diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:53:47 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:53:47 +0000 |
commit | b44518130b33cadb5c1d619e9e936ae0e0dbf7cb (patch) | |
tree | 6069eb03c39fbc79808a7d94f857118cce75cbe3 /lib/mesa/src/gallium/auxiliary | |
parent | 32aeb3c41fedbbd7b11aacfec48e8f699d16bff0 (diff) |
Merge Mesa 23.1.9
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary')
89 files changed, 1583 insertions, 2144 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c index efce6f673..d41fa27fa 100644 --- a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c @@ -50,6 +50,7 @@ #include "cso_cache/cso_hash.h" #include "cso_context.h" #include "driver_trace/tr_dump.h" +#include "util/u_threaded_context.h" /** * Per-shader sampler information. @@ -63,7 +64,7 @@ struct sampler_info struct cso_context { - struct pipe_context *pipe; + struct cso_context_base base; struct u_vbuf *vbuf; struct u_vbuf *vbuf_current; @@ -124,13 +125,6 @@ struct cso_context { }; -struct pipe_context * -cso_get_pipe_context(struct cso_context *cso) -{ - return cso->pipe; -} - - static inline boolean delete_cso(struct cso_context *ctx, void *state, enum cso_cache_type type) @@ -163,7 +157,7 @@ delete_cso(struct cso_context *ctx, assert(0); } - cso_delete_state(ctx->pipe, state, type); + cso_delete_state(ctx->base.pipe, state, type); return true; } @@ -253,18 +247,34 @@ cso_init_vbuf(struct cso_context *cso, unsigned flags) bool uses_user_vertex_buffers = !(flags & CSO_NO_USER_VERTEX_BUFFERS); bool needs64b = !(flags & CSO_NO_64B_VERTEX_BUFFERS); - u_vbuf_get_caps(cso->pipe->screen, &caps, needs64b); + u_vbuf_get_caps(cso->base.pipe->screen, &caps, needs64b); /* Enable u_vbuf if needed. */ if (caps.fallback_always || (uses_user_vertex_buffers && caps.fallback_only_for_user_vbuffers)) { - cso->vbuf = u_vbuf_create(cso->pipe, &caps); + assert(!cso->base.pipe->vbuf); + cso->vbuf = u_vbuf_create(cso->base.pipe, &caps); + cso->base.pipe->vbuf = cso->vbuf; cso->always_use_vbuf = caps.fallback_always; - cso->vbuf_current = caps.fallback_always ? cso->vbuf : NULL; + cso->vbuf_current = cso->base.pipe->vbuf = + caps.fallback_always ? cso->vbuf : NULL; } } +static void +cso_draw_vbo_default(struct pipe_context *pipe, + const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + if (pipe->vbuf) + u_vbuf_draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws); + else + pipe->draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws); +} struct cso_context * cso_create_context(struct pipe_context *pipe, unsigned flags) @@ -276,12 +286,27 @@ cso_create_context(struct pipe_context *pipe, unsigned flags) cso_cache_init(&ctx->cache, pipe); cso_cache_set_sanitize_callback(&ctx->cache, sanitize_hash, ctx); - ctx->pipe = pipe; + ctx->base.pipe = pipe; ctx->sample_mask = ~0; if (!(flags & CSO_NO_VBUF)) cso_init_vbuf(ctx, flags); + /* Only drivers using u_threaded_context benefit from the direct call. + * This is because drivers can change draw_vbo, but u_threaded_context + * never changes it. + */ + if (pipe->draw_vbo == tc_draw_vbo) { + if (ctx->vbuf_current) + ctx->base.draw_vbo = u_vbuf_draw_vbo; + else + ctx->base.draw_vbo = pipe->draw_vbo; + } else if (ctx->always_use_vbuf) { + ctx->base.draw_vbo = u_vbuf_draw_vbo; + } else { + ctx->base.draw_vbo = cso_draw_vbo_default; + } + /* Enable for testing: */ if (0) cso_set_maximum_cache_size(&ctx->cache, 4); @@ -330,15 +355,15 @@ cso_unbind_context(struct cso_context *ctx) bool dumping = trace_dumping_enabled_locked(); if (dumping) trace_dumping_stop_locked(); - if (ctx->pipe) { - ctx->pipe->bind_blend_state(ctx->pipe, NULL); - ctx->pipe->bind_rasterizer_state(ctx->pipe, NULL); + if (ctx->base.pipe) { + ctx->base.pipe->bind_blend_state(ctx->base.pipe, NULL); + ctx->base.pipe->bind_rasterizer_state(ctx->base.pipe, NULL); { static struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { NULL }; static struct pipe_shader_buffer ssbos[PIPE_MAX_SHADER_BUFFERS] = { 0 }; static void *zeros[PIPE_MAX_SAMPLERS] = { NULL }; - struct pipe_screen *scr = ctx->pipe->screen; + struct pipe_screen *scr = ctx->base.pipe->screen; enum pipe_shader_type sh; for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { switch (sh) { @@ -375,44 +400,47 @@ cso_unbind_context(struct cso_context *ctx) assert(maxcb <= PIPE_MAX_CONSTANT_BUFFERS); assert(maximg <= PIPE_MAX_SHADER_IMAGES); if (maxsam > 0) { - ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros); + ctx->base.pipe->bind_sampler_states(ctx->base.pipe, sh, 0, maxsam, zeros); } if (maxview > 0) { - ctx->pipe->set_sampler_views(ctx->pipe, sh, 0, maxview, 0, false, views); + ctx->base.pipe->set_sampler_views(ctx->base.pipe, sh, 0, maxview, 0, false, views); } if (maxssbo > 0) { - ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0); + ctx->base.pipe->set_shader_buffers(ctx->base.pipe, sh, 0, maxssbo, ssbos, 0); } if (maximg > 0) { - ctx->pipe->set_shader_images(ctx->pipe, sh, 0, 0, maximg, NULL); + ctx->base.pipe->set_shader_images(ctx->base.pipe, sh, 0, 0, maximg, NULL); } for (int i = 0; i < maxcb; i++) { - ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, false, NULL); + ctx->base.pipe->set_constant_buffer(ctx->base.pipe, sh, i, false, NULL); } } } - ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, NULL); + ctx->base.pipe->bind_depth_stencil_alpha_state(ctx->base.pipe, NULL); struct pipe_stencil_ref sr = {0}; - ctx->pipe->set_stencil_ref(ctx->pipe, sr); - ctx->pipe->bind_fs_state(ctx->pipe, NULL); - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL); - ctx->pipe->bind_vs_state(ctx->pipe, NULL); - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, false, NULL); + ctx->base.pipe->set_stencil_ref(ctx->base.pipe, sr); + ctx->base.pipe->bind_fs_state(ctx->base.pipe, NULL); + ctx->base.pipe->set_constant_buffer(ctx->base.pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL); + ctx->base.pipe->bind_vs_state(ctx->base.pipe, NULL); + ctx->base.pipe->set_constant_buffer(ctx->base.pipe, PIPE_SHADER_VERTEX, 0, false, NULL); if (ctx->has_geometry_shader) { - ctx->pipe->bind_gs_state(ctx->pipe, NULL); + ctx->base.pipe->bind_gs_state(ctx->base.pipe, NULL); } if (ctx->has_tessellation) { - ctx->pipe->bind_tcs_state(ctx->pipe, NULL); - ctx->pipe->bind_tes_state(ctx->pipe, NULL); + ctx->base.pipe->bind_tcs_state(ctx->base.pipe, NULL); + ctx->base.pipe->bind_tes_state(ctx->base.pipe, NULL); } if (ctx->has_compute_shader) { - ctx->pipe->bind_compute_state(ctx->pipe, NULL); + ctx->base.pipe->bind_compute_state(ctx->base.pipe, NULL); } - ctx->pipe->bind_vertex_elements_state(ctx->pipe, NULL); + ctx->base.pipe->bind_vertex_elements_state(ctx->base.pipe, NULL); if (ctx->has_streamout) - ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL); + ctx->base.pipe->set_stream_output_targets(ctx->base.pipe, 0, NULL, NULL); + + struct pipe_framebuffer_state fb = {0}; + ctx->base.pipe->set_framebuffer_state(ctx->base.pipe, &fb); } util_unreference_framebuffer_state(&ctx->fb); @@ -432,9 +460,9 @@ cso_unbind_context(struct cso_context *ctx) * If the cso context is reused (with the same pipe context), * need to really make sure the context state doesn't get out of sync. */ - ctx->pipe->set_sample_mask(ctx->pipe, ctx->sample_mask); - if (ctx->pipe->set_min_samples) - ctx->pipe->set_min_samples(ctx->pipe, ctx->min_samples); + ctx->base.pipe->set_sample_mask(ctx->base.pipe, ctx->sample_mask); + if (ctx->base.pipe->set_min_samples) + ctx->base.pipe->set_min_samples(ctx->base.pipe, ctx->min_samples); if (dumping) trace_dumping_start_locked(); } @@ -451,6 +479,8 @@ cso_destroy_context(struct cso_context *ctx) if (ctx->vbuf) u_vbuf_destroy(ctx->vbuf); + + ctx->base.pipe->vbuf = NULL; FREE(ctx); } @@ -499,7 +529,7 @@ cso_set_blend(struct cso_context *ctx, memset(&cso->state, 0, sizeof cso->state); memcpy(&cso->state, templ, key_size); - cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); + cso->data = ctx->base.pipe->create_blend_state(ctx->base.pipe, &cso->state); iter = cso_insert_state(&ctx->cache, hash_key, CSO_BLEND, cso); if (cso_hash_iter_is_null(iter)) { @@ -514,7 +544,7 @@ cso_set_blend(struct cso_context *ctx, if (ctx->blend != handle) { ctx->blend = handle; - ctx->pipe->bind_blend_state(ctx->pipe, handle); + ctx->base.pipe->bind_blend_state(ctx->base.pipe, handle); } return PIPE_OK; } @@ -533,7 +563,7 @@ cso_restore_blend(struct cso_context *ctx) { if (ctx->blend != ctx->blend_saved) { ctx->blend = ctx->blend_saved; - ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend_saved); + ctx->base.pipe->bind_blend_state(ctx->base.pipe, ctx->blend_saved); } ctx->blend_saved = NULL; } @@ -558,7 +588,7 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx, return PIPE_ERROR_OUT_OF_MEMORY; memcpy(&cso->state, templ, sizeof(*templ)); - cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, + cso->data = ctx->base.pipe->create_depth_stencil_alpha_state(ctx->base.pipe, &cso->state); iter = cso_insert_state(&ctx->cache, hash_key, @@ -576,7 +606,7 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx, if (ctx->depth_stencil != handle) { ctx->depth_stencil = handle; - ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); + ctx->base.pipe->bind_depth_stencil_alpha_state(ctx->base.pipe, handle); } return PIPE_OK; } @@ -595,7 +625,7 @@ cso_restore_depth_stencil_alpha(struct cso_context *ctx) { if (ctx->depth_stencil != ctx->depth_stencil_saved) { ctx->depth_stencil = ctx->depth_stencil_saved; - ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, + ctx->base.pipe->bind_depth_stencil_alpha_state(ctx->base.pipe, ctx->depth_stencil_saved); } ctx->depth_stencil_saved = NULL; @@ -625,7 +655,7 @@ cso_set_rasterizer(struct cso_context *ctx, return PIPE_ERROR_OUT_OF_MEMORY; memcpy(&cso->state, templ, sizeof(*templ)); - cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); + cso->data = ctx->base.pipe->create_rasterizer_state(ctx->base.pipe, &cso->state); iter = cso_insert_state(&ctx->cache, hash_key, CSO_RASTERIZER, cso); if (cso_hash_iter_is_null(iter)) { @@ -643,7 +673,7 @@ cso_set_rasterizer(struct cso_context *ctx, ctx->flatshade_first = templ->flatshade_first; if (ctx->vbuf) u_vbuf_set_flatshade_first(ctx->vbuf, ctx->flatshade_first); - ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); + ctx->base.pipe->bind_rasterizer_state(ctx->base.pipe, handle); } return PIPE_OK; } @@ -666,7 +696,7 @@ cso_restore_rasterizer(struct cso_context *ctx) ctx->flatshade_first = ctx->flatshade_first_saved; if (ctx->vbuf) u_vbuf_set_flatshade_first(ctx->vbuf, ctx->flatshade_first); - ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rasterizer_saved); + ctx->base.pipe->bind_rasterizer_state(ctx->base.pipe, ctx->rasterizer_saved); } ctx->rasterizer_saved = NULL; } @@ -677,7 +707,7 @@ cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle) { if (ctx->fragment_shader != handle) { ctx->fragment_shader = handle; - ctx->pipe->bind_fs_state(ctx->pipe, handle); + ctx->base.pipe->bind_fs_state(ctx->base.pipe, handle); } } @@ -694,7 +724,7 @@ static void cso_restore_fragment_shader(struct cso_context *ctx) { if (ctx->fragment_shader_saved != ctx->fragment_shader) { - ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); + ctx->base.pipe->bind_fs_state(ctx->base.pipe, ctx->fragment_shader_saved); ctx->fragment_shader = ctx->fragment_shader_saved; } ctx->fragment_shader_saved = NULL; @@ -706,7 +736,7 @@ cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle) { if (ctx->vertex_shader != handle) { ctx->vertex_shader = handle; - ctx->pipe->bind_vs_state(ctx->pipe, handle); + ctx->base.pipe->bind_vs_state(ctx->base.pipe, handle); } } @@ -723,7 +753,7 @@ static void cso_restore_vertex_shader(struct cso_context *ctx) { if (ctx->vertex_shader_saved != ctx->vertex_shader) { - ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); + ctx->base.pipe->bind_vs_state(ctx->base.pipe, ctx->vertex_shader_saved); ctx->vertex_shader = ctx->vertex_shader_saved; } ctx->vertex_shader_saved = NULL; @@ -736,7 +766,7 @@ cso_set_framebuffer(struct cso_context *ctx, { if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) { util_copy_framebuffer_state(&ctx->fb, fb); - ctx->pipe->set_framebuffer_state(ctx->pipe, fb); + ctx->base.pipe->set_framebuffer_state(ctx->base.pipe, fb); } } @@ -753,7 +783,7 @@ cso_restore_framebuffer(struct cso_context *ctx) { if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); - ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); + ctx->base.pipe->set_framebuffer_state(ctx->base.pipe, &ctx->fb); util_unreference_framebuffer_state(&ctx->fb_saved); } } @@ -765,7 +795,7 @@ cso_set_viewport(struct cso_context *ctx, { if (memcmp(&ctx->vp, vp, sizeof(*vp))) { ctx->vp = *vp; - ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, vp); + ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, vp); } } @@ -805,7 +835,7 @@ cso_restore_viewport(struct cso_context *ctx) { if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) { ctx->vp = ctx->vp_saved; - ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, &ctx->vp); + ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, &ctx->vp); } } @@ -815,7 +845,7 @@ cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask) { if (ctx->sample_mask != sample_mask) { ctx->sample_mask = sample_mask; - ctx->pipe->set_sample_mask(ctx->pipe, sample_mask); + ctx->base.pipe->set_sample_mask(ctx->base.pipe, sample_mask); } } @@ -837,9 +867,9 @@ cso_restore_sample_mask(struct cso_context *ctx) void cso_set_min_samples(struct cso_context *ctx, unsigned min_samples) { - if (ctx->min_samples != min_samples && ctx->pipe->set_min_samples) { + if (ctx->min_samples != min_samples && ctx->base.pipe->set_min_samples) { ctx->min_samples = min_samples; - ctx->pipe->set_min_samples(ctx->pipe, min_samples); + ctx->base.pipe->set_min_samples(ctx->base.pipe, min_samples); } } @@ -864,7 +894,7 @@ cso_set_stencil_ref(struct cso_context *ctx, { if (memcmp(&ctx->stencil_ref, &sr, sizeof(ctx->stencil_ref))) { ctx->stencil_ref = sr; - ctx->pipe->set_stencil_ref(ctx->pipe, sr); + ctx->base.pipe->set_stencil_ref(ctx->base.pipe, sr); } } @@ -882,7 +912,7 @@ cso_restore_stencil_ref(struct cso_context *ctx) if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved, sizeof(ctx->stencil_ref))) { ctx->stencil_ref = ctx->stencil_ref_saved; - ctx->pipe->set_stencil_ref(ctx->pipe, ctx->stencil_ref); + ctx->base.pipe->set_stencil_ref(ctx->base.pipe, ctx->stencil_ref); } } @@ -893,7 +923,7 @@ cso_set_render_condition(struct cso_context *ctx, boolean condition, enum pipe_render_cond_flag mode) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; if (ctx->render_condition != query || ctx->render_condition_mode != mode || @@ -931,7 +961,7 @@ cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle) if (ctx->has_geometry_shader && ctx->geometry_shader != handle) { ctx->geometry_shader = handle; - ctx->pipe->bind_gs_state(ctx->pipe, handle); + ctx->base.pipe->bind_gs_state(ctx->base.pipe, handle); } } @@ -956,7 +986,7 @@ cso_restore_geometry_shader(struct cso_context *ctx) } if (ctx->geometry_shader_saved != ctx->geometry_shader) { - ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved); + ctx->base.pipe->bind_gs_state(ctx->base.pipe, ctx->geometry_shader_saved); ctx->geometry_shader = ctx->geometry_shader_saved; } ctx->geometry_shader_saved = NULL; @@ -970,7 +1000,7 @@ cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle) if (ctx->has_tessellation && ctx->tessctrl_shader != handle) { ctx->tessctrl_shader = handle; - ctx->pipe->bind_tcs_state(ctx->pipe, handle); + ctx->base.pipe->bind_tcs_state(ctx->base.pipe, handle); } } @@ -995,7 +1025,7 @@ cso_restore_tessctrl_shader(struct cso_context *ctx) } if (ctx->tessctrl_shader_saved != ctx->tessctrl_shader) { - ctx->pipe->bind_tcs_state(ctx->pipe, ctx->tessctrl_shader_saved); + ctx->base.pipe->bind_tcs_state(ctx->base.pipe, ctx->tessctrl_shader_saved); ctx->tessctrl_shader = ctx->tessctrl_shader_saved; } ctx->tessctrl_shader_saved = NULL; @@ -1009,7 +1039,7 @@ cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle) if (ctx->has_tessellation && ctx->tesseval_shader != handle) { ctx->tesseval_shader = handle; - ctx->pipe->bind_tes_state(ctx->pipe, handle); + ctx->base.pipe->bind_tes_state(ctx->base.pipe, handle); } } @@ -1034,7 +1064,7 @@ cso_restore_tesseval_shader(struct cso_context *ctx) } if (ctx->tesseval_shader_saved != ctx->tesseval_shader) { - ctx->pipe->bind_tes_state(ctx->pipe, ctx->tesseval_shader_saved); + ctx->base.pipe->bind_tes_state(ctx->base.pipe, ctx->tesseval_shader_saved); ctx->tesseval_shader = ctx->tesseval_shader_saved; } ctx->tesseval_shader_saved = NULL; @@ -1048,7 +1078,7 @@ cso_set_compute_shader_handle(struct cso_context *ctx, void *handle) if (ctx->has_compute_shader && ctx->compute_shader != handle) { ctx->compute_shader = handle; - ctx->pipe->bind_compute_state(ctx->pipe, handle); + ctx->base.pipe->bind_compute_state(ctx->base.pipe, handle); } } @@ -1073,7 +1103,7 @@ cso_restore_compute_shader(struct cso_context *ctx) } if (ctx->compute_shader_saved != ctx->compute_shader) { - ctx->pipe->bind_compute_state(ctx->pipe, ctx->compute_shader_saved); + ctx->base.pipe->bind_compute_state(ctx->base.pipe, ctx->compute_shader_saved); ctx->compute_shader = ctx->compute_shader_saved; } ctx->compute_shader_saved = NULL; @@ -1143,7 +1173,7 @@ cso_set_vertex_elements_direct(struct cso_context *ctx, struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS]; util_lower_uint64_vertex_elements(&new_elems, &new_count, tmp); - cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, new_count, + cso->data = ctx->base.pipe->create_vertex_elements_state(ctx->base.pipe, new_count, new_elems); iter = cso_insert_state(&ctx->cache, hash_key, CSO_VELEMENTS, cso); @@ -1159,7 +1189,7 @@ cso_set_vertex_elements_direct(struct cso_context *ctx, if (ctx->velements != handle) { ctx->velements = handle; - ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle); + ctx->base.pipe->bind_vertex_elements_state(ctx->base.pipe, handle); } } @@ -1207,7 +1237,7 @@ cso_restore_vertex_elements(struct cso_context *ctx) if (ctx->velements != ctx->velements_saved) { ctx->velements = ctx->velements_saved; - ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved); + ctx->base.pipe->bind_vertex_elements_state(ctx->base.pipe, ctx->velements_saved); } ctx->velements_saved = NULL; } @@ -1232,7 +1262,7 @@ cso_set_vertex_buffers(struct cso_context *ctx, return; } - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; pipe->set_vertex_buffers(pipe, start_slot, count, unbind_trailing_count, take_ownership, buffers); } @@ -1260,7 +1290,7 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx, const struct pipe_vertex_buffer *vbuffers) { struct u_vbuf *vbuf = ctx->vbuf; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; if (vbuf && (ctx->always_use_vbuf || uses_user_vertex_buffers)) { if (!ctx->vbuf_current) { @@ -1271,7 +1301,9 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx, /* Unset this to make sure the CSO is re-bound on the next use. */ ctx->velements = NULL; - ctx->vbuf_current = vbuf; + ctx->vbuf_current = pipe->vbuf = vbuf; + if (pipe->draw_vbo == tc_draw_vbo) + ctx->base.draw_vbo = u_vbuf_draw_vbo; unbind_trailing_vb_count = 0; } @@ -1292,7 +1324,9 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx, /* Unset this to make sure the CSO is re-bound on the next use. */ u_vbuf_unset_vertex_elements(vbuf); - ctx->vbuf_current = NULL; + ctx->vbuf_current = pipe->vbuf = NULL; + if (pipe->draw_vbo == tc_draw_vbo) + ctx->base.draw_vbo = pipe->draw_vbo; unbind_trailing_vb_count = 0; } @@ -1322,7 +1356,7 @@ set_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage, return false; memcpy(&cso->state, templ, sizeof(*templ)); - cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->data = ctx->base.pipe->create_sampler_state(ctx->base.pipe, &cso->state); cso->hash_key = hash_key; iter = cso_insert_state(&ctx->cache, hash_key, CSO_SAMPLER, cso); @@ -1381,7 +1415,7 @@ cso_single_sampler_done(struct cso_context *ctx, if (ctx->max_sampler_seen == -1) return; - ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0, + ctx->base.pipe->bind_sampler_states(ctx->base.pipe, shader_stage, 0, ctx->max_sampler_seen + 1, info->samplers); ctx->max_sampler_seen = -1; @@ -1497,7 +1531,7 @@ cso_set_stream_outputs(struct cso_context *ctx, struct pipe_stream_output_target **targets, const unsigned *offsets) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; uint i; if (!ctx->has_streamout) { @@ -1544,7 +1578,7 @@ cso_save_stream_outputs(struct cso_context *ctx) static void cso_restore_stream_outputs(struct cso_context *ctx) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; uint i; unsigned offset[PIPE_MAX_SO_BUFFERS]; @@ -1624,7 +1658,7 @@ cso_save_state(struct cso_context *cso, unsigned state_mask) if (state_mask & CSO_BIT_VIEWPORT) cso_save_viewport(cso); if (state_mask & CSO_BIT_PAUSE_QUERIES) - cso->pipe->set_active_query_state(cso->pipe, false); + cso->base.pipe->set_active_query_state(cso->base.pipe, false); } @@ -1653,15 +1687,15 @@ cso_restore_state(struct cso_context *cso, unsigned unbind) if (state_mask & CSO_BIT_VERTEX_SHADER) cso_restore_vertex_shader(cso); if (unbind & CSO_UNBIND_FS_SAMPLERVIEWS) - cso->pipe->set_sampler_views(cso->pipe, PIPE_SHADER_FRAGMENT, 0, 0, + cso->base.pipe->set_sampler_views(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, 0, cso->max_fs_samplerviews, false, NULL); if (unbind & CSO_UNBIND_FS_SAMPLERVIEW0) - cso->pipe->set_sampler_views(cso->pipe, PIPE_SHADER_FRAGMENT, 0, 0, + cso->base.pipe->set_sampler_views(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, 0, 1, false, NULL); if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS) cso_restore_fragment_samplers(cso); if (unbind & CSO_UNBIND_FS_IMAGE0) - cso->pipe->set_shader_images(cso->pipe, PIPE_SHADER_FRAGMENT, 0, 0, 1, NULL); + cso->base.pipe->set_shader_images(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, 0, 1, NULL); if (state_mask & CSO_BIT_FRAMEBUFFER) cso_restore_framebuffer(cso); if (state_mask & CSO_BIT_BLEND) @@ -1677,17 +1711,17 @@ cso_restore_state(struct cso_context *cso, unsigned unbind) if (state_mask & CSO_BIT_VIEWPORT) cso_restore_viewport(cso); if (unbind & CSO_UNBIND_VS_CONSTANTS) - cso->pipe->set_constant_buffer(cso->pipe, PIPE_SHADER_VERTEX, 0, false, NULL); + cso->base.pipe->set_constant_buffer(cso->base.pipe, PIPE_SHADER_VERTEX, 0, false, NULL); if (unbind & CSO_UNBIND_FS_CONSTANTS) - cso->pipe->set_constant_buffer(cso->pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL); + cso->base.pipe->set_constant_buffer(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL); if (state_mask & CSO_BIT_VERTEX_ELEMENTS) cso_restore_vertex_elements(cso); if (unbind & CSO_UNBIND_VERTEX_BUFFER0) - cso->pipe->set_vertex_buffers(cso->pipe, 0, 0, 1, false, NULL); + cso->base.pipe->set_vertex_buffers(cso->base.pipe, 0, 0, 1, false, NULL); if (state_mask & CSO_BIT_STREAM_OUTPUTS) cso_restore_stream_outputs(cso); if (state_mask & CSO_BIT_PAUSE_QUERIES) - cso->pipe->set_active_query_state(cso->pipe, true); + cso->base.pipe->set_active_query_state(cso->base.pipe, true); cso->saved_state = 0; } @@ -1736,53 +1770,6 @@ cso_restore_compute_state(struct cso_context *cso) /* drawing */ void -cso_draw_vbo(struct cso_context *cso, - const struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias draw) -{ - struct u_vbuf *vbuf = cso->vbuf_current; - - /* We can't have both indirect drawing and SO-vertex-count drawing */ - assert(!indirect || - indirect->buffer == NULL || - indirect->count_from_stream_output == NULL); - - /* We can't have SO-vertex-count drawing with an index buffer */ - assert(info->index_size == 0 || - !indirect || - indirect->count_from_stream_output == NULL); - - if (vbuf) { - u_vbuf_draw_vbo(vbuf, info, drawid_offset, indirect, &draw, 1); - } else { - struct pipe_context *pipe = cso->pipe; - pipe->draw_vbo(pipe, info, drawid_offset, indirect, &draw, 1); - } -} - -/* info->draw_id can be changed by the callee if increment_draw_id is true. */ -void -cso_multi_draw(struct cso_context *cso, - struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws) -{ - struct u_vbuf *vbuf = cso->vbuf_current; - - if (vbuf) { - u_vbuf_draw_vbo(vbuf, info, drawid_offset, NULL, draws, num_draws); - } else { - struct pipe_context *pipe = cso->pipe; - - pipe->draw_vbo(pipe, info, drawid_offset, NULL, draws, num_draws); - } -} - - -void cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count) { struct pipe_draw_info info; @@ -1799,7 +1786,7 @@ cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count) draw.count = count; draw.index_bias = 0; - cso_draw_vbo(cso, &info, 0, NULL, draw); + cso_draw_vbo(cso, &info, 0, NULL, &draw, 1); } @@ -1824,5 +1811,5 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode, draw.count = count; draw.index_bias = 0; - cso_draw_vbo(cso, &info, 0, NULL, draw); + cso_draw_vbo(cso, &info, 0, NULL, &draw, 1); } diff --git a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h index 4b9ec2098..85f98644d 100644 --- a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h @@ -42,6 +42,18 @@ extern "C" { struct cso_context; struct u_vbuf; +struct cso_context_base { + struct pipe_context *pipe; + + /* This is equal to either pipe_context::draw_vbo or u_vbuf_draw_vbo. */ + void (*draw_vbo)(struct pipe_context *pipe, + const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws); +}; + #define CSO_NO_USER_VERTEX_BUFFERS (1 << 0) #define CSO_NO_64B_VERTEX_BUFFERS (1 << 1) #define CSO_NO_VBUF (1 << 2) @@ -55,9 +67,6 @@ cso_unbind_context(struct cso_context *ctx); void cso_destroy_context(struct cso_context *cso); -struct pipe_context * -cso_get_pipe_context(struct cso_context *cso); - enum pipe_error cso_set_blend(struct cso_context *cso, const struct pipe_blend_state *blend); @@ -210,21 +219,6 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx, const struct pipe_vertex_buffer *vbuffers); void -cso_draw_vbo(struct cso_context *cso, - const struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias draw); - -/* info->draw_id can be changed by the callee if increment_draw_id is true. */ -void -cso_multi_draw(struct cso_context *cso, - struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws); - -void cso_draw_arrays_instanced(struct cso_context *cso, uint mode, uint start, uint count, uint start_instance, uint instance_count); @@ -232,6 +226,43 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode, void cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count); +/* Inline functions. */ + +static inline struct pipe_context * +cso_get_pipe_context(struct cso_context *cso) +{ + struct cso_context_base *cso_base = (struct cso_context_base *)cso; + + return cso_base->pipe; +} + +static ALWAYS_INLINE void +cso_draw_vbo(struct cso_context *cso, + struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + /* We can't have both indirect drawing and SO-vertex-count drawing */ + assert(!indirect || + indirect->buffer == NULL || + indirect->count_from_stream_output == NULL); + + /* We can't have SO-vertex-count drawing with an index buffer */ + assert(info->index_size == 0 || + !indirect || + indirect->count_from_stream_output == NULL); + + /* Indirect only uses indirect->draw_count, not num_draws. */ + assert(!indirect || num_draws == 1); + + struct cso_context_base *cso_base = (struct cso_context_base *)cso; + + cso_base->draw_vbo(cso_base->pipe, info, drawid_offset, indirect, draws, + num_draws); +} + #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_context.h b/lib/mesa/src/gallium/auxiliary/draw/draw_context.h index 3986d6469..44acf0116 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_context.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_context.h @@ -39,6 +39,7 @@ #include "pipe/p_state.h" +#include "nir.h" struct pipe_context; struct draw_context; @@ -130,7 +131,8 @@ boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); boolean -draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); +draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe, + nir_alu_type bool_type); boolean draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c index 50c157bc3..8b3a15227 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c @@ -784,7 +784,7 @@ draw_llvm_create(struct draw_context *draw, LLVMContextRef context) if (!llvm->context) { llvm->context = LLVMContextCreate(); -#if LLVM_VERSION_MAJOR >= 15 +#if LLVM_VERSION_MAJOR == 15 LLVMContextSetOpaquePointers(llvm->context, false); #endif diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 7dac40785..d2bc475b2 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -107,6 +107,7 @@ struct aa_transform_context { uint64_t tempsUsed; /**< bitmask */ int colorOutput; /**< which output is the primary color */ int maxInput, maxGeneric; /**< max input index found */ + int numImm; /**< number of immediate regsters */ int colorTemp, aaTemp; /**< temp registers */ }; @@ -147,6 +148,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx, ctx->emit_declaration(ctx, decl); } +/** + * TGSI immediate declaration transform callback. + */ +static void +aa_immediate(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *imm) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *)ctx; + + ctx->emit_immediate(ctx, imm); + aactx->numImm++; +} /** * Find the lowest zero bit, or -1 if bitfield is all ones. @@ -182,6 +195,9 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) /* declare new temp regs */ tgsi_transform_temp_decl(ctx, aactx->aaTemp); tgsi_transform_temp_decl(ctx, aactx->colorTemp); + + /* declare new immediate reg */ + tgsi_transform_immediate_decl(ctx, 2.0, -1.0, 0.0, 0.25); } @@ -215,6 +231,26 @@ aa_transform_epilog(struct tgsi_transform_context *ctx) inst.Src[1].Register.Negate = true; ctx->emit_instruction(ctx, &inst); + /* linelength * 2 - 1 */ + tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_MAD, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_WRITEMASK_Y, + TGSI_FILE_INPUT, aactx->maxInput + 1, + TGSI_SWIZZLE_W, false, + TGSI_FILE_IMMEDIATE, aactx->numImm, + TGSI_SWIZZLE_X, + TGSI_FILE_IMMEDIATE, aactx->numImm, + TGSI_SWIZZLE_Y); + + /* MIN height alpha */ + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_WRITEMASK_Z, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_SWIZZLE_Z, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_SWIZZLE_Y, false); + /* MUL width / height alpha */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, aactx->aaTemp, @@ -292,6 +328,7 @@ generate_aaline_fs(struct aaline_stage *aaline) transform.base.epilog = aa_transform_epilog; transform.base.transform_instruction = aa_transform_inst; transform.base.transform_declaration = aa_transform_decl; + transform.base.transform_immediate = aa_immediate; aaline_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base); if (!aaline_fs.tokens) @@ -324,7 +361,7 @@ generate_aaline_fs_nir(struct aaline_stage *aaline) if (!aaline_fs.ir.nir) return FALSE; - nir_lower_aaline_fs(aaline_fs.ir.nir, &aaline->fs->generic_attrib); + nir_lower_aaline_fs(aaline_fs.ir.nir, &aaline->fs->generic_attrib, NULL, NULL); aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs); if (aaline->fs->aaline_fs == NULL) return FALSE; @@ -383,36 +420,13 @@ aaline_line(struct draw_stage *stage, struct prim_header *header) float *pos, *tex; float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0]; float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1]; - float a = atan2f(dy, dx); - float c_a = cosf(a), s_a = sinf(a); - float half_length; + float length = sqrtf(dx * dx + dy * dy); + float c_a = dx / length, s_a = dy / length; + float half_length = 0.5 * length; float t_l, t_w; uint i; - half_length = 0.5f * sqrtf(dx * dx + dy * dy); - - if (half_length < 0.5f) { - /* - * The logic we use for "normal" sized segments is incorrect - * for very short segments (basically because we only have - * one value to interpolate, not a distance to each endpoint). - * Therefore, we calculate half_length differently, so that for - * original line length (near) 0, we get alpha 0 - otherwise - * max alpha would still be 0.5. This also prevents us from - * artifacts due to degenerated lines (the endpoints being - * identical, which would still receive anywhere from alpha - * 0-0.5 otherwise) (at least the pstipple stage may generate - * such lines due to float inaccuracies if line length is very - * close to a integer). - * Might not be fully accurate neither (because the "strength" of - * the line is going to be determined by how close to the pixel - * center those 1 or 2 fragments are) but it's probably the best - * we can do. - */ - half_length = 2.0f * half_length; - } else { - half_length = half_length + 0.5f; - } + half_length = half_length + 0.5f; t_w = half_width; t_l = 0.5f; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 56fff8788..b1b66f653 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -99,6 +99,9 @@ struct aapoint_stage /** vertex attrib slot containing position */ uint pos_slot; + /** Type of Boolean variables on this hardware. */ + nir_alu_type bool_type; + /** Currently bound fragment shader */ struct aapoint_fragment_shader *fs; @@ -418,7 +421,7 @@ generate_aapoint_fs_nir(struct aapoint_stage *aapoint) if (!aapoint_fs.ir.nir) return FALSE; - nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib); + nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib, aapoint->bool_type); aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(pipe, &aapoint_fs); if (aapoint->fs->aapoint_fs == NULL) goto fail; @@ -689,7 +692,7 @@ draw_aapoint_prepare_outputs(struct draw_context *draw, static struct aapoint_stage * -draw_aapoint_stage(struct draw_context *draw) +draw_aapoint_stage(struct draw_context *draw, nir_alu_type bool_type) { struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); if (!aapoint) @@ -704,6 +707,7 @@ draw_aapoint_stage(struct draw_context *draw) aapoint->stage.flush = aapoint_flush; aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; aapoint->stage.destroy = aapoint_destroy; + aapoint->bool_type = bool_type; if (!draw_alloc_temp_verts(&aapoint->stage, 4)) goto fail; @@ -793,7 +797,8 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) */ boolean draw_install_aapoint_stage(struct draw_context *draw, - struct pipe_context *pipe) + struct pipe_context *pipe, + nir_alu_type bool_type) { struct aapoint_stage *aapoint; @@ -802,7 +807,7 @@ draw_install_aapoint_stage(struct draw_context *draw, /* * Create / install AA point drawing / prim stage */ - aapoint = draw_aapoint_stage(draw); + aapoint = draw_aapoint_stage(draw, bool_type); if (!aapoint) return FALSE; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h index 6b3de1d40..1780070fa 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h @@ -184,6 +184,7 @@ struct draw_context enum pipe_prim_type prim; unsigned opt; /**< bitmask of PT_x flags */ unsigned eltSize; /* saved eltSize for flushing */ + unsigned viewid; /* saved viewid for flushing */ ubyte vertices_per_patch; boolean rebind_parameters; @@ -588,21 +589,4 @@ draw_clamp_viewport_idx(int idx) return ((PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0); } - -/** - * Adds two unsigned integers and if the addition - * overflows then it returns the value from - * the overflow_value variable. - */ -static inline unsigned -draw_overflow_uadd(unsigned a, unsigned b, - unsigned overflow_value) -{ - unsigned res = a + b; - if (res < a) { - res = overflow_value; - } - return res; -} - #endif /* DRAW_PRIVATE_H */ diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c index 10908e2d2..3d5d5f088 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c @@ -103,9 +103,9 @@ draw_pt_arrays(struct draw_context *draw, */ draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); frontend = NULL; - } else if (draw->pt.eltSize != draw->pt.user.eltSize) { - /* Flush draw state if eltSize changed. - * This could be improved so only the frontend is flushed since it + } else if (draw->pt.eltSize != draw->pt.user.eltSize || draw->pt.viewid != draw->pt.user.viewid) { + /* Flush draw state if eltSize or viewid changed. + * eltSize changes could be improved so only the frontend is flushed since it * converts all indices to ushorts and the fetch part of the middle * always prepares both linear and indexed. */ @@ -121,6 +121,7 @@ draw_pt_arrays(struct draw_context *draw, draw->pt.frontend = frontend; draw->pt.eltSize = draw->pt.user.eltSize; + draw->pt.viewid = draw->pt.user.viewid; draw->pt.prim = prim; draw->pt.opt = opt; } @@ -360,28 +361,26 @@ prim_restart_loop(struct draw_context *draw, struct pipe_draw_start_count_bias cur = *draw_info; cur.count = 0; - /* The largest index within a loop using the i variable as the index. - * Used for overflow detection */ - const unsigned MAX_LOOP_IDX = 0xffffffff; - for (unsigned j = 0; j < draw_info->count; j++) { - unsigned restart_idx = 0; - unsigned i = draw_overflow_uadd(draw_info->start, j, MAX_LOOP_IDX); - switch (draw->pt.user.eltSize) { - case 1: - restart_idx = ((const uint8_t*)elements)[i]; - break; - case 2: - restart_idx = ((const uint16_t*)elements)[i]; - break; - case 4: - restart_idx = ((const uint32_t*)elements)[i]; - break; - default: - assert(0 && "bad eltSize in draw_arrays()"); + unsigned index = 0; + unsigned i = util_clamped_uadd(draw_info->start, j); + if (i < elt_max) { + switch (draw->pt.user.eltSize) { + case 1: + index = ((const uint8_t*)elements)[i]; + break; + case 2: + index = ((const uint16_t*)elements)[i]; + break; + case 4: + index = ((const uint32_t*)elements)[i]; + break; + default: + assert(0 && "bad eltSize in draw_arrays()"); + } } - if (i < elt_max && restart_idx == info->restart_index) { + if (index == info->restart_index) { if (cur.count > 0) { /* draw elts up to prev pos */ draw_pt_arrays(draw, info->mode, info->index_bias_varies, &cur, 1); diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 2df7c53a8..edaa78af3 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -120,7 +120,7 @@ llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme) } } - variant = draw_gs_llvm_create_variant(llvm, gs->info.num_outputs, key); + variant = draw_gs_llvm_create_variant(llvm, draw_total_gs_outputs(draw), key); if (variant) { list_add(&variant->list_item_local.list, &shader->variants.list); @@ -406,7 +406,7 @@ llvm_middle_end_prepare(struct draw_pt_middle_end *middle, static unsigned get_num_consts_robust(struct draw_context *draw, unsigned *sizes, unsigned idx) { - unsigned const_bytes = sizes[idx]; + uint64_t const_bytes = sizes[idx]; if (const_bytes < sizeof(float)) return 0; @@ -829,7 +829,7 @@ llvm_middle_end_linear_run(struct draw_pt_middle_end *middle, fetch_info.elts = NULL; prim_info.linear = TRUE; - prim_info.start = 0; + prim_info.start = start; prim_info.count = count; prim_info.elts = NULL; prim_info.prim = prim_type(fpme->input_prim, prim_flags); diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c index 0455f40df..86548b817 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c @@ -23,6 +23,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include "util/macros.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -33,9 +34,6 @@ #define SEGMENT_SIZE 1024 #define MAP_SIZE 256 -/* The largest possible index within an index buffer */ -#define MAX_ELT_IDX 0xffffffff - struct vsplit_frontend { struct draw_pt_front_end base; struct draw_context *draw; @@ -116,7 +114,7 @@ vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch) static inline unsigned vsplit_get_base_idx(unsigned start, unsigned fetch) { - return draw_overflow_uadd(start, fetch, MAX_ELT_IDX); + return util_clamped_uadd(start, fetch); } diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h index ee4fd56e2..53fa51091 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h @@ -23,9 +23,6 @@ * DEALINGS IN THE SOFTWARE. */ -#define CONCAT2(name, elt_type) name ## elt_type -#define CONCAT(name, elt_type) CONCAT2(name, elt_type) - #ifdef ELT_TYPE /** @@ -33,8 +30,8 @@ * (rebased) index buffer as the draw elements. */ static boolean -CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, - unsigned istart, unsigned icount) +CONCAT2(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned istart, unsigned icount) { struct draw_context *draw = vsplit->draw; const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts; @@ -127,11 +124,11 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, * appended. */ static inline void -CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit, - unsigned flags, - unsigned istart, unsigned icount, - boolean spoken, unsigned ispoken, - boolean close, unsigned iclose) +CONCAT2(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, unsigned icount, + boolean spoken, unsigned ispoken, + boolean close, unsigned iclose) { struct draw_context *draw = vsplit->draw; const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts; @@ -168,41 +165,41 @@ CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit, static void -CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit, - unsigned flags, - unsigned istart, - unsigned icount) +CONCAT2(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, + unsigned icount) { - CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit, - flags, istart, icount, FALSE, 0, FALSE, 0); + CONCAT2(vsplit_segment_cache_, ELT_TYPE)(vsplit, + flags, istart, icount, FALSE, 0, FALSE, 0); } static void -CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit, - unsigned flags, - unsigned istart, - unsigned icount, - unsigned i0) +CONCAT2(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, + unsigned icount, + unsigned i0) { const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE); - CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit, - flags, istart, icount, FALSE, 0, close_loop, i0); + CONCAT2(vsplit_segment_cache_, ELT_TYPE)(vsplit, + flags, istart, icount, FALSE, 0, close_loop, i0); } static void -CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit, - unsigned flags, - unsigned istart, - unsigned icount, - unsigned i0) +CONCAT2(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, + unsigned icount, + unsigned i0) { const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0); - CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit, - flags, istart, icount, use_spoken, i0, FALSE, 0); + CONCAT2(vsplit_segment_cache_, ELT_TYPE)(vsplit, + flags, istart, icount, use_spoken, i0, FALSE, 0); } @@ -214,7 +211,7 @@ CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit, const unsigned max_count_fan = vsplit->segment_size; #define PRIMITIVE(istart, icount) \ - CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount) + CONCAT2(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount) #else /* ELT_TYPE */ @@ -294,18 +291,15 @@ vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags, unsigned count #define SEGMENT_SIMPLE(flags, istart, icount) \ - CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount) + CONCAT2(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount) #define SEGMENT_LOOP(flags, istart, icount, i0) \ - CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0) + CONCAT2(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0) #define SEGMENT_FAN(flags, istart, icount, i0) \ - CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0) + CONCAT2(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0) #include "draw_split_tmp.h" -#undef CONCAT2 -#undef CONCAT - #undef ELT_TYPE #undef ADD_CACHE diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c b/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c index c0430b02a..4240b48e5 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c @@ -189,6 +189,7 @@ int draw_tess_ctrl_shader_run(struct draw_tess_ctrl_shader *shader, shader->draw->statistics.hs_invocations += num_patches; } #ifdef DRAW_LLVM_AVAILABLE + unsigned first_patch = input_prim->start / shader->draw->pt.vertices_per_patch; for (unsigned i = 0; i < num_patches; i++) { uint32_t vert_start = output_verts->count; @@ -196,7 +197,7 @@ int draw_tess_ctrl_shader_run(struct draw_tess_ctrl_shader *shader, llvm_fetch_tcs_input(shader, input_prim, i, shader->draw->pt.vertices_per_patch); - llvm_tcs_run(shader, i); + llvm_tcs_run(shader, first_patch + i); uint32_t old_verts = util_align_npot(vert_start, 16); uint32_t new_verts = util_align_npot(output_verts->count, 16); diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c index dde7f06b2..511c57834 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -65,7 +65,7 @@ #include "lp_bld_arit.h" #include "lp_bld_flow.h" -#if defined(PIPE_ARCH_SSE) +#if DETECT_ARCH_SSE #include <xmmintrin.h> #endif @@ -137,7 +137,7 @@ lp_build_min_simple(struct lp_build_context *bld, else if (type.floating && util_get_cpu_caps()->has_altivec) { if (nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { debug_printf("%s: altivec doesn't support nan return nan behavior\n", - __FUNCTION__); + __func__); } if (type.width == 32 && type.length == 4) { intrinsic = "llvm.ppc.altivec.vminfp"; @@ -291,7 +291,7 @@ lp_build_max_simple(struct lp_build_context *bld, else if (type.floating && util_get_cpu_caps()->has_altivec) { if (nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { debug_printf("%s: altivec doesn't support nan return nan behavior\n", - __FUNCTION__); + __func__); } if (type.width == 32 || type.length == 4) { intrinsic = "llvm.ppc.altivec.vmaxfp"; @@ -2800,7 +2800,7 @@ lp_build_fast_rsqrt(struct lp_build_context *bld, return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); } else { - debug_printf("%s: emulating fast rsqrt with rcp/sqrt\n", __FUNCTION__); + debug_printf("%s: emulating fast rsqrt with rcp/sqrt\n", __func__); } return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } @@ -3073,7 +3073,7 @@ lp_build_pow(struct lp_build_context *bld, if (gallivm_debug & GALLIVM_DEBUG_PERF && LLVMIsConstant(x) && LLVMIsConstant(y)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); + __func__); } LLVMValueRef cmp = lp_build_cmp(bld, PIPE_FUNC_EQUAL, x, lp_build_const_vec(bld->gallivm, bld->type, 0.0f)); @@ -3157,7 +3157,7 @@ lp_build_polynomial(struct lp_build_context *bld, if (gallivm_debug & GALLIVM_DEBUG_PERF && LLVMIsConstant(x)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); + __func__); } /* @@ -3253,7 +3253,7 @@ lp_build_exp2(struct lp_build_context *bld, if (gallivm_debug & GALLIVM_DEBUG_PERF && LLVMIsConstant(x)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); + __func__); } assert(type.floating && type.width == 32); @@ -3444,7 +3444,7 @@ lp_build_log2_approx(struct lp_build_context *bld, if (gallivm_debug & GALLIVM_DEBUG_PERF && LLVMIsConstant(x)) { debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); + __func__); } assert(type.floating && type.width == 32); diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h index 74a625dce..c4505b2fc 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -37,7 +37,7 @@ #include "gallivm/lp_bld.h" #include "gallivm/lp_bld_init.h" -#include "pipe/p_format.h" +#include "util/format/u_formats.h" struct util_format_description; struct lp_type; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 86cca0bb0..49aa549f8 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -656,7 +656,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", - __FUNCTION__, format_desc->short_name); + __func__, format_desc->short_name); } conv_type = lp_float32_vec4_type(); @@ -813,7 +813,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n", - __FUNCTION__, format_desc->short_name); + __func__, format_desc->short_name); } /* @@ -919,7 +919,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n", - __FUNCTION__, format_desc->short_name); + __func__, format_desc->short_name); } /* diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 784b8dc66..a22bf3e4e 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -832,7 +832,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: AoS fetch fallback for %s\n", - __FUNCTION__, format_desc->short_name); + __func__, format_desc->short_name); } tmp_type = type; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c index 3b346f37d..066cb54a6 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -83,7 +83,7 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm, * v = (uyvy >> 8) & 0xff */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64 /* * Avoid shift with per-element count. * No support on x86, gets translated to roughly 5 instructions @@ -167,7 +167,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm, * v = (yuyv) & 0xff */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64 /* * Avoid shift with per-element count. * No support on x86, gets translated to roughly 5 instructions diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c index 2f2506803..f55f2f314 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c @@ -598,3 +598,62 @@ lp_build_gather_values(struct gallivm_state * gallivm, } return vec; } + +LLVMValueRef +lp_build_masked_gather(struct gallivm_state *gallivm, + unsigned length, + unsigned bit_size, + LLVMTypeRef vec_type, + LLVMValueRef offset_ptr, + LLVMValueRef exec_mask) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef args[4]; + char intrin_name[64]; + +#if LLVM_VERSION_MAJOR >= 16 + snprintf(intrin_name, 64, "llvm.masked.gather.v%ui%u.v%up0", + length, bit_size, length); +#else + snprintf(intrin_name, 64, "llvm.masked.gather.v%ui%u.v%up0i%u", + length, bit_size, length, bit_size); +#endif + + args[0] = offset_ptr; + args[1] = lp_build_const_int32(gallivm, bit_size / 8); + args[2] = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, + LLVMConstNull(LLVMTypeOf(exec_mask)), ""); + args[3] = LLVMConstNull(vec_type); + return lp_build_intrinsic(builder, intrin_name, vec_type, + args, 4, 0); + +} + +void +lp_build_masked_scatter(struct gallivm_state *gallivm, + unsigned length, + unsigned bit_size, + LLVMValueRef offset_ptr, + LLVMValueRef value_vec, + LLVMValueRef exec_mask) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef args[4]; + char intrin_name[64]; + +#if LLVM_VERSION_MAJOR >= 16 + snprintf(intrin_name, 64, "llvm.masked.scatter.v%ui%u.v%up0", + length, bit_size, length); +#else + snprintf(intrin_name, 64, "llvm.masked.scatter.v%ui%u.v%up0i%u", + length, bit_size, length, bit_size); +#endif + + args[0] = value_vec; + args[1] = offset_ptr; + args[2] = lp_build_const_int32(gallivm, bit_size / 8); + args[3] = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, + LLVMConstNull(LLVMTypeOf(exec_mask)), ""); + lp_build_intrinsic(builder, intrin_name, LLVMVoidTypeInContext(gallivm->context), + args, 4, 0); +} diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h index 7930864e6..5fabed956 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h @@ -66,4 +66,20 @@ lp_build_gather_values(struct gallivm_state * gallivm, LLVMValueRef * values, unsigned value_count); +LLVMValueRef +lp_build_masked_gather(struct gallivm_state *gallivm, + unsigned length, + unsigned bit_size, + LLVMTypeRef vec_type, + LLVMValueRef offset_ptr, + LLVMValueRef exec_mask); + +void +lp_build_masked_scatter(struct gallivm_state *gallivm, + unsigned length, + unsigned bit_size, + LLVMValueRef offset_ptr, + LLVMValueRef value_vec, + LLVMValueRef exec_mask); + #endif /* LP_BLD_GATHER_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c index 584ea7386..24d082398 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_config.h" +#include "util/detect.h" #include "pipe/p_compiler.h" #include "util/macros.h" #include "util/u_cpu_detect.h" @@ -50,7 +50,7 @@ #if GALLIVM_USE_NEW_PASS == 1 #include <llvm-c/Transforms/PassBuilder.h> #elif GALLIVM_HAVE_CORO == 1 -#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64)) +#if LLVM_VERSION_MAJOR <= 8 && (DETECT_ARCH_AARCH64 || DETECT_ARCH_ARM || DETECT_ARCH_S390 || DETECT_ARCH_MIPS64) #include <llvm-c/Transforms/IPO.h> #endif #include <llvm-c/Transforms/Coroutines.h> @@ -67,7 +67,6 @@ static const struct debug_named_value lp_bld_perf_flags[] = { DEBUG_NAMED_VALUE_END }; -#ifdef DEBUG unsigned gallivm_debug = 0; static const struct debug_named_value lp_bld_debug_flags[] = { @@ -76,12 +75,14 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "asm", GALLIVM_DEBUG_ASM, NULL }, { "perf", GALLIVM_DEBUG_PERF, NULL }, { "gc", GALLIVM_DEBUG_GC, NULL }, +/* Don't allow setting DUMP_BC for release builds, since writing the files may be an issue with setuid. */ +#ifdef DEBUG { "dumpbc", GALLIVM_DEBUG_DUMP_BC, NULL }, +#endif DEBUG_NAMED_VALUE_END }; DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0) -#endif static boolean gallivm_initialized = FALSE; @@ -140,7 +141,7 @@ create_pass_manager(struct gallivm_state *gallivm) } #if GALLIVM_HAVE_CORO == 1 -#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64)) +#if LLVM_VERSION_MAJOR <= 8 && (DETECT_ARCH_AARCH64 || DETECT_ARCH_ARM || DETECT_ARCH_S390 || DETECT_ARCH_MIPS64) LLVMAddArgumentPromotionPass(gallivm->cgpassmgr); LLVMAddFunctionAttrsPass(gallivm->cgpassmgr); #endif @@ -355,7 +356,7 @@ init_gallivm_state(struct gallivm_state *gallivm, const char *name, if (!gallivm->module) goto fail; -#if defined(PIPE_ARCH_X86) +#if DETECT_ARCH_X86 lp_set_module_stack_alignment_override(gallivm->module, 4); #endif @@ -419,10 +420,23 @@ fail: return FALSE; } +unsigned +lp_build_init_native_width(void) +{ + // Default to 256 until we're confident llvmpipe with 512 is as correct and not slower than 256 + lp_native_vector_width = MIN2(util_get_cpu_caps()->max_vector_bits, 256); + assert(lp_native_vector_width); + + lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", lp_native_vector_width); + assert(lp_native_vector_width); + + return lp_native_vector_width; +} boolean lp_build_init(void) { + lp_build_init_native_width(); if (gallivm_initialized) return TRUE; @@ -433,21 +447,13 @@ lp_build_init(void) */ LLVMLinkInMCJIT(); -#ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); -#endif gallivm_perf = debug_get_flags_option("GALLIVM_PERF", lp_bld_perf_flags, 0 ); lp_set_target_options(); - // Default to 256 until we're confident llvmpipe with 512 is as correct and not slower than 256 - lp_native_vector_width = MIN2(util_get_cpu_caps()->max_vector_bits, 256); - - lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", - lp_native_vector_width); - -#ifdef PIPE_ARCH_PPC_64 +#if DETECT_ARCH_PPC_64 /* Set the NJ bit in VSCR to 0 so denormalized values are handled as * specified by IEEE standard (PowerISA 2.06 - Section 6.3). This guarantees * that some rounding and half-float to float handling does not round @@ -624,7 +630,7 @@ gallivm_compile_module(struct gallivm_state *gallivm) /* Disable frame pointer omission on debug/profile builds */ /* XXX: And workaround http://llvm.org/PR21435 */ -#if defined(DEBUG) || defined(PROFILE) || defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(DEBUG) || defined(PROFILE) || DETECT_ARCH_X86 || DETECT_ARCH_X86_64 LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim", "true"); LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim-non-leaf", "true"); #endif diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c index e52ce3713..542a4ba0d 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -138,10 +138,6 @@ static const char *attr_to_str(enum lp_func_attr attr) case LP_FUNC_ATTR_INREG: return "inreg"; case LP_FUNC_ATTR_NOALIAS: return "noalias"; case LP_FUNC_ATTR_NOUNWIND: return "nounwind"; - case LP_FUNC_ATTR_READNONE: return "readnone"; - case LP_FUNC_ATTR_READONLY: return "readonly"; - case LP_FUNC_ATTR_WRITEONLY: return "writeonly"; - case LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly"; case LP_FUNC_ATTR_CONVERGENT: return "convergent"; case LP_FUNC_ATTR_PRESPLITCORO: return "presplitcoroutine"; default: @@ -182,7 +178,6 @@ lp_add_func_attributes(LLVMValueRef function, unsigned attrib_mask) * Set it for all intrinsics. */ attrib_mask |= LP_FUNC_ATTR_NOUNWIND; - attrib_mask &= ~LP_FUNC_ATTR_LEGACY; while (attrib_mask) { enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask); @@ -200,7 +195,6 @@ lp_build_intrinsic(LLVMBuilderRef builder, { LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); LLVMValueRef function, call; - bool set_callsite_attrs = !(attr_mask & LP_FUNC_ATTR_LEGACY); LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; @@ -229,17 +223,13 @@ lp_build_intrinsic(LLVMBuilderRef builder, abort(); } - if (!set_callsite_attrs) - lp_add_func_attributes(function, attr_mask); - if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(function); } } call = LLVMBuildCall2(builder, function_type, function, args, num_args, ""); - if (set_callsite_attrs) - lp_add_func_attributes(call, attr_mask); + lp_add_func_attributes(call, attr_mask); return call; } @@ -335,7 +325,7 @@ lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm, * so crash and burn. */ debug_printf("%s: should handle arbitrary vector size\n", - __FUNCTION__); + __func__); assert(0); return NULL; } diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h index a73f64c0d..47a81cba2 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -53,18 +53,8 @@ enum lp_func_attr { LP_FUNC_ATTR_INREG = (1 << 2), LP_FUNC_ATTR_NOALIAS = (1 << 3), LP_FUNC_ATTR_NOUNWIND = (1 << 4), - LP_FUNC_ATTR_READNONE = (1 << 5), - LP_FUNC_ATTR_READONLY = (1 << 6), - LP_FUNC_ATTR_WRITEONLY = (1 << 7), - LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8), - LP_FUNC_ATTR_CONVERGENT = (1 << 9), - LP_FUNC_ATTR_PRESPLITCORO = (1 << 10), - - /* Legacy intrinsic that needs attributes on function declarations - * and they must match the internal LLVM definition exactly, otherwise - * intrinsic selection fails. - */ - LP_FUNC_ATTR_LEGACY = (1u << 31), + LP_FUNC_ATTR_CONVERGENT = (1 << 5), + LP_FUNC_ATTR_PRESPLITCORO = (1 << 6), }; void diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h index b0b854ad9..30e5c74fc 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -150,8 +150,6 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_DROUND_SUPPORTED: - case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED: - case PIPE_SHADER_CAP_LDEXP_SUPPORTED: case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: return 0; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 711db17d9..3bf4534da 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -189,7 +189,7 @@ lp_build_compare(struct gallivm_state *gallivm, assert(func > PIPE_FUNC_NEVER); assert(func < PIPE_FUNC_ALWAYS); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64 /* * There are no unsigned integer comparison instructions in SSE. */ @@ -203,7 +203,7 @@ lp_build_compare(struct gallivm_state *gallivm, func == PIPE_FUNC_GEQUAL) && (gallivm_debug & GALLIVM_DEBUG_PERF)) { debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", - __FUNCTION__, type.length, type.width); + __func__, type.length, type.width); } #endif diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 2279e5acb..5e7a30a6c 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -56,11 +56,9 @@ #include <llvm-c/ExecutionEngine.h> #include <llvm/Target/TargetOptions.h> #include <llvm/ExecutionEngine/ExecutionEngine.h> -#include <llvm/ADT/Triple.h> #include <llvm/Analysis/TargetLibraryInfo.h> #include <llvm/ExecutionEngine/SectionMemoryManager.h> #include <llvm/Support/CommandLine.h> -#include <llvm/Support/Host.h> #include <llvm/Support/PrettyStackTrace.h> #include <llvm/ExecutionEngine/ObjectCache.h> #include <llvm/Support/TargetSelect.h> @@ -68,6 +66,14 @@ #include <llvm/Support/MemoryBuffer.h> #endif +#if LLVM_VERSION_MAJOR >= 17 +#include <llvm/TargetParser/Host.h> +#include <llvm/TargetParser/Triple.h> +#else +#include <llvm/Support/Host.h> +#include <llvm/ADT/Triple.h> +#endif + #if LLVM_VERSION_MAJOR < 11 #include <llvm/IR/CallSite.h> #endif @@ -86,8 +92,8 @@ #endif #include "c11/threads.h" -#include "os/os_thread.h" -#include "pipe/p_config.h" +#include "util/u_thread.h" +#include "util/detect.h" #include "util/u_debug.h" #include "util/u_cpu_detect.h" @@ -353,7 +359,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, * friends for configuring code generation options, like stack alignment. */ TargetOptions options; -#if defined(PIPE_ARCH_X86) && LLVM_VERSION_MAJOR < 13 +#if DETECT_ARCH_X86 && LLVM_VERSION_MAJOR < 13 options.StackAlignmentOverride = 4; #endif @@ -362,7 +368,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, .setTargetOptions(options) .setOptLevel((CodeGenOpt::Level)OptLevel); -#ifdef _WIN32 +#if DETECT_OS_WINDOWS /* * MCJIT works on Windows, but currently only through ELF object format. * @@ -370,16 +376,20 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, * different strings for MinGW/MSVC, so better play it safe and be * explicit. */ -# ifdef _WIN64 +# if DETECT_ARCH_X86_64 LLVMSetTarget(M, "x86_64-pc-win32-elf"); -# else +# elif DETECT_ARCH_X86 LLVMSetTarget(M, "i686-pc-win32-elf"); +# elif DETECT_ARCH_AARCH64 + LLVMSetTarget(M, "aarch64-pc-win32-elf"); +# else +# error Unsupported architecture for MCJIT on Windows. # endif #endif llvm::SmallVector<std::string, 16> MAttrs; -#if defined(PIPE_ARCH_ARM) +#if DETECT_ARCH_ARM /* llvm-3.3+ implements sys::getHostCPUFeatures for Arm, * which allows us to enable/disable code generation based * on the results of cpuid on these architectures. @@ -392,7 +402,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, ++f) { MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str()); } -#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64 /* * Because we can override cpu caps with environment variables, * so we do not use llvm::sys::getHostCPUFeatures to detect cpu features @@ -424,7 +434,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, MAttrs.push_back(util_get_cpu_caps()->has_avx512dq ? "+avx512dq" : "-avx512dq"); MAttrs.push_back(util_get_cpu_caps()->has_avx512vl ? "+avx512vl" : "-avx512vl"); #endif -#if defined(PIPE_ARCH_ARM) +#if DETECT_ARCH_ARM if (!util_get_cpu_caps()->has_neon) { MAttrs.push_back("-neon"); MAttrs.push_back("-crypto"); @@ -432,7 +442,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, } #endif -#if defined(PIPE_ARCH_PPC) +#if DETECT_ARCH_PPC MAttrs.push_back(util_get_cpu_caps()->has_altivec ? "+altivec" : "-altivec"); /* * Bug 25503 is fixed, by the same fix that fixed @@ -449,7 +459,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, } #endif -#if defined(PIPE_ARCH_MIPS64) +#if DETECT_ARCH_MIPS64 MAttrs.push_back(util_get_cpu_caps()->has_msa ? "+msa" : "-msa"); /* MSA requires a 64-bit FPU register file */ MAttrs.push_back("+fp64"); @@ -481,7 +491,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, * can't handle. Not entirely sure if we really need to do anything yet. */ -#ifdef PIPE_ARCH_PPC_64 +#if DETECT_ARCH_PPC_64 /* * Large programs, e.g. gnome-shell and firefox, may tax the addressability * of the Medium code model once dynamically generated JIT-compiled shader @@ -508,7 +518,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #endif #endif -#if defined(PIPE_ARCH_MIPS64) +#if DETECT_ARCH_MIPS64 /* * ls3a4000 CPU and ls2k1000 SoC is a mips64r5 compatible with MSA SIMD * instruction set implemented, while ls3a3000 is mips64r2 compatible diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 3c2a44419..1b1c7b0c0 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -56,6 +56,7 @@ */ #define BRILINEAR_FACTOR 2 + /** * Does the given texture wrap mode allow sampling the texture border color? * XXX maybe move this into gallium util code. @@ -106,7 +107,7 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state, const struct pipe_resource *texture = view->texture; state->format = view->format; - state->res_format = view->texture->format; + state->res_format = texture->format; state->swizzle_r = view->swizzle_r; state->swizzle_g = view->swizzle_g; state->swizzle_b = view->swizzle_b; @@ -116,7 +117,12 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state, assert(state->swizzle_b < PIPE_SWIZZLE_NONE); assert(state->swizzle_a < PIPE_SWIZZLE_NONE); - state->target = view->target; + /* check if it is a tex2d created from buf */ + if (view->is_tex2d_from_buf) + state->target = PIPE_TEXTURE_2D; + else + state->target = view->target; + state->pot_width = util_is_power_of_two_or_zero(texture->width0); state->pot_height = util_is_power_of_two_or_zero(texture->height0); state->pot_depth = util_is_power_of_two_or_zero(texture->depth0); @@ -128,6 +134,7 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state, */ } + /** * Initialize lp_sampler_static_texture_state object with the gallium * texture/sampler_view state (this contains the parts which are @@ -145,7 +152,7 @@ lp_sampler_static_texture_state_image(struct lp_static_texture_state *state, const struct pipe_resource *resource = view->resource; state->format = view->format; - state->res_format = view->resource->format; + state->res_format = resource->format; state->swizzle_r = PIPE_SWIZZLE_X; state->swizzle_g = PIPE_SWIZZLE_Y; state->swizzle_b = PIPE_SWIZZLE_Z; @@ -155,7 +162,7 @@ lp_sampler_static_texture_state_image(struct lp_static_texture_state *state, assert(state->swizzle_b < PIPE_SWIZZLE_NONE); assert(state->swizzle_a < PIPE_SWIZZLE_NONE); - state->target = view->resource->target; + state->target = resource->target; state->pot_width = util_is_power_of_two_or_zero(resource->width0); state->pot_height = util_is_power_of_two_or_zero(resource->height0); state->pot_depth = util_is_power_of_two_or_zero(resource->depth0); @@ -167,6 +174,7 @@ lp_sampler_static_texture_state_image(struct lp_static_texture_state *state, */ } + /** * Initialize lp_sampler_static_sampler_state object with the gallium sampler * state (this contains the parts which are considered static). @@ -241,6 +249,7 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state, state->normalized_coords = !sampler->unnormalized_coords; } + /* build aniso pmin value */ static LLVMValueRef lp_build_pmin(struct lp_build_sample_context *bld, @@ -260,10 +269,9 @@ lp_build_pmin(struct lp_build_sample_context *bld, LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t); LLVMValueRef int_size, float_size; - unsigned length = coord_bld->type.length; - unsigned num_quads = length / 4; - boolean pmin_per_quad = pmin_bld->type.length != length; - unsigned i; + const unsigned length = coord_bld->type.length; + const unsigned num_quads = length / 4; + const boolean pmin_per_quad = pmin_bld->type.length != length; int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, TRUE); float_size = lp_build_int_to_float(float_size_bld, int_size); @@ -280,7 +288,7 @@ lp_build_pmin(struct lp_build_sample_context *bld, }; LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4]; - for (i = 0; i < num_quads; i++) { + for (unsigned i = 0; i < num_quads; i++) { shuffles[i*4+0] = shuffles[i*4+1] = index0; shuffles[i*4+2] = shuffles[i*4+3] = index1; } @@ -326,6 +334,7 @@ lp_build_pmin(struct lp_build_sample_context *bld, return pmin2; } + /** * Generate code to compute coordinate gradient (rho). * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y @@ -354,34 +363,35 @@ lp_build_rho(struct lp_build_sample_context *bld, LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0); LLVMValueRef rho_vec; - LLVMValueRef int_size, float_size; LLVMValueRef rho; unsigned length = coord_bld->type.length; unsigned num_quads = length / 4; boolean rho_per_quad = rho_bld->type.length != length; boolean no_rho_opt = bld->no_rho_approx && (dims > 1); - unsigned i; LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); LLVMValueRef rho_xvec, rho_yvec; - /* Note that all simplified calculations will only work for isotropic filtering */ + /* Note that all simplified calculations will only work for isotropic + * filtering + */ /* * rho calcs are always per quad except for explicit derivs (excluding * the messy cube maps for now) when requested. */ - int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, TRUE); - float_size = lp_build_int_to_float(float_size_bld, int_size); + LLVMValueRef int_size = + lp_build_minify(int_size_bld, bld->int_size, first_level, TRUE); + LLVMValueRef float_size = lp_build_int_to_float(float_size_bld, int_size); if (derivs) { LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL }; - for (i = 0; i < dims; i++) { - LLVMValueRef floatdim; + for (unsigned i = 0; i < dims; i++) { LLVMValueRef indexi = lp_build_const_int32(gallivm, i); - floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type, - coord_bld->type, float_size, indexi); + LLVMValueRef floatdim = + lp_build_extract_broadcast(gallivm, bld->float_size_in_type, + coord_bld->type, float_size, indexi); /* * note that for rho_per_quad case could reduce math (at some shuffle @@ -392,11 +402,9 @@ lp_build_rho(struct lp_build_sample_context *bld, ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]); ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]); ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]); - } - else { - LLVMValueRef tmpx, tmpy; - tmpx = lp_build_abs(coord_bld, derivs->ddx[i]); - tmpy = lp_build_abs(coord_bld, derivs->ddy[i]); + } else { + LLVMValueRef tmpx = lp_build_abs(coord_bld, derivs->ddx[i]); + LLVMValueRef tmpy = lp_build_abs(coord_bld, derivs->ddy[i]); ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy); ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]); } @@ -410,8 +418,7 @@ lp_build_rho(struct lp_build_sample_context *bld, } rho = lp_build_max(coord_bld, rho_xvec, rho_yvec); /* skipping sqrt hence returning rho squared */ - } - else { + } else { rho = ddmax[0]; if (dims > 1) { rho = lp_build_max(coord_bld, rho, ddmax[1]); @@ -421,7 +428,8 @@ lp_build_rho(struct lp_build_sample_context *bld, } } - LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm, coord_bld->type, rho); + LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm, + coord_bld->type, rho); rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho); if (rho_per_quad) { @@ -431,8 +439,7 @@ lp_build_rho(struct lp_build_sample_context *bld, rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, rho_bld->type, rho, 0); } - } - else { + } else { /* * This looks all a bit complex, but it's not that bad * (the shuffle code makes it look worse than it is). @@ -453,8 +460,7 @@ lp_build_rho(struct lp_build_sample_context *bld, if (dims < 2) { ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s); - } - else if (dims >= 2) { + } else if (dims >= 2) { ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t); if (dims > 2) { ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r); @@ -470,14 +476,16 @@ lp_build_rho(struct lp_build_sample_context *bld, 2, 3, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; - LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef ddx_ddys, ddx_ddyt, floatdim; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; - for (i = 0; i < num_quads; i++) { + for (unsigned i = 0; i < num_quads; i++) { shuffles[i*4+0] = shuffles[i*4+1] = index0; shuffles[i*4+2] = shuffles[i*4+3] = index1; } floatdim = LLVMBuildShuffleVector(builder, float_size, float_size, - LLVMConstVector(shuffles, length), ""); + LLVMConstVector(shuffles, length), + ""); ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim); ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]); ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01); @@ -504,26 +512,22 @@ lp_build_rho(struct lp_build_sample_context *bld, if (rho_per_quad) { rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, rho_bld->type, rho, 0); - } - else { + } else { rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4); } /* skipping sqrt hence returning rho squared */ - } - else { + } else { ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); if (dims > 2) { ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]); - } - else { + } else { ddx_ddy[1] = NULL; /* silence compiler warning */ } if (dims < 2) { rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0); rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2); - } - else if (dims == 2) { + } else if (dims == 2) { static const unsigned char swizzle02[] = { 0, 2, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE @@ -534,12 +538,11 @@ lp_build_rho(struct lp_build_sample_context *bld, }; rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02); rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13); - } - else { + } else { LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH]; assert(dims == 3); - for (i = 0; i < num_quads; i++) { + for (unsigned i = 0; i < num_quads; i++) { shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i); shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2); shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i); @@ -562,20 +565,19 @@ lp_build_rho(struct lp_build_sample_context *bld, if (dims > 1) { /* could use some broadcast_vector helper for this? */ LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4]; - for (i = 0; i < num_quads; i++) { + for (unsigned i = 0; i < num_quads; i++) { src[i] = float_size; } - float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads); - } - else { + float_size = lp_build_concat(bld->gallivm, src, + float_size_bld->type, num_quads); + } else { float_size = lp_build_broadcast_scalar(coord_bld, float_size); } rho_vec = lp_build_mul(coord_bld, rho_vec, float_size); if (dims <= 1) { rho = rho_vec; - } - else { + } else { if (dims >= 2) { LLVMValueRef rho_s, rho_t, rho_r; @@ -593,12 +595,10 @@ lp_build_rho(struct lp_build_sample_context *bld, if (rho_per_quad) { rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, rho_bld->type, rho, 0); - } - else { + } else { rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4); } - } - else { + } else { if (dims <= 1) { rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, ""); } @@ -606,8 +606,7 @@ lp_build_rho(struct lp_build_sample_context *bld, if (dims <= 1) { rho = rho_vec; - } - else { + } else { if (dims >= 2) { LLVMValueRef rho_s, rho_t, rho_r; @@ -717,9 +716,6 @@ lp_build_brilinear_rho(struct lp_build_context *bld, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart) { - LLVMValueRef lod_ipart; - LLVMValueRef lod_fpart; - const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); const double post_offset = 1 - 2*factor; @@ -736,14 +732,15 @@ lp_build_brilinear_rho(struct lp_build_context *bld, lp_build_const_vec(bld->gallivm, bld->type, pre_factor)); /* ipart = ifloor(log2(rho)) */ - lod_ipart = lp_build_extract_exponent(bld, rho, 0); + LLVMValueRef lod_ipart = lp_build_extract_exponent(bld, rho, 0); /* fpart = rho / 2**ipart */ - lod_fpart = lp_build_extract_mantissa(bld, rho); + LLVMValueRef lod_fpart = lp_build_extract_mantissa(bld, rho); - lod_fpart = lp_build_mad(bld, lod_fpart, - lp_build_const_vec(bld->gallivm, bld->type, factor), - lp_build_const_vec(bld->gallivm, bld->type, post_offset)); + lod_fpart = + lp_build_mad(bld, lod_fpart, + lp_build_const_vec(bld->gallivm, bld->type, factor), + lp_build_const_vec(bld->gallivm, bld->type, post_offset)); /* * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: @@ -768,7 +765,6 @@ lp_build_ilog2_sqrt(struct lp_build_context *bld, LLVMValueRef x) { LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef ipart; struct lp_type i_type = lp_int_type(bld->type); LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1); @@ -777,7 +773,7 @@ lp_build_ilog2_sqrt(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */ - ipart = lp_build_extract_exponent(bld, x, 1); + LLVMValueRef ipart = lp_build_extract_exponent(bld, x, 1); ipart = LLVMBuildAShr(builder, ipart, one, ""); return ipart; @@ -824,21 +820,21 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, *out_lod_fpart = lodf_bld->zero; /* - * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification: - * "Implementations may either unconditionally assume c = 0 for the minification - * vs. magnification switch-over point, or may choose to make c depend on the - * combination of minification and magnification modes as follows: if the - * magnification filter is given by LINEAR and the minification filter is given - * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is - * done to ensure that a minified texture does not appear "sharper" than a - * magnified texture. Otherwise c = 0." - * And 3.9.11 Texture Minification: - * "If lod is less than or equal to the constant c (see section 3.9.12) the - * texture is said to be magnified; if it is greater, the texture is minified." - * So, using 0 as switchover point always, and using magnification for lod == 0. - * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec), - * old GL versions required 0.5 for the modes listed above. - * I have no clue about the (undocumented) wishes of d3d9/d3d10 here! + * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture + * Magnification: "Implementations may either unconditionally assume c = 0 + * for the minification vs. magnification switch-over point, or may choose + * to make c depend on the combination of minification and magnification + * modes as follows: if the magnification filter is given by LINEAR and the + * minification filter is given by NEAREST_MIPMAP_NEAREST or + * NEAREST_MIPMAP_LINEAR, then c = 0.5. This is done to ensure that a + * minified texture does not appear "sharper" than a magnified + * texture. Otherwise c = 0." And 3.9.11 Texture Minification: "If lod is + * less than or equal to the constant c (see section 3.9.12) the texture is + * said to be magnified; if it is greater, the texture is minified." So, + * using 0 as switchover point always, and using magnification for lod == + * 0. Note that the always c = 0 behavior is new (first appearing in GL + * 3.1 spec), old GL versions required 0.5 for the modes listed above. I + * have no clue about the (undocumented) wishes of d3d9/d3d10 here! */ if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) { @@ -850,16 +846,14 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, bld->context_ptr, sampler_unit); lod = lp_build_broadcast_scalar(lodf_bld, min_lod); - } - else { + } else { if (explicit_lod) { if (bld->num_lods != bld->coord_type.length) lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type, lodf_bld->type, explicit_lod, 0); else lod = explicit_lod; - } - else { + } else { LLVMValueRef rho; boolean rho_squared = bld->no_rho_approx && (bld->dims > 1); @@ -867,8 +861,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, !explicit_lod) { rho = lp_build_pmin(bld, first_level, s, t, max_aniso); rho_squared = true; - } else + } else { rho = lp_build_rho(bld, first_level, s, t, r, derivs); + } /* * Compute lod = log2(rho) @@ -893,8 +888,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, */ if (rho_squared) { *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho); - } - else { + } else { *out_lod_ipart = lp_build_ilog2(lodf_bld, rho); } *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER, @@ -920,8 +914,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, if (0) { lod = lp_build_log2(lodf_bld, rho); - } - else { + } else { /* get more accurate results if we just sqaure rho always */ if (!rho_squared) rho = lp_build_mul(lodf_bld, rho, rho); @@ -930,13 +923,16 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* log2(x^2) == 0.5*log2(x) */ lod = lp_build_mul(lodf_bld, lod, - lp_build_const_vec(bld->gallivm, lodf_bld->type, 0.5F)); + lp_build_const_vec(bld->gallivm, + lodf_bld->type, 0.5F)); /* add shader lod bias */ if (lod_bias) { if (bld->num_lods != bld->coord_type.length) - lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type, - lodf_bld->type, lod_bias, 0); + lod_bias = lp_build_pack_aos_scalars(bld->gallivm, + bld->coord_bld.type, + lodf_bld->type, + lod_bias, 0); lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); } } @@ -988,14 +984,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, if (!bld->no_brilinear) { lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); - } - else { + } else { lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart); } lp_build_name(*out_lod_fpart, "lod_fpart"); - } - else { + } else { *out_lod_ipart = lp_build_iround(lodf_bld, lod); } @@ -1022,9 +1016,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, LLVMValueRef *out_of_bounds) { struct lp_build_context *leveli_bld = &bld->leveli_bld; - LLVMValueRef level; - - level = lp_build_add(leveli_bld, lod_ipart, first_level); + LLVMValueRef level = lp_build_add(leveli_bld, lod_ipart, first_level); if (out_of_bounds) { LLVMValueRef out, out1; @@ -1033,21 +1025,19 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, out = lp_build_or(leveli_bld, out, out1); if (bld->num_mips == bld->coord_bld.type.length) { *out_of_bounds = out; - } - else if (bld->num_mips == 1) { + } else if (bld->num_mips == 1) { *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out); - } - else { + } else { assert(bld->num_mips == bld->coord_bld.type.length / 4); - *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, - leveli_bld->type, - bld->int_coord_bld.type, - out); + *out_of_bounds = + lp_build_unpack_broadcast_aos_scalars(bld->gallivm, + leveli_bld->type, + bld->int_coord_bld.type, + out); } level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds); *level_out = level; - } - else { + } else { /* clamp level to legal range of levels */ *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level); @@ -1059,7 +1049,8 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s) * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod * part accordingly. - * Later, we'll sample from those two mipmap levels and interpolate between them. + * Later, we'll sample from those two mipmap levels and interpolate between + * them. */ void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, @@ -1083,9 +1074,9 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one); /* - * Clamp both *level0_out and *level1_out to [first_level, last_level], with - * the minimum number of comparisons, and zeroing lod_fpart in the extreme - * ends in the process. + * Clamp both *level0_out and *level1_out to [first_level, last_level], + * with the minimum number of comparisons, and zeroing lod_fpart in the + * extreme ends in the process. */ /* *level0_out < first_level */ @@ -1121,18 +1112,25 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit); } + /** * A helper function that factorizes this common pattern. */ static LLVMValueRef load_mip(struct gallivm_state *gallivm, - LLVMTypeRef ptr_type, LLVMValueRef offsets, LLVMValueRef index1) { + LLVMTypeRef ptr_type, + LLVMValueRef offsets, + LLVMValueRef index1) +{ LLVMValueRef zero = lp_build_const_int32(gallivm, 0); LLVMValueRef indexes[2] = {zero, index1}; - LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets, indexes, ARRAY_SIZE(indexes), ""); - return LLVMBuildLoad2(gallivm->builder, LLVMInt32TypeInContext(gallivm->context), ptr, ""); + LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets, + indexes, ARRAY_SIZE(indexes), ""); + return LLVMBuildLoad2(gallivm->builder, + LLVMInt32TypeInContext(gallivm->context), ptr, ""); } + /** * Return pointer to a single mipmap level. * \param level integer mipmap level @@ -1141,14 +1139,17 @@ LLVMValueRef lp_build_get_mipmap_level(struct lp_build_sample_context *bld, LLVMValueRef level) { - LLVMValueRef mip_offset = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level); + LLVMValueRef mip_offset = load_mip(bld->gallivm, bld->mip_offsets_type, + bld->mip_offsets, level); LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef data_ptr = LLVMBuildGEP2(builder, - LLVMInt8TypeInContext(bld->gallivm->context), - bld->base_ptr, &mip_offset, 1, ""); + LLVMValueRef data_ptr = + LLVMBuildGEP2(builder, + LLVMInt8TypeInContext(bld->gallivm->context), + bld->base_ptr, &mip_offset, 1, ""); return data_ptr; } + /** * Return (per-pixel) offsets to mip levels. * \param level integer mipmap level @@ -1163,29 +1164,32 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld, if (bld->num_mips == 1) { offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level); offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1); - } - else if (bld->num_mips == bld->coord_bld.type.length / 4) { - unsigned i; - + } else if (bld->num_mips == bld->coord_bld.type.length / 4) { offsets = bld->int_coord_bld.undef; - for (i = 0; i < bld->num_mips; i++) { + for (unsigned i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); - offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, "")); + offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, + bld->mip_offsets, + LLVMBuildExtractElement(builder, level, + indexi, "")); LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i); - offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, ""); + offsets = LLVMBuildInsertElement(builder, offsets, offset1, + indexo, ""); } - offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4); - } - else { - unsigned i; - + offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, + offsets, 0, 4); + } else { assert (bld->num_mips == bld->coord_bld.type.length); offsets = bld->int_coord_bld.undef; - for (i = 0; i < bld->num_mips; i++) { + for (unsigned i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); - offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, "")); - offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, ""); + offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, + bld->mip_offsets, + LLVMBuildExtractElement(builder, level, + indexi, "")); + offsets = LLVMBuildInsertElement(builder, offsets, offset1, + indexi, ""); } } return offsets; @@ -1210,16 +1214,14 @@ lp_build_minify(struct lp_build_context *bld, if (level == bld->zero) { /* if we're using mipmap level zero, no minification is needed */ return base_size; - } - else { + } else { LLVMValueRef size; assert(bld->type.sign); if (lod_scalar || (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) { size = LLVMBuildLShr(builder, base_size, level, "minify"); size = lp_build_max(bld, size, bld->one); - } - else { + } else { /* * emulate shift with float mul, since intel "forgot" shifts with * per-element shift count until avx2, which results in terrible @@ -1259,6 +1261,7 @@ lp_build_minify(struct lp_build_context *bld, } } + /* * Scale image dimensions with block sizes. * @@ -1278,14 +1281,18 @@ lp_build_scale_view_dims(struct lp_build_context *bld, LLVMValueRef size, LLVMValueRef view_blocksize) { LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef ret; - - ret = LLVMBuildAdd(builder, size, LLVMBuildSub(builder, tex_blocksize, lp_build_const_int_vec(bld->gallivm, bld->type, 1), ""), ""); + LLVMValueRef ret = + LLVMBuildAdd(builder, size, + LLVMBuildSub(builder, tex_blocksize, + lp_build_const_int_vec(bld->gallivm, + bld->type, 1), ""), + ""); ret = LLVMBuildLShr(builder, ret, tex_blocksize_log2, ""); ret = LLVMBuildMul(builder, ret, view_blocksize, ""); return ret; } + /* * Scale a single image dimension. * @@ -1296,18 +1303,22 @@ LLVMValueRef lp_build_scale_view_dim(struct gallivm_state *gallivm, LLVMValueRef size, unsigned tex_blocksize, unsigned view_blocksize) { - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ret; - if (tex_blocksize == view_blocksize) return size; - ret = LLVMBuildAdd(builder, size, lp_build_const_int32(gallivm, tex_blocksize - 1), ""); - ret = LLVMBuildLShr(builder, ret, lp_build_const_int32(gallivm, util_logbase2(tex_blocksize)), ""); - ret = LLVMBuildMul(builder, ret, lp_build_const_int32(gallivm, view_blocksize), ""); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ret = + LLVMBuildAdd(builder, size, + lp_build_const_int32(gallivm, tex_blocksize - 1), ""); + ret = LLVMBuildLShr(builder, ret, + lp_build_const_int32(gallivm, + util_logbase2(tex_blocksize)), ""); + ret = LLVMBuildMul(builder, ret, + lp_build_const_int32(gallivm, view_blocksize), ""); return ret; } + /** * Dereference stride_array[mipmap_level] array to get a stride. * Return stride as a vector. @@ -1319,33 +1330,34 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef stride, stride1; + if (bld->num_mips == 1) { stride1 = load_mip(bld->gallivm, stride_type, stride_array, level); stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1); - } - else if (bld->num_mips == bld->coord_bld.type.length / 4) { + } else if (bld->num_mips == bld->coord_bld.type.length / 4) { LLVMValueRef stride1; - unsigned i; stride = bld->int_coord_bld.undef; - for (i = 0; i < bld->num_mips; i++) { + for (unsigned i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); - stride1 = load_mip(bld->gallivm, stride_type, stride_array, LLVMBuildExtractElement(builder, level, indexi, "")); + stride1 = load_mip(bld->gallivm, stride_type, stride_array, + LLVMBuildExtractElement(builder, level, + indexi, "")); LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i); stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, ""); } stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4); - } - else { + } else { LLVMValueRef stride1; - unsigned i; assert (bld->num_mips == bld->coord_bld.type.length); stride = bld->int_coord_bld.undef; - for (i = 0; i < bld->coord_bld.type.length; i++) { + for (unsigned i = 0; i < bld->coord_bld.type.length; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); - stride1 = load_mip(bld->gallivm, stride_type, stride_array, LLVMBuildExtractElement(builder, level, indexi, "")); + stride1 = load_mip(bld->gallivm, stride_type, stride_array, + LLVMBuildExtractElement(builder, level, + indexi, "")); stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, ""); } } @@ -1373,19 +1385,18 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, */ if (bld->num_mips == 1) { ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); - *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE); + *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, + ilevel_vec, TRUE); *out_size = lp_build_scale_view_dims(&bld->int_size_bld, *out_size, bld->int_tex_blocksize, bld->int_tex_blocksize_log2, bld->int_view_blocksize); - } - else { + } else { LLVMValueRef int_size_vec; LLVMValueRef int_tex_blocksize_vec, int_tex_blocksize_log2_vec; LLVMValueRef int_view_blocksize_vec; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; - unsigned num_quads = bld->coord_bld.type.length / 4; - unsigned i; + const unsigned num_quads = bld->coord_bld.type.length / 4; if (bld->num_mips == num_quads) { /* @@ -1408,14 +1419,13 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, assert(bld->int_size_in_bld.type.length == 1); int_size_vec = lp_build_broadcast_scalar(&bld4, bld->int_size); - int_tex_blocksize_vec = lp_build_broadcast_scalar(&bld4, - bld->int_tex_blocksize); - int_tex_blocksize_log2_vec = lp_build_broadcast_scalar(&bld4, - bld->int_tex_blocksize_log2); - int_view_blocksize_vec = lp_build_broadcast_scalar(&bld4, - bld->int_view_blocksize); - } - else { + int_tex_blocksize_vec = + lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize); + int_tex_blocksize_log2_vec = + lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize_log2); + int_view_blocksize_vec = + lp_build_broadcast_scalar(&bld4, bld->int_view_blocksize); + } else { assert(bld->int_size_in_bld.type.length == 4); int_size_vec = bld->int_size; int_tex_blocksize_vec = bld->int_tex_blocksize; @@ -1423,7 +1433,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, int_view_blocksize_vec = bld->int_view_blocksize; } - for (i = 0; i < num_quads; i++) { + for (unsigned i = 0; i < num_quads; i++) { LLVMValueRef ileveli; LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); @@ -1439,51 +1449,59 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, int_view_blocksize_vec); } /* - * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1, - * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise. + * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for + * dims > 1, [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise. */ *out_size = lp_build_concat(bld->gallivm, tmp, bld4.type, num_quads); - } - else { - /* FIXME: this is terrible and results in _huge_ vector - * (for the dims > 1 case). - * Should refactor this (together with extract_image_sizes) and do - * something more useful. Could for instance if we have width,height - * with 4-wide vector pack all elements into a 8xi16 vector - * (on which we can still do useful math) instead of using a 16xi32 - * vector. - * For dims == 1 this will create [w0, w1, w2, w3, ...] vector. - * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector. - */ + } else { + /* FIXME: this is terrible and results in _huge_ vector + * (for the dims > 1 case). + * Should refactor this (together with extract_image_sizes) and do + * something more useful. Could for instance if we have width,height + * with 4-wide vector pack all elements into a 8xi16 vector + * (on which we can still do useful math) instead of using a 16xi32 + * vector. + * For dims == 1 this will create [w0, w1, w2, w3, ...] vector. + * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] + * vector. + */ assert(bld->num_mips == bld->coord_bld.type.length); if (bld->dims == 1) { assert(bld->int_size_in_bld.type.length == 1); int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, bld->int_size); - int_tex_blocksize_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, - bld->int_tex_blocksize); - int_tex_blocksize_log2_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, - bld->int_tex_blocksize_log2); - int_view_blocksize_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, - bld->int_view_blocksize); - *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE); - *out_size = lp_build_scale_view_dims(&bld->int_coord_bld, *out_size, + int_tex_blocksize_vec = + lp_build_broadcast_scalar(&bld->int_coord_bld, + bld->int_tex_blocksize); + int_tex_blocksize_log2_vec = + lp_build_broadcast_scalar(&bld->int_coord_bld, + bld->int_tex_blocksize_log2); + int_view_blocksize_vec = + lp_build_broadcast_scalar(&bld->int_coord_bld, + bld->int_view_blocksize); + *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, + ilevel, FALSE); + *out_size = lp_build_scale_view_dims(&bld->int_coord_bld, + *out_size, int_tex_blocksize_vec, int_tex_blocksize_log2_vec, int_view_blocksize_vec); - } - else { + } else { LLVMValueRef ilevel1; - for (i = 0; i < bld->num_mips; i++) { + for (unsigned i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); - ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type, - bld->int_size_in_bld.type, ilevel, indexi); + ilevel1 = lp_build_extract_broadcast(bld->gallivm, + bld->int_coord_type, + bld->int_size_in_bld.type, + ilevel, indexi); tmp[i] = bld->int_size; - tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE); - tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld, tmp[i], + tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], + ilevel1, TRUE); + tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld, + tmp[i], bld->int_tex_blocksize, bld->int_tex_blocksize_log2, bld->int_view_blocksize); @@ -1552,14 +1570,12 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, LLVMConstInt(i32t, 2, 0)); } } - } - else { + } else { unsigned num_quads = bld->coord_bld.type.length / 4; if (dims == 1) { *out_width = size; - } - else if (bld->num_mips == num_quads) { + } else if (bld->num_mips == num_quads) { *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4); if (dims >= 2) { *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4); @@ -1567,8 +1583,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4); } } - } - else { + } else { assert(bld->num_mips == bld->coord_type.length); *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type, coord_type, size, 0); @@ -1620,6 +1635,7 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld, } } + /** * Generate new coords and faces for cubemap texels falling off the face. * @@ -1785,8 +1801,7 @@ lp_build_select3(struct lp_build_context *sel_bld, LLVMValueRef val1, LLVMValueRef val2) { - LLVMValueRef tmp; - tmp = lp_build_select(sel_bld, sel0, val0, val1); + LLVMValueRef tmp = lp_build_select(sel_bld, sel0, val0, val1); return lp_build_select(sel_bld, sel1, val2, tmp); } @@ -1874,8 +1889,8 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz; LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy; /* - * s = 1/2 * ( sc / ma + 1) - * t = 1/2 * ( tc / ma + 1) + * s = 1/2 * (sc / ma + 1) + * t = 1/2 * (tc / ma + 1) * * s' = 1/2 * (sc' * ma - sc * ma') / ma^2 * t' = 1/2 * (tc' * ma - tc * ma') / ma^2 @@ -1901,8 +1916,7 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, ddy[0] = lp_build_ddy(coord_bld, s); ddy[1] = lp_build_ddy(coord_bld, t); ddy[2] = lp_build_ddy(coord_bld, r); - } - else { + } else { ddx[0] = derivs_in->ddx[0]; ddx[1] = derivs_in->ddx[1]; ddx[2] = derivs_in->ddx[2]; @@ -2095,8 +2109,7 @@ lp_build_sample_partial_offset(struct lp_build_context *bld, if (block_length == 1) { subcoord = bld->zero; - } - else { + } else { /* * Pixel blocks have power of two dimensions. LLVM should convert the * rem/div to bit arithmetic. @@ -2165,8 +2178,7 @@ lp_build_sample_offset(struct lp_build_context *bld, y, y_stride, &y_offset, out_j); offset = lp_build_add(bld, offset, y_offset); - } - else { + } else { *out_j = bld->zero; } @@ -2183,6 +2195,7 @@ lp_build_sample_offset(struct lp_build_context *bld, *out_offset = offset; } + static LLVMValueRef lp_build_sample_min(struct lp_build_context *bld, LLVMValueRef x, @@ -2200,6 +2213,7 @@ lp_build_sample_min(struct lp_build_context *bld, return lp_build_select(bld, mask, min, v0); } + static LLVMValueRef lp_build_sample_max(struct lp_build_context *bld, LLVMValueRef x, @@ -2217,6 +2231,7 @@ lp_build_sample_max(struct lp_build_context *bld, return lp_build_select(bld, mask, max, v0); } + static LLVMValueRef lp_build_sample_min_2d(struct lp_build_context *bld, LLVMValueRef x, @@ -2231,6 +2246,7 @@ lp_build_sample_min_2d(struct lp_build_context *bld, return lp_build_sample_min(bld, y, v0, v1); } + static LLVMValueRef lp_build_sample_max_2d(struct lp_build_context *bld, LLVMValueRef x, @@ -2245,6 +2261,7 @@ lp_build_sample_max_2d(struct lp_build_context *bld, return lp_build_sample_max(bld, y, v0, v1); } + static LLVMValueRef lp_build_sample_min_3d(struct lp_build_context *bld, LLVMValueRef x, @@ -2260,6 +2277,7 @@ lp_build_sample_min_3d(struct lp_build_context *bld, return lp_build_sample_min(bld, z, v0, v1); } + static LLVMValueRef lp_build_sample_max_3d(struct lp_build_context *bld, LLVMValueRef x, @@ -2275,6 +2293,7 @@ lp_build_sample_max_3d(struct lp_build_context *bld, return lp_build_sample_max(bld, z, v0, v1); } + void lp_build_reduce_filter(struct lp_build_context *bld, enum pipe_tex_reduction_mode mode, @@ -2303,6 +2322,7 @@ lp_build_reduce_filter(struct lp_build_context *bld, } } + void lp_build_reduce_filter_2d(struct lp_build_context *bld, enum pipe_tex_reduction_mode mode, @@ -2316,24 +2336,27 @@ lp_build_reduce_filter_2d(struct lp_build_context *bld, LLVMValueRef *v11, LLVMValueRef *out) { - unsigned chan; switch (mode) { case PIPE_TEX_REDUCTION_MIN: - for (chan = 0; chan < num_chan; chan++) - out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]); + for (unsigned chan = 0; chan < num_chan; chan++) + out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], + v10[chan], v11[chan]); break; case PIPE_TEX_REDUCTION_MAX: - for (chan = 0; chan < num_chan; chan++) - out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]); + for (unsigned chan = 0; chan < num_chan; chan++) + out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], + v10[chan], v11[chan]); break; case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE: default: - for (chan = 0; chan < num_chan; chan++) - out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags); + for (unsigned chan = 0; chan < num_chan; chan++) + out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], + v10[chan], v11[chan], flags); break; } } + void lp_build_reduce_filter_3d(struct lp_build_context *bld, enum pipe_tex_reduction_mode mode, @@ -2352,23 +2375,22 @@ lp_build_reduce_filter_3d(struct lp_build_context *bld, LLVMValueRef *v111, LLVMValueRef *out) { - unsigned chan; switch (mode) { case PIPE_TEX_REDUCTION_MIN: - for (chan = 0; chan < num_chan; chan++) + for (unsigned chan = 0; chan < num_chan; chan++) out[chan] = lp_build_sample_min_3d(bld, x, y, z, v000[chan], v001[chan], v010[chan], v011[chan], v100[chan], v101[chan], v110[chan], v111[chan]); break; case PIPE_TEX_REDUCTION_MAX: - for (chan = 0; chan < num_chan; chan++) + for (unsigned chan = 0; chan < num_chan; chan++) out[chan] = lp_build_sample_max_3d(bld, x, y, z, v000[chan], v001[chan], v010[chan], v011[chan], v100[chan], v101[chan], v110[chan], v111[chan]); break; case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE: default: - for (chan = 0; chan < num_chan; chan++) + for (unsigned chan = 0; chan < num_chan; chan++) out[chan] = lp_build_lerp_3d(bld, x, y, z, v000[chan], v001[chan], v010[chan], v011[chan], v100[chan], v101[chan], v110[chan], v111[chan], @@ -2377,6 +2399,7 @@ lp_build_reduce_filter_3d(struct lp_build_context *bld, } } + /* * generated from * const float alpha = 2; @@ -2515,6 +2538,7 @@ static const float aniso_filter_table[1024] = { 0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335, }; + const float * lp_build_sample_aniso_filter_table(void) { diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 19150f995..140568b99 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -116,8 +116,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, if (use_border) { use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); - } - else { + } else { use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); } } @@ -132,8 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, if (use_border) { use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); - } - else { + } else { use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); } } @@ -174,8 +172,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * Ex: * if (use_border) { * texel = border_color; - * } - * else { + * } else { * texel = sample_texture(coord); * } * As it is now, we always sample the texture, then selectively replace @@ -185,7 +182,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, if (use_border) { /* select texel color or border color depending on use_border. */ const struct util_format_description *format_desc = bld->format_desc; - int chan; struct lp_type border_type = bld->texel_type; border_type.length = 4; /* @@ -193,7 +189,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * get optimized away eventually by sampler_view swizzle anyway but it's * easier too. */ - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { unsigned chan_s; /* reverse-map channel... */ if (util_format_has_stencil(format_desc)) { @@ -201,8 +197,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, chan_s = 0; else break; - } - else { + } else { for (chan_s = 0; chan_s < 4; chan_s++) { if (chan_s == format_desc->swizzle[chan]) { break; @@ -335,7 +330,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef coord0, coord1, weight; - switch(wrap_mode) { + switch (wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if (is_pot) { /* mul by size and subtract 0.5 */ @@ -351,8 +346,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* repeat wrap */ coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); - } - else { + } else { LLVMValueRef mask; if (offset) { offset = lp_build_int_to_float(coord_bld, offset); @@ -696,8 +690,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef icoord; - - switch(wrap_mode) { + + switch (wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if (is_pot) { coord = lp_build_mul(coord_bld, coord, length_f); @@ -706,8 +700,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, icoord = lp_build_add(int_coord_bld, icoord, offset); } icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); - } - else { + } else { if (offset) { offset = lp_build_int_to_float(coord_bld, offset); offset = lp_build_div(coord_bld, offset, length_f); @@ -845,10 +838,10 @@ lp_build_sample_comparefunc(struct lp_build_sample_context *bld, * are ordered except NOT_EQUAL which is unordered. */ if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) { - res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func, + res = lp_build_cmp_ordered(texel_bld, + bld->static_sampler_state->compare_func, p, texel); - } - else { + } else { res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func, p, texel); } @@ -923,8 +916,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { /* add cube layer to face */ z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); - } - else { + } else { z = coords[2]; } lp_build_name(z, "tex.z.layer"); @@ -1038,7 +1030,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL; LLVMValueRef xs[4], ys[4], zs[4]; LLVMValueRef neighbors[2][2][4]; - int chan, texel_index; boolean seamless_cube_filter, accurate_cube_corners; unsigned chan_swiz = bld->static_texture_state->swizzle_r; @@ -1049,7 +1040,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, case 2: chan_swiz = bld->static_texture_state->swizzle_b; break; case 3: chan_swiz = bld->static_texture_state->swizzle_a; break; default: - break; + break; } } @@ -1122,15 +1113,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* add cube layer to face */ z00 = z01 = z10 = z11 = z1 = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); - } - else { + } else { z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */ } lp_build_name(z00, "tex.z0.layer"); lp_build_name(z1, "tex.z1.layer"); } - } - else { + } else { struct lp_build_if_state edge_if; LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2]; LLVMValueRef coord0, coord1, have_edge, have_corner; @@ -1186,7 +1175,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* needed for accurate corner filtering branch later, rely on 0 init */ have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner"); - for (texel_index = 0; texel_index < 4; texel_index++) { + for (unsigned texel_index = 0; texel_index < 4; texel_index++) { xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs"); ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys"); zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs"); @@ -1292,8 +1281,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMBuildStore(builder, cube_layer, zs[1]); LLVMBuildStore(builder, cube_layer, zs[2]); LLVMBuildStore(builder, cube_layer, zs[3]); - } - else { + } else { LLVMBuildStore(builder, face, zs[0]); LLVMBuildStore(builder, face, zs[1]); LLVMBuildStore(builder, face, zs[2]); @@ -1369,8 +1357,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, neighbors[0][0], neighbors[0][1], colors_out); - } - else { + } else { LLVMValueRef cmpval0, cmpval1; cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); @@ -1379,8 +1366,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, cmpval0, cmpval1); colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; } - } - else { + } else { /* 2D/3D texture */ struct lp_build_if_state corner_if; LLVMValueRef colors0[4], colorss[4] = { 0 }; @@ -1470,7 +1456,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]); tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]); @@ -1480,8 +1466,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]); colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); } - } - else { + } else { LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); @@ -1512,8 +1497,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); colors0[1] = colors0[2] = colors0[3] = colors0[0]; } - } - else { + } else { /* * We don't have any weights to adjust, so instead calculate * the fourth texel as simply the average of the other 3. @@ -1619,8 +1603,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, colors0[3] = lp_build_swizzle_soa_channel(texel_bld, neighbors[0][0], chan_swiz); - } - else { + } else { /* Bilinear interpolate the four samples from the 2D image / 3D slice */ lp_build_reduce_filter_2d(texel_bld, bld->static_sampler_state->reduction_mode, @@ -1634,8 +1617,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, neighbors[1][1], colors0); } - } - else { + } else { LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); @@ -1652,8 +1634,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, texel_bld->one, texel_bld->zero); colors0[3] = lp_build_select(texel_bld, cmpval00, texel_bld->one, texel_bld->zero); - } - else { + } else { colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, cmpval00, cmpval01, cmpval10, cmpval11); colors0[1] = colors0[2] = colors0[3] = colors0[0]; @@ -1726,8 +1707,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, colors0, colors1, colors_out); - } - else { + } else { LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); @@ -1742,10 +1722,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, 0); colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; } - } - else { + } else { /* 2D tex */ - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { colors_out[chan] = colors0[chan]; } } @@ -1761,11 +1740,11 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * gather on a channel which will always return 0 or 1 in any case... */ if (chan_swiz == PIPE_SWIZZLE_1) { - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { colors_out[chan] = texel_bld->one; } } else if (chan_swiz == PIPE_SWIZZLE_0) { - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { colors_out[chan] = texel_bld->zero; } } @@ -1803,7 +1782,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef mipoff0 = NULL; LLVMValueRef mipoff1 = NULL; LLVMValueRef colors0[4], colors1[4]; - unsigned chan; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, @@ -1811,19 +1789,18 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, &row_stride0_vec, &img_stride0_vec); if (bld->num_mips == 1) { data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); - } - else { + } else { /* This path should work for num_lods 1 too but slightly less efficient */ data_ptr0 = bld->base_ptr; mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); } + if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, mipoff0, coords, offsets, colors0); - } - else { + } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, is_gather, size0, NULL, row_stride0_vec, img_stride0_vec, @@ -1832,7 +1809,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, } /* Store the first level's colors in the output variables */ - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { LLVMBuildStore(builder, colors0[chan], colors_out[chan]); } @@ -1845,8 +1822,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, lod_fpart, bld->lodf_bld.zero, "need_lerp"); - } - else { + } else { /* * We'll do mip filtering if any of the quads (or individual * pixel in case of per-pixel lod) need it. @@ -1875,8 +1851,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, &row_stride1_vec, &img_stride1_vec); if (bld->num_mips == 1) { data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); - } - else { + } else { data_ptr1 = bld->base_ptr; mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); } @@ -1885,8 +1860,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, row_stride1_vec, img_stride1_vec, data_ptr1, mipoff1, coords, offsets, colors1); - } - else { + } else { lp_build_sample_image_linear(bld, FALSE, size1, NULL, row_stride1_vec, img_stride1_vec, data_ptr1, mipoff1, coords, offsets, @@ -1901,7 +1875,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, bld->texel_bld.type, lod_fpart); - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, colors0[chan], colors1[chan], 0); @@ -1946,7 +1920,6 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, LLVMValueRef mipoff0 = NULL; LLVMValueRef mipoff1 = NULL; LLVMValueRef colors0[4], colors1[4]; - unsigned chan; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, @@ -1954,8 +1927,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, &row_stride0_vec, &img_stride0_vec); if (bld->num_mips == 1) { data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); - } - else { + } else { /* This path should work for num_lods 1 too but slightly less efficient */ data_ptr0 = bld->base_ptr; mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); @@ -1967,7 +1939,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, colors0); /* Store the first level's colors in the output variables */ - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { LLVMBuildStore(builder, colors0[chan], colors_out[chan]); } @@ -2000,8 +1972,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, &row_stride1_vec, &img_stride1_vec); if (bld->num_mips == 1) { data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); - } - else { + } else { data_ptr1 = bld->base_ptr; mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); } @@ -2019,7 +1990,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, bld->texel_bld.type, lod_fpart); - for (chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 4; chan++) { colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, colors0[chan], colors1[chan], 0); @@ -2057,8 +2028,7 @@ lp_build_layer_coord(struct lp_build_sample_context *bld, out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers); *out_of_bounds = lp_build_or(int_coord_bld, out, out1); return layer; - } - else { + } else { LLVMValueRef maxlayer; LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) : bld->int_bld.one; @@ -2088,8 +2058,10 @@ lp_build_sample_ms_offset(struct lp_build_context *int_coord_bld, *offset = lp_build_add(int_coord_bld, *offset, sample_offset); } + #define WEIGHT_LUT_SIZE 1024 + static void lp_build_sample_aniso(struct lp_build_sample_context *bld, unsigned img_filter, @@ -2122,8 +2094,7 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld, &row_stride0_vec, &img_stride0_vec); if (bld->num_mips == 1) { data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); - } - else { + } else { /* This path should work for num_lods 1 too but slightly less efficient */ data_ptr0 = bld->base_ptr; mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); @@ -2169,14 +2140,15 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld, scaling = lp_build_rcp(&bld->levelf_bld, scaling); if (bld->num_lods != length) { - if (bld->levelf_bld.type.length == 1) + if (bld->levelf_bld.type.length == 1) { scaling = lp_build_broadcast_scalar(coord_bld, scaling); - else + } else { scaling = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, bld->levelf_bld.type, coord_bld->type, scaling); + } } ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, scaling); @@ -2564,7 +2536,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld, struct lp_derivatives cube_derivs; /* - printf("%s mip %d min %d mag %d\n", __FUNCTION__, + printf("%s mip %d min %d mag %d\n", __func__, mip_filter, min_filter, mag_filter); */ @@ -2582,11 +2554,10 @@ lp_build_sample_common(struct lp_build_sample_context *bld, * calculate / transform derivatives. */ if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) { - boolean need_derivs; - need_derivs = ((min_filter != mag_filter || - mip_filter != PIPE_TEX_MIPFILTER_NONE) && - !bld->static_sampler_state->min_max_lod_equal && - !explicit_lod); + boolean need_derivs = ((min_filter != mag_filter || + mip_filter != PIPE_TEX_MIPFILTER_NONE) && + !bld->static_sampler_state->min_max_lod_equal && + !explicit_lod); lp_build_cube_lookup(bld, coords, derivs, &cube_derivs, need_derivs); if (need_derivs) derivs = &cube_derivs; @@ -2599,8 +2570,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld, coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL); /* because of seamless filtering can't add it to face (coords[2]) here. */ } - } - else if ((target == PIPE_TEXTURE_1D_ARRAY || + } else if ((target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) { coords[2] = lp_build_iround(&bld->coord_bld, coords[2]); coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL); @@ -2620,11 +2590,11 @@ lp_build_sample_common(struct lp_build_sample_context *bld, * too or do some other tricks to make it work). */ const struct util_format_description *format_desc = bld->format_desc; - unsigned chan_type; /* not entirely sure we couldn't end up with non-valid swizzle here */ - chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ? - format_desc->channel[format_desc->swizzle[0]].type : - UTIL_FORMAT_TYPE_FLOAT; + const enum util_format_type chan_type = + format_desc->swizzle[0] <= PIPE_SWIZZLE_W + ? format_desc->channel[format_desc->swizzle[0]].type + : UTIL_FORMAT_TYPE_FLOAT; if (chan_type != UTIL_FORMAT_TYPE_FLOAT) { coords[4] = lp_build_clamp(&bld->coord_bld, coords[4], bld->coord_bld.zero, bld->coord_bld.one); @@ -2647,7 +2617,8 @@ lp_build_sample_common(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - LLVMValueRef first_level_vec = lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level); + LLVMValueRef first_level_vec = + lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level); lp_build_lod_selector(bld, is_lodq, sampler_index, first_level_vec, coords[0], coords[1], coords[2], @@ -2780,8 +2751,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld, /* d/s needs special handling because both present means just sampling depth */ if (util_format_is_depth_and_stencil(format_desc->format)) { chan = format_desc->swizzle[0]; - } - else { + } else { chan = util_format_get_first_non_void_channel(format_desc->format); } if (chan >= 0 && chan <= PIPE_SWIZZLE_W) { @@ -2792,8 +2762,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld, if (chan_norm) { min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); max_clamp = vec4_bld.one; - } - else if (chan_pure) { + } else if (chan_pure) { /* * Border color was stored as int, hence need min/max clamp * only if chan has less than 32 bits.. @@ -2807,24 +2776,23 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld, } } /* TODO: no idea about non-pure, non-normalized! */ - } - else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) { + } else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) { if (chan_norm) { min_clamp = vec4_bld.zero; max_clamp = vec4_bld.one; - } - /* - * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24 - * we use Z32_FLOAT_S8X24 to imply sampling depth component - * and ignoring stencil, which will blow up here if we try to - * do a uint clamp in a float texel build... - * And even if we had that format, mesa st also thinks using z24s8 - * means depth sampling ignoring stencil. - */ - else if (chan_pure) { + } else if (chan_pure) { /* - * Border color was stored as uint, hence never need min - * clamp, and only need max clamp if chan has less than 32 bits. + * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24 + * we use Z32_FLOAT_S8X24 to imply sampling depth component and + * ignoring stencil, which will blow up here if we try to do a + * uint clamp in a float texel build... And even if we had + * that format, mesa st also thinks using z24s8 means depth + * sampling ignoring stencil. + */ + + /* + * Border color was stored as uint, hence never need min clamp, + * and only need max clamp if chan has less than 32 bits. */ unsigned chan_size = format_desc->channel[chan].size; if (chan_size < 32) { @@ -2833,8 +2801,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld, } /* TODO: no idea about non-pure, non-normalized! */ } - } - else if (chan_type == UTIL_FORMAT_TYPE_FIXED) { + } else if (chan_type == UTIL_FORMAT_TYPE_FIXED) { /* TODO: I have no idea what clamp this would need if any! */ } } @@ -2862,17 +2829,15 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld, default: break; } - } - else { + } else { /* cannot figure this out from format description */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { /* s3tc formats are always unorm */ min_clamp = vec4_bld.zero; max_clamp = vec4_bld.one; - } - else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC || - format_desc->layout == UTIL_FORMAT_LAYOUT_ETC || - format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { + } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC || + format_desc->layout == UTIL_FORMAT_LAYOUT_ETC || + format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { switch (format_desc->format) { case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC2_UNORM: @@ -2901,12 +2866,11 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld, assert(0); break; } - } - /* - * all others from subsampled/other group, though we don't care - * about yuv (and should not have any from zs here) - */ - else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){ + } else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){ + /* + * all others from subsampled/other group, though we don't care + * about yuv (and should not have any from zs here) + */ switch (format_desc->format) { case PIPE_FORMAT_R8G8_B8G8_UNORM: case PIPE_FORMAT_G8R8_G8B8_UNORM: @@ -3016,8 +2980,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, coords, offsets, ilevel0, ilevel1, lod_fpart, texels); - } - else { + } else { /* * Could also get rid of the if-logic and always use mipmap_both, both * for the single lod and multi-lod case if nothing really uses this. @@ -3050,8 +3013,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, texels); } lp_build_endif(&if_ctx); - } - else { + } else { LLVMValueRef need_linear, linear_mask; unsigned mip_filter_for_nearest; struct lp_build_if_state if_ctx; @@ -3059,8 +3021,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, if (min_filter == PIPE_TEX_FILTER_LINEAR) { linear_mask = lod_positive; mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE; - } - else { + } else { linear_mask = lp_build_not(&bld->lodi_bld, lod_positive); mip_filter_for_nearest = mip_filter; } @@ -3152,8 +3113,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, if (bld->num_mips != int_coord_bld->type.length) { ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type, perquadi_bld->type, explicit_lod, 0); - } - else { + } else { ilevel = explicit_lod; } LLVMValueRef last_level = bld->dynamic_state->last_level(bld->gallivm, @@ -3165,13 +3125,11 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, first_level, last_level, ilevel, &ilevel, out_of_bound_ret_zero ? &out_of_bounds : NULL); - } - else { + } else { assert(bld->num_mips == 1); if (bld->static_texture_state->target != PIPE_BUFFER) { ilevel = first_level; - } - else { + } else { ilevel = lp_build_const_int32(bld->gallivm, 0); } } @@ -3186,8 +3144,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, if (out_of_bound_ret_zero) { z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1); out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); - } - else { + } else { z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL); } } @@ -3311,8 +3268,7 @@ lp_build_texel_type(struct lp_type texel_type, } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length); } - } - else if (util_format_has_stencil(format_desc) && + } else if (util_format_has_stencil(format_desc) && !util_format_has_depth(format_desc)) { /* for stencil only formats, sample stencil (uint) */ texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length); @@ -3389,17 +3345,14 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, lod_bias = lod; assert(lod); assert(derivs == NULL); - } - else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) { + } else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) { explicit_lod = lod; assert(lod); assert(derivs == NULL); - } - else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { + } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { assert(derivs); assert(lod == NULL); - } - else { + } else { assert(derivs == NULL); assert(lod == NULL); } @@ -3537,15 +3490,13 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, */ bld.num_mips = type.length; bld.num_lods = type.length; - } - else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT || + } else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT || (explicit_lod || lod_bias || derivs)) { if ((!op_is_tex && target != PIPE_BUFFER) || (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { bld.num_mips = type.length; bld.num_lods = type.length; - } - else if (op_is_tex && min_img_filter != mag_img_filter) { + } else if (op_is_tex && min_img_filter != mag_img_filter) { bld.num_mips = 1; bld.num_lods = type.length; } @@ -3555,8 +3506,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { bld.num_mips = num_quads; bld.num_lods = num_quads; - } - else if (op_is_tex && min_img_filter != mag_img_filter) { + } else if (op_is_tex && min_img_filter != mag_img_filter) { bld.num_mips = 1; bld.num_lods = num_quads; } @@ -3644,8 +3594,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, bld.int_tex_blocksize = LLVMConstInt(i32t, res_bw, 0); bld.int_tex_blocksize_log2 = LLVMConstInt(i32t, util_logbase2(res_bw), 0); bld.int_view_blocksize = LLVMConstInt(i32t, bw, 0); - } - else { + } else { bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef, tex_width, LLVMConstInt(i32t, 0, 0), ""); @@ -3769,7 +3718,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, if ((gallivm_debug & GALLIVM_DEBUG_PERF) && !use_aos && util_format_fits_8unorm(bld.format_desc)) { debug_printf("%s: using floating point linear filtering for %s\n", - __FUNCTION__, bld.format_desc->short_name); + __func__, bld.format_desc->short_name); debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d" " wraps %d wrapt %d wrapr %d\n", derived_sampler_state.min_img_filter, @@ -3825,8 +3774,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, ilevel0, ilevel1, texel_out); } - } - else { + } else { struct lp_build_sample_context bld4; struct lp_type type4 = type; LLVMValueRef texelout4[4]; @@ -3887,8 +3835,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { bld4.num_mips = type4.length; bld4.num_lods = type4.length; - } - else if (op_is_tex && min_img_filter != mag_img_filter) { + } else if (op_is_tex && min_img_filter != mag_img_filter) { bld4.num_mips = 1; bld4.num_lods = type4.length; } @@ -3963,9 +3910,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, lod_positive4, lod_fpart4, ilevel04, ilevel14, texelout4); - } - - else { + } else { /* this path is currently unreachable and hence might break easily... */ LLVMValueRef newcoords4[5]; newcoords4[0] = s4; @@ -4068,15 +4013,14 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, struct lp_derivatives *deriv_ptr = NULL; unsigned num_param = 0; unsigned num_coords, num_derivs, num_offsets, layer; - enum lp_sampler_lod_control lod_control; - enum lp_sampler_op_type op_type; boolean need_cache = FALSE; - lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> - LP_SAMPLER_LOD_CONTROL_SHIFT; + const enum lp_sampler_lod_control lod_control = + (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) + >> LP_SAMPLER_LOD_CONTROL_SHIFT; - op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> - LP_SAMPLER_OP_TYPE_SHIFT; + const enum lp_sampler_op_type op_type = + (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT; get_target_info(static_texture_state->target, &num_coords, &num_derivs, &num_offsets, &layer); @@ -4124,8 +4068,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, if (lod_control == LP_SAMPLER_LOD_BIAS || lod_control == LP_SAMPLER_LOD_EXPLICIT) { lod = LLVMGetParam(function, num_param++); - } - else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { + } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { for (unsigned i = 0; i < num_derivs; i++) { derivs.ddx[i] = LLVMGetParam(function, num_param++); derivs.ddy[i] = LLVMGetParam(function, num_param++); @@ -4269,8 +4212,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, if (lod_control == LP_SAMPLER_LOD_BIAS || lod_control == LP_SAMPLER_LOD_EXPLICIT) { arg_types[num_param++] = LLVMTypeOf(params->lod); - } - else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { + } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { for (unsigned i = 0; i < num_derivs; i++) { arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]); arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]); @@ -4339,8 +4281,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, if (lod_control == LP_SAMPLER_LOD_BIAS || lod_control == LP_SAMPLER_LOD_EXPLICIT) { args[num_args++] = params->lod; - } - else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { + } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { for (unsigned i = 0; i < num_derivs; i++) { args[num_args++] = derivs->ddx[i]; args[num_args++] = derivs->ddy[i]; @@ -4353,7 +4294,6 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, LLVMBasicBlockRef bb = LLVMGetInsertBlock(builder); LLVMValueRef inst = LLVMGetLastInstruction(bb); LLVMSetInstructionCallConv(inst, LLVMFastCallConv); - } @@ -4416,8 +4356,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, ""); } - } - else { + } else { lp_build_sample_soa_code(gallivm, static_texture_state, static_sampler_state, @@ -4644,8 +4583,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, out = lp_build_or(&leveli_bld, out, out1); if (num_lods == 1) { out = lp_build_broadcast_scalar(&bld_int_vec4, out); - } - else { + } else { /* TODO */ assert(0); } @@ -4678,8 +4616,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, LLVMValueRef num_levels; if (static_state->level_zero_only) { num_levels = bld_int_scalar.one; - } - else { + } else { LLVMValueRef last_level; last_level = dynamic_state->last_level(gallivm, context_type, @@ -4694,6 +4631,14 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, lp_build_vec_type(gallivm, params->int_type), num_levels); } + + if (target == PIPE_BUFFER) { + struct lp_build_context bld_int; + lp_build_context_init(&bld_int, gallivm, params->int_type); + + params->sizes_out[0] = lp_build_min(&bld_int, params->sizes_out[0], + lp_build_const_int_vec(gallivm, params->int_type, LP_MAX_TEXEL_BUFFER_ELEMENTS)); + } } @@ -4932,12 +4877,13 @@ lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state, outdata[chan] = lp_build_select(&texel_bld, out_of_bounds, texel_bld.zero, outdata[chan]); } - if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) + if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) { outdata[3] = lp_build_select(&texel_bld, out_of_bounds, texel_bld.one, outdata[3]); - else + } else { outdata[3] = lp_build_select(&texel_bld, out_of_bounds, texel_bld.zero, outdata[3]); + } } else if (params->img_op == LP_IMG_STORE) { lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index edf9cf89a..d859e6ef9 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -37,7 +37,7 @@ * @author Jose Fonseca <jfonseca@vmware.com> */ -#include "pipe/p_config.h" +#include "util/detect.h" #include "pipe/p_shader_tokens.h" #include "util/u_debug.h" #include "util/u_math.h" diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index d98d20e11..916386d31 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -36,7 +36,7 @@ * Brian Paul, and others. */ -#include "pipe/p_config.h" +#include "util/detect.h" #include "pipe/p_shader_tokens.h" #include "util/u_debug.h" #include "util/u_math.h" diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_context.c b/lib/mesa/src/gallium/auxiliary/hud/hud_context.c index 6b6e15653..f5b11c425 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_context.c +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_context.c @@ -68,12 +68,18 @@ #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" -/* Control the visibility of all HUD contexts */ -static boolean huds_visible = TRUE; -static int hud_scale = 1; +#define HUD_DEFAULT_VISIBILITY TRUE +#define HUD_DEFAULT_SCALE 1 +#define HUD_DEFAULT_ROTATION 0 +#define HUD_DEFAULT_OPACITY 66 +/* Control the visibility of all HUD contexts */ +static boolean huds_visible = HUD_DEFAULT_VISIBILITY; +static int hud_scale = HUD_DEFAULT_SCALE; +static int hud_rotate = HUD_DEFAULT_ROTATION; +static float hud_opacity = HUD_DEFAULT_OPACITY / 100.0f; -#ifdef PIPE_OS_UNIX +#if DETECT_OS_UNIX static void signal_visible_handler(int sig, siginfo_t *siginfo, void *context) { @@ -219,6 +225,24 @@ hud_draw_string(struct hud_context *hud, unsigned x, unsigned y, hud->text.num_vertices += num/4; } +static const char * +get_float_modifier(double d) +{ + /* Round to 3 decimal places so as not to print trailing zeros. */ + if (d*1000 != (int)(d*1000)) + d = round(d * 1000) / 1000; + + /* Show at least 4 digits with at most 3 decimal places, but not zeros. */ + if (d >= 1000 || d == (int)d) + return "%.0f"; + else if (d >= 100 || d*10 == (int)(d*10)) + return "%.1f"; + else if (d >= 10 || d*100 == (int)(d*100)) + return "%.2f"; + else + return "%.3f"; +} + static void number_to_human_readable(double num, enum pipe_driver_query_type type, char *out) @@ -295,20 +319,9 @@ number_to_human_readable(double num, enum pipe_driver_query_type type, d /= divisor; unit++; } - - /* Round to 3 decimal places so as not to print trailing zeros. */ - if (d*1000 != (int)(d*1000)) - d = round(d * 1000) / 1000; - - /* Show at least 4 digits with at most 3 decimal places, but not zeros. */ - if (d >= 1000 || d == (int)d) - sprintf(out, "%.0f%s", d, units[unit]); - else if (d >= 100 || d*10 == (int)(d*10)) - sprintf(out, "%.1f%s", d, units[unit]); - else if (d >= 10 || d*100 == (int)(d*100)) - sprintf(out, "%.2f%s", d, units[unit]); - else - sprintf(out, "%.3f%s", d, units[unit]); + int n = sprintf(out, get_float_modifier(d), d); + if (n > 0) + sprintf(&out[n], "%s", units[unit]); } static void @@ -486,8 +499,21 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex) hud->fb_width = tex->width0; hud->fb_height = tex->height0; - hud->constants.two_div_fb_width = 2.0f / hud->fb_width; - hud->constants.two_div_fb_height = 2.0f / hud->fb_height; + float th = hud_rotate * (M_PI / 180.0f); + hud->constants.rotate[0] = cos(th); + hud->constants.rotate[1] = -sin(th); + hud->constants.rotate[2] = sin(th); + hud->constants.rotate[3] = cos(th); + + /* invert the aspect ratio when we rotate the hud */ + if (hud_rotate % 180 == 90) { + hud->constants.two_div_fb_height = 2.0f / hud->fb_width; + hud->constants.two_div_fb_width = 2.0f / hud->fb_height; + } else { + assert(hud_rotate % 180 == 0); + hud->constants.two_div_fb_width = 2.0f / hud->fb_width; + hud->constants.two_div_fb_height = 2.0f / hud->fb_height; + } cso_save_state(cso, (CSO_BIT_FRAMEBUFFER | CSO_BIT_SAMPLE_MASK | @@ -530,6 +556,7 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex) fb.zsbuf = NULL; fb.width = hud->fb_width; fb.height = hud->fb_height; + fb.resolve = NULL; viewport.scale[0] = 0.5f * hud->fb_width; viewport.scale[1] = 0.5f * hud->fb_height; @@ -568,7 +595,7 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex) hud->constants.color[0] = 0; hud->constants.color[1] = 0; hud->constants.color[2] = 0; - hud->constants.color[3] = 0.666f; + hud->constants.color[3] = hud_opacity; hud->constants.translate[0] = 0; hud->constants.translate[1] = 0; hud->constants.scale[0] = hud_scale; @@ -627,10 +654,10 @@ done: /* restore states not restored by cso */ if (hud->st) { - hud->st->invalidate_state(hud->st, - ST_INVALIDATE_FS_SAMPLER_VIEWS | - ST_INVALIDATE_VS_CONSTBUF0 | - ST_INVALIDATE_VERTEX_BUFFERS); + hud->st_invalidate_state(hud->st, + ST_INVALIDATE_FS_SAMPLER_VIEWS | + ST_INVALIDATE_VS_CONSTBUF0 | + ST_INVALIDATE_VERTEX_BUFFERS); } pipe_surface_reference(&surf, NULL); @@ -971,8 +998,12 @@ hud_graph_add_value(struct hud_graph *gr, double value) value = value > gr->pane->ceiling ? gr->pane->ceiling : value; if (gr->fd) { + if (gr->fd == stdout) { + fprintf(gr->fd, "%s: ", gr->name); + } if (fabs(value - lround(value)) > FLT_EPSILON) { - fprintf(gr->fd, "%f\n", value); + fprintf(gr->fd, get_float_modifier(value), value); + fprintf(gr->fd, "\n"); } else { fprintf(gr->fd, "%" PRIu64 "\n", (uint64_t) lround(value)); @@ -1042,11 +1073,9 @@ static void strcat_without_spaces(char *dst, const char *src) * is a HUD variable such as "fps", or "cpu" */ static void -hud_graph_set_dump_file(struct hud_graph *gr) +hud_graph_set_dump_file(struct hud_graph *gr, const char *hud_dump_dir, bool to_stdout) { - const char *hud_dump_dir = getenv("GALLIUM_HUD_DUMP_DIR"); - - if (hud_dump_dir && access(hud_dump_dir, W_OK) == 0) { + if (hud_dump_dir) { char *dump_file = malloc(strlen(hud_dump_dir) + sizeof(PATH_SEP) + sizeof(gr->name)); if (dump_file) { @@ -1054,12 +1083,15 @@ hud_graph_set_dump_file(struct hud_graph *gr) strcat(dump_file, PATH_SEP); strcat_without_spaces(dump_file, gr->name); gr->fd = fopen(dump_file, "w+"); - if (gr->fd) { - /* flush output after each line is written */ - setvbuf(gr->fd, NULL, _IOLBF, 0); - } free(dump_file); } + } else if (to_stdout) { + gr->fd = stdout; + } + + if (gr->fd) { + /* flush output after each line is written */ + setvbuf(gr->fd, NULL, _IOLBF, 0); } } @@ -1185,7 +1217,7 @@ has_pipeline_stats_query(struct pipe_screen *screen) static void hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen, - const char *env) + const char *env, unsigned period_ms) { unsigned num, i; char name_a[256], s[256]; @@ -1193,12 +1225,13 @@ hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen, struct hud_pane *pane = NULL; unsigned x = 10, y = 10, y_simple = 10; unsigned width = 251, height = 100; - unsigned period = 500 * 1000; /* default period (1/2 second) */ + unsigned period = period_ms * 1000; uint64_t ceiling = UINT64_MAX; unsigned column_width = 251; boolean dyn_ceiling = false; boolean reset_colors = false; boolean sort_items = false; + boolean to_stdout = false; const char *period_env; if (strncmp(env, "simple,", 7) == 0) { @@ -1359,6 +1392,9 @@ hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, 0); } + else if (strcmp(name, "stdout") == 0) { + to_stdout = true; + } else { boolean processed = FALSE; @@ -1509,11 +1545,14 @@ hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen, } } - LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) { - struct hud_graph *gr; + const char *hud_dump_dir = getenv("GALLIUM_HUD_DUMP_DIR"); + if ((hud_dump_dir && access(hud_dump_dir, W_OK) == 0) || to_stdout) { + LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) { + struct hud_graph *gr; - LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) { - hud_graph_set_dump_file(gr); + LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) { + hud_graph_set_dump_file(gr, hud_dump_dir, to_stdout); + } } } } @@ -1570,6 +1609,7 @@ print_help(struct pipe_screen *screen) puts(" Example: GALLIUM_HUD=\".w256.h64.x1600.y520.d.c1000fps+cpu,.datom-count\""); puts(""); puts(" Available names:"); + puts(" stdout (prints the counters value to stdout)"); puts(" fps"); puts(" frametime"); puts(" cpu"); @@ -1660,7 +1700,8 @@ hud_unset_draw_context(struct hud_context *hud) static bool hud_set_draw_context(struct hud_context *hud, struct cso_context *cso, - struct st_context_iface *st) + struct st_context *st, + hud_st_invalidate_state_func st_invalidate_state) { struct pipe_context *pipe = cso_get_pipe_context(cso); @@ -1668,6 +1709,7 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso, hud->pipe = pipe; hud->cso = cso; hud->st = st; + hud->st_invalidate_state = st_invalidate_state; struct pipe_sampler_view view_templ; u_sampler_view_default_template( @@ -1721,15 +1763,20 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso, "DCL OUT[2], GENERIC[0]\n" /* texcoord */ /* [0] = color, * [1] = (2/fb_width, 2/fb_height, xoffset, yoffset) - * [2] = (xscale, yscale, 0, 0) */ - "DCL CONST[0][0..2]\n" - "DCL TEMP[0]\n" + * [2] = (xscale, yscale, 0, 0) + * [3] = rotation_matrix */ + "DCL CONST[0][0..3]\n" + "DCL TEMP[0..2]\n" "IMM[0] FLT32 { -1, 0, 0, 1 }\n" /* v = in * (xscale, yscale) + (xoffset, yoffset) */ "MAD TEMP[0].xy, IN[0], CONST[0][2].xyyy, CONST[0][1].zwww\n" - /* pos = v * (2 / fb_width, 2 / fb_height) - (1, 1) */ - "MAD OUT[0].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n" + /* v = v * (2 / fb_width, 2 / fb_height) - (1, 1) */ + "MAD TEMP[1].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n" + + /* pos = rotation_matrix * v */ + "MUL TEMP[2].xyzw, TEMP[1].xyxy, CONST[0][3].xyzw\n" + "ADD OUT[0].xy, TEMP[2].xzzz, TEMP[2].ywww\n" "MOV OUT[0].zw, IMM[0]\n" "MOV OUT[1], CONST[0][0]\n" @@ -1758,16 +1805,21 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso, "DCL OUT[1], GENERIC[0]\n" /* texcoord */ /* [0] = color, * [1] = (2/fb_width, 2/fb_height, xoffset, yoffset) - * [2] = (xscale, yscale, 0, 0) */ - "DCL CONST[0][0..2]\n" - "DCL TEMP[0]\n" + * [2] = (xscale, yscale, 0, 0) + * [3] = rotation_matrix */ + "DCL CONST[0][0..3]\n" + "DCL TEMP[0..2]\n" "IMM[0] FLT32 { -1, 0, 0, 1 }\n" "IMM[1] FLT32 { 0.0078125, 0.00390625, 1, 1 }\n" // 1.0 / 128, 1.0 / 256, 1, 1 /* v = in * (xscale, yscale) + (xoffset, yoffset) */ "MAD TEMP[0].xy, IN[0], CONST[0][2].xyyy, CONST[0][1].zwww\n" /* pos = v * (2 / fb_width, 2 / fb_height) - (1, 1) */ - "MAD OUT[0].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n" + "MAD TEMP[1].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n" + + /* pos = rotation_matrix * v */ + "MUL TEMP[2].xyzw, TEMP[1].xyxy, CONST[0][3].xyzw\n" + "ADD OUT[0].xy, TEMP[2].xzzz, TEMP[2].ywww\n" "MOV OUT[0].zw, IMM[0]\n" "MUL OUT[1], IN[1], IMM[1]\n" @@ -1831,8 +1883,9 @@ hud_set_record_context(struct hud_context *hud, struct pipe_context *pipe) * record queries in one context and draw them in another. */ struct hud_context * -hud_create(struct cso_context *cso, struct st_context_iface *st, - struct hud_context *share) +hud_create(struct cso_context *cso, struct hud_context *share, + struct st_context *st, + hud_st_invalidate_state_func st_invalidate_state) { const char *share_env = debug_get_option("GALLIUM_HUD_SHARE", NULL); unsigned record_ctx = 0, draw_ctx = 0; @@ -1856,7 +1909,7 @@ hud_create(struct cso_context *cso, struct st_context_iface *st, if (context_id == draw_ctx) { assert(!share->pipe); - hud_set_draw_context(share, cso, st); + hud_set_draw_context(share, cso, st, st_invalidate_state); } return share; @@ -1865,16 +1918,36 @@ hud_create(struct cso_context *cso, struct st_context_iface *st, struct pipe_screen *screen = cso_get_pipe_context(cso)->screen; struct hud_context *hud; unsigned i; - const char *env = debug_get_option("GALLIUM_HUD", NULL); -#ifdef PIPE_OS_UNIX + unsigned default_period_ms = 500;/* default period (1/2 second) */ + const char *show_fps = getenv("LIBGL_SHOW_FPS"); + bool emulate_libgl_show_fps = false; + if (show_fps) { + default_period_ms = atoi(show_fps) * 1000; + if (default_period_ms) + emulate_libgl_show_fps = true; + else + default_period_ms = 500; + } + const char *env = debug_get_option("GALLIUM_HUD", + emulate_libgl_show_fps ? "stdout,fps" : NULL); +#if DETECT_OS_UNIX unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0); static boolean sig_handled = FALSE; struct sigaction action; memset(&action, 0, sizeof(action)); #endif - huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE); - hud_scale = debug_get_num_option("GALLIUM_HUD_SCALE", 1); + huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", !emulate_libgl_show_fps); + hud_opacity = debug_get_num_option("GALLIUM_HUD_OPACITY", HUD_DEFAULT_OPACITY) / 100.0f; + hud_scale = debug_get_num_option("GALLIUM_HUD_SCALE", HUD_DEFAULT_SCALE); + hud_rotate = debug_get_num_option("GALLIUM_HUD_ROTATION", HUD_DEFAULT_ROTATION) % 360; + if (hud_rotate < 0) { + hud_rotate += 360; + } + if (hud_rotate % 90 != 0) { + fprintf(stderr, "gallium_hud: rotation must be a multiple of 90. Falling back to 0.\n"); + hud_rotate = 0; + } if (!env || !*env) return NULL; @@ -1953,7 +2026,7 @@ hud_create(struct cso_context *cso, struct st_context_iface *st, list_inithead(&hud->pane_list); /* setup sig handler once for all hud contexts */ -#ifdef PIPE_OS_UNIX +#if DETECT_OS_UNIX if (!sig_handled && signo != 0) { action.sa_sigaction = &signal_visible_handler; action.sa_flags = SA_SIGINFO; @@ -1971,9 +2044,9 @@ hud_create(struct cso_context *cso, struct st_context_iface *st, if (record_ctx == 0) hud_set_record_context(hud, cso_get_pipe_context(cso)); if (draw_ctx == 0) - hud_set_draw_context(hud, cso, st); + hud_set_draw_context(hud, cso, st, st_invalidate_state); - hud_parse_env_var(hud, screen, env); + hud_parse_env_var(hud, screen, env, default_period_ms); return hud; } diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_context.h b/lib/mesa/src/gallium/auxiliary/hud/hud_context.h index ed5dd5dbf..ad495970d 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_context.h +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_context.h @@ -33,11 +33,15 @@ struct cso_context; struct pipe_context; struct pipe_resource; struct util_queue_monitoring; -struct st_context_iface; +struct st_context; + +typedef void (*hud_st_invalidate_state_func)(struct st_context *st, + unsigned flags); struct hud_context * -hud_create(struct cso_context *cso, struct st_context_iface *st, - struct hud_context *share); +hud_create(struct cso_context *cso, struct hud_context *share, + struct st_context *st, + hud_st_invalidate_state_func st_invalidate_state); void hud_destroy(struct hud_context *hud, struct cso_context *cso); diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c b/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c index 820e7d710..a4313b2c7 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c @@ -30,18 +30,18 @@ #include "hud/hud_private.h" #include "util/os_time.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include "util/u_queue.h" #include <stdio.h> #include <inttypes.h> -#ifdef PIPE_OS_WINDOWS +#if DETECT_OS_WINDOWS #include <windows.h> #endif -#if defined(PIPE_OS_BSD) +#if DETECT_OS_BSD #include <sys/types.h> #include <sys/sysctl.h> -#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) +#if DETECT_OS_NETBSD || DETECT_OS_OPENBSD #include <sys/sched.h> #else #include <sys/resource.h> @@ -49,7 +49,7 @@ #endif -#ifdef PIPE_OS_WINDOWS +#if DETECT_OS_WINDOWS static inline uint64_t filetime_to_scalar(FILETIME ft) @@ -95,12 +95,12 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time) return TRUE; } -#elif defined(PIPE_OS_BSD) +#elif DETECT_OS_BSD static boolean get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time) { -#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) +#if DETECT_OS_NETBSD || DETECT_OS_OPENBSD uint64_t cp_time[CPUSTATES]; #else long cp_time[CPUSTATES]; @@ -110,12 +110,12 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time) if (cpu_index == ALL_CPUS) { len = sizeof(cp_time); -#if defined(PIPE_OS_NETBSD) +#if DETECT_OS_NETBSD int mib[] = { CTL_KERN, KERN_CP_TIME }; if (sysctl(mib, ARRAY_SIZE(mib), cp_time, &len, NULL, 0) == -1) return FALSE; -#elif defined(PIPE_OS_OPENBSD) +#elif DETECT_OS_OPENBSD int mib[] = { CTL_KERN, KERN_CPTIME }; long sum_cp_time[CPUSTATES]; @@ -130,13 +130,13 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time) return FALSE; #endif } else { -#if defined(PIPE_OS_NETBSD) +#if DETECT_OS_NETBSD int mib[] = { CTL_KERN, KERN_CP_TIME, cpu_index }; len = sizeof(cp_time); if (sysctl(mib, ARRAY_SIZE(mib), cp_time, &len, NULL, 0) == -1) return FALSE; -#elif defined(PIPE_OS_OPENBSD) +#elif DETECT_OS_OPENBSD int mib[] = { CTL_KERN, KERN_CPTIME2, cpu_index }; len = sizeof(cp_time); diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c b/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c index 9c7b90f73..bf1be1e9b 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c @@ -36,7 +36,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "util/os_time.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include <stdio.h> #include <unistd.h> diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c b/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c index b5d9710ef..55bb9023a 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c @@ -35,7 +35,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "util/os_time.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include "util/u_string.h" #include <stdio.h> diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c b/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c index b10247e44..b54af2add 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c @@ -35,7 +35,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "util/os_time.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include "util/u_string.h" #include <stdio.h> diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_private.h b/lib/mesa/src/gallium/auxiliary/hud/hud_private.h index 3604760c7..0c3fbbba4 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_private.h +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_private.h @@ -32,6 +32,7 @@ #include "pipe/p_state.h" #include "util/list.h" #include "hud/font.h" +#include "hud/hud_context.h" #include "cso_cache/cso_context.h" enum hud_counter { @@ -51,7 +52,10 @@ struct hud_context { /* Context where the HUD is drawn: */ struct pipe_context *pipe; struct cso_context *cso; - struct st_context_iface *st; + + /* For notifying st_context to rebind states that we clobbered. */ + struct st_context *st; + hud_st_invalidate_state_func st_invalidate_state; struct hud_batch_query_context *batch_query; struct list_head pane_list; @@ -79,6 +83,7 @@ struct hud_context { float translate[2]; float scale[2]; float padding[2]; + float rotate[4]; } constants; struct pipe_constant_buffer constbuf; diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c b/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c index f99752f38..96876d3a7 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c @@ -32,7 +32,7 @@ #include "hud/hud_private.h" #include "util/list.h" #include "util/os_time.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include "util/u_string.h" #include <stdio.h> diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices.c b/lib/mesa/src/gallium/auxiliary/indices/u_indices.c index 53dbb760d..e43072351 100644 --- a/lib/mesa/src/gallium/auxiliary/indices/u_indices.c +++ b/lib/mesa/src/gallium/auxiliary/indices/u_indices.c @@ -57,6 +57,10 @@ u_index_prim_type_convert(unsigned hw_mask, enum pipe_prim_type prim, bool pv_ma case PIPE_PRIM_TRIANGLE_FAN: case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: + if ((hw_mask & (1<<PIPE_PRIM_QUADS)) && pv_matches) + return PIPE_PRIM_QUADS; + else + return PIPE_PRIM_TRIANGLES; case PIPE_PRIM_POLYGON: return PIPE_PRIM_TRIANGLES; case PIPE_PRIM_LINES_ADJACENCY: @@ -140,8 +144,9 @@ u_index_translator(unsigned hw_mask, return U_TRANSLATE_MEMCPY; } - *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim_restart][prim]; *out_prim = u_index_prim_type_convert(hw_mask, prim, in_pv == out_pv); + *out_translate = (*out_prim == PIPE_PRIM_QUADS ? translate_quads : translate) + [in_idx][out_idx][in_pv][out_pv][prim_restart][prim]; *out_nr = u_index_count_converted_indices(hw_mask, in_pv == out_pv, prim, nr); return ret; @@ -170,9 +175,9 @@ u_index_count_converted_indices(unsigned hw_mask, bool pv_matches, enum pipe_pri case PIPE_PRIM_TRIANGLE_FAN: return (nr - 2) * 3; case PIPE_PRIM_QUADS: - return (nr / 4) * 6; + return ((hw_mask & (1<<PIPE_PRIM_QUADS)) && pv_matches) ? nr : (nr / 4) * 6; case PIPE_PRIM_QUAD_STRIP: - return (nr - 2) * 3; + return ((hw_mask & (1<<PIPE_PRIM_QUADS)) && pv_matches) ? (nr - 2) * 2 : (nr - 2) * 3; case PIPE_PRIM_POLYGON: return (nr - 2) * 3; case PIPE_PRIM_LINES_ADJACENCY: @@ -237,9 +242,11 @@ u_index_generator(unsigned hw_mask, if ((hw_mask & (1<<prim)) && (in_pv == out_pv)) { - *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS]; + *out_generate = (*out_prim == PIPE_PRIM_QUADS ? generate_quads : generate) + [out_idx][in_pv][out_pv][PIPE_PRIM_POINTS]; return U_GENERATE_LINEAR; } - *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_generate = (*out_prim == PIPE_PRIM_QUADS ? generate_quads : generate) + [out_idx][in_pv][out_pv][prim]; return prim == PIPE_PRIM_LINE_LOOP ? U_GENERATE_ONE_OFF : U_GENERATE_REUSABLE; } diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py b/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py index de3bf9570..03640699c 100644 --- a/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py +++ b/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py @@ -51,6 +51,8 @@ PRIMS=('points', 'trisadj', 'tristripadj') +OUT_TRIS, OUT_QUADS = 'tris', 'quads' + LONGPRIMS=('PIPE_PRIM_POINTS', 'PIPE_PRIM_LINES', 'PIPE_PRIM_LINE_STRIP', @@ -91,6 +93,9 @@ def prolog(f: 'T.TextIO') -> None: static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PR_COUNT][PRIM_COUNT]; static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; +static u_translate_func translate_quads[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PR_COUNT][PRIM_COUNT]; +static u_generate_func generate_quads[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; + ''') @@ -121,13 +126,21 @@ def do_tri(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, inpv, outpv ): else: shape(f, intype, outtype, ptr, v2, v0, v1 ) -def do_quad(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): - if inpv == LAST: - do_tri(f, intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); - do_tri(f, intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); +def do_quad(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv, out_prim ): + if out_prim == OUT_TRIS: + if inpv == LAST: + do_tri(f, intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); + do_tri(f, intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); + else: + do_tri(f, intype, outtype, ptr+'+0', v0, v1, v2, inpv, outpv ); + do_tri(f, intype, outtype, ptr+'+3', v0, v2, v3, inpv, outpv ); else: - do_tri(f, intype, outtype, ptr+'+0', v0, v1, v2, inpv, outpv ); - do_tri(f, intype, outtype, ptr+'+3', v0, v2, v3, inpv, outpv ); + if inpv == outpv: + shape(f, intype, outtype, ptr, v0, v1, v2, v3) + elif inpv == FIRST: + shape(f, intype, outtype, ptr, v1, v2, v3, v0) + else: + shape(f, intype, outtype, ptr, v3, v0, v1, v2) def do_lineadj(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): if inpv == outpv: @@ -141,14 +154,14 @@ def do_triadj(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, v4, v5, inpv, else: shape(f, intype, outtype, ptr, v4, v5, v0, v1, v2, v3 ) -def name(intype, outtype, inpv, outpv, pr, prim): +def name(intype, outtype, inpv, outpv, pr, prim, out_prim): if intype == GENERATE: - return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv + return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv + '_' + str(out_prim) else: - return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv + '_' + pr + return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv + '_' + pr + '_' + str(out_prim) -def preamble(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim): - f.write('static void ' + name( intype, outtype, inpv, outpv, pr, prim ) + '(\n') +def preamble(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim, out_prim): + f.write('static void ' + name( intype, outtype, inpv, outpv, pr, prim, out_prim ) + '(\n') if intype != GENERATE: f.write(' const void * restrict _in,\n') f.write(' unsigned start,\n') @@ -186,28 +199,28 @@ def prim_restart(f: 'T.TextIO', in_verts, out_verts, out_prims, close_func = Non f.write(' }\n') def points(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='points') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='points') f.write(' for (i = start, j = 0; j < out_nr; j++, i++) {\n') do_point(f, intype, outtype, 'out+j', 'i' ); f.write(' }\n') postamble(f) def lines(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='lines') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='lines') f.write(' for (i = start, j = 0; j < out_nr; j+=2, i+=2) {\n') do_line(f, intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); f.write(' }\n') postamble(f) def linestrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='linestrip') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='linestrip') f.write(' for (i = start, j = 0; j < out_nr; j+=2, i++) {\n') do_line(f, intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); f.write(' }\n') postamble(f) def lineloop(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='lineloop') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='lineloop') f.write(' unsigned end = start;\n') f.write(' for (i = start, j = 0; j < out_nr - 2; j+=2, i++) {\n') if pr == PRENABLE: @@ -226,7 +239,7 @@ def lineloop(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): postamble(f) def tris(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='tris') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='tris') f.write(' for (i = start, j = 0; j < out_nr; j+=3, i+=3) {\n') do_tri(f, intype, outtype, 'out+j', 'i', 'i+1', 'i+2', inpv, outpv ); f.write(' }\n') @@ -234,7 +247,7 @@ def tris(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): def tristrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='tristrip') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='tristrip') f.write(' for (i = start, j = 0; j < out_nr; j+=3, i++) {\n') if inpv == FIRST: do_tri(f, intype, outtype, 'out+j', 'i', 'i+1+(i&1)', 'i+2-(i&1)', inpv, outpv ); @@ -245,7 +258,7 @@ def tristrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): def trifan(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='trifan') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='trifan') f.write(' for (i = start, j = 0; j < out_nr; j+=3, i++) {\n') if pr == PRENABLE: @@ -264,7 +277,7 @@ def trifan(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): def polygon(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='polygon') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='polygon') f.write(' for (i = start, j = 0; j < out_nr; j+=3, i++) {\n') if pr == PRENABLE: def close_func(index): @@ -279,33 +292,43 @@ def polygon(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): postamble(f) -def quads(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='quads') - f.write(' for (i = start, j = 0; j < out_nr; j+=6, i+=4) {\n') - if pr == PRENABLE: +def quads(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, out_prim): + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=out_prim, prim='quads') + if out_prim == OUT_TRIS: + f.write(' for (i = start, j = 0; j < out_nr; j+=6, i+=4) {\n') + else: + f.write(' for (i = start, j = 0; j < out_nr; j+=4, i+=4) {\n') + if pr == PRENABLE and out_prim == OUT_TRIS: prim_restart(f, 4, 3, 2) + elif pr == PRENABLE: + prim_restart(f, 4, 4, 1) - do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ); + do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv, out_prim ); f.write(' }\n') postamble(f) -def quadstrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='quadstrip') - f.write(' for (i = start, j = 0; j < out_nr; j+=6, i+=2) {\n') - if pr == PRENABLE: +def quadstrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, out_prim): + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=out_prim, prim='quadstrip') + if out_prim == OUT_TRIS: + f.write(' for (i = start, j = 0; j < out_nr; j+=6, i+=2) {\n') + else: + f.write(' for (i = start, j = 0; j < out_nr; j+=4, i+=2) {\n') + if pr == PRENABLE and out_prim == OUT_TRIS: prim_restart(f, 4, 3, 2) + elif pr == PRENABLE: + prim_restart(f, 4, 4, 1) if inpv == LAST: - do_quad(f, intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv ); + do_quad(f, intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv, out_prim ); else: - do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+3', 'i+2', inpv, outpv ); + do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+3', 'i+2', inpv, outpv, out_prim ); f.write(' }\n') postamble(f) def linesadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='linesadj') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='linesadj') f.write(' for (i = start, j = 0; j < out_nr; j+=4, i+=4) {\n') do_lineadj(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ) f.write(' }\n') @@ -313,7 +336,7 @@ def linesadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): def linestripadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='linestripadj') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='linestripadj') f.write(' for (i = start, j = 0; j < out_nr; j+=4, i++) {\n') do_lineadj(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ) f.write(' }\n') @@ -321,7 +344,7 @@ def linestripadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): def trisadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='trisadj') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='trisadj') f.write(' for (i = start, j = 0; j < out_nr; j+=6, i+=6) {\n') do_triadj(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', 'i+4', 'i+5', inpv, outpv ) @@ -330,7 +353,7 @@ def trisadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): def tristripadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr): - preamble(f, intype, outtype, inpv, outpv, pr, prim='tristripadj') + preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='tristripadj') f.write(' for (i = start, j = 0; j < out_nr; i+=2, j+=6) {\n') f.write(' if (i % 4 == 0) {\n') f.write(' /* even triangle */\n') @@ -357,31 +380,44 @@ def emit_funcs(f: 'T.TextIO') -> None: tris(f, intype, outtype, inpv, outpv, pr) tristrip(f, intype, outtype, inpv, outpv, pr) trifan(f, intype, outtype, inpv, outpv, pr) - quads(f, intype, outtype, inpv, outpv, pr) - quadstrip(f, intype, outtype, inpv, outpv, pr) + quads(f, intype, outtype, inpv, outpv, pr, OUT_TRIS) + quadstrip(f, intype, outtype, inpv, outpv, pr, OUT_TRIS) polygon(f, intype, outtype, inpv, outpv, pr) linesadj(f, intype, outtype, inpv, outpv, pr) linestripadj(f, intype, outtype, inpv, outpv, pr) trisadj(f, intype, outtype, inpv, outpv, pr) tristripadj(f, intype, outtype, inpv, outpv, pr) -def init(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim): + for intype, outtype, inpv, outpv, pr in itertools.product( + INTYPES, OUTTYPES, [FIRST, LAST], [FIRST, LAST], [PRDISABLE, PRENABLE]): + if pr == PRENABLE and intype == GENERATE: + continue + quads(f, intype, outtype, inpv, outpv, pr, OUT_QUADS) + quadstrip(f, intype, outtype, inpv, outpv, pr, OUT_QUADS) + +def init(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim, out_prim=OUT_TRIS): + generate_name = 'generate' + translate_name = 'translate' + if out_prim == OUT_QUADS: + generate_name = 'generate_quads' + translate_name = 'translate_quads' + if intype == GENERATE: - f.write('generate[' + + f.write(f'{generate_name}[' + outtype_idx[outtype] + '][' + pv_idx[inpv] + '][' + pv_idx[outpv] + '][' + longprim[prim] + - '] = ' + name( intype, outtype, inpv, outpv, pr, prim ) + ';\n') + '] = ' + name( intype, outtype, inpv, outpv, pr, prim, out_prim ) + ';\n') else: - f.write('translate[' + + f.write(f'{translate_name}[' + intype_idx[intype] + '][' + outtype_idx[outtype] + '][' + pv_idx[inpv] + '][' + pv_idx[outpv] + '][' + pr_idx[pr] + '][' + longprim[prim] + - '] = ' + name( intype, outtype, inpv, outpv, pr, prim ) + ';\n') + '] = ' + name( intype, outtype, inpv, outpv, pr, prim, out_prim ) + ';\n') def emit_all_inits(f: 'T.TextIO'): @@ -389,6 +425,10 @@ def emit_all_inits(f: 'T.TextIO'): INTYPES, OUTTYPES, PVS, PVS, PRS, PRIMS): init(f,intype, outtype, inpv, outpv, pr, prim) + for intype, outtype, inpv, outpv, pr, prim in itertools.product( + INTYPES, OUTTYPES, PVS, PVS, PRS, ['quads', 'quadstrip']): + init(f,intype, outtype, inpv, outpv, pr, prim, OUT_QUADS) + def emit_init(f: 'T.TextIO'): f.write('void u_index_init( void )\n') f.write('{\n') diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c b/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c index dcbc9052d..526e7b644 100644 --- a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c +++ b/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c @@ -130,9 +130,15 @@ primconvert_init_draw(struct primconvert_context *pc, return false; util_draw_init_info(new_info); - new_info->index_bounds_valid = info->index_bounds_valid; - new_info->min_index = info->min_index; - new_info->max_index = info->max_index; + + /* Because we've changed the index buffer, the original min_index/max_index + * for the draw are no longer valid. That's ok, but we need to tell drivers + * so they don't optimize incorrectly. + */ + new_info->index_bounds_valid = false; + new_info->min_index = 0; + new_info->max_index = ~0; + new_info->start_instance = info->start_instance; new_info->instance_count = info->instance_count; new_info->primitive_restart = info->primitive_restart; diff --git a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.c b/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.c deleted file mode 100644 index 1b5b7968e..000000000 --- a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.c +++ /dev/null @@ -1,56 +0,0 @@ -/************************************************************************** - * - * Copyright 2021 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **************************************************************************/ - -#include "nir_helpers.h" -#include "nir_xfb_info.h" - -void -nir_gather_stream_output_info(nir_shader *nir, - struct pipe_stream_output_info *so) -{ - int slot_to_register[NUM_TOTAL_VARYING_SLOTS]; - nir_xfb_info *info = nir_gather_xfb_info_from_intrinsics(nir, slot_to_register); - - memset(so, 0, sizeof(*so)); - - if (!info) - return; - - so->num_outputs = info->output_count; - - for (unsigned i = 0; i < info->output_count; i++) { - so->output[i].start_component = info->outputs[i].component_offset; - so->output[i].num_components = util_bitcount(info->outputs[i].component_mask); - so->output[i].output_buffer = info->outputs[i].buffer; - so->output[i].dst_offset = info->outputs[i].offset / 4; - so->output[i].stream = info->buffer_to_stream[info->outputs[i].buffer]; - so->output[i].register_index = slot_to_register[info->outputs[i].location]; - } - - for (unsigned i = 0; i < MAX_XFB_BUFFERS; i++) - so->stride[i] = info->buffers[i].stride; - - free(info); -} diff --git a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.h b/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.h deleted file mode 100644 index 08761b045..000000000 --- a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2021 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **************************************************************************/ - -#ifndef GALLIUM_NIR_HELPERS -#define GALLIUM_NIR_HELPERS - -#include "nir.h" -#include "pipe/p_state.h" - -void -nir_gather_stream_output_info(nir_shader *nir, - struct pipe_stream_output_info *so); - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c index ad9ed85ec..6b046a85f 100644 --- a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -957,7 +957,7 @@ ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, unsigned dest_bitsize, { nir_ssa_def *def = nir_build_alu_src_arr(b, op, src); if (def->bit_size == 1) - def = nir_ineg(b, nir_b2i(b, def, dest_bitsize)); + def = nir_ineg(b, nir_b2iN(b, def, dest_bitsize)); assert(def->bit_size == dest_bitsize); if (dest_bitsize == 64) { if (def->num_components > 2) { diff --git a/lib/mesa/src/gallium/auxiliary/os/os_mman.h b/lib/mesa/src/gallium/auxiliary/os/os_mman.h deleted file mode 100644 index 1d07ce654..000000000 --- a/lib/mesa/src/gallium/auxiliary/os/os_mman.h +++ /dev/null @@ -1,84 +0,0 @@ -/************************************************************************** - * - * Copyright 2011 LunarG, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * OS independent memory mapping (with large file support). - * - * @author Chia-I Wu <olvaffe@gmail.com> - */ - -#ifndef _OS_MMAN_H_ -#define _OS_MMAN_H_ - - -#include "pipe/p_config.h" -#include "pipe/p_compiler.h" - -#if defined(PIPE_OS_UNIX) -# include <sys/mman.h> -#else -# error Unsupported OS -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - -#if defined(PIPE_OS_ANDROID) && !defined(__LP64__) -/* 32-bit needs mmap64 for 64-bit offsets */ -# define os_mmap(addr, length, prot, flags, fd, offset) \ - mmap64(addr, length, prot, flags, fd, offset) - -# define os_munmap(addr, length) \ - munmap(addr, length) - -#else -/* assume large file support exists */ -# define os_mmap(addr, length, prot, flags, fd, offset) \ - mmap(addr, length, prot, flags, fd, offset) - -static inline int os_munmap(void *addr, size_t length) -{ - /* Copied from configure code generated by AC_SYS_LARGEFILE */ -#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \ - (((off_t) 1 << 31) << 31)) - STATIC_ASSERT(LARGE_OFF_T % 2147483629 == 721 && - LARGE_OFF_T % 2147483647 == 1); -#undef LARGE_OFF_T - - return munmap(addr, length); -} -#endif - - -#ifdef __cplusplus -} -#endif - -#endif /* _OS_MMAN_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/os/os_process.c b/lib/mesa/src/gallium/auxiliary/os/os_process.c deleted file mode 100644 index b00ff2b0d..000000000 --- a/lib/mesa/src/gallium/auxiliary/os/os_process.c +++ /dev/null @@ -1,146 +0,0 @@ -/************************************************************************** - * - * Copyright 2013 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "pipe/p_config.h" -#include "os/os_process.h" -#include "util/u_memory.h" -#include "util/u_process.h" - -#if defined(PIPE_OS_WINDOWS) -# include <windows.h> -#elif defined(PIPE_OS_HAIKU) -# include <kernel/OS.h> -# include <kernel/image.h> -#endif - -#if defined(PIPE_OS_LINUX) -# include <fcntl.h> -#endif - - -/** - * Return the name of the current process. - * \param procname returns the process name - * \param size size of the procname buffer - * \return TRUE or FALSE for success, failure - */ -boolean -os_get_process_name(char *procname, size_t size) -{ - const char *name; - - /* First, check if the GALLIUM_PROCESS_NAME env var is set to - * override the normal process name query. - */ - name = os_get_option("GALLIUM_PROCESS_NAME"); - - if (!name) { - /* do normal query */ - -#if defined(PIPE_OS_WINDOWS) - char szProcessPath[MAX_PATH]; - char *lpProcessName; - char *lpProcessExt; - - GetModuleFileNameA(NULL, szProcessPath, ARRAY_SIZE(szProcessPath)); - - lpProcessName = strrchr(szProcessPath, '\\'); - lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath; - - lpProcessExt = strrchr(lpProcessName, '.'); - if (lpProcessExt) { - *lpProcessExt = '\0'; - } - - name = lpProcessName; - -#elif defined(PIPE_OS_HAIKU) - image_info info; - get_image_info(B_CURRENT_TEAM, &info); - name = info.name; -#else - name = util_get_process_name(); -#endif - } - - assert(size > 0); - assert(procname); - - if (name && procname && size > 0) { - strncpy(procname, name, size); - procname[size - 1] = '\0'; - return TRUE; - } - else { - return FALSE; - } -} - - -/** - * Return the command line for the calling process. This is basically - * the argv[] array with the arguments separated by spaces. - * \param cmdline returns the command line string - * \param size size of the cmdline buffer - * \return TRUE or FALSE for success, failure - */ -boolean -os_get_command_line(char *cmdline, size_t size) -{ -#if defined(PIPE_OS_WINDOWS) - const char *args = GetCommandLineA(); - if (args) { - strncpy(cmdline, args, size); - // make sure we terminate the string - cmdline[size - 1] = 0; - return TRUE; - } -#elif defined(PIPE_OS_LINUX) - int f = open("/proc/self/cmdline", O_RDONLY); - if (f != -1) { - const int n = read(f, cmdline, size - 1); - int i; - assert(n < size); - // The arguments are separated by '\0' chars. Convert them to spaces. - for (i = 0; i < n; i++) { - if (cmdline[i] == 0) { - cmdline[i] = ' '; - } - } - // terminate the string - cmdline[n] = 0; - close(f); - return TRUE; - } -#endif - - /* XXX to-do: implement this function for other operating systems */ - - cmdline[0] = 0; - return FALSE; -} diff --git a/lib/mesa/src/gallium/auxiliary/os/os_thread.h b/lib/mesa/src/gallium/auxiliary/os/os_thread.h deleted file mode 100644 index 7ca65a21d..000000000 --- a/lib/mesa/src/gallium/auxiliary/os/os_thread.h +++ /dev/null @@ -1,158 +0,0 @@ -/************************************************************************** - * - * Copyright 1999-2006 Brian Paul - * Copyright 2008 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * - * Thread, mutex, condition variable, barrier, semaphore and - * thread-specific data functions. - */ - - -#ifndef OS_THREAD_H_ -#define OS_THREAD_H_ - - -#include "pipe/p_compiler.h" -#include "util/u_debug.h" /* for assert */ -#include "util/u_thread.h" - - -#define pipe_mutex_assert_locked(mutex) \ - __pipe_mutex_assert_locked(&(mutex)) - -static inline void -__pipe_mutex_assert_locked(mtx_t *mutex) -{ -#ifdef DEBUG - /* NOTE: this would not work for recursive mutexes, but - * mtx_t doesn't support those - */ - int ret = mtx_trylock(mutex); - assert(ret == thrd_busy); - if (ret == thrd_success) - mtx_unlock(mutex); -#else - (void)mutex; -#endif -} - - -/* - * Semaphores - */ - -typedef struct -{ - mtx_t mutex; - cnd_t cond; - int counter; -} pipe_semaphore; - - -static inline void -pipe_semaphore_init(pipe_semaphore *sema, int init_val) -{ - (void) mtx_init(&sema->mutex, mtx_plain); - cnd_init(&sema->cond); - sema->counter = init_val; -} - -static inline void -pipe_semaphore_destroy(pipe_semaphore *sema) -{ - mtx_destroy(&sema->mutex); - cnd_destroy(&sema->cond); -} - -/** Signal/increment semaphore counter */ -static inline void -pipe_semaphore_signal(pipe_semaphore *sema) -{ - mtx_lock(&sema->mutex); - sema->counter++; - cnd_signal(&sema->cond); - mtx_unlock(&sema->mutex); -} - -/** Wait for semaphore counter to be greater than zero */ -static inline void -pipe_semaphore_wait(pipe_semaphore *sema) -{ - mtx_lock(&sema->mutex); - while (sema->counter <= 0) { - cnd_wait(&sema->cond, &sema->mutex); - } - sema->counter--; - mtx_unlock(&sema->mutex); -} - - - -/* - * Thread-specific data. - */ - -typedef struct { - tss_t key; - int initMagic; -} pipe_tsd; - - -#define PIPE_TSD_INIT_MAGIC 0xff8adc98 - - -static inline void -pipe_tsd_init(pipe_tsd *tsd) -{ - if (tss_create(&tsd->key, NULL/*free*/) != 0) { - exit(-1); - } - tsd->initMagic = PIPE_TSD_INIT_MAGIC; -} - -static inline void * -pipe_tsd_get(pipe_tsd *tsd) -{ - if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { - pipe_tsd_init(tsd); - } - return tss_get(tsd->key); -} - -static inline void -pipe_tsd_set(pipe_tsd *tsd, void *value) -{ - if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { - pipe_tsd_init(tsd); - } - if (tss_set(tsd->key, value) != 0) { - exit(-1); - } -} - -#endif /* OS_THREAD_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c index 1c58eaefd..5b69599ee 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c @@ -97,8 +97,12 @@ merge_driconf(const driOptionDescription *driver_driconf, unsigned driver_count, return NULL; } - memcpy(merged, gallium_driconf, sizeof(*merged) * gallium_count); - memcpy(&merged[gallium_count], driver_driconf, sizeof(*merged) * driver_count); + if (gallium_count) + memcpy(merged, gallium_driconf, sizeof(*merged) * gallium_count); + if (driver_count) { + memcpy(&merged[gallium_count], driver_driconf, + sizeof(*merged) * driver_count); + } *merged_count = driver_count + gallium_count; return merged; diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c index 12bc79305..e11837f2c 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -69,13 +69,13 @@ static const struct sw_driver_descriptor driver_descriptors = { #ifdef HAVE_DRI { .name = "dri", - .create_winsys = dri_create_sw_winsys, + .create_winsys_dri = dri_create_sw_winsys, }, #endif #ifdef HAVE_DRISW_KMS { .name = "kms_dri", - .create_winsys = kms_dri_create_winsys, + .create_winsys_kms_dri = kms_dri_create_winsys, }, #endif #ifndef __ANDROID__ @@ -85,7 +85,7 @@ static const struct sw_driver_descriptor driver_descriptors = { }, { .name = "wrapped", - .create_winsys = wrapper_sw_winsys_wrap_pipe_screen, + .create_winsys_wrapped = wrapper_sw_winsys_wrap_pipe_screen, }, #endif { 0 }, @@ -99,12 +99,12 @@ static const struct sw_driver_descriptor kopper_driver_descriptors = { .winsys = { { .name = "dri", - .create_winsys = dri_create_sw_winsys, + .create_winsys_dri = dri_create_sw_winsys, }, #ifdef HAVE_DRISW_KMS { .name = "kms_dri", - .create_winsys = kms_dri_create_winsys, + .create_winsys_kms_dri = kms_dri_create_winsys, }, #endif #ifndef __ANDROID__ @@ -114,7 +114,7 @@ static const struct sw_driver_descriptor kopper_driver_descriptors = { }, { .name = "wrapped", - .create_winsys = wrapper_sw_winsys_wrap_pipe_screen, + .create_winsys_wrapped = wrapper_sw_winsys_wrap_pipe_screen, }, #endif { 0 }, @@ -216,7 +216,7 @@ pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, const struct drisw_lo for (i = 0; sdev->dd->winsys[i].name; i++) { if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) { - sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf); + sdev->ws = sdev->dd->winsys[i].create_winsys_dri(drisw_lf); break; } } @@ -246,7 +246,7 @@ pipe_loader_vk_probe_dri(struct pipe_loader_device **devs, const struct drisw_lo for (i = 0; sdev->dd->winsys[i].name; i++) { if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) { - sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf); + sdev->ws = sdev->dd->winsys[i].create_winsys_dri(drisw_lf); break; } } @@ -282,7 +282,7 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd) for (i = 0; sdev->dd->winsys[i].name; i++) { if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) { - sdev->ws = sdev->dd->winsys[i].create_winsys(sdev->fd); + sdev->ws = sdev->dd->winsys[i].create_winsys_kms_dri(sdev->fd); break; } } @@ -360,7 +360,7 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev, for (i = 0; sdev->dd->winsys[i].name; i++) { if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) { - sdev->ws = sdev->dd->winsys[i].create_winsys(screen); + sdev->ws = sdev->dd->winsys[i].create_winsys_wrapped(screen); break; } } @@ -382,6 +382,7 @@ pipe_loader_sw_release(struct pipe_loader_device **dev) UNUSED struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev); + sdev->ws->destroy(sdev->ws); #ifndef GALLIUM_STATIC_TARGETS if (sdev->lib) util_dl_close(sdev->lib); diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 41e5c8386..76608b371 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -34,9 +34,9 @@ */ -#include "pipe/p_config.h" +#include "util/detect.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if DETECT_OS_LINUX || DETECT_OS_BSD || DETECT_OS_SOLARIS #include <unistd.h> #include <sched.h> #endif @@ -45,7 +45,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_defines.h" #include "util/u_debug.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include "util/list.h" @@ -979,7 +979,7 @@ fenced_bufmgr_destroy(struct pb_manager *mgr) /* Wait on outstanding fences. */ while (fenced_mgr->num_fenced) { mtx_unlock(&fenced_mgr->mutex); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if DETECT_OS_LINUX || DETECT_OS_BSD || DETECT_OS_SOLARIS sched_yield(); #endif mtx_lock(&fenced_mgr->mutex); diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 9a10def98..f2fb620dd 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/list.h" @@ -307,7 +307,7 @@ pb_debug_buffer_validate(struct pb_buffer *_buf, mtx_lock(&buf->mutex); if(buf->map_count) { - debug_printf("%s: attempting to validate a mapped buffer\n", __FUNCTION__); + debug_printf("%s: attempting to validate a mapped buffer\n", __func__); debug_printf("last map backtrace is\n"); debug_backtrace_dump(buf->map_backtrace, PB_DEBUG_MAP_BACKTRACE); } @@ -390,7 +390,7 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, FREE(buf); #if 0 mtx_lock(&mgr->mutex); - debug_printf("%s: failed to create buffer\n", __FUNCTION__); + debug_printf("%s: failed to create buffer\n", __func__); if(!list_is_empty(&mgr->list)) pb_debug_manager_dump_locked(mgr); mtx_unlock(&mgr->mutex); @@ -445,7 +445,7 @@ pb_debug_manager_destroy(struct pb_manager *_mgr) mtx_lock(&mgr->mutex); if(!list_is_empty(&mgr->list)) { - debug_printf("%s: unfreed buffers\n", __FUNCTION__); + debug_printf("%s: unfreed buffers\n", __func__); pb_debug_manager_dump_locked(mgr); } mtx_unlock(&mgr->mutex); diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 5cc63b93d..397e42eed 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -35,7 +35,7 @@ #include "pipe/p_defines.h" #include "util/u_debug.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include "util/list.h" #include "util/u_mm.h" diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index d1928dcaf..f078ff0b1 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -38,7 +38,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/list.h" diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h index cda0f9984..c5d62c7f9 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h @@ -32,7 +32,7 @@ #include "pb_buffer.h" #include "util/simple_mtx.h" #include "util/list.h" -#include "os/os_thread.h" +#include "util/u_thread.h" /** * Statically inserted into the driver-specific buffer structure. diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h index e8e8f7687..4fa5fd8d7 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h @@ -47,7 +47,7 @@ #include "pb_buffer.h" #include "util/simple_mtx.h" #include "util/list.h" -#include "os/os_thread.h" +#include "util/u_thread.h" struct pb_slab; struct pb_slabs; diff --git a/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c b/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c index d8628a36b..157570b80 100644 --- a/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c +++ b/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c @@ -66,7 +66,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc, struct renderonly *ro, struct winsys_handle *out_handle) { - struct renderonly_scanout *scanout; + struct renderonly_scanout *scanout = NULL; int err; struct drm_mode_create_dumb create_dumb = { .width = rsc->width0, @@ -114,7 +114,13 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc, return scanout; free_dumb: - destroy_dumb.handle = scanout->handle; + /* If an error occured, make sure we reset the scanout object before + * leaving. + */ + if (scanout) + memset(scanout, 0, sizeof(*scanout)); + + destroy_dumb.handle = create_dumb.handle; drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb); return NULL; diff --git a/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 8d5195ac0..2aa545919 100644 --- a/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -33,7 +33,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "util/u_memory.h" #include "rtasm_execmem.h" @@ -42,11 +42,11 @@ #define MAP_ANONYMOUS MAP_ANON #endif -#if defined(PIPE_OS_WINDOWS) +#if DETECT_OS_WINDOWS #include <windows.h> #endif -#if defined(PIPE_OS_UNIX) +#if DETECT_OS_UNIX /* @@ -137,7 +137,7 @@ rtasm_exec_free(void *addr) } -#elif defined(PIPE_OS_WINDOWS) +#elif DETECT_OS_WINDOWS /* diff --git a/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h b/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h index 7dff0b66c..323832ecc 100644 --- a/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h +++ b/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -264,14 +264,21 @@ pipe_msm_create_screen(int fd, const struct pipe_screen_config *config) { struct pipe_screen *screen; - screen = fd_drm_screen_create(fd, NULL, config); + screen = fd_drm_screen_create_renderonly(fd, NULL, config); return screen ? debug_screen_wrap(screen) : NULL; } -DRM_DRIVER_DESCRIPTOR(msm, NULL, 0) + +const driOptionDescription msm_driconf[] = { +#ifdef GALLIUM_FREEDRENO + #include "freedreno/driinfo_freedreno.h" +#endif +}; +DRM_DRIVER_DESCRIPTOR(msm, msm_driconf, ARRAY_SIZE(msm_driconf)) +DRM_DRIVER_DESCRIPTOR_ALIAS(msm, kgsl, msm_driconf, ARRAY_SIZE(msm_driconf)) #else DRM_DRIVER_DESCRIPTOR_STUB(msm) +DRM_DRIVER_DESCRIPTOR_STUB(kgsl) #endif -DRM_DRIVER_DESCRIPTOR_ALIAS(msm, kgsl, NULL, 0) #if defined(GALLIUM_VIRGL) || (defined(GALLIUM_FREEDRENO) && !defined(PIPE_LOADER_DYNAMIC)) #include "virgl/drm/virgl_drm_public.h" @@ -285,7 +292,7 @@ pipe_virtio_gpu_create_screen(int fd, const struct pipe_screen_config *config) /* Try native guest driver(s) first, and then fallback to virgl: */ #ifdef GALLIUM_FREEDRENO if (!screen) - screen = fd_drm_screen_create(fd, NULL, config); + screen = fd_drm_screen_create_renderonly(fd, NULL, config); #endif #ifdef GALLIUM_VIRGL if (!screen) @@ -295,9 +302,7 @@ pipe_virtio_gpu_create_screen(int fd, const struct pipe_screen_config *config) } const driOptionDescription virgl_driconf[] = { -#ifdef GALLIUM_VIRGL #include "virgl/virgl_driinfo.h.in" -#endif }; DRM_DRIVER_DESCRIPTOR(virtio_gpu, virgl_driconf, ARRAY_SIZE(virgl_driconf)) diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c index 73d1eb26a..5ec2605aa 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "util/u_debug.h" -#include "pipe/p_format.h" +#include "util/format/u_formats.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c index 5e440353e..abafc6dc0 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -73,18 +73,27 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static_assert(alignof(union tgsi_exec_channel) == 16, ""); +static_assert(alignof(struct tgsi_exec_vector) == 16, ""); +static_assert(alignof(struct tgsi_exec_machine) == 16, ""); + union tgsi_double_channel { + alignas(16) double d[TGSI_QUAD_SIZE]; unsigned u[TGSI_QUAD_SIZE][2]; uint64_t u64[TGSI_QUAD_SIZE]; int64_t i64[TGSI_QUAD_SIZE]; -} ALIGN16; +}; -struct ALIGN16 tgsi_double_vector { +struct tgsi_double_vector { + alignas(16) union tgsi_double_channel xy; union tgsi_double_channel zw; }; +static_assert(alignof(union tgsi_double_channel) == 16, ""); +static_assert(alignof(struct tgsi_double_vector) == 16, ""); + static void micro_abs(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -399,17 +408,6 @@ micro_dldexp(union tgsi_double_channel *dst, } static void -micro_dfracexp(union tgsi_double_channel *dst, - union tgsi_exec_channel *dst_exp, - const union tgsi_double_channel *src) -{ - dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); - dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); - dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); - dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); -} - -static void micro_exp2(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -3559,26 +3557,6 @@ exec_dldexp(struct tgsi_exec_machine *mach, } static void -exec_dfracexp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_double_channel src; - union tgsi_double_channel dst; - union tgsi_exec_channel dst_exp; - - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - micro_dfracexp(&dst, &dst_exp, &src); - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[1].Register.WriteMask & (1 << chan)) - store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan); - } -} - -static void exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, micro_dop_sop op) @@ -5795,10 +5773,6 @@ exec_instruction( exec_dldexp(mach, inst); break; - case TGSI_OPCODE_DFRACEXP: - exec_dfracexp(mach, inst); - break; - case TGSI_OPCODE_I2D: exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT); break; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h index 80acf6359..6e13618fb 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -73,17 +73,18 @@ extern "C" { */ union tgsi_exec_channel { + alignas(16) float f[TGSI_QUAD_SIZE]; int i[TGSI_QUAD_SIZE]; unsigned u[TGSI_QUAD_SIZE]; -} ALIGN16; +}; /** * A vector[RGBA] of channels[4 pixels] */ -struct ALIGN16 tgsi_exec_vector +struct tgsi_exec_vector { - union tgsi_exec_channel xyzw[TGSI_NUM_CHANNELS]; + alignas(16) union tgsi_exec_channel xyzw[TGSI_NUM_CHANNELS]; }; /** @@ -286,10 +287,11 @@ typedef void (* apply_sample_offset_func)( /** * Run-time virtual machine state for executing TGSI shader. */ -struct ALIGN16 tgsi_exec_machine +struct tgsi_exec_machine { /* Total = program temporaries + internal temporaries */ + alignas(16) struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS]; unsigned ImmsReserved; @@ -480,8 +482,6 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; - case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED: - case PIPE_SHADER_CAP_LDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_DROUND_SUPPORTED: diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c index 477876d7e..8b926baf7 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -201,7 +201,6 @@ tgsi_opcode_infer_type(enum tgsi_opcode opcode) case TGSI_OPCODE_DSQRT: case TGSI_OPCODE_DMAD: case TGSI_OPCODE_DLDEXP: - case TGSI_OPCODE_DFRACEXP: case TGSI_OPCODE_DFRAC: case TGSI_OPCODE_DRSQ: case TGSI_OPCODE_DTRUNC: @@ -335,8 +334,5 @@ tgsi_opcode_infer_src_type(enum tgsi_opcode opcode, uint src_idx) enum tgsi_opcode_type tgsi_opcode_infer_dst_type(enum tgsi_opcode opcode, uint dst_idx) { - if (dst_idx == 1 && opcode == TGSI_OPCODE_DFRACEXP) - return TGSI_TYPE_SIGNED; - return tgsi_opcode_infer_type(opcode); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c index 7802f1049..29e337278 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -30,6 +30,7 @@ #include "util/u_prim.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/strtod.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -231,52 +232,9 @@ static boolean parse_identifier( const char **pcur, char *ret, size_t len ) static boolean parse_float( const char **pcur, float *val ) { const char *cur = *pcur; - boolean integral_part = FALSE; - boolean fractional_part = FALSE; - - if (*cur == '0' && *(cur + 1) == 'x') { - union fi fi; - fi.ui = strtoul(cur, NULL, 16); - *val = fi.f; - cur += 10; - goto out; - } - - *val = (float) atof( cur ); - if (*cur == '-' || *cur == '+') - cur++; - if (is_digit( cur )) { - cur++; - integral_part = TRUE; - while (is_digit( cur )) - cur++; - } - if (*cur == '.') { - cur++; - if (is_digit( cur )) { - cur++; - fractional_part = TRUE; - while (is_digit( cur )) - cur++; - } - } - if (!integral_part && !fractional_part) + *val = _mesa_strtof(cur, (char**)pcur); + if (*pcur == cur) return FALSE; - if (uprcase( *cur ) == 'E') { - cur++; - if (*cur == '-' || *cur == '+') - cur++; - if (is_digit( cur )) { - cur++; - while (is_digit( cur )) - cur++; - } - else - return FALSE; - } - -out: - *pcur = cur; return TRUE; } @@ -288,7 +246,7 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1) uint32_t uval[2]; } v; - v.dval = strtod(cur, (char**)pcur); + v.dval = _mesa_strtod(cur, (char**)pcur); if (*pcur == cur) return FALSE; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 398d59b95..b4cfae0e6 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -35,13 +35,13 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_sanity.h" +#include "util/glheader.h" #include "util/u_debug.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_prim.h" #include "util/u_bitmask.h" -#include "GL/gl.h" #include "compiler/shader_info.h" union tgsi_any_token { @@ -2131,7 +2131,7 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) if (ureg->domain[0].tokens == error_tokens || ureg->domain[1].tokens == error_tokens) { - debug_printf("%s: error in generated shader\n", __FUNCTION__); + debug_printf("%s: error in generated shader\n", __func__); assert(0); return NULL; } @@ -2139,7 +2139,7 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; if (0) { - debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__, + debug_printf("%s: emitted shader %d tokens:\n", __func__, ureg->domain[DOMAIN_DECL].count); tgsi_dump( tokens, 0 ); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 59041e94d..5c4efbe8c 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -29,7 +29,7 @@ #define TGSI_UREG_H #include "pipe/p_defines.h" -#include "pipe/p_format.h" +#include "util/format/u_formats.h" #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" #include "util/u_debug.h" diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c index a101cce24..d879d2e26 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -146,7 +146,6 @@ tgsi_util_get_src_usage_mask(enum tgsi_opcode opcode, case TGSI_OPCODE_DP2: case TGSI_OPCODE_PK2H: case TGSI_OPCODE_PK2US: - case TGSI_OPCODE_DFRACEXP: case TGSI_OPCODE_F2D: case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: diff --git a/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c b/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c index 20e2de341..c4213fb5d 100644 --- a/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c +++ b/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 VMware, Inc. + * Copyright 2007-2023 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -584,12 +584,13 @@ get_emit_func(enum pipe_format format) } } -static ALWAYS_INLINE void PIPE_CDECL +static ALWAYS_INLINE void UTIL_CDECL generic_run_one(struct translate_generic *tg, unsigned elt, unsigned start_instance, unsigned instance_id, - void *vert) + void *vert, + unsigned index_size) { unsigned nr_attrs = tg->nr_attrib; unsigned attr; @@ -613,8 +614,10 @@ generic_run_one(struct translate_generic *tg, } else { index = elt; - /* clamp to avoid going out of bounds */ - index = MIN2(index, tg->attrib[attr].max_index); + if (index_size > 0) { + /* clamp to avoid going out of bounds */ + index = MIN2(index, tg->attrib[attr].max_index); + } } src = tg->attrib[attr].input_ptr + @@ -651,7 +654,7 @@ generic_run_one(struct translate_generic *tg, /** * Fetch vertex attributes for 'count' vertices. */ -static void PIPE_CDECL +static void UTIL_CDECL generic_run_elts(struct translate *translate, const unsigned *elts, unsigned count, @@ -664,12 +667,12 @@ generic_run_elts(struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, *elts++, start_instance, instance_id, vert); + generic_run_one(tg, *elts++, start_instance, instance_id, vert, 4); vert += tg->translate.key.output_stride; } } -static void PIPE_CDECL +static void UTIL_CDECL generic_run_elts16(struct translate *translate, const uint16_t *elts, unsigned count, @@ -682,12 +685,12 @@ generic_run_elts16(struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, *elts++, start_instance, instance_id, vert); + generic_run_one(tg, *elts++, start_instance, instance_id, vert, 2); vert += tg->translate.key.output_stride; } } -static void PIPE_CDECL +static void UTIL_CDECL generic_run_elts8(struct translate *translate, const uint8_t *elts, unsigned count, @@ -700,12 +703,12 @@ generic_run_elts8(struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, *elts++, start_instance, instance_id, vert); + generic_run_one(tg, *elts++, start_instance, instance_id, vert, 1); vert += tg->translate.key.output_stride; } } -static void PIPE_CDECL +static void UTIL_CDECL generic_run(struct translate *translate, unsigned start, unsigned count, @@ -718,7 +721,7 @@ generic_run(struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, start + i, start_instance, instance_id, vert); + generic_run_one(tg, start + i, start_instance, instance_id, vert, 0); vert += tg->translate.key.output_stride; } } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c index 4c0c960de..5552e04c7 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c @@ -2412,6 +2412,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, fb_state.nr_cbufs = 1; fb_state.cbufs[0] = dstsurf; fb_state.zsbuf = NULL; + fb_state.resolve = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); if (pipe->set_min_samples) @@ -2497,6 +2498,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, fb_state.nr_cbufs = 0; fb_state.cbufs[0] = NULL; fb_state.zsbuf = dstsurf; + fb_state.resolve = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); if (pipe->set_min_samples) @@ -2568,6 +2570,7 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, fb_state.nr_cbufs = 0; } fb_state.zsbuf = zsurf; + fb_state.resolve = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, sample_mask); if (pipe->set_min_samples) @@ -2706,6 +2709,7 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter, fb_state.cbufs[0] = srcsurf; fb_state.cbufs[1] = dstsurf; fb_state.zsbuf = NULL; + fb_state.resolve = NULL; pipe->set_framebuffer_state(pipe, &fb_state); blitter_set_common_draw_rect_state(ctx, false, @@ -2755,6 +2759,7 @@ void util_blitter_custom_color(struct blitter_context *blitter, fb_state.nr_cbufs = 1; fb_state.cbufs[0] = dstsurf; fb_state.zsbuf = NULL; + fb_state.resolve = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); if (pipe->set_min_samples) @@ -2818,6 +2823,7 @@ void util_blitter_custom_shader(struct blitter_context *blitter, fb_state.height = dstsurf->height; fb_state.nr_cbufs = 1; fb_state.cbufs[0] = dstsurf; + fb_state.resolve = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); if (pipe->set_min_samples) @@ -2914,6 +2920,7 @@ util_blitter_stencil_fallback(struct blitter_context *blitter, fb_state.width = dstbox->x + dstbox->width; fb_state.height = dstbox->y + dstbox->height; fb_state.zsbuf = dst_view; + fb_state.resolve = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); if (pipe->set_min_samples) diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c index 55e6d7aa9..5aa8e4faf 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c @@ -48,7 +48,7 @@ #include "util/list.h" #include "util/u_inlines.h" #include "util/u_string.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include <stdio.h> /* Future improvement: Use realloc instead? */ @@ -337,7 +337,7 @@ out_no_item: "for this command batch.\n"); } -static enum pipe_error +static int debug_flush_might_flush_cb(UNUSED void *key, void *value, void *data) { struct debug_flush_item *item = @@ -360,7 +360,7 @@ debug_flush_might_flush_cb(UNUSED void *key, void *value, void *data) } mtx_unlock(&fbuf->mutex); - return PIPE_OK; + return 0; } /** @@ -378,7 +378,7 @@ debug_flush_might_flush(struct debug_flush_ctx *fctx) "Might flush"); } -static enum pipe_error +static int debug_flush_flush_cb(UNUSED void *key, void *value, UNUSED void *data) { struct debug_flush_item *item = @@ -386,7 +386,7 @@ debug_flush_flush_cb(UNUSED void *key, void *value, UNUSED void *data) debug_flush_item_destroy(item); - return PIPE_OK; + return 0; } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dirty_flags.h b/lib/mesa/src/gallium/auxiliary/util/u_dirty_flags.h deleted file mode 100644 index 40539f0b0..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_dirty_flags.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef U_DIRTY_FLAGS_H -#define U_DIRTY_FLAGS_H - -/* Here's a convenient list of dirty flags to use in a driver. Either - * include it directly or use it as a starting point for your own - * list. - */ -#define U_NEW_VIEWPORT 0x1 -#define U_NEW_RASTERIZER 0x2 -#define U_NEW_FS 0x4 -#define U_NEW_FS_CONSTANTS 0x8 -#define U_NEW_FS_SAMPLER_VIEW 0x10 -#define U_NEW_FS_SAMPLER_STATES 0x20 -#define U_NEW_VS 0x40 -#define U_NEW_VS_CONSTANTS 0x80 -#define U_NEW_VS_SAMPLER_VIEW 0x100 -#define U_NEW_VS_SAMPLER_STATES 0x200 -#define U_NEW_BLEND 0x400 -#define U_NEW_CLIP 0x800 -#define U_NEW_SCISSOR 0x1000 -#define U_NEW_POLYGON_STIPPLE 0x2000 -#define U_NEW_FRAMEBUFFER 0x4000 -#define U_NEW_VERTEX_ELEMENTS 0x8000 -#define U_NEW_VERTEX_BUFFER 0x10000 -#define U_NEW_QUERY 0x20000 -#define U_NEW_DEPTH_STENCIL 0x40000 -#define U_NEW_GS 0x80000 -#define U_NEW_GS_CONSTANTS 0x100000 -#define U_NEW_GS_SAMPLER_VIEW 0x200000 -#define U_NEW_GS_SAMPLER_STATES 0x400000 - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dirty_surfaces.h b/lib/mesa/src/gallium/auxiliary/util/u_dirty_surfaces.h deleted file mode 100644 index ccde8a8c1..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_dirty_surfaces.h +++ /dev/null @@ -1,119 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 Luca Barbieri - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef U_DIRTY_SURFACES_H_ -#define U_DIRTY_SURFACES_H_ - -#include "pipe/p_state.h" - -#include "util/list.h" -#include "util/u_math.h" - -struct pipe_context; - -typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *); - -struct util_dirty_surfaces -{ - struct list_head dirty_list; -}; - -struct util_dirty_surface -{ - struct pipe_surface base; - struct list_head dirty_list; -}; - -static inline void -util_dirty_surfaces_init(struct util_dirty_surfaces *ds) -{ - LIST_INITHEAD(&ds->dirty_list); -} - -static inline void -util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, util_dirty_surface_flush_t flush) -{ - struct list_head *p, *next; - for(p = dss->dirty_list.next; p != &dss->dirty_list; p = next) - { - struct util_dirty_surface *ds = LIST_ENTRY(struct util_dirty_surface, p, dirty_list); - next = p->next; - - flush(pipe, &ds->base); - } -} - -static inline void -util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, unsigned first, unsigned last, util_dirty_surface_flush_t flush) -{ - struct list_head *p, *next; - if(first > last) - return; - for(p = dss->dirty_list.next; p != &dss->dirty_list; p = next) - { - struct util_dirty_surface *ds = LIST_ENTRY(struct util_dirty_surface, p, dirty_list); - next = p->next; - - if(ds->base.u.tex.level >= first && ds->base.u.tex.level <= last) - flush(pipe, &ds->base); - } -} - -static inline void -util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util_dirty_surfaces *dss, struct pipe_sampler_view *psv, struct pipe_sampler_state *pss, util_dirty_surface_flush_t flush) -{ - if(!LIST_IS_EMPTY(&dss->dirty_list)) - util_dirty_surfaces_use_levels_for_sampling(pipe, dss, (unsigned)pss->min_lod + psv->u.tex.first_level, - MIN2((unsigned)ceilf(pss->max_lod) + psv->u.tex.first_level, psv->u.tex.last_level), flush); -} - -static inline void -util_dirty_surface_init(struct util_dirty_surface *ds) -{ - LIST_INITHEAD(&ds->dirty_list); -} - -static inline boolean -util_dirty_surface_is_dirty(struct util_dirty_surface *ds) -{ - return !LIST_IS_EMPTY(&ds->dirty_list); -} - -static inline void -util_dirty_surface_set_dirty(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds) -{ - if(LIST_IS_EMPTY(&ds->dirty_list)) - LIST_ADDTAIL(&ds->dirty_list, &dss->dirty_list); -} - -static inline void -util_dirty_surface_set_clean(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds) -{ - if(!LIST_IS_EMPTY(&ds->dirty_list)) - LIST_DELINIT(&ds->dirty_list); -} - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_draw.c b/lib/mesa/src/gallium/auxiliary/util/u_draw.c index ed1e294a5..aac16c0f4 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_draw.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_draw.c @@ -115,7 +115,7 @@ util_draw_max_index( * indices/instances and simply start clamping against buffer * size. */ debug_printf("%s: too many instances for vertex buffer\n", - __FUNCTION__); + __func__); return 0; } } @@ -147,7 +147,7 @@ util_draw_indirect_read(struct pipe_context *pipe, indirect->indirect_draw_count_offset, 4, PIPE_MAP_READ, &dc_transfer); if (!dc_transfer) { - debug_printf("%s: failed to map indirect draw count buffer\n", __FUNCTION__); + debug_printf("%s: failed to map indirect draw count buffer\n", __func__); return NULL; } draw_count = dc_param[0]; @@ -169,7 +169,7 @@ util_draw_indirect_read(struct pipe_context *pipe, PIPE_MAP_READ, &transfer); if (!transfer) { - debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__); + debug_printf("%s: failed to map indirect buffer\n", __func__); free(draws); return NULL; } @@ -215,7 +215,7 @@ util_draw_indirect(struct pipe_context *pipe, indirect->indirect_draw_count_offset, 4, PIPE_MAP_READ, &dc_transfer); if (!dc_transfer) { - debug_printf("%s: failed to map indirect draw count buffer\n", __FUNCTION__); + debug_printf("%s: failed to map indirect draw count buffer\n", __func__); return; } if (dc_param[0] < draw_count) @@ -223,6 +223,9 @@ util_draw_indirect(struct pipe_context *pipe, pipe_buffer_unmap(pipe, dc_transfer); } + if (!draw_count) + return; + if (indirect->stride) num_params = MIN2(indirect->stride / 4, num_params); params = (uint32_t *) @@ -233,7 +236,7 @@ util_draw_indirect(struct pipe_context *pipe, PIPE_MAP_READ, &transfer); if (!transfer) { - debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__); + debug_printf("%s: failed to map indirect buffer\n", __func__); return; } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c index e0e91aab4..2b5350015 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c @@ -32,6 +32,7 @@ #include "util/u_upload_mgr.h" #include "util/u_thread.h" #include "util/os_time.h" +#include "util/perf/cpu_trace.h" #include <inttypes.h> /** @@ -396,6 +397,8 @@ util_throttle_memory_usage(struct pipe_context *pipe, if (!t->max_mem_usage) return; + MESA_TRACE_FUNC(); + struct pipe_screen *screen = pipe->screen; struct pipe_fence_handle **fence = NULL; unsigned ring_size = ARRAY_SIZE(t->ring); @@ -460,6 +463,21 @@ util_throttle_memory_usage(struct pipe_context *pipe, t->ring[t->flush_index].mem_usage += memory_size; } +void +util_sw_query_memory_info(struct pipe_screen *pscreen, + struct pipe_memory_info *info) +{ + /* Provide query_memory_info from CPU reported memory */ + uint64_t size; + + if (!os_get_available_system_memory(&size)) + return; + info->avail_staging_memory = size / 1024; + if (!os_get_total_physical_memory(&size)) + return; + info->total_staging_memory = size / 1024; +} + bool util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped) { diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h index 299c67980..2d12d6f17 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h @@ -117,6 +117,8 @@ void util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage); void util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t); void util_throttle_memory_usage(struct pipe_context *pipe, struct util_throttle *t, uint64_t memory_size); +void util_sw_query_memory_info(struct pipe_screen *pscreen, + struct pipe_memory_info *info); bool util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_inlines.h b/lib/mesa/src/gallium/auxiliary/util/u_inlines.h index 1e6ec06c9..f42f368e2 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_inlines.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_inlines.h @@ -891,13 +891,17 @@ util_writes_stencil(const struct pipe_stencil_state *s) } static inline bool -util_writes_depth_stencil(const struct pipe_depth_stencil_alpha_state *zsa) +util_writes_depth(const struct pipe_depth_stencil_alpha_state *zsa) { - if (zsa->depth_enabled && zsa->depth_writemask && - (zsa->depth_func != PIPE_FUNC_NEVER)) - return true; + return zsa->depth_enabled && zsa->depth_writemask && + (zsa->depth_func != PIPE_FUNC_NEVER); +} - return util_writes_stencil(&zsa->stencil[0]) || +static inline bool +util_writes_depth_stencil(const struct pipe_depth_stencil_alpha_state *zsa) +{ + return util_writes_depth(zsa) || + util_writes_stencil(&zsa->stencil[0]) || util_writes_stencil(&zsa->stencil[1]); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_linear.c b/lib/mesa/src/gallium/auxiliary/util/u_linear.c deleted file mode 100644 index f1aef2167..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_linear.c +++ /dev/null @@ -1,101 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Functions for converting tiled data to linear and vice versa. - */ - - -#include "util/u_debug.h" -#include "u_linear.h" - -void -pipe_linear_to_tile(size_t src_stride, const void *src_ptr, - struct pipe_tile_info *t, void *dst_ptr) -{ - int x, y, z; - char *ptr; - size_t bytes = t->cols * t->block.size; - char *dst_ptr2 = (char *) dst_ptr; - - assert(pipe_linear_check_tile(t)); - - /* lets write lineary to the tiled buffer */ - for (y = 0; y < t->tiles_y; y++) { - for (x = 0; x < t->tiles_x; x++) { - /* this inner loop could be replace with SSE magic */ - ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x; - for (z = 0; z < t->rows; z++) { - memcpy(dst_ptr2, ptr, bytes); - dst_ptr2 += bytes; - ptr += src_stride; - } - } - } -} - -void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, - size_t dst_stride, void *dst_ptr) -{ - int x, y, z; - char *ptr; - size_t bytes = t->cols * t->block.size; - const char *src_ptr2 = (const char *) src_ptr; - - /* lets read lineary from the tiled buffer */ - for (y = 0; y < t->tiles_y; y++) { - for (x = 0; x < t->tiles_x; x++) { - /* this inner loop could be replace with SSE magic */ - ptr = (char*)dst_ptr + dst_stride * t->rows * y + bytes * x; - for (z = 0; z < t->rows; z++) { - memcpy(ptr, src_ptr2, bytes); - src_ptr2 += bytes; - ptr += dst_stride; - } - } - } -} - -void -pipe_linear_fill_info(struct pipe_tile_info *t, - const struct u_linear_format_block *block, - unsigned tile_width, unsigned tile_height, - unsigned tiles_x, unsigned tiles_y) -{ - t->block = *block; - - t->tile.width = tile_width; - t->tile.height = tile_height; - t->cols = t->tile.width / t->block.width; - t->rows = t->tile.height / t->block.height; - t->tile.size = t->cols * t->rows * t->block.size; - - t->tiles_x = tiles_x; - t->tiles_y = tiles_y; - t->stride = t->cols * t->tiles_x * t->block.size; - t->size = t->tiles_x * t->tiles_y * t->tile.size; -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_linear.h b/lib/mesa/src/gallium/auxiliary/util/u_linear.h deleted file mode 100644 index 87e52a344..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_linear.h +++ /dev/null @@ -1,106 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Functions for converting tiled data to linear and vice versa. - */ - - -#ifndef U_LINEAR_H -#define U_LINEAR_H - -#include "pipe/p_compiler.h" -#include "pipe/p_format.h" - -struct u_linear_format_block -{ - /** Block size in bytes */ - unsigned size; - - /** Block width in pixels */ - unsigned width; - - /** Block height in pixels */ - unsigned height; -}; - - -struct pipe_tile_info -{ - unsigned size; - unsigned stride; - - /* The number of tiles */ - unsigned tiles_x; - unsigned tiles_y; - - /* size of each tile expressed in blocks */ - unsigned cols; - unsigned rows; - - /* Describe the tile in pixels */ - struct u_linear_format_block tile; - - /* Describe each block within the tile */ - struct u_linear_format_block block; -}; - -void pipe_linear_to_tile(size_t src_stride, const void *src_ptr, - struct pipe_tile_info *t, void *dst_ptr); - -void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, - size_t dst_stride, void *dst_ptr); - -/** - * Convenience function to fillout a pipe_tile_info struct. - * @t info to fill out. - * @block block info about pixel layout - * @tile_width the width of the tile in pixels - * @tile_height the height of the tile in pixels - * @tiles_x number of tiles in x axis - * @tiles_y number of tiles in y axis - */ -void pipe_linear_fill_info(struct pipe_tile_info *t, - const struct u_linear_format_block *block, - unsigned tile_width, unsigned tile_height, - unsigned tiles_x, unsigned tiles_y); - -static inline boolean pipe_linear_check_tile(const struct pipe_tile_info *t) -{ - if (t->tile.size != t->block.size * t->cols * t->rows) - return FALSE; - - if (t->stride != t->block.size * t->cols * t->tiles_x) - return FALSE; - - if (t->size < t->stride * t->rows * t->tiles_y) - return FALSE; - - return TRUE; -} - -#endif /* U_LINEAR_H */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c index 85ce9f174..82fca6788 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c @@ -90,84 +90,6 @@ util_translate_prim_restart_data(unsigned index_size, } } -/** - * Translate an index buffer for primitive restart. - * Create a new index buffer which is a copy of the original index buffer - * except that instances of 'restart_index' are converted to 0xffff or - * 0xffffffff. - * Also, index buffers using 1-byte indexes are converted to 2-byte indexes. - */ -enum pipe_error -util_translate_prim_restart_ib(struct pipe_context *context, - const struct pipe_draw_info *info, - const struct pipe_draw_indirect_info *indirect_info, - const struct pipe_draw_start_count_bias *draw, - struct pipe_resource **dst_buffer) -{ - struct pipe_screen *screen = context->screen; - struct pipe_transfer *src_transfer = NULL, *dst_transfer = NULL; - void *src_map = NULL, *dst_map = NULL; - const unsigned src_index_size = info->index_size; - unsigned dst_index_size; - DrawElementsIndirectCommand indirect; - unsigned count = draw->count; - unsigned start = draw->start; - - /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */ - dst_index_size = MAX2(2, info->index_size); - assert(dst_index_size == 2 || dst_index_size == 4); - - if (indirect_info && indirect_info->buffer) { - indirect = read_indirect_elements(context, indirect_info); - count = indirect.count; - start = indirect.firstIndex; - } - - /* Create new index buffer */ - *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER, - PIPE_USAGE_STREAM, - count * dst_index_size); - if (!*dst_buffer) - goto error; - - /* Map new / dest index buffer */ - dst_map = pipe_buffer_map(context, *dst_buffer, - PIPE_MAP_WRITE, &dst_transfer); - if (!dst_map) - goto error; - - if (info->has_user_indices) - src_map = (unsigned char*)info->index.user + start * src_index_size; - else - /* Map original / src index buffer */ - src_map = pipe_buffer_map_range(context, info->index.resource, - start * src_index_size, - count * src_index_size, - PIPE_MAP_READ, - &src_transfer); - if (!src_map) - goto error; - - util_translate_prim_restart_data(src_index_size, src_map, dst_map, - count, info->restart_index); - - if (src_transfer) - pipe_buffer_unmap(context, src_transfer); - pipe_buffer_unmap(context, dst_transfer); - - return PIPE_OK; - -error: - if (src_transfer) - pipe_buffer_unmap(context, src_transfer); - if (dst_transfer) - pipe_buffer_unmap(context, dst_transfer); - if (*dst_buffer) - pipe_resource_reference(dst_buffer, NULL); - return PIPE_ERROR_OUT_OF_MEMORY; -} - - /** Helper structs for util_draw_vbo_without_prim_restart() */ struct range_info { diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h index eb06b8e77..45038d468 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h @@ -46,13 +46,6 @@ util_translate_prim_restart_data(unsigned index_size, void *src_map, void *dst_map, unsigned count, unsigned restart_index); -enum pipe_error -util_translate_prim_restart_ib(struct pipe_context *context, - const struct pipe_draw_info *info, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draw, - struct pipe_resource **dst_buffer); - struct pipe_draw_start_count_bias * util_prim_restart_convert_to_direct(const void *index_map, const struct pipe_draw_info *info, diff --git a/lib/mesa/src/gallium/auxiliary/util/u_range.h b/lib/mesa/src/gallium/auxiliary/util/u_range.h index 90dc80bbc..1ade98381 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_range.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_range.h @@ -34,7 +34,7 @@ #ifndef U_RANGE_H #define U_RANGE_H -#include "os/os_thread.h" +#include "util/u_thread.h" #include "pipe/p_state.h" #include "pipe/p_screen.h" #include "util/u_atomic.h" diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surface.c b/lib/mesa/src/gallium/auxiliary/util/u_surface.c index af406e826..cd51fd34b 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_surface.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_surface.c @@ -783,9 +783,11 @@ util_can_blit_via_copy_region(const struct pipe_blit_info *blit, } else { /* do loose format compatibility checking */ - if (blit->src.resource->format != blit->src.format || - blit->dst.resource->format != blit->dst.format || - !util_is_format_compatible(src_desc, dst_desc)) { + if ((blit->src.format != blit->dst.format || + src_desc != dst_desc) && + (blit->src.resource->format != blit->src.format || + blit->dst.resource->format != blit->dst.format || + !util_is_format_compatible(src_desc, dst_desc))) { return FALSE; } } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_tests.c b/lib/mesa/src/gallium/auxiliary/util/u_tests.c index aab3ca52c..bec15df2a 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_tests.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_tests.c @@ -516,7 +516,7 @@ disabled_fragment_shader(struct pipe_context *ctx) util_report_result(qresult.u64 == 2); } -#if defined(PIPE_OS_LINUX) && defined(HAVE_LIBDRM) +#if DETECT_OS_LINUX && defined(HAVE_LIBDRM) #include <libsync.h> #else #define sync_merge(str, fd1, fd2) (-1) @@ -594,7 +594,7 @@ test_sync_file_fences(struct pipe_context *ctx) pass = pass && screen->fence_finish(screen, NULL, final_fence, 0); /* Cleanup. */ -#ifndef PIPE_OS_WINDOWS +#if !DETECT_OS_WINDOWS if (buf_fd >= 0) close(buf_fd); if (tex_fd >= 0) diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c index 6b3929d89..828b8847b 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c @@ -116,29 +116,70 @@ tc_clear_driver_thread(struct threaded_context *tc) #endif } +struct tc_batch_rp_info { + /* this is what drivers can see */ + struct tc_renderpass_info info; + /* determines whether the info can be "safely" read by drivers or if it may still be in use */ + struct util_queue_fence ready; + /* when a batch is full, the rp info rollsover onto 'next' */ + struct tc_batch_rp_info *next; + /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */ + struct tc_batch_rp_info *prev; +}; + +static struct tc_batch_rp_info * +tc_batch_rp_info(struct tc_renderpass_info *info) +{ + return (struct tc_batch_rp_info *)info; +} + +static void +tc_sanitize_renderpass_info(struct threaded_context *tc) +{ + tc->renderpass_info_recording->cbuf_invalidate = 0; + tc->renderpass_info_recording->zsbuf_invalidate = false; + tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS); + if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording)) + /* this should be a "safe" way to indicate to the driver that both loads and stores are required; + * driver can always detect invalidation + */ + tc->renderpass_info_recording->zsbuf_clear_partial = true; + if (tc->num_queries_active) + tc->renderpass_info_recording->has_query_ends = true; +} + /* ensure the batch's array of renderpass data is large enough for the current index */ static void -tc_batch_renderpass_infos_resize(struct tc_batch *batch) +tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch) { unsigned size = batch->renderpass_infos.capacity; - unsigned cur_num = batch->renderpass_info_idx; + unsigned cur_num = MAX2(batch->renderpass_info_idx, 0); - if (size / sizeof(struct tc_renderpass_info) > cur_num) + if (size / sizeof(struct tc_batch_rp_info) > cur_num) return; - if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_renderpass_info, cur_num + 10)) + struct tc_batch_rp_info *infos = batch->renderpass_infos.data; + unsigned old_idx = batch->renderpass_info_idx - 1; + bool redo = tc->renderpass_info_recording && + tc->renderpass_info_recording == &infos[old_idx].info; + if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10)) mesa_loge("tc: memory alloc fail!"); if (size != batch->renderpass_infos.capacity) { /* zero new allocation region */ uint8_t *data = batch->renderpass_infos.data; memset(data + size, 0, batch->renderpass_infos.capacity - size); - unsigned start = size / sizeof(struct tc_renderpass_info); + unsigned start = size / sizeof(struct tc_batch_rp_info); unsigned count = (batch->renderpass_infos.capacity - size) / - sizeof(struct tc_renderpass_info); - struct tc_renderpass_info *infos = batch->renderpass_infos.data; + sizeof(struct tc_batch_rp_info); + infos = batch->renderpass_infos.data; + if (infos->prev) + infos->prev->next = infos; for (unsigned i = 0; i < count; i++) util_queue_fence_init(&infos[start + i].ready); + /* re-set current recording info on resize */ + if (redo) + tc->renderpass_info_recording = &infos[old_idx].info; } } @@ -147,43 +188,75 @@ static void tc_signal_renderpass_info_ready(struct threaded_context *tc) { if (tc->renderpass_info_recording && - !util_queue_fence_is_signalled(&tc->renderpass_info_recording->ready)) - util_queue_fence_signal(&tc->renderpass_info_recording->ready); + !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready)) + util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready); } /* increment the current renderpass info struct for recording * 'full_copy' is used for preserving data across non-blocking tc batch flushes */ static void -tc_batch_increment_renderpass_info(struct threaded_context *tc, bool full_copy) +tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy) { - struct tc_batch *batch = &tc->batch_slots[tc->next]; - struct tc_renderpass_info *tc_info = batch->renderpass_infos.data; + struct tc_batch *batch = &tc->batch_slots[batch_idx]; + struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data; - /* signal existing info since it will not be used anymore */ - tc_signal_renderpass_info_ready(tc); + if (tc_info[0].next || batch->num_total_slots) { + /* deadlock condition detected: all batches are in flight, renderpass hasn't ended + * (probably a cts case) + */ + struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording); + if (!util_queue_fence_is_signalled(&info->ready)) { + /* this batch is actively executing and the driver is waiting on the recording fence to signal */ + /* force all buffer usage to avoid data loss */ + info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear); + info->info.zsbuf_clear_partial = true; + info->info.has_query_ends = tc->num_queries_active > 0; + /* ensure threaded_context_get_renderpass_info() won't deadlock */ + info->next = NULL; + util_queue_fence_signal(&info->ready); + } + /* always wait on the batch to finish since this will otherwise overwrite thread data */ + util_queue_fence_wait(&batch->fence); + } /* increment rp info and initialize it */ batch->renderpass_info_idx++; - tc_batch_renderpass_infos_resize(batch); + tc_batch_renderpass_infos_resize(tc, batch); tc_info = batch->renderpass_infos.data; if (full_copy) { + /* this should only be called when changing batches */ + assert(batch->renderpass_info_idx == 0); /* copy the previous data in its entirety: this is still the same renderpass */ - if (tc->renderpass_info_recording) - tc_info[batch->renderpass_info_idx].data = tc->renderpass_info_recording->data; - else - tc_info[batch->renderpass_info_idx].data = 0; + if (tc->renderpass_info_recording) { + tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data; + tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx]; + tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording); + /* guard against deadlock scenario */ + assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording); + } else { + tc_info[batch->renderpass_info_idx].info.data = 0; + tc_info[batch->renderpass_info_idx].prev = NULL; + } } else { /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */ - tc_info[batch->renderpass_info_idx].data = 0; - if (tc->renderpass_info_recording) - tc_info[batch->renderpass_info_idx].data16[2] = tc->renderpass_info_recording->data16[2]; + tc_info[batch->renderpass_info_idx].info.data = 0; + if (tc->renderpass_info_recording) { + tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2]; + tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL; + tc_info[batch->renderpass_info_idx].prev = NULL; + } } + assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next); + /* signal existing info since it will not be used anymore */ + tc_signal_renderpass_info_ready(tc); util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready); - assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx]); + /* guard against deadlock scenario */ + assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info); /* this is now the current recording renderpass info */ - tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx]; + tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info; + batch->max_renderpass_info_idx = batch->renderpass_info_idx; } static ALWAYS_INLINE struct tc_renderpass_info * @@ -207,10 +280,12 @@ tc_parse_draw(struct threaded_context *tc) info->cbuf_invalidate = 0; info->zsbuf_invalidate = false; info->has_draw = true; + info->has_query_ends |= tc->query_ended; } tc->in_renderpass = true; tc->seen_fb_state = true; + tc->query_ended = false; } static void * @@ -291,6 +366,13 @@ tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs) #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \ offsetof(struct pipe_draw_info, min_index) +ALWAYS_INLINE static struct tc_renderpass_info * +incr_rp_info(struct tc_renderpass_info *tc_info) +{ + struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info); + return &info[1].info; +} + ALWAYS_INLINE static void batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing) { @@ -314,7 +396,7 @@ batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, if (parsing) { if (call->call_id == TC_CALL_flush) { /* always increment renderpass info for non-deferred flushes */ - batch->tc->renderpass_info++; + batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info); /* if a flush happens, renderpass info is always incremented after */ first = false; } else if (call->call_id == TC_CALL_set_framebuffer_state) { @@ -322,7 +404,7 @@ batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, * so don't increment on the first set_framebuffer_state call */ if (!first) - batch->tc->renderpass_info++; + batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info); first = false; } else if (call->call_id >= TC_CALL_draw_single && call->call_id <= TC_CALL_draw_vstate_multi) { @@ -350,10 +432,18 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index) /* setup renderpass info */ batch->tc->renderpass_info = batch->renderpass_infos.data; - if (batch->tc->options.parse_renderpass_info) + if (batch->tc->options.parse_renderpass_info) { batch_execute(batch, pipe, last, true); - else + + struct tc_batch_rp_info *info = batch->renderpass_infos.data; + for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) { + if (info[i].next) + info[i].next->prev = NULL; + info[i].next = NULL; + } + } else { batch_execute(batch, pipe, last, false); + } /* Add the fence to the list of fences for the driver to signal at the next * flush, which we use for tracking which buffers are referenced by @@ -383,6 +473,7 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index) batch->num_total_slots = 0; batch->last_mergeable_call = NULL; batch->first_set_fb = false; + batch->max_renderpass_info_idx = 0; } static void @@ -406,6 +497,7 @@ static void tc_batch_flush(struct threaded_context *tc, bool full_copy) { struct tc_batch *next = &tc->batch_slots[tc->next]; + unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES; tc_assert(next->num_total_slots != 0); tc_batch_check(next); @@ -420,19 +512,20 @@ tc_batch_flush(struct threaded_context *tc, bool full_copy) /* reset renderpass info index for subsequent use */ next->renderpass_info_idx = -1; - util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute, - NULL, 0); - tc->last = tc->next; - tc->next = (tc->next + 1) % TC_MAX_BATCHES; - tc_begin_next_buffer_list(tc); - /* always increment renderpass info on batch flush; * renderpass info can only be accessed by its owner batch during execution */ if (tc->renderpass_info_recording) { - tc->batch_slots[tc->next].first_set_fb = full_copy; - tc_batch_increment_renderpass_info(tc, full_copy); + tc->batch_slots[next_id].first_set_fb = full_copy; + tc_batch_increment_renderpass_info(tc, next_id, full_copy); } + + util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute, + NULL, 0); + tc->last = tc->next; + tc->next = next_id; + tc_begin_next_buffer_list(tc); + } /* This is the function that adds variable-sized calls into the current @@ -553,6 +646,18 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char tc_debug_check(tc); + if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) { + /* corner case: if tc syncs for any reason but a driver flush during a renderpass, + * then the current renderpass info MUST be signaled to avoid deadlocking the driver + * + * this is not a "complete" signal operation, however, as it's unknown what calls may + * come after this one, which means that framebuffer attachment data is unreliable + * + * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination), + * force all attachments active and assume the app was going to get bad perf here anyway + */ + tc_sanitize_renderpass_info(tc); + } tc_signal_renderpass_info_ready(tc); /* Only wait for queued calls... */ @@ -590,12 +695,18 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char if (tc->options.parse_renderpass_info) { int renderpass_info_idx = next->renderpass_info_idx; if (renderpass_info_idx > 0) { + /* don't reset if fb state is unflushed */ + bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw; + uint32_t fb_info = tc->renderpass_info_recording->data32[0]; next->renderpass_info_idx = -1; - tc_batch_increment_renderpass_info(tc, false); + tc_batch_increment_renderpass_info(tc, tc->next, false); + if (fb_no_draw) + tc->renderpass_info_recording->data32[0] = fb_info; } else if (tc->renderpass_info_recording->has_draw) { tc->renderpass_info_recording->data32[0] = 0; } tc->seen_fb_state = false; + tc->query_ended = false; } MESA_TRACE_END(); @@ -632,40 +743,10 @@ threaded_context_flush(struct pipe_context *_pipe, } } -/* Must be called before TC binds, maps, invalidates, or adds a buffer to a buffer list. */ -static void tc_touch_buffer(struct threaded_context *tc, struct threaded_resource *buf) -{ - const struct threaded_context *first_user = buf->first_user; - - /* Fast path exit to avoid additional branches */ - if (likely(first_user == tc)) - return; - - if (!first_user) - first_user = p_atomic_cmpxchg_ptr(&buf->first_user, NULL, tc); - - /* The NULL check might seem unnecessary here but it's actually critical: - * p_atomic_cmpxchg will return NULL if it succeeds, meaning that NULL is - * equivalent to "we're the first user" here. (It's equally important not - * to ignore the result of the cmpxchg above, since it might fail.) - * Without the NULL check, we'd set the flag unconditionally, which is bad. - */ - if (first_user && first_user != tc && !buf->used_by_multiple_contexts) - buf->used_by_multiple_contexts = true; -} - -static bool tc_is_buffer_shared(struct threaded_resource *buf) -{ - return buf->is_shared || buf->used_by_multiple_contexts; -} - static void tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next, struct pipe_resource *buf) { - struct threaded_resource *tbuf = threaded_resource(buf); - tc_touch_buffer(tc, tbuf); - - uint32_t id = tbuf->buffer_id_unique; + uint32_t id = threaded_resource(buf)->buffer_id_unique; BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK); } @@ -673,10 +754,7 @@ tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next, static void tc_bind_buffer(struct threaded_context *tc, uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf) { - struct threaded_resource *tbuf = threaded_resource(buf); - tc_touch_buffer(tc, tbuf); - - uint32_t id = tbuf->buffer_id_unique; + uint32_t id = threaded_resource(buf)->buffer_id_unique; *binding = id; BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK); } @@ -934,8 +1012,6 @@ threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage) { struct threaded_resource *tres = threaded_resource(res); - tres->first_user = NULL; - tres->used_by_multiple_contexts = false; tres->latest = &tres->b; tres->cpu_storage = NULL; util_range_init(&tres->valid_buffer_range); @@ -1084,6 +1160,7 @@ static bool tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query) { struct threaded_context *tc = threaded_context(_pipe); + tc->num_queries_active++; tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query; return true; /* we don't care about the return value for this call */ @@ -1115,11 +1192,13 @@ tc_end_query(struct pipe_context *_pipe, struct pipe_query *query) struct threaded_query *tq = threaded_query(query); struct tc_end_query_call *call = tc_add_call(tc, TC_CALL_end_query, tc_end_query_call); + tc->num_queries_active--; call->tc = tc; call->query = query; tq->flushed = false; + tc->query_ended = true; return true; /* we don't care about the return value for this call */ } @@ -1363,6 +1442,7 @@ tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *l for (unsigned i = 0; i < nr_cbufs; i++) tc_drop_surface_reference(p->cbufs[i]); tc_drop_surface_reference(p->zsbuf); + tc_drop_resource_reference(p->resolve); return call_size(tc_framebuffer); } @@ -1383,6 +1463,13 @@ tc_set_framebuffer_state(struct pipe_context *_pipe, if (tc->options.parse_renderpass_info) { + /* ensure this is treated as the first fb set if no fb activity has occurred */ + if (!tc->renderpass_info_recording->has_draw && + !tc->renderpass_info_recording->cbuf_clear && + !tc->renderpass_info_recording->cbuf_load && + !tc->renderpass_info_recording->zsbuf_load && + !tc->renderpass_info_recording->zsbuf_clear_partial) + tc->batch_slots[tc->next].first_set_fb = false; /* store existing zsbuf data for possible persistence */ uint8_t zsbuf = tc->renderpass_info_recording->has_draw ? 0 : @@ -1400,9 +1487,10 @@ tc_set_framebuffer_state(struct pipe_context *_pipe, sizeof(void*) * (PIPE_MAX_COLOR_BUFS - nr_cbufs)); tc->fb_resources[PIPE_MAX_COLOR_BUFS] = fb->zsbuf ? fb->zsbuf->texture : NULL; + tc->fb_resolve = fb->resolve; if (tc->seen_fb_state) { /* this is the end of a renderpass, so increment the renderpass info */ - tc_batch_increment_renderpass_info(tc, false); + tc_batch_increment_renderpass_info(tc, tc->next, false); /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer): * keep zsbuf usage data */ @@ -1425,6 +1513,8 @@ tc_set_framebuffer_state(struct pipe_context *_pipe, tc->in_renderpass = false; p->state.zsbuf = NULL; pipe_surface_reference(&p->state.zsbuf, fb->zsbuf); + p->state.resolve = NULL; + pipe_resource_reference(&p->state.resolve, fb->resolve); } struct tc_tess_state { @@ -2323,9 +2413,7 @@ tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t * return call_size(tc_replace_buffer_storage); } -/* Return true if the buffer has been invalidated or is idle. - * Note that callers must've called tc_touch_buffer before calling - * this function. */ +/* Return true if the buffer has been invalidated or is idle. */ static bool tc_invalidate_buffer(struct threaded_context *tc, struct threaded_resource *tbuf) @@ -2346,7 +2434,7 @@ tc_invalidate_buffer(struct threaded_context *tc, struct pipe_resource *new_buf; /* Shared, pinned, and sparse buffers can't be reallocated. */ - if (tc_is_buffer_shared(tbuf) || + if (tbuf->is_shared || tbuf->is_user_ptr || tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) return false; @@ -2391,8 +2479,6 @@ tc_invalidate_buffer(struct threaded_context *tc, return true; } -/* Note that callers must've called tc_touch_buffer first before - * calling tc_improve_map_buffer_flags. */ static unsigned tc_improve_map_buffer_flags(struct threaded_context *tc, struct threaded_resource *tres, unsigned usage, @@ -2507,14 +2593,6 @@ tc_buffer_map(struct pipe_context *_pipe, if (usage & PIPE_MAP_THREAD_SAFE) tc_buffer_disable_cpu_storage(resource); - tc_touch_buffer(tc, tres); - - /* CPU storage relies on buffer invalidation never failing. With shared buffers, - * invalidation might not always be possible, so CPU storage can't be used. - */ - if (tc_is_buffer_shared(tres)) - tc_buffer_disable_cpu_storage(resource); - usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width); /* If the CPU storage is enabled, return it directly. */ @@ -2817,10 +2895,7 @@ tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) assert(tres->cpu_storage); if (tres->cpu_storage) { - /* Invalidations shouldn't fail as long as CPU storage is allowed. */ - ASSERTED bool invalidated = tc_invalidate_buffer(tc, tres); - assert(invalidated); - + tc_invalidate_buffer(tc, tres); tc_buffer_subdata(&tc->base, &tres->b, PIPE_MAP_UNSYNCHRONIZED | TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE, @@ -2948,8 +3023,6 @@ tc_buffer_subdata(struct pipe_context *_pipe, if (!size) return; - tc_touch_buffer(tc, tres); - usage |= PIPE_MAP_WRITE; /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */ @@ -3084,11 +3157,68 @@ tc_texture_subdata(struct pipe_context *_pipe, } else { struct pipe_context *pipe = tc->pipe; - tc_sync(tc); - tc_set_driver_thread(tc); - pipe->texture_subdata(pipe, resource, level, usage, box, data, - stride, layer_stride); - tc_clear_driver_thread(tc); + if (resource->usage != PIPE_USAGE_STAGING && + tc->options.parse_renderpass_info && tc->in_renderpass) { + enum pipe_format format = resource->format; + if (usage & PIPE_MAP_DEPTH_ONLY) + format = util_format_get_depth_only(format); + else if (usage & PIPE_MAP_STENCIL_ONLY) + format = PIPE_FORMAT_S8_UINT; + unsigned fmt_stride = util_format_get_stride(format, box->width); + unsigned fmt_layer_stride = util_format_get_2d_size(format, stride, box->height); + + struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth); + pipe->buffer_subdata(pipe, pres, PIPE_MAP_WRITE | TC_TRANSFER_MAP_THREADED_UNSYNC, 0, layer_stride * box->depth, data); + struct pipe_box src_box = *box; + src_box.x = src_box.y = src_box.z = 0; + + if (fmt_stride == stride && fmt_layer_stride == layer_stride) { + /* if stride matches, single copy is fine*/ + tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box); + } else { + /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */ + src_box.depth = 1; + for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) { + unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z; + int blocksize = util_format_get_blocksize(format); + int blockwidth = util_format_get_blockwidth(format); + int blockheight = util_format_get_blockheight(format); + + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); + + dst_x /= blockwidth; + dst_y /= blockheight; + width = DIV_ROUND_UP(width, blockwidth); + height = DIV_ROUND_UP(height, blockheight); + + width *= blocksize; + + if (width == fmt_stride && width == (unsigned)stride) { + ASSERTED uint64_t size = (uint64_t)height * width; + + assert(size <= SIZE_MAX); + assert(dst_x + src_box.width < u_minify(pres->width0, level)); + assert(dst_y + src_box.height < u_minify(pres->height0, level)); + assert(pres->target != PIPE_TEXTURE_3D || z + src_box.depth < u_minify(pres->depth0, level)); + tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box); + } else { + src_box.height = 1; + for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride) + tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box); + } + } + } + + pipe_resource_reference(&pres, NULL); + } else { + tc_sync(tc); + tc_set_driver_thread(tc); + pipe->texture_subdata(pipe, resource, level, usage, box, data, + stride, layer_stride); + tc_clear_driver_thread(tc); + } } } @@ -3117,7 +3247,6 @@ tc_get_sample_position(struct pipe_context *_pipe, struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; - tc_sync(tc); pipe->get_sample_position(pipe, sample_count, sample_index, out_value); } @@ -3404,8 +3533,10 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, struct pipe_context *pipe = tc->pipe; struct pipe_screen *screen = pipe->screen; bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC); + bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0; - tc->in_renderpass = false; + if (!deferred || !fence) + tc->in_renderpass = false; if (async && tc->options.create_fence) { if (fence) { @@ -3427,7 +3558,7 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, } struct tc_flush_call *p; - if (flags & PIPE_FLUSH_DEFERRED) { + if (deferred) { /* these have identical fields */ p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call); } else { @@ -3437,7 +3568,7 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, p->fence = fence ? *fence : NULL; p->flags = flags | TC_FLUSH_ASYNC; - if (!(flags & PIPE_FLUSH_DEFERRED)) { + if (!deferred) { /* non-deferred async flushes indicate completion of existing renderpass info */ tc_signal_renderpass_info_ready(tc); tc_batch_flush(tc, false); @@ -3448,17 +3579,20 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, } out_of_memory: + tc->flushing = true; /* renderpass info is signaled during sync */ tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" : flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal"); - if (!(flags & PIPE_FLUSH_DEFERRED)) { + if (!deferred) { tc_flush_queries(tc); tc->seen_fb_state = false; + tc->query_ended = false; } tc_set_driver_thread(tc); pipe->flush(pipe, fence, flags); tc_clear_driver_thread(tc); + tc->flushing = false; } struct tc_draw_single { @@ -3670,7 +3804,8 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, struct threaded_context *tc = threaded_context(_pipe); unsigned index_size = info->index_size; bool has_user_indices = info->has_user_indices; - tc_parse_draw(tc); + if (tc->options.parse_renderpass_info) + tc_parse_draw(tc); if (unlikely(indirect)) { assert(!has_user_indices); @@ -3990,7 +4125,8 @@ tc_draw_vertex_state(struct pipe_context *_pipe, unsigned num_draws) { struct threaded_context *tc = threaded_context(_pipe); - tc_parse_draw(tc); + if (tc->options.parse_renderpass_info) + tc_parse_draw(tc); if (num_draws == 1) { /* Single draw. */ @@ -4171,6 +4307,11 @@ tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info) tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource); tc_set_resource_reference(&blit->info.src.resource, info->src.resource); memcpy(&blit->info, info, sizeof(*info)); + if (tc->options.parse_renderpass_info) { + tc->renderpass_info_recording->has_resolve = info->src.resource->nr_samples > 1 && + info->dst.resource->nr_samples <= 1 && + tc->fb_resolve == info->dst.resource; + } } struct tc_generate_mipmap { @@ -4275,10 +4416,7 @@ tc_invalidate_resource(struct pipe_context *_pipe, struct threaded_context *tc = threaded_context(_pipe); if (resource->target == PIPE_BUFFER) { - /* This can fail, in which case we simply ignore the invalidation request. */ - struct threaded_resource *tbuf = threaded_resource(resource); - tc_touch_buffer(tc, tbuf); - tc_invalidate_buffer(tc, tbuf); + tc_invalidate_buffer(tc, threaded_resource(resource)); return; } @@ -4340,8 +4478,13 @@ tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor if (info) { /* full clears use a different load operation, but are only valid if draws haven't occurred yet */ info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load; - if (buffers & PIPE_CLEAR_DEPTHSTENCIL && !info->zsbuf_load && !info->zsbuf_clear_partial) - info->zsbuf_clear = true; + if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { + if (!info->zsbuf_load && !info->zsbuf_clear_partial) + info->zsbuf_clear = true; + else if (!info->zsbuf_clear) + /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */ + info->zsbuf_clear_partial = true; + } } } p->scissor_state_set = !!scissor_state; @@ -4812,8 +4955,11 @@ threaded_context_create(struct pipe_context *pipe, return NULL; } - if (options) + if (options) { + /* this is unimplementable */ + assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify)); tc->options = *options; + } pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options); @@ -4858,7 +5004,7 @@ threaded_context_create(struct pipe_context *pipe, tc->batch_slots[i].renderpass_info_idx = -1; if (tc->options.parse_renderpass_info) { util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL); - tc_batch_renderpass_infos_resize(&tc->batch_slots[i]); + tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]); } } for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) @@ -5022,7 +5168,7 @@ threaded_context_create(struct pipe_context *pipe, tc_begin_next_buffer_list(tc); if (tc->options.parse_renderpass_info) - tc_batch_increment_renderpass_info(tc, false); + tc_batch_increment_renderpass_info(tc, tc->next, false); return &tc->base; fail: @@ -5042,9 +5188,14 @@ threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned d } const struct tc_renderpass_info * -threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait) -{ - if (tc->renderpass_info && wait) - util_queue_fence_wait(&tc->renderpass_info->ready); - return tc->renderpass_info; -}
\ No newline at end of file +threaded_context_get_renderpass_info(struct threaded_context *tc) +{ + assert(tc->renderpass_info && tc->options.parse_renderpass_info); + struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info); + while (1) { + util_queue_fence_wait(&info->ready); + if (!info->next) + return &info->info; + info = info->next; + } +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h index e87b0061e..dbc5d6962 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h @@ -78,6 +78,7 @@ * - transfer_map (only unsychronized buffer mappings) * - get_query_result (when threaded_query::flushed == true) * - create_stream_output_target + * - get_sample_position * * * Transfer_map rules for buffer mappings @@ -316,17 +317,6 @@ typedef bool (*tc_is_resource_busy)(struct pipe_screen *screen, struct threaded_resource { struct pipe_resource b; - /* Pointer to the TC that first used this threaded_resource (buffer). This is used to - * allow TCs to determine whether they have been given a buffer that was created by a - * different TC, in which case all TCs have to disable busyness tracking and buffer - * replacement for that particular buffer. - * DO NOT DEREFERENCE. The only operation allowed on this pointer is equality-checking - * since it might be dangling if a buffer has been shared and its first_user has - * already been destroyed. The pointer is const void to discourage such disallowed usage. - * This is NULL if no TC has used this buffer yet. - */ - const void *first_user; - /* Since buffer invalidations are queued, we can't use the base resource * for unsychronized mappings. This points to the latest version of * the buffer after the latest invalidation. It's only used for unsychro- @@ -354,12 +344,6 @@ struct threaded_resource { */ struct util_range valid_buffer_range; - /* True if multiple threaded contexts have accessed this buffer. - * Disables non-multicontext-safe optimizations in TC. - * We can't just re-use is_shared for that purpose as that would confuse drivers. - */ - bool used_by_multiple_contexts; - /* Drivers are required to update this for shared resources and user * pointers. */ bool is_shared; @@ -444,7 +428,11 @@ struct tc_renderpass_info { bool zsbuf_invalidate : 1; /* whether a draw occurs */ bool has_draw : 1; - uint8_t pad : 3; + /* whether a framebuffer resolve occurs on cbuf[0] */ + bool has_resolve : 1; + /* whether queries are ended during this renderpass */ + bool has_query_ends : 1; + uint8_t pad : 1; /* 32 bits offset */ /* bitmask of color buffers using fbfetch */ uint8_t cbuf_fbfetch; @@ -467,8 +455,6 @@ struct tc_renderpass_info { /* zsbuf fb info is in data8[3] */ uint8_t data8[8]; }; - /* determines whether the info can be "safely" read by drivers or if it may still be in use */ - struct util_queue_fence ready; }; static inline bool @@ -482,6 +468,23 @@ tc_renderpass_info_is_zsbuf_used(const struct tc_renderpass_info *info) info->zsbuf_fbfetch; } +/* if a driver ends a renderpass early for some reason, + * this function can be called to reset any stored renderpass info + * to a "safe" state that will avoid data loss on framebuffer attachments + * + * note: ending a renderpass early if invalidate hints are applied will + * result in data loss + */ +static inline void +tc_renderpass_info_reset(struct tc_renderpass_info *info) +{ + info->data32[0] = 0; + info->cbuf_load = BITFIELD_MASK(8); + info->zsbuf_clear_partial = true; + info->has_draw = true; + info->has_query_ends = true; +} + struct tc_batch { struct threaded_context *tc; #if !defined(NDEBUG) && TC_DEBUG >= 1 @@ -490,7 +493,8 @@ struct tc_batch { uint16_t num_total_slots; uint16_t buffer_list_index; /* the index of the current renderpass info for recording */ - int renderpass_info_idx; + int16_t renderpass_info_idx; + uint16_t max_renderpass_info_idx; /* The last mergeable call that was added to this batch (i.e. * buffer subdata). This might be out-of-date or NULL. @@ -559,6 +563,7 @@ struct threaded_context { bool use_forced_staging_uploads; bool add_all_gfx_bindings_to_buffer_list; bool add_all_compute_bindings_to_buffer_list; + uint8_t num_queries_active; /* Estimation of how much vram/gtt bytes are mmap'd in * the current tc_batch. @@ -585,6 +590,10 @@ struct threaded_context { bool seen_fb_state; /* whether a renderpass is currently active */ bool in_renderpass; + /* whether a query has ended more recently than a draw */ + bool query_ended; + /* whether pipe_context::flush has been called */ + bool flushing; bool seen_streamout_buffers; bool seen_shader_buffers[PIPE_SHADER_TYPES]; @@ -619,8 +628,9 @@ struct threaded_context { struct tc_batch batch_slots[TC_MAX_BATCHES]; struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS]; - /* the curent framebuffer attachments; [PIPE_MAX_COLOR_BUFS] is the zsbuf */ + /* the current framebuffer attachments; [PIPE_MAX_COLOR_BUFS] is the zsbuf */ struct pipe_resource *fb_resources[PIPE_MAX_COLOR_BUFS + 1]; + struct pipe_resource *fb_resolve; /* accessed by main thread; preserves info across batches */ struct tc_renderpass_info *renderpass_info_recording; /* accessed by driver thread */ @@ -634,17 +644,18 @@ struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe); void tc_driver_internal_flush_notify(struct threaded_context *tc); /** function for getting the current renderpass info: - * - renderpass info is always valid - * - set 'wait=true' when calling during normal execution - * - set 'wait=true' when calling from flush + * - renderpass info is always non-null * * Rules: - * 1) this must be called with 'wait=true' after the driver receives a pipe_context::set_framebuffer_state callback - * 2) this should be called with 'wait=false' when the driver receives a blocking pipe_context::flush call - * 3) this must not be used during any internal driver operations (e.g., u_blitter) + * - threaded context must have been created with parse_renderpass_info=true + * - must be called after the driver receives a pipe_context::set_framebuffer_state callback + * - must be called after the driver receives a non-deferrable pipe_context::flush callback + * - renderpass info must not be used during any internal driver operations (e.g., u_blitter) + * - must not be called before the driver receives its first pipe_context::set_framebuffer_state callback + * - renderpass info is invalidated only for non-deferrable flushes and new framebuffer states */ const struct tc_renderpass_info * -threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait); +threaded_context_get_renderpass_info(struct threaded_context *tc); struct pipe_context * threaded_context_create(struct pipe_context *pipe, diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c index 202fbed0a..7b8f95560 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c @@ -1450,17 +1450,17 @@ u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, draw.index_bias = indirect_data[offset + 3]; info->start_instance = indirect_data[offset + 4]; - u_vbuf_draw_vbo(mgr, info, drawid_offset, NULL, &draw, 1); + u_vbuf_draw_vbo(mgr->pipe, info, drawid_offset, NULL, &draw, 1); } } -void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, +void u_vbuf_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { - struct pipe_context *pipe = mgr->pipe; + struct u_vbuf *mgr = pipe->vbuf; int start_vertex; unsigned min_index; unsigned num_vertices; @@ -1512,6 +1512,9 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, if (indirect && indirect->buffer) { unsigned draw_count = 0; + /* num_draws can only be 1 with indirect draws. */ + assert(num_draws == 1); + /* Get the number of draws. */ if (indirect->indirect_draw_count) { pipe_buffer_read(pipe, indirect->indirect_draw_count, @@ -1547,6 +1550,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data, indirect->stride, draw_count); free(data); + /* We're done (as num_draws is 1), so return early. */ return; } @@ -1563,6 +1567,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data, indirect->stride, draw_count); free(data); + /* We're done (as num_draws is 1), so return early. */ return; } @@ -1724,6 +1729,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, } if (unroll_indices) { + if (!new_info.has_user_indices && info->take_index_buffer_ownership) + pipe_drop_resource_references(new_info.index.resource, 1); new_info.index_size = 0; new_draw.index_bias = 0; new_info.index_bounds_valid = true; diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h index 2d6ca434d..bb3568fb3 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h @@ -35,7 +35,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_format.h" +#include "util/format/u_formats.h" struct cso_context; struct cso_velems_state; @@ -85,7 +85,8 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, unsigned unbind_num_trailing_slots, bool take_ownership, const struct pipe_vertex_buffer *bufs); -void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, +void u_vbuf_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_start_count_bias *draws, diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c index b545b9c29..92e7462c1 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c @@ -363,6 +363,13 @@ set_yuv_layer(struct vl_compositor_state *s, struct vl_compositor *c, s->layers[layer].cs = (y) ? c->cs_yuv.bob.y : c->cs_yuv.bob.uv; break; + case VL_COMPOSITOR_NONE: + if (c->pipe_cs_composit_supported) { + s->layers[layer].cs = (y) ? c->cs_yuv.progressive.y : c->cs_yuv.progressive.uv; + break; + } + FALLTHROUGH; + default: if (c->pipe_gfx_supported) s->layers[layer].fs = (y) ? c->fs_yuv.weave.y : c->fs_yuv.weave.uv; diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h index 32ce82f73..be82e156c 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h @@ -149,6 +149,10 @@ struct vl_compositor void *y; void *uv; } bob; + struct { + void *y; + void *uv; + } progressive; } cs_yuv; struct { diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h index 919f86c34..a3945b67e 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h @@ -39,7 +39,7 @@ #include <windows.h> #endif #include "pipe/p_defines.h" -#include "pipe/p_format.h" +#include "util/format/u_formats.h" struct pipe_screen; struct pipe_surface; diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c index baa12fa95..073630e55 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c @@ -133,13 +133,21 @@ dri3_handle_stamps(struct vl_dri3_screen *scrn, uint64_t ust, uint64_t msc) scrn->last_msc = msc; } -static void +/* XXX this belongs in presentproto */ +#ifndef PresentWindowDestroyed +#define PresentWindowDestroyed (1 << 0) +#endif +static bool dri3_handle_present_event(struct vl_dri3_screen *scrn, xcb_present_generic_event_t *ge) { switch (ge->evtype) { case XCB_PRESENT_CONFIGURE_NOTIFY: { xcb_present_configure_notify_event_t *ce = (void *) ge; + if (ce->pixmap_flags & PresentWindowDestroyed) { + free(ge); + return false; + } scrn->width = ce->width; scrn->height = ce->height; break; @@ -171,6 +179,7 @@ dri3_handle_present_event(struct vl_dri3_screen *scrn, } } free(ge); + return true; } static void @@ -179,8 +188,10 @@ dri3_flush_present_events(struct vl_dri3_screen *scrn) if (scrn->special_event) { xcb_generic_event_t *ev; while ((ev = xcb_poll_for_special_event( - scrn->conn, scrn->special_event)) != NULL) - dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev); + scrn->conn, scrn->special_event)) != NULL) { + if (!dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev)) + break; + } } } @@ -192,8 +203,7 @@ dri3_wait_present_events(struct vl_dri3_screen *scrn) ev = xcb_wait_for_special_event(scrn->conn, scrn->special_event); if (!ev) return false; - dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev); - return true; + return dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev); } return false; } @@ -811,7 +821,7 @@ vl_dri3_screen_create(Display *display, int screen) fcntl(fd, F_SETFD, FD_CLOEXEC); free(open_reply); - fd = loader_get_user_preferred_fd(fd, &scrn->is_different_gpu); + scrn->is_different_gpu = loader_get_user_preferred_fd(&fd, NULL); geom_cookie = xcb_get_geometry(scrn->conn, RootWindow(display, screen)); geom_reply = xcb_get_geometry_reply(scrn->conn, geom_cookie, NULL); |