diff options
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary')
77 files changed, 3381 insertions, 760 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/lib/mesa/src/gallium/auxiliary/draw/draw_cliptest_tmp.h index b7c77bfd8..dceae1fd0 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_cliptest_tmp.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_cliptest_tmp.h @@ -26,6 +26,7 @@ **************************************************************************/ #include "util/u_bitcast.h" +#include <math.h> static boolean TAG(do_cliptest)( struct pt_post_vs *pvs, struct draw_vertex_info *info, @@ -182,11 +183,10 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs, * to NaN to help catch potential errors later. */ else { - float zero = 0.0f; position[0] = position[1] = position[2] = - position[3] = zero / zero; /* MSVC doesn't accept 0.0 / 0.0 */ + position[3] = NAN; } #endif diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/lib/mesa/src/gallium/auxiliary/draw/draw_decompose_tmp.h index e9f3b1d88..7686afebe 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_decompose_tmp.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_decompose_tmp.h @@ -170,7 +170,10 @@ FUNC(FUNC_VARS) idx[1] = GET_ELT(i + 1); idx[2] = GET_ELT(i + 2); idx[3] = GET_ELT(i + 3); - +#ifdef PASS_QUADS + QUAD(0, idx[0], idx[1], + idx[2], idx[3]); +#else flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; @@ -180,6 +183,7 @@ FUNC(FUNC_VARS) flags = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; TRIANGLE(flags, idx[1], idx[2], idx[3]); +#endif } } else { @@ -188,7 +192,10 @@ FUNC(FUNC_VARS) idx[1] = GET_ELT(i + 1); idx[2] = GET_ELT(i + 2); idx[3] = GET_ELT(i + 3); - +#ifdef PASS_QUADS + QUAD(0, idx[0], idx[1], + idx[2], idx[3]); +#else flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; @@ -204,6 +211,7 @@ FUNC(FUNC_VARS) TRIANGLE(flags, idx[3], idx[1], idx[2]); else TRIANGLE(flags, idx[0], idx[2], idx[3]); +#endif } } break; @@ -220,6 +228,10 @@ FUNC(FUNC_VARS) idx[2] = GET_ELT(i + 2); idx[3] = GET_ELT(i + 3); +#ifdef PASS_QUADS + QUAD(0, idx[2], idx[0], + idx[1], idx[3]); +#else /* always emit idx[3] last */ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_0 | @@ -229,6 +241,7 @@ FUNC(FUNC_VARS) flags = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; TRIANGLE(flags, idx[0], idx[1], idx[3]); +#endif } } else { @@ -238,6 +251,10 @@ FUNC(FUNC_VARS) idx[2] = GET_ELT(i + 2); idx[3] = GET_ELT(i + 3); +#ifdef PASS_QUADS + QUAD(0, idx[3], idx[2], + idx[0], idx[1]); +#else flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; @@ -253,6 +270,7 @@ FUNC(FUNC_VARS) TRIANGLE(flags, idx[3], idx[0], idx[1]); else TRIANGLE(flags, idx[0], idx[1], idx[3]); +#endif } } } diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_gs.c b/lib/mesa/src/gallium/auxiliary/draw/draw_gs.c index ed698e920..90e66b643 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_gs.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_gs.c @@ -834,12 +834,18 @@ draw_create_geometry_shader(struct draw_context *draw, gs->primitive_boundary = gs->max_output_vertices + 1; gs->position_output = -1; + bool found_clipvertex = false; for (i = 0; i < gs->info.num_outputs; i++) { if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && gs->info.output_semantic_index[i] == 0) gs->position_output = i; if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX) gs->viewport_index_output = i; + if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPVERTEX && + gs->info.output_semantic_index[i] == 0) { + found_clipvertex = true; + gs->clipvertex_output = i; + } if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { debug_assert(gs->info.output_semantic_index[i] < PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT); @@ -847,6 +853,9 @@ draw_create_geometry_shader(struct draw_context *draw, } } + if (!found_clipvertex) + gs->clipvertex_output = gs->position_output; + gs->machine = draw->gs.tgsi.machine; gs->num_vertex_streams = 1; @@ -900,6 +909,7 @@ void draw_bind_geometry_shader(struct draw_context *draw, draw->gs.geometry_shader = dgs; draw->gs.num_gs_outputs = dgs->info.num_outputs; draw->gs.position_output = dgs->position_output; + draw->gs.clipvertex_output = dgs->clipvertex_output; draw_geometry_shader_prepare(dgs, draw); } else { diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_gs.h b/lib/mesa/src/gallium/auxiliary/draw/draw_gs.h index 9449ec509..10969426f 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_gs.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_gs.h @@ -75,6 +75,7 @@ struct draw_geometry_shader { struct tgsi_shader_info info; unsigned position_output; unsigned viewport_index_output; + unsigned clipvertex_output; unsigned ccdistance_output[PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT]; unsigned max_output_vertices; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm_sample.c b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm_sample.c index a3895c798..b3e98be55 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm_sample.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm_sample.c @@ -289,6 +289,7 @@ DRAW_LLVM_SAMPLER_MEMBER(min_lod, DRAW_JIT_SAMPLER_MIN_LOD, TRUE) DRAW_LLVM_SAMPLER_MEMBER(max_lod, DRAW_JIT_SAMPLER_MAX_LOD, TRUE) DRAW_LLVM_SAMPLER_MEMBER(lod_bias, DRAW_JIT_SAMPLER_LOD_BIAS, TRUE) DRAW_LLVM_SAMPLER_MEMBER(border_color, DRAW_JIT_SAMPLER_BORDER_COLOR, FALSE) +DRAW_LLVM_SAMPLER_MEMBER(max_aniso, DRAW_JIT_SAMPLER_MAX_ANISO, TRUE) #define DRAW_LLVM_IMAGE_MEMBER(_name, _index, _emit_load) \ static LLVMValueRef \ @@ -405,6 +406,7 @@ draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_stat sampler->dynamic_state.base.max_lod = draw_llvm_sampler_max_lod; sampler->dynamic_state.base.lod_bias = draw_llvm_sampler_lod_bias; sampler->dynamic_state.base.border_color = draw_llvm_sampler_border_color; + sampler->dynamic_state.base.max_aniso = draw_llvm_sampler_max_aniso; sampler->dynamic_state.static_state = static_state; sampler->nr_samplers = nr_samplers; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_offset.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_offset.c index 08d47f005..87db9cdda 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -97,7 +97,7 @@ static void do_offset_tri( struct draw_stage *stage, if (stage->draw->floating_point_depth) { float bias; union fi maxz; - maxz.f = MAX3(v0[2], v1[2], v2[2]); + maxz.f = MAX3(fabs(v0[2]), fabs(v1[2]), fabs(v2[2])); /* just do the math directly on shifted number */ maxz.ui &= 0xff << 23; maxz.i -= 23 << 23; @@ -187,7 +187,7 @@ static void offset_first_tri( struct draw_stage *stage, if (stage->draw->floating_point_depth) { offset->units = (float) rast->offset_units; } else { - offset->units = (float) (rast->offset_units * stage->draw->mrd); + offset->units = (float) (rast->offset_units * stage->draw->mrd * 2); } } else { diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index eb7ad8bf2..d5f757eb1 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -109,6 +109,7 @@ struct pstip_stage enum pipe_shader_type shader, unsigned start, unsigned count, unsigned unbind_num_trailing_slots, + bool take_ownership, struct pipe_sampler_view **); void (*driver_set_polygon_stipple)(struct pipe_context *, @@ -224,7 +225,8 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) num_samplers, pstip->state.samplers); pstip->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - num_sampler_views, 0, pstip->state.sampler_views); + num_sampler_views, 0, false, + pstip->state.sampler_views); draw->suspend_flushing = FALSE; @@ -253,7 +255,7 @@ pstip_flush(struct draw_stage *stage, unsigned flags) pstip->state.samplers); pstip->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - pstip->num_sampler_views, 0, + pstip->num_sampler_views, 0, false, pstip->state.sampler_views); draw->suspend_flushing = FALSE; @@ -418,6 +420,7 @@ pstip_set_sampler_views(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start, unsigned num, unsigned unbind_num_trailing_slots, + bool take_ownership, struct pipe_sampler_view **views) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); @@ -438,7 +441,7 @@ pstip_set_sampler_views(struct pipe_context *pipe, /* pass-through */ pstip->driver_set_sampler_views(pstip->pipe, shader, start, num, - unbind_num_trailing_slots, views); + unbind_num_trailing_slots, take_ownership, views); } diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_prim_assembler.c b/lib/mesa/src/gallium/auxiliary/draw/draw_prim_assembler.c index e628a143d..9a957f33f 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_prim_assembler.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_prim_assembler.c @@ -201,6 +201,28 @@ prim_tri(struct draw_assembler *asmblr, copy_verts(asmblr, indices, 3); } +static void +prim_quad(struct draw_assembler *asmblr, + unsigned i0, unsigned i1, + unsigned i2, unsigned i3) +{ + unsigned indices[4]; + + if (asmblr->needs_primid) { + inject_primid(asmblr, i0, asmblr->primid); + inject_primid(asmblr, i1, asmblr->primid); + inject_primid(asmblr, i2, asmblr->primid); + inject_primid(asmblr, i3, asmblr->primid++); + } + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + + add_prim(asmblr, 4); + copy_verts(asmblr, indices, 4); +} + void draw_prim_assembler_prepare_outputs(struct draw_assembler *ia) { @@ -244,7 +266,9 @@ draw_prim_assembler_run(struct draw_context *draw, { struct draw_assembler *asmblr = draw->ia; unsigned start, i; - unsigned assembled_prim = u_reduced_prim(input_prims->prim); + unsigned assembled_prim = (input_prims->prim == PIPE_PRIM_QUADS || + input_prims->prim == PIPE_PRIM_QUAD_STRIP) ? + PIPE_PRIM_QUADS : u_reduced_prim(input_prims->prim); unsigned max_primitives = u_decomposed_prims_for_vertices( input_prims->prim, input_prims->count); unsigned max_verts = u_vertices_per_prim(assembled_prim) * max_primitives; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.h b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.h index 00527527f..5201676e1 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.h @@ -147,12 +147,7 @@ struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw); * Currently one general-purpose case which can do all possibilities, * at the slight expense of creating a vertex_header in some cases * unecessarily. - * - * The special case fetch_emit code avoids pipeline vertices - * altogether and builds hardware vertices directly from API - * vertex_elements. */ -struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw); diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch.c index 17fcfa067..b826b3381 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -50,11 +50,6 @@ struct pt_fetch { * Perform the fetch from API vertex elements & vertex buffers, to a * contiguous set of float[4] attributes as required for the * vertex_shader->run_linear() method. - * - * This is used in all cases except pure passthrough - * (draw_pt_fetch_emit.c) which has its own version to translate - * directly to hw vertices. - * */ void draw_pt_fetch_prepare(struct pt_fetch *fetch, diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_vs.c b/lib/mesa/src/gallium/auxiliary/draw/draw_vs.c index e8dbc11bc..41e6c7a04 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_vs.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_vs.c @@ -35,6 +35,8 @@ #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" #include "draw_private.h" #include "draw_context.h" @@ -46,6 +48,8 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" +#include "nir/nir_to_tgsi.h" + DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) @@ -54,6 +58,7 @@ draw_create_vertex_shader(struct draw_context *draw, const struct pipe_shader_state *shader) { struct draw_vertex_shader *vs = NULL; + struct pipe_shader_state state = *shader; if (draw->dump_vs) { tgsi_dump(shader->tokens, 0); @@ -61,12 +66,22 @@ draw_create_vertex_shader(struct draw_context *draw, #ifdef DRAW_LLVM_AVAILABLE if (draw->pt.middle.llvm) { - vs = draw_create_vs_llvm(draw, shader); + struct pipe_screen *screen = draw->pipe->screen; + if (shader->type == PIPE_SHADER_IR_NIR && + ((!screen->get_shader_param(screen, PIPE_SHADER_VERTEX, + PIPE_SHADER_CAP_INTEGERS)) || + (screen->get_shader_param(screen, PIPE_SHADER_VERTEX, + PIPE_SHADER_CAP_PREFERRED_IR) == + PIPE_SHADER_IR_TGSI))) { + state.type = PIPE_SHADER_IR_TGSI; + state.tokens = nir_to_tgsi(shader->ir.nir, screen); + } + vs = draw_create_vs_llvm(draw, &state); } #endif if (!vs) { - vs = draw_create_vs_exec( draw, shader ); + vs = draw_create_vs_exec( draw, &state ); } if (vs) diff --git a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_context.c b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_context.c index d24b2c55c..53b68a352 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_context.c +++ b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_context.c @@ -411,6 +411,15 @@ static void dd_context_set_tess_state(struct pipe_context *_pipe, pipe->set_tess_state(pipe, default_outer_level, default_inner_level); } +static void dd_context_set_patch_vertices(struct pipe_context *_pipe, + uint8_t patch_vertices) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + + pipe->set_patch_vertices(pipe, patch_vertices); +} + static void dd_context_set_window_rectangles(struct pipe_context *_pipe, bool include, unsigned num_rectangles, @@ -511,6 +520,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, enum pipe_shader_type shader, unsigned start, unsigned num, unsigned unbind_num_trailing_slots, + bool take_ownership, struct pipe_sampler_view **views) { struct dd_context *dctx = dd_context(_pipe); @@ -520,7 +530,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, sizeof(views[0]) * num); safe_memcpy(&dctx->draw_state.sampler_views[shader][start + num], views, sizeof(views[0]) * unbind_num_trailing_slots); - pipe->set_sampler_views(pipe, shader, start, num, + pipe->set_sampler_views(pipe, shader, start, num, take_ownership, unbind_num_trailing_slots, views); } @@ -903,6 +913,7 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) CTX_INIT(set_viewport_states); CTX_INIT(set_sampler_views); CTX_INIT(set_tess_state); + CTX_INIT(set_patch_vertices); CTX_INIT(set_shader_buffers); CTX_INIT(set_shader_images); CTX_INIT(set_vertex_buffers); diff --git a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_draw.c b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_draw.c index 9fc776d75..5e70765f5 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_draw.c +++ b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_draw.c @@ -352,13 +352,15 @@ dd_dump_flush(struct dd_draw_state *dstate, struct call_flush *info, FILE *f) static void dd_dump_draw_vbo(struct dd_draw_state *dstate, struct pipe_draw_info *info, + unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count *draw, FILE *f) + const struct pipe_draw_start_count_bias *draw, FILE *f) { int sh, i; DUMP(draw_info, info); - DUMP(draw_start_count, draw); + PRINT_NAMED(int, "drawid offset", drawid_offset); + DUMP(draw_start_count_bias, draw); if (indirect) { if (indirect->buffer) DUMP_M(resource, indirect, buffer); @@ -636,6 +638,7 @@ dd_dump_call(FILE *f, struct dd_draw_state *state, struct dd_call *call) break; case CALL_DRAW_VBO: dd_dump_draw_vbo(state, &call->info.draw_vbo.info, + call->info.draw_vbo.drawid_offset, &call->info.draw_vbo.indirect, &call->info.draw_vbo.draw, f); break; @@ -1303,8 +1306,9 @@ dd_context_flush(struct pipe_context *_pipe, static void dd_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, + unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count *draws, + const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { struct dd_context *dctx = dd_context(_pipe); @@ -1313,6 +1317,7 @@ dd_context_draw_vbo(struct pipe_context *_pipe, record->call.type = CALL_DRAW_VBO; record->call.info.draw_vbo.info = *info; + record->call.info.draw_vbo.drawid_offset = drawid_offset; record->call.info.draw_vbo.draw = draws[0]; if (info->index_size && !info->has_user_indices) { record->call.info.draw_vbo.info.index.resource = NULL; @@ -1336,7 +1341,7 @@ dd_context_draw_vbo(struct pipe_context *_pipe, } dd_before_draw(dctx, record); - pipe->draw_vbo(pipe, info, indirect, draws, num_draws); + pipe->draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws); dd_after_draw(dctx, record); } @@ -1594,10 +1599,10 @@ dd_context_clear_texture(struct pipe_context *_pipe, */ static void * -dd_context_transfer_map(struct pipe_context *_pipe, - struct pipe_resource *resource, unsigned level, - unsigned usage, const struct pipe_box *box, - struct pipe_transfer **transfer) +dd_context_buffer_map(struct pipe_context *_pipe, + struct pipe_resource *resource, unsigned level, + unsigned usage, const struct pipe_box *box, + struct pipe_transfer **transfer) { struct dd_context *dctx = dd_context(_pipe); struct pipe_context *pipe = dctx->pipe; @@ -1609,7 +1614,41 @@ dd_context_transfer_map(struct pipe_context *_pipe, dd_before_draw(dctx, record); } - void *ptr = pipe->transfer_map(pipe, resource, level, usage, box, transfer); + void *ptr = pipe->buffer_map(pipe, resource, level, usage, box, transfer); + if (record) { + record->call.info.transfer_map.transfer_ptr = *transfer; + record->call.info.transfer_map.ptr = ptr; + if (*transfer) { + record->call.info.transfer_map.transfer = **transfer; + record->call.info.transfer_map.transfer.resource = NULL; + pipe_resource_reference(&record->call.info.transfer_map.transfer.resource, + (*transfer)->resource); + } else { + memset(&record->call.info.transfer_map.transfer, 0, sizeof(struct pipe_transfer)); + } + + dd_after_draw(dctx, record); + } + return ptr; +} + +static void * +dd_context_texture_map(struct pipe_context *_pipe, + struct pipe_resource *resource, unsigned level, + unsigned usage, const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_draw_record *record = + dd_screen(dctx->base.screen)->transfers ? dd_create_record(dctx) : NULL; + + if (record) { + record->call.type = CALL_TRANSFER_MAP; + + dd_before_draw(dctx, record); + } + void *ptr = pipe->texture_map(pipe, resource, level, usage, box, transfer); if (record) { record->call.info.transfer_map.transfer_ptr = *transfer; record->call.info.transfer_map.ptr = ptr; @@ -1655,7 +1694,32 @@ dd_context_transfer_flush_region(struct pipe_context *_pipe, } static void -dd_context_transfer_unmap(struct pipe_context *_pipe, +dd_context_buffer_unmap(struct pipe_context *_pipe, + struct pipe_transfer *transfer) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_draw_record *record = + dd_screen(dctx->base.screen)->transfers ? dd_create_record(dctx) : NULL; + + if (record) { + record->call.type = CALL_TRANSFER_UNMAP; + record->call.info.transfer_unmap.transfer_ptr = transfer; + record->call.info.transfer_unmap.transfer = *transfer; + record->call.info.transfer_unmap.transfer.resource = NULL; + pipe_resource_reference( + &record->call.info.transfer_unmap.transfer.resource, + transfer->resource); + + dd_before_draw(dctx, record); + } + pipe->buffer_unmap(pipe, transfer); + if (record) + dd_after_draw(dctx, record); +} + +static void +dd_context_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) { struct dd_context *dctx = dd_context(_pipe); @@ -1674,7 +1738,7 @@ dd_context_transfer_unmap(struct pipe_context *_pipe, dd_before_draw(dctx, record); } - pipe->transfer_unmap(pipe, transfer); + pipe->texture_unmap(pipe, transfer); if (record) dd_after_draw(dctx, record); } @@ -1754,9 +1818,11 @@ dd_init_draw_functions(struct dd_context *dctx) CTX_INIT(flush_resource); CTX_INIT(generate_mipmap); CTX_INIT(get_query_result_resource); - CTX_INIT(transfer_map); + CTX_INIT(buffer_map); + CTX_INIT(texture_map); CTX_INIT(transfer_flush_region); - CTX_INIT(transfer_unmap); + CTX_INIT(buffer_unmap); + CTX_INIT(texture_unmap); CTX_INIT(buffer_subdata); CTX_INIT(texture_subdata); } diff --git a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_pipe.h b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_pipe.h index 25bfc74fb..e7e23fe1c 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_pipe.h +++ b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_pipe.h @@ -123,8 +123,9 @@ struct call_flush { struct call_draw_info { struct pipe_draw_info info; + unsigned drawid_offset; struct pipe_draw_indirect_info indirect; - struct pipe_draw_start_count draw; + struct pipe_draw_start_count_bias draw; }; struct call_get_query_result_resource { diff --git a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_screen.c b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_screen.c index dadcde63a..b9a60b1a3 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_screen.c +++ b/lib/mesa/src/gallium/auxiliary/driver_ddebug/dd_screen.c @@ -414,12 +414,12 @@ dd_screen_memobj_destroy(struct pipe_screen *_screen, * screen */ -static void -dd_screen_finalize_nir(struct pipe_screen *_screen, void *nir, bool optimize) +static char * +dd_screen_finalize_nir(struct pipe_screen *_screen, void *nir) { struct pipe_screen *screen = dd_screen(_screen)->screen; - screen->finalize_nir(screen, nir, optimize); + return screen->finalize_nir(screen, nir); } static void diff --git a/lib/mesa/src/gallium/auxiliary/driver_noop/noop_pipe.c b/lib/mesa/src/gallium/auxiliary/driver_noop/noop_pipe.c index f107c71ec..73d35d003 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_noop/noop_pipe.c +++ b/lib/mesa/src/gallium/auxiliary/driver_noop/noop_pipe.c @@ -29,7 +29,9 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/format/u_format.h" +#include "util/u_helpers.h" #include "util/u_upload_mgr.h" +#include "util/u_threaded_context.h" #include "noop_public.h" DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", false) @@ -39,12 +41,14 @@ void noop_init_state_functions(struct pipe_context *ctx); struct noop_pipe_screen { struct pipe_screen pscreen; struct pipe_screen *oscreen; + struct slab_parent_pool pool_transfers; }; /* * query */ struct noop_query { + struct threaded_query b; unsigned query; }; static struct pipe_query *noop_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) @@ -90,7 +94,7 @@ noop_set_active_query_state(struct pipe_context *pipe, bool enable) * resource */ struct noop_resource { - struct pipe_resource base; + struct threaded_resource b; unsigned size; char *data; struct sw_displaytarget *dt; @@ -107,16 +111,34 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen, return NULL; stride = util_format_get_stride(templ->format, templ->width0); - nresource->base = *templ; - nresource->base.screen = screen; + nresource->b.b = *templ; + nresource->b.b.screen = screen; nresource->size = stride * templ->height0 * templ->depth0; nresource->data = MALLOC(nresource->size); - pipe_reference_init(&nresource->base.reference, 1); + pipe_reference_init(&nresource->b.b.reference, 1); if (nresource->data == NULL) { FREE(nresource); return NULL; } - return &nresource->base; + threaded_resource_init(&nresource->b.b); + return &nresource->b.b; +} + +static struct pipe_resource * +noop_resource_create_with_modifiers(struct pipe_screen *screen, + const struct pipe_resource *templ, + const uint64_t *modifiers, int count) +{ + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + struct pipe_resource *result; + struct pipe_resource *noop_resource; + + result = oscreen->resource_create_with_modifiers(oscreen, templ, + modifiers, count); + noop_resource = noop_resource_create(screen, result); + pipe_resource_reference(&result, NULL); + return noop_resource; } static struct pipe_resource *noop_resource_from_handle(struct pipe_screen *screen, @@ -187,6 +209,7 @@ static void noop_resource_destroy(struct pipe_screen *screen, { struct noop_resource *nresource = (struct noop_resource *)resource; + threaded_resource_deinit(resource); FREE(nresource->data); FREE(resource); } @@ -205,7 +228,7 @@ static void *noop_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer; struct noop_resource *nresource = (struct noop_resource *)resource; - transfer = CALLOC_STRUCT(pipe_transfer); + transfer = (struct pipe_transfer*)CALLOC_STRUCT(threaded_transfer); if (!transfer) return NULL; pipe_resource_reference(&transfer->resource, resource); @@ -310,8 +333,13 @@ static void noop_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, unsigned flags) { - if (fence) - *fence = NULL; + if (fence) { + struct pipe_reference *f = MALLOC_STRUCT(pipe_reference); + f->count = 1; + + ctx->screen->fence_reference(ctx->screen, fence, NULL); + *fence = (struct pipe_fence_handle*)f; + } } static void noop_destroy_context(struct pipe_context *ctx) @@ -319,6 +347,7 @@ static void noop_destroy_context(struct pipe_context *ctx) if (ctx->stream_uploader) u_upload_destroy(ctx->stream_uploader); + p_atomic_dec(&ctx->screen->num_contexts); FREE(ctx); } @@ -348,6 +377,32 @@ static void noop_set_frontend_noop(struct pipe_context *ctx, bool enable) { } +static void noop_replace_buffer_storage(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_resource *src, + unsigned num_rebinds, + uint32_t rebind_mask, + uint32_t delete_buffer_id) +{ +} + +static struct pipe_fence_handle * +noop_create_fence(struct pipe_context *ctx, + struct tc_unflushed_batch_token *tc_token) +{ + struct pipe_reference *f = MALLOC_STRUCT(pipe_reference); + + f->count = 1; + return (struct pipe_fence_handle*)f; +} + +static bool noop_is_resource_busy(struct pipe_screen *screen, + struct pipe_resource *resource, + unsigned usage) +{ + return false; +} + static struct pipe_context *noop_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { @@ -381,9 +436,11 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen, ctx->end_query = noop_end_query; ctx->get_query_result = noop_get_query_result; ctx->set_active_query_state = noop_set_active_query_state; - ctx->transfer_map = noop_transfer_map; + ctx->buffer_map = noop_transfer_map; + ctx->texture_map = noop_transfer_map; ctx->transfer_flush_region = noop_transfer_flush_region; - ctx->transfer_unmap = noop_transfer_unmap; + ctx->buffer_unmap = noop_transfer_unmap; + ctx->texture_unmap = noop_transfer_unmap; ctx->buffer_subdata = noop_buffer_subdata; ctx->texture_subdata = noop_texture_subdata; ctx->invalidate_resource = noop_invalidate_resource; @@ -391,7 +448,25 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen, ctx->set_frontend_noop = noop_set_frontend_noop; noop_init_state_functions(ctx); - return ctx; + p_atomic_inc(&screen->num_contexts); + + if (!(flags & PIPE_CONTEXT_PREFER_THREADED)) + return ctx; + + struct pipe_context *tc = + threaded_context_create(ctx, + &((struct noop_pipe_screen*)screen)->pool_transfers, + noop_replace_buffer_storage, + &(struct threaded_context_options) { + .create_fence = noop_create_fence, + .is_resource_busy = noop_is_resource_busy, + }, + NULL); + + if (tc && tc != ctx) + threaded_context_init_bytes_mapped_limit((struct threaded_context *)tc, 4); + + return tc; } @@ -479,6 +554,7 @@ static void noop_destroy_screen(struct pipe_screen *screen) struct pipe_screen *oscreen = noop_screen->oscreen; oscreen->destroy(oscreen); + slab_destroy_parent(&noop_screen->pool_transfers); FREE(screen); } @@ -486,6 +562,11 @@ static void noop_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { + if (pipe_reference((struct pipe_reference*)*ptr, + (struct pipe_reference*)fence)) + FREE(*ptr); + + *ptr = fence; } static bool noop_fence_finish(struct pipe_screen *screen, @@ -521,11 +602,104 @@ static const void *noop_get_compiler_options(struct pipe_screen *pscreen, return screen->get_compiler_options(screen, ir, shader); } -static void noop_finalize_nir(struct pipe_screen *pscreen, void *nir, bool optimize) +static char *noop_finalize_nir(struct pipe_screen *pscreen, void *nir) { struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; - screen->finalize_nir(screen, nir, optimize); + return screen->finalize_nir(screen, nir); +} + +static bool noop_check_resource_capability(struct pipe_screen *screen, + struct pipe_resource *resource, + unsigned bind) +{ + return true; +} + +static void noop_set_max_shader_compiler_threads(struct pipe_screen *screen, + unsigned max_threads) +{ +} + +static bool noop_is_parallel_shader_compilation_finished(struct pipe_screen *screen, + void *shader, + unsigned shader_type) +{ + return true; +} + +static bool noop_is_dmabuf_modifier_supported(struct pipe_screen *screen, + uint64_t modifier, enum pipe_format format, + bool *external_only) +{ + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + return oscreen->is_dmabuf_modifier_supported(oscreen, modifier, format, external_only); +} + +static unsigned int noop_get_dmabuf_modifier_planes(struct pipe_screen *screen, + uint64_t modifier, + enum pipe_format format) +{ + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + return oscreen->get_dmabuf_modifier_planes(oscreen, modifier, format); +} + +static void noop_get_driver_uuid(struct pipe_screen *screen, char *uuid) +{ + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + oscreen->get_driver_uuid(oscreen, uuid); +} + +static void noop_get_device_uuid(struct pipe_screen *screen, char *uuid) +{ + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + oscreen->get_device_uuid(oscreen, uuid); +} + +static void noop_query_dmabuf_modifiers(struct pipe_screen *screen, + enum pipe_format format, int max, + uint64_t *modifiers, + unsigned int *external_only, int *count) +{ + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + oscreen->query_dmabuf_modifiers(oscreen, format, max, modifiers, + external_only, count); +} + +static struct pipe_vertex_state * +noop_create_vertex_state(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask) +{ + struct pipe_vertex_state *state = CALLOC_STRUCT(pipe_vertex_state); + + if (!state) + return NULL; + + util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, + full_velem_mask, state); + return state; +} + +static void noop_vertex_state_destroy(struct pipe_screen *screen, + struct pipe_vertex_state *state) +{ + pipe_vertex_buffer_unreference(&state->input.vbuffer); + pipe_resource_reference(&state->input.indexbuf, NULL); + FREE(state); } struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen) @@ -568,6 +742,20 @@ struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen) screen->get_disk_shader_cache = noop_get_disk_shader_cache; screen->get_compiler_options = noop_get_compiler_options; screen->finalize_nir = noop_finalize_nir; + screen->check_resource_capability = noop_check_resource_capability; + screen->set_max_shader_compiler_threads = noop_set_max_shader_compiler_threads; + screen->is_parallel_shader_compilation_finished = noop_is_parallel_shader_compilation_finished; + screen->is_dmabuf_modifier_supported = noop_is_dmabuf_modifier_supported; + screen->get_dmabuf_modifier_planes = noop_get_dmabuf_modifier_planes; + screen->get_driver_uuid = noop_get_driver_uuid; + screen->get_device_uuid = noop_get_device_uuid; + screen->query_dmabuf_modifiers = noop_query_dmabuf_modifiers; + screen->resource_create_with_modifiers = noop_resource_create_with_modifiers; + screen->create_vertex_state = noop_create_vertex_state; + screen->vertex_state_destroy = noop_vertex_state_destroy; + + slab_create_parent(&noop_screen->pool_transfers, + sizeof(struct pipe_transfer), 64); return screen; } diff --git a/lib/mesa/src/gallium/auxiliary/driver_noop/noop_state.c b/lib/mesa/src/gallium/auxiliary/driver_noop/noop_state.c index fb90a1ec4..56036e22e 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_noop/noop_state.c +++ b/lib/mesa/src/gallium/auxiliary/driver_noop/noop_state.c @@ -31,12 +31,22 @@ #include "util/u_transfer.h" static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info, + unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count *draws, + const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { } +static void noop_draw_vertex_state(struct pipe_context *ctx, + struct pipe_vertex_state *state, + uint32_t partial_velem_mask, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ +} + static void noop_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) { @@ -115,6 +125,7 @@ static void noop_set_sampler_views(struct pipe_context *ctx, enum pipe_shader_type shader, unsigned start, unsigned count, unsigned unbind_num_trailing_slots, + bool take_ownership, struct pipe_sampler_view **views) { } @@ -267,6 +278,139 @@ static void noop_set_window_rectangles(struct pipe_context *ctx, { } +static void noop_set_shader_buffers(struct pipe_context *ctx, + enum pipe_shader_type shader, + unsigned start_slot, unsigned count, + const struct pipe_shader_buffer *buffers, + unsigned writable_bitmask) +{ +} + +static void noop_set_shader_images(struct pipe_context *ctx, + enum pipe_shader_type shader, + unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + const struct pipe_image_view *images) +{ +} + +static void noop_render_condition( struct pipe_context *pipe, + struct pipe_query *query, + bool condition, + enum pipe_render_cond_flag mode ) +{ +} + +static void noop_get_query_result_resource(struct pipe_context *pipe, + struct pipe_query *q, + bool wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ +} + +static void noop_set_min_samples( struct pipe_context *ctx, + unsigned min_samples ) +{ +} + +static void noop_set_sample_locations( struct pipe_context *ctx, + size_t size, const uint8_t *locations ) +{ +} + +static void noop_set_tess_state(struct pipe_context *ctx, + const float default_outer_level[4], + const float default_inner_level[2]) +{ +} + +static void noop_clear_texture(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned level, + const struct pipe_box *box, + const void *data) +{ +} + +static void noop_clear_buffer(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, + unsigned size, + const void *clear_value, + int clear_value_size) +{ +} + +static void noop_fence_server_sync(struct pipe_context *pipe, + struct pipe_fence_handle *fence) +{ +} + +static void noop_texture_barrier(struct pipe_context *ctx, unsigned flags) +{ +} + +static void noop_memory_barrier(struct pipe_context *ctx, unsigned flags) +{ +} + +static bool noop_resource_commit(struct pipe_context *ctx, struct pipe_resource *res, + unsigned level, struct pipe_box *box, bool commit) +{ + return true; +} + +static void noop_get_sample_position(struct pipe_context *context, + unsigned sample_count, + unsigned sample_index, + float *out_value) +{ +} + +static enum pipe_reset_status noop_get_device_reset_status(struct pipe_context *ctx) +{ + return PIPE_NO_RESET; +} + +static uint64_t noop_create_texture_handle(struct pipe_context *ctx, + struct pipe_sampler_view *view, + const struct pipe_sampler_state *state) +{ + return 1; +} + +static void noop_delete_texture_handle(struct pipe_context *ctx, uint64_t handle) +{ +} + +static void noop_make_texture_handle_resident(struct pipe_context *ctx, + uint64_t handle, bool resident) +{ +} + +static uint64_t noop_create_image_handle(struct pipe_context *ctx, + const struct pipe_image_view *image) +{ + return 2; +} + +static void noop_delete_image_handle(struct pipe_context *ctx, uint64_t handle) +{ +} + +static void noop_make_image_handle_resident(struct pipe_context *ctx, uint64_t handle, + unsigned access, bool resident) +{ +} + +static void noop_set_patch_vertices(struct pipe_context *ctx, + uint8_t patch_vertices) +{ +} + void noop_init_state_functions(struct pipe_context *ctx); void noop_init_state_functions(struct pipe_context *ctx) @@ -311,6 +455,8 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->set_constant_buffer = noop_set_constant_buffer; ctx->set_inlinable_constants = noop_set_inlinable_constants; ctx->set_sampler_views = noop_set_sampler_views; + ctx->set_shader_buffers = noop_set_shader_buffers; + ctx->set_shader_images = noop_set_shader_images; ctx->set_framebuffer_state = noop_set_framebuffer_state; ctx->set_polygon_stipple = noop_set_polygon_stipple; ctx->set_sample_mask = noop_set_sample_mask; @@ -322,8 +468,29 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->sampler_view_destroy = noop_sampler_view_destroy; ctx->surface_destroy = noop_surface_destroy; ctx->draw_vbo = noop_draw_vbo; + ctx->draw_vertex_state = noop_draw_vertex_state; ctx->launch_grid = noop_launch_grid; ctx->create_stream_output_target = noop_create_stream_output_target; ctx->stream_output_target_destroy = noop_stream_output_target_destroy; ctx->set_stream_output_targets = noop_set_stream_output_targets; + ctx->render_condition = noop_render_condition; + ctx->get_query_result_resource = noop_get_query_result_resource; + ctx->set_min_samples = noop_set_min_samples; + ctx->set_sample_locations = noop_set_sample_locations; + ctx->set_tess_state = noop_set_tess_state; + ctx->clear_texture = noop_clear_texture; + ctx->clear_buffer = noop_clear_buffer; + ctx->fence_server_sync = noop_fence_server_sync; + ctx->texture_barrier = noop_texture_barrier; + ctx->memory_barrier = noop_memory_barrier; + ctx->resource_commit = noop_resource_commit; + ctx->get_sample_position = noop_get_sample_position; + ctx->get_device_reset_status = noop_get_device_reset_status; + ctx->create_texture_handle = noop_create_texture_handle; + ctx->delete_texture_handle = noop_delete_texture_handle; + ctx->make_texture_handle_resident = noop_make_texture_handle_resident; + ctx->create_image_handle = noop_create_image_handle; + ctx->delete_image_handle = noop_delete_image_handle; + ctx->make_image_handle_resident = noop_make_image_handle_resident; + ctx->set_patch_vertices = noop_set_patch_vertices; } diff --git a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_context.c b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_context.c index 4f72eb891..b0c283d9b 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_context.c +++ b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_context.c @@ -115,8 +115,9 @@ rbug_draw_block_locked(struct rbug_context *rb_pipe, int flag) static void rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *_info, + unsigned _drawid_offset, const struct pipe_draw_indirect_info *_indirect, - const struct pipe_draw_start_count *draws, + const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { struct rbug_context *rb_pipe = rbug_context(_pipe); @@ -135,7 +136,7 @@ rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *_info, if (!(rb_pipe->curr.shader[PIPE_SHADER_FRAGMENT] && rb_pipe->curr.shader[PIPE_SHADER_FRAGMENT]->disabled) && !(rb_pipe->curr.shader[PIPE_SHADER_GEOMETRY] && rb_pipe->curr.shader[PIPE_SHADER_GEOMETRY]->disabled) && !(rb_pipe->curr.shader[PIPE_SHADER_VERTEX] && rb_pipe->curr.shader[PIPE_SHADER_VERTEX]->disabled)) - pipe->draw_vbo(pipe, &info, _indirect, draws, num_draws); + pipe->draw_vbo(pipe, &info, _drawid_offset, _indirect, draws, num_draws); mtx_unlock(&rb_pipe->call_mutex); rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); @@ -739,6 +740,7 @@ rbug_set_sampler_views(struct pipe_context *_pipe, unsigned start, unsigned num, unsigned unbind_num_trailing_slots, + bool take_ownership, struct pipe_sampler_view **_views) { struct rbug_context *rb_pipe = rbug_context(_pipe); @@ -768,7 +770,7 @@ rbug_set_sampler_views(struct pipe_context *_pipe, } pipe->set_sampler_views(pipe, shader, start, num, - unbind_num_trailing_slots, views); + unbind_num_trailing_slots, take_ownership, views); mtx_unlock(&rb_pipe->call_mutex); } @@ -1106,7 +1108,7 @@ rbug_context_surface_destroy(struct pipe_context *_pipe, static void * -rbug_context_transfer_map(struct pipe_context *_context, +rbug_context_buffer_map(struct pipe_context *_context, struct pipe_resource *_resource, unsigned level, unsigned usage, @@ -1121,7 +1123,34 @@ rbug_context_transfer_map(struct pipe_context *_context, void *map; mtx_lock(&rb_pipe->call_mutex); - map = context->transfer_map(context, + map = context->buffer_map(context, + resource, + level, + usage, + box, &result); + mtx_unlock(&rb_pipe->call_mutex); + + *transfer = rbug_transfer_create(rb_pipe, rb_resource, result); + return *transfer ? map : NULL; +} + +static void * +rbug_context_texture_map(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct rbug_context *rb_pipe = rbug_context(_context); + struct rbug_resource *rb_resource = rbug_resource(_resource); + struct pipe_context *context = rb_pipe->pipe; + struct pipe_resource *resource = rb_resource->resource; + struct pipe_transfer *result; + void *map; + + mtx_lock(&rb_pipe->call_mutex); + map = context->texture_map(context, resource, level, usage, @@ -1151,7 +1180,24 @@ rbug_context_transfer_flush_region(struct pipe_context *_context, static void -rbug_context_transfer_unmap(struct pipe_context *_context, +rbug_context_buffer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct rbug_context *rb_pipe = rbug_context(_context); + struct rbug_transfer *rb_transfer = rbug_transfer(_transfer); + struct pipe_context *context = rb_pipe->pipe; + struct pipe_transfer *transfer = rb_transfer->transfer; + + mtx_lock(&rb_pipe->call_mutex); + context->buffer_unmap(context, + transfer); + rbug_transfer_destroy(rb_pipe, + rb_transfer); + mtx_unlock(&rb_pipe->call_mutex); +} + +static void +rbug_context_texture_unmap(struct pipe_context *_context, struct pipe_transfer *_transfer) { struct rbug_context *rb_pipe = rbug_context(_context); @@ -1160,7 +1206,7 @@ rbug_context_transfer_unmap(struct pipe_context *_context, struct pipe_transfer *transfer = rb_transfer->transfer; mtx_lock(&rb_pipe->call_mutex); - context->transfer_unmap(context, + context->texture_unmap(context, transfer); rbug_transfer_destroy(rb_pipe, rb_transfer); @@ -1307,8 +1353,10 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.sampler_view_destroy = rbug_context_sampler_view_destroy; rb_pipe->base.create_surface = rbug_context_create_surface; rb_pipe->base.surface_destroy = rbug_context_surface_destroy; - rb_pipe->base.transfer_map = rbug_context_transfer_map; - rb_pipe->base.transfer_unmap = rbug_context_transfer_unmap; + rb_pipe->base.buffer_map = rbug_context_buffer_map; + rb_pipe->base.buffer_unmap = rbug_context_buffer_unmap; + rb_pipe->base.texture_map = rbug_context_texture_map; + rb_pipe->base.texture_unmap = rbug_context_texture_unmap; rb_pipe->base.transfer_flush_region = rbug_context_transfer_flush_region; rb_pipe->base.buffer_subdata = rbug_context_buffer_subdata; rb_pipe->base.texture_subdata = rbug_context_texture_subdata; diff --git a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_core.c b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_core.c index 6d6ca7ec0..aad4487f4 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_core.c +++ b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_core.c @@ -267,7 +267,7 @@ rbug_texture_read(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_ } tex = tr_tex->resource; - map = pipe_transfer_map(context, tex, + map = pipe_texture_map(context, tex, gptr->level, gptr->face + gptr->zslice, PIPE_MAP_READ, gptr->x, gptr->y, gptr->w, gptr->h, &t); @@ -283,7 +283,7 @@ rbug_texture_read(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_ t->stride, NULL); - context->transfer_unmap(context, t); + context->texture_unmap(context, t); mtx_unlock(&rb_screen->list_mutex); diff --git a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_objects.c b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_objects.c index bf2790cf2..09455cf6f 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_objects.c +++ b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_objects.c @@ -186,7 +186,10 @@ rbug_transfer_create(struct rbug_context *rb_context, return &rb_transfer->base; error: - rb_context->pipe->transfer_unmap(rb_context->pipe, transfer); + if (rb_resource->base.target == PIPE_BUFFER) + rb_context->pipe->buffer_unmap(rb_context->pipe, transfer); + else + rb_context->pipe->texture_unmap(rb_context->pipe, transfer); return NULL; } diff --git a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_screen.c b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_screen.c index 25d0fcf0e..9eb9ba379 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_screen.c +++ b/lib/mesa/src/gallium/auxiliary/driver_rbug/rbug_screen.c @@ -410,12 +410,12 @@ rbug_screen_fence_get_fd(struct pipe_screen *_screen, return screen->fence_get_fd(screen, fence); } -static void -rbug_screen_finalize_nir(struct pipe_screen *_screen, void *nir, bool optimize) +static char * +rbug_screen_finalize_nir(struct pipe_screen *_screen, void *nir) { struct pipe_screen *screen = rbug_screen(_screen)->screen; - screen->finalize_nir(screen, nir, optimize); + return screen->finalize_nir(screen, nir); } bool diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.c b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.c index 1252d367b..7e28a4028 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.c +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.c @@ -45,6 +45,7 @@ struct trace_query { + struct threaded_query base; unsigned type; struct pipe_query *query; @@ -110,8 +111,9 @@ dump_fb_state(struct trace_context *tr_ctx, static void trace_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, + unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count *draws, + const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { struct trace_context *tr_ctx = trace_context(_pipe); @@ -124,6 +126,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(draw_info, info); + trace_dump_arg(int, drawid_offset); trace_dump_arg(draw_indirect_info, indirect); trace_dump_arg_begin("draws"); trace_dump_struct_array(draw_start_count, draws, num_draws); @@ -132,12 +135,45 @@ trace_context_draw_vbo(struct pipe_context *_pipe, trace_dump_trace_flush(); - pipe->draw_vbo(pipe, info, indirect, draws, num_draws); + pipe->draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws); trace_dump_call_end(); } +static void +trace_context_draw_vertex_state(struct pipe_context *_pipe, + struct pipe_vertex_state *state, + uint32_t partial_velem_mask, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + if (!tr_ctx->seen_fb_state && trace_dump_is_triggered()) + dump_fb_state(tr_ctx, "current_framebuffer_state", true); + + trace_dump_call_begin("pipe_context", "draw_vertex_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + trace_dump_arg(uint, partial_velem_mask); + trace_dump_arg(draw_vertex_state_info, info); + trace_dump_arg_begin("draws"); + trace_dump_struct_array(draw_start_count, draws, num_draws); + trace_dump_arg_end(); + trace_dump_arg(uint, num_draws); + + trace_dump_trace_flush(); + + pipe->draw_vertex_state(pipe, state, partial_velem_mask, info, draws, + num_draws); + trace_dump_call_end(); +} + + static struct pipe_query * trace_context_create_query(struct pipe_context *_pipe, unsigned query_type, @@ -222,19 +258,21 @@ trace_context_begin_query(struct pipe_context *_pipe, static bool trace_context_end_query(struct pipe_context *_pipe, - struct pipe_query *query) + struct pipe_query *_query) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; bool ret; - query = trace_query_unwrap(query); + struct pipe_query *query = trace_query_unwrap(_query); trace_dump_call_begin("pipe_context", "end_query"); trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); + if (tr_ctx->threaded) + threaded_query(query)->flushed = trace_query(_query)->base.flushed; ret = pipe->end_query(pipe, query); trace_dump_call_end(); @@ -258,6 +296,10 @@ trace_context_get_query_result(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); + trace_dump_arg(bool, wait); + + if (tr_ctx->threaded) + threaded_query(query)->flushed = trace_query(_query)->base.flushed; ret = pipe->get_query_result(pipe, query, wait, result); @@ -1013,6 +1055,8 @@ trace_context_create_sampler_view(struct pipe_context *_pipe, pipe_resource_reference(&tr_view->base.texture, resource); tr_view->base.context = _pipe; tr_view->sampler_view = result; + result->reference.count += 100000000; + tr_view->refcount = 100000000; result = &tr_view->base; return result; @@ -1028,13 +1072,12 @@ trace_context_sampler_view_destroy(struct pipe_context *_pipe, struct pipe_context *pipe = tr_ctx->pipe; struct pipe_sampler_view *view = tr_view->sampler_view; - assert(_view->context == _pipe); - trace_dump_call_begin("pipe_context", "sampler_view_destroy"); trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, view); + p_atomic_add(&tr_view->sampler_view->reference.count, -tr_view->refcount); pipe_sampler_view_reference(&tr_view->sampler_view, NULL); trace_dump_call_end(); @@ -1105,6 +1148,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe, unsigned start, unsigned num, unsigned unbind_num_trailing_slots, + bool take_ownership, struct pipe_sampler_view **views) { struct trace_context *tr_ctx = trace_context(_pipe); @@ -1118,6 +1162,13 @@ trace_context_set_sampler_views(struct pipe_context *_pipe, for (i = 0; i < num; ++i) { tr_view = trace_sampler_view(views[i]); + if (tr_view) { + tr_view->refcount--; + if (!tr_view->refcount) { + tr_view->refcount = 100000000; + p_atomic_add(&tr_view->sampler_view->reference.count, tr_view->refcount); + } + } unwrapped_views[i] = tr_view ? tr_view->sampler_view : NULL; } views = unwrapped_views; @@ -1129,10 +1180,11 @@ trace_context_set_sampler_views(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, num); trace_dump_arg(uint, unbind_num_trailing_slots); + trace_dump_arg(bool, take_ownership); trace_dump_arg_array(ptr, views, num); pipe->set_sampler_views(pipe, shader, start, num, - unbind_num_trailing_slots, views); + unbind_num_trailing_slots, take_ownership, views); trace_dump_call_end(); } @@ -1405,6 +1457,32 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe, } static inline void +trace_context_clear_buffer(struct pipe_context *_pipe, + struct pipe_resource *res, + unsigned offset, + unsigned size, + const void *clear_value, + int clear_value_size) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + + trace_dump_call_begin("pipe_context", "clear_buffer"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, res); + trace_dump_arg(uint, offset); + trace_dump_arg(uint, size); + trace_dump_arg(ptr, clear_value); + trace_dump_arg(int, clear_value_size); + + pipe->clear_buffer(pipe, res, offset, size, clear_value, clear_value_size); + + trace_dump_call_end(); +} + +static inline void trace_context_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res, unsigned level, @@ -1563,20 +1641,29 @@ trace_context_transfer_map(struct pipe_context *_context, struct pipe_transfer **transfer) { struct trace_context *tr_context = trace_context(_context); - struct pipe_context *context = tr_context->pipe; - struct pipe_transfer *result = NULL; + struct pipe_context *pipe = tr_context->pipe; + struct pipe_transfer *xfer = NULL; void *map; - /* - * Map and transfers can't be serialized so we convert all write transfers - * to texture/buffer_subdata and ignore read transfers. - */ - - map = context->transfer_map(context, resource, level, usage, box, &result); + if (resource->target == PIPE_BUFFER) + map = pipe->buffer_map(pipe, resource, level, usage, box, &xfer); + else + map = pipe->texture_map(pipe, resource, level, usage, box, &xfer); if (!map) return NULL; + *transfer = trace_transfer_create(tr_context, resource, xfer); + trace_dump_call_begin("pipe_context", resource->target == PIPE_BUFFER ? "buffer_map" : "texture_map"); - *transfer = trace_transfer_create(tr_context, resource, result); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, resource); + trace_dump_arg(uint, level); + trace_dump_arg(uint, usage); + trace_dump_arg(box, box); + + trace_dump_arg(ptr, xfer); + trace_dump_ret(ptr, map); + + trace_dump_call_end(); if (map) { if (usage & PIPE_MAP_WRITE) { @@ -1594,10 +1681,18 @@ trace_context_transfer_flush_region( struct pipe_context *_context, { struct trace_context *tr_context = trace_context(_context); struct trace_transfer *tr_transfer = trace_transfer(_transfer); - struct pipe_context *context = tr_context->pipe; + struct pipe_context *pipe = tr_context->pipe; struct pipe_transfer *transfer = tr_transfer->transfer; - context->transfer_flush_region(context, transfer, box); + trace_dump_call_begin("pipe_context", "transfer_flush_region"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, transfer); + trace_dump_arg(box, box); + + trace_dump_call_end(); + + pipe->transfer_flush_region(pipe, transfer, box); } static void @@ -1609,7 +1704,15 @@ trace_context_transfer_unmap(struct pipe_context *_context, struct pipe_context *context = tr_ctx->pipe; struct pipe_transfer *transfer = tr_trans->transfer; - if (tr_trans->map) { + + trace_dump_call_begin("pipe_context", "transfer_unmap"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, transfer); + + trace_dump_call_end(); + + if (tr_trans->map && !tr_ctx->threaded) { /* * Fake a texture/buffer_subdata */ @@ -1672,7 +1775,10 @@ trace_context_transfer_unmap(struct pipe_context *_context, tr_trans->map = NULL; } - context->transfer_unmap(context, transfer); + if (transfer->resource->target == PIPE_BUFFER) + context->buffer_unmap(context, transfer); + else + context->texture_unmap(context, transfer); trace_transfer_destroy(tr_ctx, tr_trans); } @@ -1781,6 +1887,21 @@ trace_context_set_context_param(struct pipe_context *_context, } static void +trace_context_set_debug_callback(struct pipe_context *_context, const struct pipe_debug_callback *cb) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + + trace_dump_call_begin("pipe_context", "set_debug_callback"); + + trace_dump_arg(ptr, context); + + trace_dump_call_end(); + + context->set_debug_callback(context, cb); +} + +static void trace_context_render_condition(struct pipe_context *_context, struct pipe_query *query, bool condition, @@ -1873,6 +1994,20 @@ trace_context_set_tess_state(struct pipe_context *_context, context->set_tess_state(context, default_outer_level, default_inner_level); } +static void +trace_context_set_patch_vertices(struct pipe_context *_context, + uint8_t patch_vertices) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + + trace_dump_call_begin("pipe_context", "set_patch_vertices"); + trace_dump_arg(ptr, context); + trace_dump_arg(uint, patch_vertices); + trace_dump_call_end(); + + context->set_patch_vertices(context, patch_vertices); +} static void trace_context_set_shader_buffers(struct pipe_context *_context, enum pipe_shader_type shader, @@ -2075,6 +2210,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base . _member = pipe -> _member ? trace_context_ ## _member : NULL TR_CTX_INIT(draw_vbo); + TR_CTX_INIT(draw_vertex_state); TR_CTX_INIT(render_condition); TR_CTX_INIT(create_query); TR_CTX_INIT(destroy_query); @@ -2135,6 +2271,8 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(create_stream_output_target); TR_CTX_INIT(stream_output_target_destroy); TR_CTX_INIT(set_stream_output_targets); + /* this is lavapipe-only and can't be traced */ + tr_ctx->base.stream_output_target_offset = pipe->stream_output_target_offset; TR_CTX_INIT(resource_copy_region); TR_CTX_INIT(blit); TR_CTX_INIT(flush_resource); @@ -2142,6 +2280,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(clear_render_target); TR_CTX_INIT(clear_depth_stencil); TR_CTX_INIT(clear_texture); + TR_CTX_INIT(clear_buffer); TR_CTX_INIT(flush); TR_CTX_INIT(create_fence_fd); TR_CTX_INIT(fence_server_sync); @@ -2150,6 +2289,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(memory_barrier); TR_CTX_INIT(resource_commit); TR_CTX_INIT(set_tess_state); + TR_CTX_INIT(set_patch_vertices); TR_CTX_INIT(set_shader_buffers); TR_CTX_INIT(launch_grid); TR_CTX_INIT(set_shader_images); @@ -2160,13 +2300,14 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(delete_image_handle); TR_CTX_INIT(make_image_handle_resident); - TR_CTX_INIT(transfer_map); - TR_CTX_INIT(transfer_unmap); + tr_ctx->base.buffer_map = tr_ctx->base.texture_map = trace_context_transfer_map; + tr_ctx->base.buffer_unmap = tr_ctx->base.texture_unmap = trace_context_transfer_unmap; TR_CTX_INIT(transfer_flush_region); TR_CTX_INIT(buffer_subdata); TR_CTX_INIT(texture_subdata); TR_CTX_INIT(invalidate_resource); TR_CTX_INIT(set_context_param); + TR_CTX_INIT(set_debug_callback); #undef TR_CTX_INIT @@ -2189,3 +2330,12 @@ trace_context_check(const struct pipe_context *pipe) ASSERTED struct trace_context *tr_ctx = (struct trace_context *) pipe; assert(tr_ctx->base.destroy == trace_context_destroy); } + +/** + * Threaded context is not wrapped, and so it may call fence functions directly + */ +struct pipe_context * +trace_get_possibly_threaded_context(struct pipe_context *pipe) +{ + return pipe->destroy == trace_context_destroy ? ((struct trace_context*)pipe)->pipe : pipe; +} diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.h b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.h index 95469e875..f687fa293 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.h +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_context.h @@ -33,6 +33,7 @@ #include "util/u_debug.h" #include "util/hash_table.h" #include "pipe/p_context.h" +#include "util/u_threaded_context.h" #include "tr_screen.h" @@ -52,15 +53,20 @@ struct trace_context struct hash_table depth_stencil_alpha_states; struct pipe_context *pipe; + tc_replace_buffer_storage_func replace_buffer_storage; + tc_create_fence_func create_fence; struct pipe_framebuffer_state unwrapped_state; bool seen_fb_state; + + bool threaded; }; void trace_context_check(const struct pipe_context *pipe); - +struct pipe_context * +trace_get_possibly_threaded_context(struct pipe_context *pipe); static inline struct trace_context * trace_context(struct pipe_context *pipe) @@ -77,7 +83,10 @@ struct pipe_context * trace_context_create(struct trace_screen *tr_scr, struct pipe_context *pipe); - +struct pipe_context * +trace_context_create_threaded(struct pipe_screen *screen, struct pipe_context *pipe, + tc_replace_buffer_storage_func *replace_buffer, + struct threaded_context_options *options); #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.c b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.c index ec479b596..1f5da8019 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.c +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.c @@ -552,6 +552,7 @@ void trace_dump_sampler_view_template(const struct pipe_sampler_view *state, trace_dump_struct_begin("pipe_sampler_view"); trace_dump_member(format, state, format); + trace_dump_member(ptr, state, texture); trace_dump_member_begin("u"); trace_dump_struct_begin(""); /* anonymous */ @@ -604,6 +605,7 @@ void trace_dump_surface_template(const struct pipe_surface *state, trace_dump_struct_begin("pipe_surface"); trace_dump_member(format, state, format); + trace_dump_member(ptr, state, texture); trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); @@ -698,6 +700,10 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state) trace_dump_member(uint, state, vertex_buffer_index); + trace_dump_member(uint, state, instance_divisor); + + trace_dump_member(bool, state, dual_slot); + trace_dump_member(format, state, src_format); trace_dump_struct_end(); @@ -780,6 +786,26 @@ void trace_dump_image_view(const struct pipe_image_view *state) } +void trace_dump_memory_info(const struct pipe_memory_info *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if (!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_memory_info"); + trace_dump_member(uint, state, total_device_memory); + trace_dump_member(uint, state, avail_device_memory); + trace_dump_member(uint, state, total_staging_memory); + trace_dump_member(uint, state, avail_staging_memory); + trace_dump_member(uint, state, device_memory_evicted); + trace_dump_member(uint, state, nr_device_memory_evictions); + trace_dump_struct_end(); +} + void trace_dump_draw_info(const struct pipe_draw_info *state) { if (!trace_dumping_enabled_locked()) @@ -798,9 +824,6 @@ void trace_dump_draw_info(const struct pipe_draw_info *state) trace_dump_member(uint, state, start_instance); trace_dump_member(uint, state, instance_count); - trace_dump_member(uint, state, vertices_per_patch); - - trace_dump_member(int, state, index_bias); trace_dump_member(uint, state, min_index); trace_dump_member(uint, state, max_index); @@ -811,14 +834,26 @@ void trace_dump_draw_info(const struct pipe_draw_info *state) trace_dump_struct_end(); } -void trace_dump_draw_start_count(const struct pipe_draw_start_count *state) +void trace_dump_draw_vertex_state_info(struct pipe_draw_vertex_state_info state) +{ + if (!trace_dumping_enabled_locked()) + return; + + trace_dump_struct_begin("pipe_draw_vertex_state_info"); + trace_dump_member(uint, &state, mode); + trace_dump_member(uint, &state, take_vertex_state_ownership); + trace_dump_struct_end(); +} + +void trace_dump_draw_start_count(const struct pipe_draw_start_count_bias *state) { if (!trace_dumping_enabled_locked()) return; - trace_dump_struct_begin("pipe_draw_start_count"); + trace_dump_struct_begin("pipe_draw_start_count_bias"); trace_dump_member(uint, state, start); trace_dump_member(uint, state, count); + trace_dump_member(int, state, index_bias); trace_dump_struct_end(); } diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.h b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.h index 1a969c750..f5633b3be 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.h +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_dump_state.h @@ -86,7 +86,9 @@ void trace_dump_shader_buffer(const struct pipe_shader_buffer *buffer); void trace_dump_draw_info(const struct pipe_draw_info *state); -void trace_dump_draw_start_count(const struct pipe_draw_start_count *state); +void trace_dump_draw_vertex_state_info(struct pipe_draw_vertex_state_info state); + +void trace_dump_draw_start_count(const struct pipe_draw_start_count_bias *state); void trace_dump_draw_indirect_info(const struct pipe_draw_indirect_info *state); @@ -99,4 +101,5 @@ void trace_dump_grid_info(const struct pipe_grid_info *state); void trace_dump_image_view(const struct pipe_image_view *view); +void trace_dump_memory_info(const struct pipe_memory_info *state); #endif /* TR_STATE_H */ diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.c b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.c index 95ce875bc..32f0bba7c 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.c +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.c @@ -27,6 +27,7 @@ #include "util/format/u_format.h" #include "util/u_memory.h" +#include "util/hash_table.h" #include "util/simple_list.h" #include "tr_dump.h" @@ -39,6 +40,7 @@ static bool trace = false; +static struct hash_table *trace_screens; static const char * trace_screen_get_name(struct pipe_screen *_screen) @@ -262,6 +264,104 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, return result; } +static void +trace_context_replace_buffer_storage(struct pipe_context *_pipe, + struct pipe_resource *dst, + struct pipe_resource *src, + unsigned num_rebinds, + uint32_t rebind_mask, + unsigned delete_buffer_id) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "replace_buffer_storage"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, dst); + trace_dump_arg(ptr, src); + trace_dump_arg(uint, num_rebinds); + trace_dump_arg(uint, rebind_mask); + trace_dump_arg(uint, delete_buffer_id); + trace_dump_call_end(); + + tr_ctx->replace_buffer_storage(pipe, dst, src, num_rebinds, rebind_mask, delete_buffer_id); +} + +static struct pipe_fence_handle * +trace_context_create_fence(struct pipe_context *_pipe, struct tc_unflushed_batch_token *token) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "create_fence"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, token); + + struct pipe_fence_handle *ret = tr_ctx->create_fence(pipe, token); + trace_dump_ret(ptr, ret); + trace_dump_call_end(); + + return ret; +} + +static bool +trace_context_is_resource_busy(struct pipe_screen *_screen, + struct pipe_resource *resource, + unsigned usage) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + bool result; + + trace_dump_call_begin("pipe_screen", "is_resource_busy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, resource); + trace_dump_arg(uint, usage); + + result = tr_scr->is_resource_busy(screen, resource, usage); + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + +struct pipe_context * +trace_context_create_threaded(struct pipe_screen *screen, struct pipe_context *pipe, + tc_replace_buffer_storage_func *replace_buffer, + struct threaded_context_options *options) +{ + if (!trace_screens) + return pipe; + + struct hash_entry *he = _mesa_hash_table_search(trace_screens, screen); + if (!he) + return pipe; + struct trace_screen *tr_scr = trace_screen(he->data); + + if (tr_scr->trace_tc) + return pipe; + + struct pipe_context *ctx = trace_context_create(tr_scr, pipe); + if (!ctx) + return pipe; + + struct trace_context *tr_ctx = trace_context(ctx); + tr_ctx->replace_buffer_storage = *replace_buffer; + tr_ctx->create_fence = options->create_fence; + tr_scr->is_resource_busy = options->is_resource_busy; + tr_ctx->threaded = true; + *replace_buffer = trace_context_replace_buffer_storage; + if (options->create_fence) + options->create_fence = trace_context_create_fence; + if (options->is_resource_busy) + options->is_resource_busy = trace_context_is_resource_busy; + return ctx; +} static struct pipe_context * trace_screen_context_create(struct pipe_screen *_screen, void *priv, @@ -271,19 +371,20 @@ trace_screen_context_create(struct pipe_screen *_screen, void *priv, struct pipe_screen *screen = tr_scr->screen; struct pipe_context *result; + result = screen->context_create(screen, priv, flags); + trace_dump_call_begin("pipe_screen", "context_create"); trace_dump_arg(ptr, screen); trace_dump_arg(ptr, priv); trace_dump_arg(uint, flags); - result = screen->context_create(screen, priv, flags); - trace_dump_ret(ptr, result); trace_dump_call_end(); - result = trace_context_create(tr_scr, result); + if (result && (tr_scr->trace_tc || result->draw_vbo != tc_draw_vbo)) + result = trace_context_create(tr_scr, result); return result; } @@ -299,7 +400,7 @@ trace_screen_flush_frontbuffer(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; - struct pipe_context *pipe = _pipe ? trace_context(_pipe)->pipe : NULL; + struct pipe_context *pipe = _pipe ? trace_get_possibly_threaded_context(_pipe) : NULL; trace_dump_call_begin("pipe_screen", "flush_frontbuffer"); @@ -311,9 +412,9 @@ trace_screen_flush_frontbuffer(struct pipe_screen *_screen, trace_dump_arg(ptr, context_private); */ - screen->flush_frontbuffer(screen, pipe, resource, level, layer, context_private, sub_box); - trace_dump_call_end(); + + screen->flush_frontbuffer(screen, pipe, resource, level, layer, context_private, sub_box); } @@ -430,7 +531,7 @@ trace_screen_free_memory(struct pipe_screen *_screen, trace_dump_call_end(); } -static void +static bool trace_screen_resource_bind_backing(struct pipe_screen *_screen, struct pipe_resource *resource, struct pipe_memory_allocation *pmem, @@ -438,6 +539,7 @@ trace_screen_resource_bind_backing(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; + bool result; trace_dump_call_begin("pipe_screen", "resource_bind_backing"); @@ -446,9 +548,13 @@ trace_screen_resource_bind_backing(struct pipe_screen *_screen, trace_dump_arg(ptr, pmem); trace_dump_arg(uint, offset); - screen->resource_bind_backing(screen, resource, pmem, offset); + result = screen->resource_bind_backing(screen, resource, pmem, offset); + + trace_dump_ret(bool, result); trace_dump_call_end(); + + return result; } static struct pipe_resource * @@ -504,6 +610,31 @@ trace_screen_resource_create(struct pipe_screen *_screen, } static struct pipe_resource * +trace_screen_resource_create_with_modifiers(struct pipe_screen *_screen, const struct pipe_resource *templat, + const uint64_t *modifiers, int modifiers_count) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct pipe_resource *result; + + trace_dump_call_begin("pipe_screen", "resource_create_with_modifiers"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(resource_template, templat); + trace_dump_arg_array(uint, modifiers, modifiers_count); + + result = screen->resource_create_with_modifiers(screen, templat, modifiers, modifiers_count); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + if (result) + result->screen = _screen; + return result; +} + +static struct pipe_resource * trace_screen_resource_from_handle(struct pipe_screen *_screen, const struct pipe_resource *templ, struct winsys_handle *handle, @@ -540,12 +671,12 @@ trace_screen_resource_get_handle(struct pipe_screen *_screen, unsigned usage) { struct trace_screen *tr_screen = trace_screen(_screen); - struct trace_context *tr_pipe = _pipe ? trace_context(_pipe) : NULL; + struct pipe_context *pipe = _pipe ? trace_get_possibly_threaded_context(_pipe) : NULL; struct pipe_screen *screen = tr_screen->screen; /* TODO trace call */ - return screen->resource_get_handle(screen, tr_pipe ? tr_pipe->pipe : NULL, + return screen->resource_get_handle(screen, pipe, resource, handle, usage); } @@ -561,12 +692,12 @@ trace_screen_resource_get_param(struct pipe_screen *_screen, uint64_t *value) { struct trace_screen *tr_screen = trace_screen(_screen); - struct trace_context *tr_pipe = _pipe ? trace_context(_pipe) : NULL; + struct pipe_context *pipe = _pipe ? trace_get_possibly_threaded_context(_pipe) : NULL; struct pipe_screen *screen = tr_screen->screen; /* TODO trace call */ - return screen->resource_get_param(screen, tr_pipe ? tr_pipe->pipe : NULL, + return screen->resource_get_param(screen, pipe, resource, plane, layer, level, param, handle_usage, value); } @@ -704,9 +835,12 @@ trace_screen_fence_finish(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; - struct pipe_context *ctx = _ctx ? trace_context(_ctx)->pipe : NULL; + struct pipe_context *ctx = _ctx ? trace_get_possibly_threaded_context(_ctx) : NULL; int result; + result = screen->fence_finish(screen, ctx, fence, timeout); + + trace_dump_call_begin("pipe_screen", "fence_finish"); trace_dump_arg(ptr, screen); @@ -714,8 +848,6 @@ trace_screen_fence_finish(struct pipe_screen *_screen, trace_dump_arg(ptr, fence); trace_dump_arg(uint, timeout); - result = screen->fence_finish(screen, ctx, fence, timeout); - trace_dump_ret(bool, result); trace_dump_call_end(); @@ -786,12 +918,12 @@ trace_screen_get_timestamp(struct pipe_screen *_screen) return result; } -static void -trace_screen_finalize_nir(struct pipe_screen *_screen, void *nir, bool optimize) +static char * +trace_screen_finalize_nir(struct pipe_screen *_screen, void *nir) { struct pipe_screen *screen = trace_screen(_screen)->screen; - screen->finalize_nir(screen, nir, optimize); + return screen->finalize_nir(screen, nir); } static void @@ -804,11 +936,154 @@ trace_screen_destroy(struct pipe_screen *_screen) trace_dump_arg(ptr, screen); trace_dump_call_end(); + if (trace_screens) { + struct hash_entry *he = _mesa_hash_table_search(trace_screens, screen); + if (he) { + _mesa_hash_table_remove(trace_screens, he); + if (!_mesa_hash_table_num_entries(trace_screens)) { + _mesa_hash_table_destroy(trace_screens, NULL); + trace_screens = NULL; + } + } + } + screen->destroy(screen); FREE(tr_scr); } +static void +trace_screen_query_memory_info(struct pipe_screen *_screen, struct pipe_memory_info *info) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "query_memory_info"); + + trace_dump_arg(ptr, screen); + + screen->query_memory_info(screen, info); + + trace_dump_ret(memory_info, info); + + trace_dump_call_end(); +} + +static void +trace_screen_query_dmabuf_modifiers(struct pipe_screen *_screen, enum pipe_format format, int max, uint64_t *modifiers, unsigned int *external_only, int *count) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "query_dmabuf_modifiers"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(format, format); + trace_dump_arg(int, max); + + screen->query_dmabuf_modifiers(screen, format, max, modifiers, external_only, count); + + if (max) + trace_dump_arg_array(uint, modifiers, *count); + else + trace_dump_arg_array(uint, modifiers, max); + trace_dump_arg_array(uint, external_only, max); + trace_dump_ret_begin(); + trace_dump_uint(*count); + trace_dump_ret_end(); + + trace_dump_call_end(); +} + +static bool +trace_screen_is_dmabuf_modifier_supported(struct pipe_screen *_screen, uint64_t modifier, enum pipe_format format, bool *external_only) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "is_dmabuf_modifier_supported"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(uint, modifier); + trace_dump_arg(format, format); + + bool ret = screen->is_dmabuf_modifier_supported(screen, modifier, format, external_only); + + trace_dump_arg_begin("external_only"); + trace_dump_bool(external_only ? *external_only : false); + trace_dump_arg_end(); + + trace_dump_ret(bool, ret); + + trace_dump_call_end(); + return ret; +} + +static unsigned int +trace_screen_get_dmabuf_modifier_planes(struct pipe_screen *_screen, uint64_t modifier, enum pipe_format format) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "get_dmabuf_modifier_planes"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(uint, modifier); + trace_dump_arg(format, format); + + unsigned ret = screen->get_dmabuf_modifier_planes(screen, modifier, format); + + trace_dump_ret(uint, ret); + + trace_dump_call_end(); + return ret; +} + +static struct pipe_vertex_state * +trace_screen_create_vertex_state(struct pipe_screen *_screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "create_vertex_state"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, buffer->buffer.resource); + trace_dump_arg(vertex_buffer, buffer); + trace_dump_arg_begin("elements"); + trace_dump_struct_array(vertex_element, elements, num_elements); + trace_dump_arg_end(); + trace_dump_arg(uint, num_elements); + trace_dump_arg(ptr, indexbuf); + trace_dump_arg(uint, full_velem_mask); + + struct pipe_vertex_state *vstate = + screen->create_vertex_state(screen, buffer, elements, num_elements, + indexbuf, full_velem_mask); + trace_dump_ret(ptr, vstate); + trace_dump_call_end(); + return vstate; +} + +static void trace_screen_vertex_state_destroy(struct pipe_screen *_screen, + struct pipe_vertex_state *state) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "vertex_state_destroy"); + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, state); + trace_dump_call_end(); + + screen->vertex_state_destroy(screen, state); +} + bool trace_enabled(void) { @@ -874,6 +1149,7 @@ trace_screen_create(struct pipe_screen *screen) assert(screen->context_create); tr_scr->base.context_create = trace_screen_context_create; tr_scr->base.resource_create = trace_screen_resource_create; + SCR_INIT(resource_create_with_modifiers); tr_scr->base.resource_create_unbacked = trace_screen_resource_create_unbacked; tr_scr->base.resource_bind_backing = trace_screen_resource_bind_backing; tr_scr->base.resource_from_handle = trace_screen_resource_from_handle; @@ -881,6 +1157,10 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.free_memory = trace_screen_free_memory; tr_scr->base.map_memory = trace_screen_map_memory; tr_scr->base.unmap_memory = trace_screen_unmap_memory; + SCR_INIT(query_memory_info); + SCR_INIT(query_dmabuf_modifiers); + SCR_INIT(is_dmabuf_modifier_supported); + SCR_INIT(get_dmabuf_modifier_planes); SCR_INIT(check_resource_capability); tr_scr->base.resource_get_handle = trace_screen_resource_get_handle; SCR_INIT(resource_get_param); @@ -898,12 +1178,21 @@ trace_screen_create(struct pipe_screen *screen) SCR_INIT(get_driver_uuid); SCR_INIT(get_device_uuid); SCR_INIT(finalize_nir); + SCR_INIT(create_vertex_state); + SCR_INIT(vertex_state_destroy); + tr_scr->base.transfer_helper = screen->transfer_helper; tr_scr->screen = screen; trace_dump_ret(ptr, screen); trace_dump_call_end(); + if (!trace_screens) + trace_screens = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + _mesa_hash_table_insert(trace_screens, screen, tr_scr); + + tr_scr->trace_tc = debug_get_bool_option("GALLIUM_TRACE_TC", false); + return &tr_scr->base; error2: diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.h b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.h index 65ea4fb6a..cce41d3fc 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.h +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_screen.h @@ -31,7 +31,7 @@ #include "pipe/p_screen.h" #include "os/os_thread.h" - +#include "util/u_threaded_context.h" #ifdef __cplusplus extern "C" { @@ -51,6 +51,8 @@ struct trace_screen struct pipe_screen base; struct pipe_screen *screen; + tc_is_resource_busy is_resource_busy; + bool trace_tc; }; diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.c b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.c index d644e1b08..d9fba6d46 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.c +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.c @@ -70,7 +70,6 @@ error: void trace_surf_destroy(struct trace_surface *tr_surf) { - trace_context_check(tr_surf->base.context); pipe_resource_reference(&tr_surf->base.texture, NULL); pipe_surface_reference(&tr_surf->surface, NULL); FREE(tr_surf); @@ -91,18 +90,21 @@ trace_transfer_create(struct trace_context *tr_ctx, if (!tr_trans) goto error; - memcpy(&tr_trans->base, transfer, sizeof(struct pipe_transfer)); + memcpy(&tr_trans->base, transfer, tr_ctx->threaded ? sizeof(struct threaded_transfer) : sizeof(struct pipe_transfer)); - tr_trans->base.resource = NULL; + tr_trans->base.b.resource = NULL; tr_trans->transfer = transfer; - pipe_resource_reference(&tr_trans->base.resource, res); - assert(tr_trans->base.resource == res); + pipe_resource_reference(&tr_trans->base.b.resource, res); + assert(tr_trans->base.b.resource == res); - return &tr_trans->base; + return &tr_trans->base.b; error: - tr_ctx->pipe->transfer_unmap(tr_ctx->pipe, transfer); + if (res->target == PIPE_BUFFER) + tr_ctx->pipe->buffer_unmap(tr_ctx->pipe, transfer); + else + tr_ctx->pipe->texture_unmap(tr_ctx->pipe, transfer); return NULL; } @@ -111,7 +113,7 @@ void trace_transfer_destroy(struct trace_context *tr_context, struct trace_transfer *tr_trans) { - pipe_resource_reference(&tr_trans->base.resource, NULL); + pipe_resource_reference(&tr_trans->base.b.resource, NULL); FREE(tr_trans); } diff --git a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.h b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.h index e5dfc53fd..b9caf968d 100644 --- a/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.h +++ b/lib/mesa/src/gallium/auxiliary/driver_trace/tr_texture.h @@ -33,6 +33,7 @@ #include "pipe/p_state.h" #include "tr_screen.h" +#include "util/u_threaded_context.h" struct trace_context; @@ -56,6 +57,7 @@ struct trace_surface struct trace_sampler_view { struct pipe_sampler_view base; + unsigned refcount; struct pipe_sampler_view *sampler_view; }; @@ -63,7 +65,7 @@ struct trace_sampler_view struct trace_transfer { - struct pipe_transfer base; + struct threaded_transfer base; struct pipe_transfer *transfer; struct pipe_context *pipe; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_const.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_const.c index 18ece7324..4f4bddf44 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -42,7 +42,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_init.h" - +#include "lp_bld_limits.h" unsigned lp_mantissa(struct lp_type type) @@ -256,7 +256,7 @@ lp_build_one(struct gallivm_state *gallivm, struct lp_type type) elem_type = lp_build_elem_type(gallivm, type); - if(type.floating && type.width == 16) + if(!lp_has_fp16() && type.floating && type.width == 16) elems[0] = LLVMConstInt(elem_type, _mesa_float_to_half(1.0f), 0); else if(type.floating) elems[0] = LLVMConstReal(elem_type, 1.0); @@ -303,7 +303,7 @@ lp_build_const_elem(struct gallivm_state *gallivm, LLVMTypeRef elem_type = lp_build_elem_type(gallivm, type); LLVMValueRef elem; - if(type.floating && type.width == 16) { + if (!lp_has_fp16() && type.floating && type.width == 16) { elem = LLVMConstInt(elem_type, _mesa_float_to_half((float)val), 0); } else if(type.floating) { elem = LLVMConstReal(elem_type, val); diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.c index 28f722e93..d3d5e6dc9 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.c @@ -176,9 +176,8 @@ void lp_build_coro_declare_malloc_hooks(struct gallivm_state *gallivm) LLVMValueRef lp_build_coro_begin_alloc_mem(struct gallivm_state *gallivm, LLVMValueRef coro_id) { - LLVMValueRef do_alloc = lp_build_coro_alloc(gallivm, coro_id); LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); - LLVMValueRef alloc_mem_store = lp_build_alloca(gallivm, mem_ptr_type, "coro mem"); + LLVMValueRef do_alloc = lp_build_coro_alloc(gallivm, coro_id); struct lp_build_if_state if_state_coro; lp_build_if(&if_state_coro, gallivm, do_alloc); LLVMValueRef coro_size = lp_build_coro_size(gallivm); @@ -186,14 +185,40 @@ LLVMValueRef lp_build_coro_begin_alloc_mem(struct gallivm_state *gallivm, LLVMVa assert(gallivm->coro_malloc_hook); alloc_mem = LLVMBuildCall(gallivm->builder, gallivm->coro_malloc_hook, &coro_size, 1, ""); - - LLVMBuildStore(gallivm->builder, alloc_mem, alloc_mem_store); lp_build_endif(&if_state_coro); - alloc_mem = LLVMBuildLoad(gallivm->builder, alloc_mem_store, ""); - LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloc_mem); + + LLVMValueRef phi = LLVMBuildPhi(gallivm->builder, mem_ptr_type, ""); + LLVMValueRef zero_bool = LLVMConstNull(mem_ptr_type); + LLVMAddIncoming(phi, &alloc_mem, &if_state_coro.true_block, 1); + LLVMAddIncoming(phi, &zero_bool, &if_state_coro.entry_block, 1); + + LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, phi); return coro_hdl; } +LLVMValueRef lp_build_coro_alloc_mem_array(struct gallivm_state *gallivm, + LLVMValueRef coro_hdl_ptr, LLVMValueRef coro_idx, + LLVMValueRef coro_num_hdls) +{ + LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + LLVMValueRef alloced_ptr = LLVMBuildLoad(gallivm->builder, coro_hdl_ptr, ""); + + LLVMValueRef not_alloced = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, alloced_ptr, LLVMConstNull(mem_ptr_type), ""); + LLVMValueRef coro_size = lp_build_coro_size(gallivm); + + struct lp_build_if_state if_state_coro; + lp_build_if(&if_state_coro, gallivm, not_alloced); + + LLVMValueRef alloc_mem; + LLVMValueRef alloc_size = LLVMBuildMul(gallivm->builder, coro_num_hdls, coro_size, ""); + assert(gallivm->coro_malloc_hook); + alloc_mem = LLVMBuildCall(gallivm->builder, gallivm->coro_malloc_hook, &alloc_size, 1, ""); + LLVMBuildStore(gallivm->builder, alloc_mem, coro_hdl_ptr); + lp_build_endif(&if_state_coro); + + return LLVMBuildMul(gallivm->builder, coro_size, coro_idx, ""); +} + void lp_build_coro_free_mem(struct gallivm_state *gallivm, LLVMValueRef coro_id, LLVMValueRef coro_hdl) { LLVMValueRef alloc_mem = lp_build_coro_free(gallivm, coro_id, coro_hdl); diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.h index 2ffc130c9..1853217ed 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_coro.h @@ -55,6 +55,10 @@ LLVMValueRef lp_build_coro_suspend(struct gallivm_state *gallivm, bool last); LLVMValueRef lp_build_coro_alloc(struct gallivm_state *gallivm, LLVMValueRef id); LLVMValueRef lp_build_coro_begin_alloc_mem(struct gallivm_state *gallivm, LLVMValueRef coro_id); + +LLVMValueRef lp_build_coro_alloc_mem_array(struct gallivm_state *gallivm, + LLVMValueRef coro_hdl_ptr, LLVMValueRef coro_idx, + LLVMValueRef coro_num_hdls); void lp_build_coro_free_mem(struct gallivm_state *gallivm, LLVMValueRef coro_id, LLVMValueRef coro_hdl); struct lp_build_coro_suspend_info { diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 1ea133264..a5dd7b80d 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -43,8 +43,8 @@ #define GALLIVM_DEBUG_GC (1 << 4) #define GALLIVM_DEBUG_DUMP_BC (1 << 5) -#define GALLIVM_PERF_NO_BRILINEAR (1 << 0) -#define GALLIVM_PERF_NO_RHO_APPROX (1 << 1) +#define GALLIVM_PERF_BRILINEAR (1 << 0) +#define GALLIVM_PERF_RHO_APPROX (1 << 1) #define GALLIVM_PERF_NO_QUAD_LOD (1 << 2) #define GALLIVM_PERF_NO_OPT (1 << 3) #define GALLIVM_PERF_NO_AOS_SAMPLING (1 << 4) diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c index e17c7881e..497d403fa 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c @@ -2365,6 +2365,9 @@ lp_build_gather_rgtc(struct gallivm_state *gallivm, lp_build_const_int32(gallivm, 2), ""); *green_hi = LLVMBuildExtractElement(builder, elem, lp_build_const_int32(gallivm, 3), ""); + } else { + *green_lo = NULL; + *green_hi = NULL; } } else { LLVMValueRef tmp[4]; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 38afac47d..1ce4be0ec 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -27,14 +27,17 @@ #include "lp_bld_arit.h" #include "lp_bld_bitarit.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_gather.h" #include "lp_bld_logic.h" #include "lp_bld_quad.h" #include "lp_bld_flow.h" +#include "lp_bld_intr.h" #include "lp_bld_struct.h" #include "lp_bld_debug.h" #include "lp_bld_printf.h" #include "nir_deref.h" +#include "nir_search_helpers.h" static void visit_cf_list(struct lp_build_nir_context *bld_base, struct exec_list *list); @@ -47,7 +50,7 @@ static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRe case nir_type_float: switch (bit_size) { case 16: - return LLVMBuildBitCast(builder, val, LLVMVectorType(LLVMHalfTypeInContext(bld_base->base.gallivm->context), bld_base->base.type.length), ""); + return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, ""); case 32: return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, ""); case 64: @@ -222,6 +225,8 @@ static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base, LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero); if (src_bit_size == 64) result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + if (src_bit_size == 16) + result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); return result; } @@ -240,6 +245,8 @@ static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base, result = lp_build_cmp(flt_bld, compare, src[0], src[1]); if (src_bit_size == 64) result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + else if (src_bit_size == 16) + result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); return result; } @@ -306,6 +313,9 @@ static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base, ""); result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, ""); switch (bitsize) { + case 16: + result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, ""); + break; case 32: break; case 64: @@ -447,6 +457,43 @@ merge_16bit(struct lp_build_nir_context *bld_base, return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); } +static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm, + struct lp_build_context *int_bld, + struct lp_build_context *mask_bld, + int src_bit_size, + LLVMValueRef src, LLVMValueRef divisor) +{ + LLVMBuilderRef builder = gallivm->builder; + /* However for signed divides SIGFPE can occur if the numerator is INT_MIN + and divisor is -1. */ + /* set mask if numerator == INT_MIN */ + long long min_val; + switch (src_bit_size) { + case 8: + min_val = INT8_MIN; + break; + case 16: + min_val = INT16_MIN; + break; + default: + case 32: + min_val = INT_MIN; + break; + case 64: + min_val = INT64_MIN; + break; + } + LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src, + lp_build_const_int_vec(gallivm, int_bld->type, min_val)); + /* set another mask if divisor is - 1 */ + LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor, + lp_build_const_int_vec(gallivm, int_bld->type, -1)); + div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, ""); + + divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor); + return divisor; +} + static LLVMValueRef do_int_divide(struct lp_build_nir_context *bld_base, bool is_unsigned, unsigned src_bit_size, @@ -456,16 +503,16 @@ do_int_divide(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size); struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size); + + /* avoid divide by 0. Converted divisor from 0 to -1 */ LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2, mask_bld->zero); + LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, ""); if (!is_unsigned) { - /* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */ - div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), ""); + divisor = get_signed_divisor(gallivm, int_bld, mask_bld, + src_bit_size, src, divisor); } - LLVMValueRef divisor = LLVMBuildOr(builder, - div_mask, - src2, ""); LLVMValueRef result = lp_build_div(int_bld, src, divisor); if (!is_unsigned) { @@ -485,11 +532,16 @@ do_int_mod(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size); - LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2, - int_bld->zero); + struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size); + LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2, + mask_bld->zero); LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, ""); + if (!is_unsigned) { + divisor = get_signed_divisor(gallivm, int_bld, mask_bld, + src_bit_size, src, divisor); + } LLVMValueRef result = lp_build_mod(int_bld, src, divisor); return LLVMBuildOr(builder, div_mask, result, ""); } @@ -502,7 +554,7 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = gallivm->builder; LLVMValueRef result, cond, cond2, temp; - result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), ""); + result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, ""); result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, ""); temp = lp_build_abs(get_flt_bld(bld_base, 32), result); @@ -516,13 +568,18 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base, } static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, - nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) + const nir_alu_instr *instr, + unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], + LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef result; - enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED; - switch (op) { + + switch (instr->op) { + case nir_op_b2f16: + result = emit_b2f(bld_base, src[0], 16); + break; case nir_op_b2f32: result = emit_b2f(bld_base, src[0], 32); break; @@ -546,6 +603,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, break; case nir_op_bit_count: result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); + if (src_bit_size[0] < 32) + result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, ""); + else if (src_bit_size[0] > 32) + result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); break; case nir_op_bitfield_select: result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2]))); @@ -561,7 +622,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, src[0] = LLVMBuildFPTrunc(builder, src[0], bld_base->base.vec_type, ""); result = LLVMBuildFPTrunc(builder, src[0], - LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), ""); + bld_base->half_bld.vec_type, ""); break; case nir_op_f2f32: if (src_bit_size[0] < 32) @@ -624,17 +685,17 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fcos: - result = lp_build_cos(&bld_base->base, src[0]); + result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fddx: case nir_op_fddx_coarse: case nir_op_fddx_fine: - result = lp_build_ddx(&bld_base->base, src[0]); + result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fddy: case nir_op_fddy_coarse: case nir_op_fddy_fine: - result = lp_build_ddy(&bld_base->base, src[0]); + result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fdiv: result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]), @@ -644,7 +705,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src); break; case nir_op_fexp2: - result = lp_build_exp2(&bld_base->base, src[0]); + result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_ffloor: result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]); @@ -670,16 +731,45 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, ""); break; } + case nir_op_fisfinite32: + unreachable("Should have been lowered in nir_opt_algebraic_late."); case nir_op_flog2: - result = lp_build_log2_safe(&bld_base->base, src[0]); + result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_flt: case nir_op_flt32: result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src); break; - case nir_op_fmin: - result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan); + case nir_op_fmax: + case nir_op_fmin: { + enum gallivm_nan_behavior minmax_nan; + int first = 0; + + /* If one of the sources is known to be a number (i.e., not NaN), then + * better code can be generated by passing that information along. + */ + if (is_a_number(bld_base->range_ht, instr, 1, + 0 /* unused num_components */, + NULL /* unused swizzle */)) { + minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN; + } else if (is_a_number(bld_base->range_ht, instr, 0, + 0 /* unused num_components */, + NULL /* unused swizzle */)) { + first = 1; + minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN; + } else { + minmax_nan = GALLIVM_NAN_RETURN_OTHER; + } + + if (instr->op == nir_op_fmin) { + result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), + src[first], src[1 - first], minmax_nan); + } else { + result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), + src[first], src[1 - first], minmax_nan); + } break; + } case nir_op_fmod: { struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]); result = lp_build_div(flt_bld, src[0], src[1]); @@ -692,9 +782,6 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]); break; - case nir_op_fmax: - result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan); - break; case nir_op_fneu32: result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src); break; @@ -702,7 +789,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fpow: - result = lp_build_pow(&bld_base->base, src[0], src[1]); + result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]); break; case nir_op_fquantize2f16: result = do_quantize_to_f16(bld_base, src[0]); @@ -711,7 +798,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fround_even: - result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + if (src_bit_size[0] == 16) { + struct lp_build_context *bld = get_flt_bld(bld_base, 16); + char intrinsic[64]; + lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type); + result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]); + } else + result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_frsq: result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); @@ -723,7 +816,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fsin: - result = lp_build_sin(&bld_base->base, src[0]); + result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fsqrt: result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); @@ -734,6 +827,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, case nir_op_i2b32: result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]); break; + case nir_op_i2f16: + result = LLVMBuildSIToFP(builder, src[0], + bld_base->half_bld.vec_type, ""); + break; case nir_op_i2f32: result = lp_build_int_to_float(&bld_base->base, src[0]); break; @@ -874,6 +971,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, ""); break; } + case nir_op_u2f16: + result = LLVMBuildUIToFP(builder, src[0], + bld_base->half_bld.vec_type, ""); + break; case nir_op_u2f32: result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, ""); break; @@ -970,14 +1071,14 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr case nir_op_unpack_half_2x16: src_components = 1; break; - case nir_op_cube_face_coord: - case nir_op_cube_face_index: + case nir_op_cube_face_coord_amd: + case nir_op_cube_face_index_amd: src_components = 3; break; case nir_op_fsum2: case nir_op_fsum3: case nir_op_fsum4: - src_components = nir_src_num_components(instr->src[0].src); + src_components = nir_op_infos[instr->op].input_sizes[0]; break; default: src_components = num_components; @@ -994,7 +1095,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]); } } else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) { - for (unsigned c = 0; c < nir_src_num_components(instr->src[0].src); c++) { + for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) { LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder, src[0], c, ""); temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]); @@ -1012,7 +1113,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr src_chan[i] = src[i]; src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]); } - result[c] = do_alu_action(bld_base, instr->op, src_bit_size, src_chan); + result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan); result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest)); } } @@ -1026,6 +1127,7 @@ static void visit_load_const(struct lp_build_nir_context *bld_base, struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size); for (unsigned i = 0; i < instr->def.num_components; i++) result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64); + memset(&result[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components); assign_ssa_dest(bld_base, &instr->def, result); } @@ -1240,7 +1342,7 @@ static void visit_load_ssbo(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { - LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); LLVMValueRef offset = get_src(bld_base, instr->src[1]); bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), idx, offset, result); @@ -1250,7 +1352,7 @@ static void visit_store_ssbo(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr) { LLVMValueRef val = get_src(bld_base, instr->src[0]); - LLVMValueRef idx = get_src(bld_base, instr->src[1]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32); LLVMValueRef offset = get_src(bld_base, instr->src[2]); int writemask = instr->const_index[0]; int nc = nir_src_num_components(instr->src[0]); @@ -1262,7 +1364,7 @@ static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { - LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); result[0] = bld_base->get_ssbo_size(bld_base, idx); } @@ -1270,7 +1372,7 @@ static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { - LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); LLVMValueRef offset = get_src(bld_base, instr->src[1]); LLVMValueRef val = get_src(bld_base, instr->src[2]); LLVMValueRef val2 = NULL; @@ -1662,13 +1764,14 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_load_base_instance: case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_first_vertex: - case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_workgroup_id: case nir_intrinsic_load_local_invocation_id: - case nir_intrinsic_load_num_work_groups: + case nir_intrinsic_load_local_invocation_index: + case nir_intrinsic_load_num_workgroups: case nir_intrinsic_load_invocation_id: case nir_intrinsic_load_front_face: case nir_intrinsic_load_draw_id: - case nir_intrinsic_load_local_group_size: + case nir_intrinsic_load_workgroup_size: case nir_intrinsic_load_work_dim: case nir_intrinsic_load_tess_coord: case nir_intrinsic_load_tess_level_outer: @@ -1888,7 +1991,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst LLVMBuilderRef builder = gallivm->builder; LLVMValueRef coords[5]; LLVMValueRef offsets[3] = { NULL }; - LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL; + LLVMValueRef explicit_lod = NULL, ms_index = NULL; struct lp_sampler_params params; struct lp_derivatives derivs; unsigned sample_key = 0; @@ -1935,9 +2038,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst case nir_tex_src_sampler_deref: sampler_deref_instr = nir_src_as_deref(instr->src[i].src); break; - case nir_tex_src_projector: - projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32)); - break; case nir_tex_src_comparator: sample_key |= LP_SAMPLER_SHADOW; coords[4] = get_src(bld_base, instr->src[i].src); @@ -2038,13 +2138,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst coords[1] = coord_undef; } - if (projector) { - for (unsigned chan = 0; chan < instr->coord_components; ++chan) - coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector); - if (sample_key & LP_SAMPLER_SHADOW) - coords[4] = lp_build_mul(&bld_base->base, coords[4], projector); - } - uint32_t samp_base_index = 0, tex_base_index = 0; if (!sampler_deref_instr) { int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle); @@ -2081,8 +2174,38 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst params.texel = texel; params.lod = explicit_lod; params.ms_index = ms_index; + params.aniso_filter_table = bld_base->aniso_filter_table; bld_base->tex(bld_base, ¶ms); + + if (nir_dest_bit_size(instr->dest) != 32) { + assert(nir_dest_bit_size(instr->dest) == 16); + LLVMTypeRef vec_type = NULL; + bool is_float = false; + switch (nir_alu_type_get_base_type(instr->dest_type)) { + case nir_type_float: + is_float = true; + break; + case nir_type_int: + vec_type = bld_base->int16_bld.vec_type; + break; + case nir_type_uint: + vec_type = bld_base->uint16_bld.vec_type; + break; + default: + unreachable("unexpected alu type"); + } + for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) { + if (is_float) { + texel[i] = lp_build_float_to_half(gallivm, texel[i]); + } else { + texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, ""); + texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, ""); + } + } + } + assign_dest(bld_base, &instr->dest, texel); + } static void visit_ssa_undef(struct lp_build_nir_context *bld_base, @@ -2093,6 +2216,7 @@ static void visit_ssa_undef(struct lp_build_nir_context *bld_base, struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size); for (unsigned i = 0; i < num_components; i++) undef[i] = LLVMGetUndef(undef_bld->vec_type); + memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components); assign_ssa_dest(bld_base, &instr->def, undef); } @@ -2279,6 +2403,7 @@ bool lp_build_nir_llvm( _mesa_key_pointer_equal); bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + bld_base->range_ht = _mesa_pointer_hash_table_create(NULL); func = (struct nir_function *)exec_list_get_head(&nir->functions); @@ -2295,6 +2420,7 @@ bool lp_build_nir_llvm( free(bld_base->ssa_defs); ralloc_free(bld_base->vars); ralloc_free(bld_base->regs); + ralloc_free(bld_base->range_ht); return true; } @@ -2305,6 +2431,7 @@ void lp_build_opt_nir(struct nir_shader *nir) static const struct nir_lower_tex_options lower_tex_options = { .lower_tg4_offsets = true, + .lower_txp = ~0u, }; NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); NIR_PASS_V(nir, nir_lower_frexp); @@ -2313,21 +2440,36 @@ void lp_build_opt_nir(struct nir_shader *nir) NIR_PASS_V(nir, nir_lower_fp16_casts); do { progress = false; - NIR_PASS_V(nir, nir_opt_constant_folding); - NIR_PASS_V(nir, nir_opt_algebraic); - NIR_PASS_V(nir, nir_lower_pack); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_lower_pack); - nir_lower_tex_options options = { .lower_tex_without_implicit_lod = true }; + nir_lower_tex_options options = { 0, }; NIR_PASS_V(nir, nir_lower_tex, &options); const nir_lower_subgroups_options subgroups_options = { .subgroup_size = lp_native_vector_width / 32, .ballot_bit_size = 32, + .ballot_components = 1, .lower_to_scalar = true, .lower_subgroup_masks = true, }; NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options); } while (progress); - nir_lower_bool_to_int32(nir); + + do { + progress = false; + NIR_PASS(progress, nir, nir_opt_algebraic_late); + if (progress) { + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + } + } while (progress); + + if (nir_lower_bool_to_int32(nir)) { + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + } } diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.h index 1a92bbc03..874a5d55e 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -49,6 +49,7 @@ struct lp_build_nir_context struct lp_build_context int8_bld; struct lp_build_context uint16_bld; struct lp_build_context int16_bld; + struct lp_build_context half_bld; struct lp_build_context dbl_bld; struct lp_build_context uint64_bld; struct lp_build_context int64_bld; @@ -57,6 +58,11 @@ struct lp_build_nir_context struct hash_table *regs; struct hash_table *vars; + /** Value range analysis hash table used in code generation. */ + struct hash_table *range_ht; + + LLVMValueRef aniso_filter_table; + nir_shader *shader; void (*load_ubo)(struct lp_build_nir_context *bld_base, @@ -284,6 +290,8 @@ static inline struct lp_build_context *get_flt_bld(struct lp_build_nir_context * switch (op_bit_size) { case 64: return &bld_base->dbl_bld; + case 16: + return &bld_base->half_bld; default: case 32: return &bld_base->base; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 05e52083b..b771b7cc7 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -300,7 +300,8 @@ emit_mask_scatter(struct lp_build_nir_soa_context *bld, if (scalar_pred) { LLVMValueRef real_val, dst_val; dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); - real_val = lp_build_select(&bld->uint_elem_bld, scalar_pred, val, dst_val); + scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), ""); + real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, ""); LLVMBuildStore(builder, real_val, scalar_ptr); } else { @@ -472,7 +473,7 @@ static void emit_load_var(struct lp_build_nir_context *bld_base, break; case nir_var_shader_out: if (bld->fs_iface && bld->fs_iface->fb_fetch) { - bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result); + bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result); return; } for (unsigned i = 0; i < num_components; i++) { @@ -1038,7 +1039,6 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; - LLVMValueRef ssbo_ptr = NULL; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMValueRef ssbo_limit = NULL; struct lp_build_context *load_bld; @@ -1046,51 +1046,61 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, load_bld = get_int_bld(bld_base, true, bit_size); + offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), ""); + + /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */ + + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]; + for (unsigned c = 0; c < nc; c++) + result[c] = lp_build_alloca(gallivm, load_bld->vec_type, ""); + + LLVMValueRef exec_mask = mask_vec(bld_base); + LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); + + struct lp_build_if_state exec_ifthen; + lp_build_if(&exec_ifthen, gallivm, loop_cond); + + LLVMValueRef mem_ptr; + if (index) { - LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, ""); + LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx); + LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx); ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), ""); - ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); - - ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + mem_ptr = ssbo_ptr; } else - ssbo_ptr = bld->shared_ptr; + mem_ptr = bld->shared_ptr; - offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), ""); for (unsigned c = 0; c < nc; c++) { - LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); - LLVMValueRef exec_mask = mask_vec(bld_base); - + LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), ""); + LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); if (ssbo_limit) { - LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); - exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit); + do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, ""); } - LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, ""); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); - struct lp_build_if_state ifthen; - LLVMValueRef cond, temp_res; + LLVMValueRef fetch_cond, temp_res; - loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, - loop_state.counter, ""); - - cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), ""); - lp_build_if(&ifthen, gallivm, cond); + lp_build_if(&ifthen, gallivm, fetch_cond); LLVMValueRef scalar; if (bit_size != 32) { - LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(load_bld->elem_type, 0), ""); - scalar = lp_build_pointer_get(builder, ssbo_ptr2, loop_index); + LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(load_bld->elem_type, 0), ""); + scalar = lp_build_pointer_get(builder, mem_ptr2, loop_index); } else - scalar = lp_build_pointer_get(builder, ssbo_ptr, loop_index); + scalar = lp_build_pointer_get(builder, mem_ptr, loop_index); - temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildLoad(builder, result[c], ""); temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, result); + LLVMBuildStore(builder, temp_res, result[c]); lp_build_else(&ifthen); - temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildLoad(builder, result[c], ""); LLVMValueRef zero; if (bit_size == 64) zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0); @@ -1101,12 +1111,16 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, else zero = lp_build_const_int32(gallivm, 0); temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, result); + LLVMBuildStore(builder, temp_res, result[c]); lp_build_endif(&ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); - outval[c] = LLVMBuildLoad(gallivm->builder, result, ""); } + + lp_build_endif(&exec_ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + for (unsigned c = 0; c < nc; c++) + outval[c] = LLVMBuildLoad(gallivm->builder, result[c], ""); + } static void emit_store_mem(struct lp_build_nir_context *bld_base, @@ -1120,56 +1134,66 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; - LLVMValueRef ssbo_ptr; + LLVMValueRef mem_ptr; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMValueRef ssbo_limit = NULL; struct lp_build_context *store_bld; uint32_t shift_val = bit_size_to_shift_size(bit_size); store_bld = get_int_bld(bld_base, true, bit_size); + offset = lp_build_shr_imm(uint_bld, offset, shift_val); + + LLVMValueRef exec_mask = mask_vec(bld_base); + LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); + + struct lp_build_if_state exec_ifthen; + lp_build_if(&exec_ifthen, gallivm, loop_cond); + if (index) { - LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, ""); + LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx); + LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx); ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), ""); - ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); - ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + mem_ptr = ssbo_ptr; } else - ssbo_ptr = bld->shared_ptr; + mem_ptr = bld->shared_ptr; - offset = lp_build_shr_imm(uint_bld, offset, shift_val); for (unsigned c = 0; c < nc; c++) { if (!(writemask & (1u << c))) continue; - LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); + LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), ""); LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); + LLVMValueRef do_store = lp_build_const_int32(gallivm, -1); - LLVMValueRef exec_mask = mask_vec(bld_base); if (ssbo_limit) { - LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); - exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit); + do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, ""); } - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, loop_state.counter, ""); value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, ""); struct lp_build_if_state ifthen; - LLVMValueRef cond; + LLVMValueRef store_cond; - loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, - loop_state.counter, ""); - cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - lp_build_if(&ifthen, gallivm, cond); + store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), ""); + lp_build_if(&ifthen, gallivm, store_cond); if (bit_size != 32) { - LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(store_bld->elem_type, 0), ""); - lp_build_pointer_set(builder, ssbo_ptr2, loop_index, value_ptr); + LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(store_bld->elem_type, 0), ""); + lp_build_pointer_set(builder, mem_ptr2, loop_index, value_ptr); } else - lp_build_pointer_set(builder, ssbo_ptr, loop_index, value_ptr); + lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr); lp_build_endif(&ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); } + + lp_build_endif(&exec_ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + } static void emit_atomic_mem(struct lp_build_nir_context *bld_base, @@ -1182,52 +1206,58 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; - LLVMValueRef ssbo_ptr; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMValueRef ssbo_limit = NULL; uint32_t shift_val = bit_size_to_shift_size(bit_size); struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size); - if (index) { - LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); - ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, 2), ""); - ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); - ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); - } else - ssbo_ptr = bld->shared_ptr; offset = lp_build_shr_imm(uint_bld, offset, shift_val); LLVMValueRef atom_res = lp_build_alloca(gallivm, atomic_bld->vec_type, ""); LLVMValueRef exec_mask = mask_vec(bld_base); - if (ssbo_limit) { - LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, offset, ssbo_limit); - exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); - } - + LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); struct lp_build_loop_state loop_state; lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); + + struct lp_build_if_state exec_ifthen; + lp_build_if(&exec_ifthen, gallivm, loop_cond); + + LLVMValueRef mem_ptr; + if (index) { + LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, ""); + LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx); + LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx); + ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), ""); + mem_ptr = ssbo_ptr; + } else + mem_ptr = bld->shared_ptr; + + LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); + if (ssbo_limit) { + LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit); + do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, ""); + } LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, loop_state.counter, ""); value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, ""); - offset = LLVMBuildExtractElement(gallivm->builder, offset, - loop_state.counter, ""); - LLVMValueRef scalar_ptr; if (bit_size != 32) { - LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(atomic_bld->elem_type, 0), ""); - scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr2, &offset, 1, ""); + LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(atomic_bld->elem_type, 0), ""); + scalar_ptr = LLVMBuildGEP(builder, mem_ptr2, &loop_offset, 1, ""); } else - scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr, &offset, 1, ""); + scalar_ptr = LLVMBuildGEP(builder, mem_ptr, &loop_offset, 1, ""); struct lp_build_if_state ifthen; - LLVMValueRef cond, temp_res; + LLVMValueRef inner_cond, temp_res; LLVMValueRef scalar; - cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - lp_build_if(&ifthen, gallivm, cond); + + inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), ""); + lp_build_if(&ifthen, gallivm, inner_cond); if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) { LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, @@ -1297,6 +1327,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, LLVMBuildStore(builder, temp_res, atom_res); lp_build_endif(&ifthen); + lp_build_endif(&exec_ifthen); lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), NULL, LLVMIntUGE); *result = LLVMBuildLoad(builder, atom_res, ""); @@ -1514,7 +1545,7 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, case nir_intrinsic_load_primitive_id: result[0] = bld->system_values.prim_id; break; - case nir_intrinsic_load_work_group_id: { + case nir_intrinsic_load_workgroup_id: { LLVMValueRef tmp[3]; for (unsigned i = 0; i < 3; i++) { tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), ""); @@ -1528,7 +1559,21 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, for (unsigned i = 0; i < 3; i++) result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, ""); break; - case nir_intrinsic_load_num_work_groups: { + case nir_intrinsic_load_local_invocation_index: { + LLVMValueRef tmp, tmp2; + tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), "")); + tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), "")); + tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2); + tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, "")); + + tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), "")); + tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, "")); + tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2); + tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, "")); + result[0] = tmp; + break; + } + case nir_intrinsic_load_num_workgroups: { LLVMValueRef tmp[3]; for (unsigned i = 0; i < 3; i++) { tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), ""); @@ -1552,7 +1597,7 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, break; default: break; - case nir_intrinsic_load_local_group_size: + case nir_intrinsic_load_workgroup_size: for (unsigned i = 0; i < 3; i++) result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), "")); break; @@ -1985,36 +2030,106 @@ static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src, switch (reduction_op) { case nir_op_fmin: { LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) : - lp_build_const_float(gallivm, INFINITY); + (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY)); store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, ""); break; } case nir_op_fmax: { LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) : - lp_build_const_float(gallivm, -INFINITY); + (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY)); store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, ""); break; } case nir_op_fmul: { LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) : - lp_build_const_float(gallivm, 1.0); + (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0)); store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, ""); break; } case nir_op_umin: - store_val = lp_build_const_int32(gallivm, UINT_MAX); + switch (bit_size) { + case 8: + store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0); + break; + case 16: + store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0); + break; + case 32: + default: + store_val = lp_build_const_int32(gallivm, UINT_MAX); + break; + case 64: + store_val = lp_build_const_int64(gallivm, UINT64_MAX); + break; + } break; case nir_op_imin: - store_val = lp_build_const_int32(gallivm, INT_MAX); + switch (bit_size) { + case 8: + store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0); + break; + case 16: + store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0); + break; + case 32: + default: + store_val = lp_build_const_int32(gallivm, INT_MAX); + break; + case 64: + store_val = lp_build_const_int64(gallivm, INT64_MAX); + break; + } break; case nir_op_imax: - store_val = lp_build_const_int32(gallivm, INT_MIN); + switch (bit_size) { + case 8: + store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0); + break; + case 16: + store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0); + break; + case 32: + default: + store_val = lp_build_const_int32(gallivm, INT_MIN); + break; + case 64: + store_val = lp_build_const_int64(gallivm, INT64_MIN); + break; + } break; case nir_op_imul: - store_val = lp_build_const_int32(gallivm, 1); + switch (bit_size) { + case 8: + store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0); + break; + case 16: + store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0); + break; + case 32: + default: + store_val = lp_build_const_int32(gallivm, 1); + break; + case 64: + store_val = lp_build_const_int64(gallivm, 1); + break; + } break; case nir_op_iand: - store_val = lp_build_const_int32(gallivm, 0xffffffff); + switch (bit_size) { + case 8: + store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0); + break; + case 16: + store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0); + break; + case 32: + default: + store_val = lp_build_const_int32(gallivm, 0xffffffff); + break; + case 64: + store_val = lp_build_const_int64(gallivm, 0xffffffffffffffffLL); + break; + } break; default: break; @@ -2105,28 +2220,27 @@ static void emit_read_invocation(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = gallivm->builder; LLVMValueRef idx; struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size); - if (invoc) { - idx = invoc; - idx = LLVMBuildExtractElement(gallivm->builder, idx, lp_build_const_int32(gallivm, 0), ""); - } else { - /* have to find the first active invocation */ - LLVMValueRef exec_mask = mask_vec(bld_base); - struct lp_build_loop_state loop_state; - LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, ""); - LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, ""); - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length)); - LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, ""); - struct lp_build_if_state ifthen; + /* have to find the first active invocation */ + LLVMValueRef exec_mask = mask_vec(bld_base); + struct lp_build_loop_state loop_state; + LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, ""); + LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, ""); + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length)); - lp_build_if(&ifthen, gallivm, if_cond); - LLVMBuildStore(builder, loop_state.counter, res_store); - lp_build_endif(&ifthen); + LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, ""); + struct lp_build_if_state ifthen; - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1), - lp_build_const_int32(gallivm, -1), LLVMIntEQ); - idx = LLVMBuildLoad(builder, res_store, ""); - } + lp_build_if(&ifthen, gallivm, if_cond); + LLVMValueRef store_val = loop_state.counter; + if (invoc) + store_val = LLVMBuildExtractElement(gallivm->builder, invoc, loop_state.counter, ""); + LLVMBuildStore(builder, store_val, res_store); + lp_build_endif(&ifthen); + + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1), + lp_build_const_int32(gallivm, -1), LLVMIntEQ); + idx = LLVMBuildLoad(builder, res_store, ""); LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, idx, ""); @@ -2312,6 +2426,12 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); } { + struct lp_type half_type; + half_type = type; + half_type.width /= 2; + lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type); + } + { struct lp_type uint64_type; uint64_type = lp_uint_type(type); uint64_type.width *= 2; @@ -2399,6 +2519,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.context_ptr = params->context_ptr; bld.thread_data_ptr = params->thread_data_ptr; + bld.bld_base.aniso_filter_table = params->aniso_filter_table; bld.image = params->image; bld.shared_ptr = params->shared_ptr; bld.coro = params->coro; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h index c608e42c1..604b3b04a 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -113,6 +113,7 @@ struct lp_sampler_params const LLVMValueRef *offsets; LLVMValueRef ms_index; LLVMValueRef lod; + LLVMValueRef aniso_filter_table; const struct lp_derivatives *derivs; LLVMValueRef *texel; }; @@ -201,10 +202,7 @@ struct lp_static_sampler_state unsigned apply_min_lod:1; /**< min_lod > 0 ? */ unsigned apply_max_lod:1; /**< max_lod < last_level ? */ unsigned seamless_cube_map:1; - - /* Hacks */ - unsigned force_nearest_s:1; - unsigned force_nearest_t:1; + unsigned aniso:1; unsigned reduction_mode:2; }; @@ -330,6 +328,13 @@ struct lp_sampler_dynamic_state LLVMValueRef context_ptr, unsigned sampler_unit); + /** Obtain maximum anisotropy */ + LLVMValueRef + (*max_aniso)(const struct lp_sampler_dynamic_state *state, + struct gallivm_state *gallivm, + LLVMValueRef context_ptr, + unsigned sampler_unit); + /** * Obtain texture cache (returns ptr to lp_build_format_cache). * @@ -444,6 +449,8 @@ struct lp_build_sample_context LLVMValueRef border_color_clamped; LLVMValueRef context_ptr; + + LLVMValueRef aniso_filter_table; }; /* @@ -577,6 +584,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ unsigned mip_filter, + LLVMValueRef max_aniso, LLVMValueRef *out_lod, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart, @@ -790,6 +798,8 @@ lp_build_reduce_filter_3d(struct lp_build_context *bld, LLVMValueRef *v110, LLVMValueRef *v111, LLVMValueRef *out); + +const float *lp_build_sample_aniso_filter_table(void); #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 875271883..ea2ec780f 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -408,7 +408,9 @@ lp_build_swizzle_aos(struct lp_build_context *bld, switch (swizzles[i]) { default: assert(0); +#if defined(NDEBUG) || defined(DEBUG) FALLTHROUGH; +#endif case PIPE_SWIZZLE_X: case PIPE_SWIZZLE_Y: case PIPE_SWIZZLE_Z: diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_type.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_type.c index da139a838..a261ae981 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -31,7 +31,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_init.h" - +#include "lp_bld_limits.h" LLVMTypeRef lp_build_elem_type(struct gallivm_state *gallivm, struct lp_type type) @@ -39,7 +39,7 @@ lp_build_elem_type(struct gallivm_state *gallivm, struct lp_type type) if (type.floating) { switch(type.width) { case 16: - return LLVMIntTypeInContext(gallivm->context, 16); + return lp_has_fp16() ? LLVMHalfTypeInContext(gallivm->context) : LLVMInt16TypeInContext(gallivm->context); break; case 32: return LLVMFloatTypeInContext(gallivm->context); @@ -89,7 +89,7 @@ lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) if (type.floating) { switch(type.width) { case 16: - if(elem_kind != LLVMIntegerTypeKind) + if(elem_kind != (lp_has_fp16() ? LLVMHalfTypeKind : LLVMIntegerTypeKind)) return FALSE; break; case 32: @@ -259,6 +259,8 @@ lp_sizeof_llvm_type(LLVMTypeRef t) return 8 * sizeof(float); case LLVMDoubleTypeKind: return 8 * sizeof(double); + case LLVMHalfTypeKind: + return 8 * sizeof(uint16_t); case LLVMVectorTypeKind: { LLVMTypeRef elem = LLVMGetElementType(t); @@ -291,6 +293,8 @@ lp_typekind_name(LLVMTypeKind t) return "LLVMVoidTypeKind"; case LLVMFloatTypeKind: return "LLVMFloatTypeKind"; + case LLVMHalfTypeKind: + return "LLVMHalfTypeKind"; case LLVMDoubleTypeKind: return "LLVMDoubleTypeKind"; case LLVMX86_FP80TypeKind: diff --git a/lib/mesa/src/gallium/auxiliary/hud/font.c b/lib/mesa/src/gallium/auxiliary/hud/font.c index c7f8aef0d..a372410b1 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/font.c +++ b/lib/mesa/src/gallium/auxiliary/hud/font.c @@ -417,8 +417,8 @@ util_font_create_fixed_8x13(struct pipe_context *pipe, return FALSE; } - map = pipe_transfer_map(pipe, tex, 0, 0, PIPE_MAP_WRITE, 0, 0, - tex->width0, tex->height0, &transfer); + map = pipe_texture_map(pipe, tex, 0, 0, PIPE_MAP_WRITE, 0, 0, + tex->width0, tex->height0, &transfer); if (!map) { pipe_resource_reference(&tex, NULL); return FALSE; @@ -432,7 +432,7 @@ util_font_create_fixed_8x13(struct pipe_context *pipe, transfer->stride, i); } - pipe_transfer_unmap(pipe, transfer); + pipe_texture_unmap(pipe, transfer); pipe_resource_reference(&out_font->texture, NULL); out_font->texture = tex; diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices.h b/lib/mesa/src/gallium/auxiliary/indices/u_indices.h index f160fcbc6..3c57f9c5d 100644 --- a/lib/mesa/src/gallium/auxiliary/indices/u_indices.h +++ b/lib/mesa/src/gallium/auxiliary/indices/u_indices.h @@ -82,6 +82,18 @@ enum indices_mode { void u_index_init( void ); +/* returns the primitive type resulting from index translation */ +enum pipe_prim_type +u_index_prim_type_convert(unsigned hw_mask, enum pipe_prim_type prim, bool pv_matches); + +static inline unsigned +u_index_size_convert(unsigned index_size) +{ + return (index_size == 4) ? 4 : 2; +} + +unsigned +u_index_count_converted_indices(unsigned hw_mask, bool pv_matches, enum pipe_prim_type prim, unsigned nr); /** * For indexed drawing, this function determines what kind of primitive diff --git a/lib/mesa/src/gallium/auxiliary/meson.build b/lib/mesa/src/gallium/auxiliary/meson.build index a30e44186..7682357ed 100644 --- a/lib/mesa/src/gallium/auxiliary/meson.build +++ b/lib/mesa/src/gallium/auxiliary/meson.build @@ -60,7 +60,6 @@ files_libgallium = files( 'draw/draw_pt_decompose.h', 'draw/draw_pt_emit.c', 'draw/draw_pt_fetch.c', - 'draw/draw_pt_fetch_emit.c', 'draw/draw_pt_fetch_shade_emit.c', 'draw/draw_pt_fetch_shade_pipeline.c', 'draw/draw_pt.h', @@ -239,14 +238,10 @@ files_libgallium = files( 'util/u_cache.h', 'util/u_compute.c', 'util/u_compute.h', - 'util/u_debug_describe.c', - 'util/u_debug_describe.h', 'util/u_debug_flush.c', 'util/u_debug_flush.h', 'util/u_debug_image.c', 'util/u_debug_image.h', - 'util/u_debug_refcnt.c', - 'util/u_debug_refcnt.h', 'util/u_dirty_flags.h', 'util/u_dirty_surfaces.h', 'util/u_dl.c', @@ -255,19 +250,18 @@ files_libgallium = files( 'util/u_draw.h', 'util/u_draw_quad.c', 'util/u_draw_quad.h', + 'util/u_driconf.c', + 'util/u_driconf.h', 'util/u_dual_blend.h', 'util/u_dump_defines.c', 'util/u_dump.h', 'util/u_dump_state.c', - 'util/u_fifo.h', 'util/u_framebuffer.c', 'util/u_framebuffer.h', 'util/u_gen_mipmap.c', 'util/u_gen_mipmap.h', 'util/u_handle_table.c', 'util/u_handle_table.h', - 'util/u_hash_table.c', - 'util/u_hash_table.h', 'util/u_helpers.c', 'util/u_helpers.h', 'util/u_index_modify.c', @@ -314,9 +308,6 @@ files_libgallium = files( 'util/u_texture.h', 'util/u_tile.c', 'util/u_tile.h', - 'util/u_trace.c', - 'util/u_trace.h', - 'util/u_trace_priv.h', 'util/u_transfer.c', 'util/u_transfer.h', 'util/u_transfer_helper.c', @@ -324,10 +315,14 @@ files_libgallium = files( 'util/u_threaded_context.c', 'util/u_threaded_context.h', 'util/u_threaded_context_calls.h', + 'util/u_trace_gallium.c', + 'util/u_trace_gallium.h', 'util/u_upload_mgr.c', 'util/u_upload_mgr.h', 'util/u_vbuf.c', 'util/u_vbuf.h', + 'util/u_vertex_state_cache.c', + 'util/u_vertex_state_cache.h', 'util/u_video.h', 'util/u_viewport.h', 'nir/tgsi_to_nir.c', @@ -483,15 +478,13 @@ if with_dri2 and with_platform_x11 endif endif -u_trace_py = files('util/u_trace.py') - files_libgallium += custom_target( 'u_tracepoints.c', input: 'util/u_tracepoints.py', output: 'u_tracepoints.c', command: [ prog_python, '@INPUT@', - '-p', join_paths(meson.source_root(), 'src/gallium/auxiliary/util/'), + '-p', join_paths(meson.source_root(), 'src/util/perf/'), '-C', '@OUTPUT@', ], depend_files: u_trace_py, @@ -503,7 +496,7 @@ files_u_tracepoints = custom_target( output: 'u_tracepoints.h', command: [ prog_python, '@INPUT@', - '-p', join_paths(meson.source_root(), 'src/gallium/auxiliary/util/'), + '-p', join_paths(meson.source_root(), 'src/util/perf/'), '-H', '@OUTPUT@', ], depend_files: u_trace_py, diff --git a/lib/mesa/src/gallium/auxiliary/nir/nir_draw_helpers.c b/lib/mesa/src/gallium/auxiliary/nir/nir_draw_helpers.c index b5706631f..5122ce401 100644 --- a/lib/mesa/src/gallium/auxiliary/nir/nir_draw_helpers.c +++ b/lib/mesa/src/gallium/auxiliary/nir/nir_draw_helpers.c @@ -167,7 +167,7 @@ nir_lower_aaline_block(nir_block *block, nir_variable *var = nir_intrinsic_get_var(intrin, 0); if (var->data.mode != nir_var_shader_out) continue; - if (var->data.location != FRAG_RESULT_COLOR) + if (var->data.location < FRAG_RESULT_DATA0 && var->data.location != FRAG_RESULT_COLOR) continue; nir_ssa_def *out_input = intrin->src[1].ssa; @@ -262,7 +262,7 @@ nir_lower_aapoint_block(nir_block *block, nir_variable *var = nir_intrinsic_get_var(intrin, 0); if (var->data.mode != nir_var_shader_out) continue; - if (var->data.location != FRAG_RESULT_COLOR) + if (var->data.location < FRAG_RESULT_DATA0 && var->data.location != FRAG_RESULT_COLOR) continue; nir_ssa_def *out_input = intrin->src[1].ssa; diff --git a/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c b/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c index 3c73d342c..b016b07bf 100644 --- a/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_ureg.h" #include "util/debug.h" +#include "util/u_math.h" #include "util/u_memory.h" struct ntt_compile { @@ -53,7 +54,7 @@ struct ntt_compile { /* TGSI temps for our NIR SSA and register values. */ struct ureg_dst *reg_temp; - struct ureg_dst *ssa_temp; + struct ureg_src *ssa_temp; nir_instr_liveness *liveness; @@ -65,11 +66,34 @@ struct ntt_compile { struct ureg_src *input_index_map; uint64_t centroid_inputs; + uint32_t first_ubo; + struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; }; static void ntt_emit_cf_list(struct ntt_compile *c, struct exec_list *list); +/** + * Interprets a nir_load_const used as a NIR src as a uint. + * + * For non-native-integers drivers, nir_load_const_instrs used by an integer ALU + * instruction (or in a phi-web used by an integer ALU instruction) were + * converted to floats and the ALU instruction swapped to the float equivalent. + * However, this means that integer load_consts used by intrinsics (which don't + * normally get that conversion) may have been reformatted to be floats. Given + * that all of our intrinsic nir_src_as_uint() calls are expected to be small, + * we can just look and see if they look like floats and convert them back to + * ints. + */ +static uint32_t +ntt_src_as_uint(struct ntt_compile *c, nir_src src) +{ + uint32_t val = nir_src_as_uint(src); + if (!c->native_integers && val >= fui(1.0)) + val = (uint32_t)uif(val); + return val; +} + static unsigned ntt_64bit_write_mask(unsigned write_mask) { @@ -163,7 +187,7 @@ ntt_tgsi_var_usage_mask(const struct nir_variable *var) } static struct ureg_dst -ntt_store_output_decl(struct ntt_compile *c, nir_intrinsic_instr *instr, uint32_t *frac) +ntt_output_decl(struct ntt_compile *c, nir_intrinsic_instr *instr, uint32_t *frac) { nir_io_semantics semantics = nir_intrinsic_io_semantics(instr); int base = nir_intrinsic_base(instr); @@ -172,9 +196,6 @@ ntt_store_output_decl(struct ntt_compile *c, nir_intrinsic_instr *instr, uint32_ struct ureg_dst out; if (c->s->info.stage == MESA_SHADER_FRAGMENT) { - if (semantics.location == FRAG_RESULT_COLOR) - ureg_property(c->ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1); - unsigned semantic_name, semantic_index; tgsi_get_gl_frag_result_semantic(semantics.location, &semantic_name, &semantic_index); @@ -225,7 +246,11 @@ ntt_store_output_decl(struct ntt_compile *c, nir_intrinsic_instr *instr, uint32_ invariant); } - unsigned write_mask = nir_intrinsic_write_mask(instr); + unsigned write_mask; + if (nir_intrinsic_has_write_mask(instr)) + write_mask = nir_intrinsic_write_mask(instr); + else + write_mask = ((1 << instr->num_components) - 1) << *frac; if (is_64) { write_mask = ntt_64bit_write_mask(write_mask); @@ -274,8 +299,8 @@ ntt_try_store_in_tgsi_output(struct ntt_compile *c, struct ureg_dst *dst, } uint32_t frac; - *dst = ntt_store_output_decl(c, intr, &frac); - dst->Index += nir_src_as_uint(intr->src[1]); + *dst = ntt_output_decl(c, intr, &frac); + dst->Index += ntt_src_as_uint(c, intr->src[1]); return frac == 0; } @@ -338,15 +363,14 @@ ntt_setup_inputs(struct ntt_compile *c) uint32_t usage_mask = ntt_tgsi_var_usage_mask(var); - decl = ureg_DECL_fs_input_cyl_centroid_layout(c->ureg, - semantic_name, - semantic_index, - interpolation, - 0, - sample_loc, - var->data.driver_location, - usage_mask, - array_id, array_len); + decl = ureg_DECL_fs_input_centroid_layout(c->ureg, + semantic_name, + semantic_index, + interpolation, + sample_loc, + var->data.driver_location, + usage_mask, + array_id, array_len); if (semantic_name == TGSI_SEMANTIC_FACE) { struct ureg_dst temp = ureg_DECL_temporary(c->ureg); @@ -362,49 +386,173 @@ ntt_setup_inputs(struct ntt_compile *c) } } +static int +ntt_sort_by_location(const nir_variable *a, const nir_variable *b) +{ + return a->data.location - b->data.location; +} + +/** + * Workaround for virglrenderer requiring that TGSI FS output color variables + * are declared in order. Besides, it's a lot nicer to read the TGSI this way. + */ static void -ntt_setup_uniforms(struct ntt_compile *c) +ntt_setup_outputs(struct ntt_compile *c) { - struct pipe_screen *screen = c->screen; - bool packed = screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS); + if (c->s->info.stage != MESA_SHADER_FRAGMENT) + return; + nir_sort_variables_with_modes(c->s, ntt_sort_by_location, nir_var_shader_out); + + nir_foreach_shader_out_variable(var, c->s) { + if (var->data.location == FRAG_RESULT_COLOR) + ureg_property(c->ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1); + + unsigned semantic_name, semantic_index; + tgsi_get_gl_frag_result_semantic(var->data.location, + &semantic_name, &semantic_index); + + (void)ureg_DECL_output(c->ureg, semantic_name, semantic_index); + } +} + +static enum tgsi_texture_type +tgsi_texture_type_from_sampler_dim(enum glsl_sampler_dim dim, bool is_array, bool is_shadow) +{ + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + if (is_shadow) + return is_array ? TGSI_TEXTURE_SHADOW1D_ARRAY : TGSI_TEXTURE_SHADOW1D; + else + return is_array ? TGSI_TEXTURE_1D_ARRAY : TGSI_TEXTURE_1D; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_EXTERNAL: + if (is_shadow) + return is_array ? TGSI_TEXTURE_SHADOW2D_ARRAY : TGSI_TEXTURE_SHADOW2D; + else + return is_array ? TGSI_TEXTURE_2D_ARRAY : TGSI_TEXTURE_2D; + case GLSL_SAMPLER_DIM_3D: + return TGSI_TEXTURE_3D; + case GLSL_SAMPLER_DIM_CUBE: + if (is_shadow) + return is_array ? TGSI_TEXTURE_SHADOWCUBE_ARRAY : TGSI_TEXTURE_SHADOWCUBE; + else + return is_array ? TGSI_TEXTURE_CUBE_ARRAY : TGSI_TEXTURE_CUBE; + case GLSL_SAMPLER_DIM_RECT: + if (is_shadow) + return TGSI_TEXTURE_SHADOWRECT; + else + return TGSI_TEXTURE_RECT; + case GLSL_SAMPLER_DIM_MS: + return is_array ? TGSI_TEXTURE_2D_ARRAY_MSAA : TGSI_TEXTURE_2D_MSAA; + case GLSL_SAMPLER_DIM_BUF: + return TGSI_TEXTURE_BUFFER; + default: + unreachable("unknown sampler dim"); + } +} + +static enum tgsi_return_type +tgsi_return_type_from_base_type(enum glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_INT: + return TGSI_RETURN_TYPE_SINT; + case GLSL_TYPE_UINT: + return TGSI_RETURN_TYPE_UINT; + case GLSL_TYPE_FLOAT: + return TGSI_RETURN_TYPE_FLOAT; + default: + unreachable("unexpected texture type"); + } +} + +static void +ntt_setup_uniforms(struct ntt_compile *c) +{ nir_foreach_uniform_variable(var, c->s) { - if (glsl_type_is_image(var->type)) { - c->images[var->data.binding] = ureg_DECL_image(c->ureg, - var->data.binding, - TGSI_TEXTURE_2D, - var->data.image.format, - !var->data.read_only, - false); - } else { - unsigned size; - if (packed) { - size = DIV_ROUND_UP(glsl_count_dword_slots(var->type, - var->data.bindless), 4); - } else { - size = glsl_count_vec4_slots(var->type, false, var->data.bindless); - } + int image_count = glsl_type_get_image_count(var->type); - for (unsigned i = 0; i < size; i++) - ureg_DECL_constant(c->ureg, var->data.driver_location + i); + if (glsl_type_is_sampler(glsl_without_array(var->type))) { + /* Don't use this size for the check for samplers -- arrays of structs + * containing samplers should be ignored, and just the separate lowered + * sampler uniform decl used. + */ + int size = glsl_type_get_sampler_count(var->type); + + const struct glsl_type *stype = glsl_without_array(var->type); + enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(glsl_get_sampler_dim(stype), + glsl_sampler_type_is_array(stype), + glsl_sampler_type_is_shadow(stype)); + enum tgsi_return_type ret_type = tgsi_return_type_from_base_type(glsl_get_sampler_result_type(stype)); + for (int i = 0; i < size; i++) { + ureg_DECL_sampler_view(c->ureg, var->data.binding + i, + target, ret_type, ret_type, ret_type, ret_type); + ureg_DECL_sampler(c->ureg, var->data.binding + i); + } + } else if (image_count) { + const struct glsl_type *itype = glsl_without_array(var->type); + enum tgsi_texture_type tex_type = + tgsi_texture_type_from_sampler_dim(glsl_get_sampler_dim(itype), + glsl_sampler_type_is_array(itype), false); + + for (int i = 0; i < image_count; i++) { + c->images[var->data.binding] = ureg_DECL_image(c->ureg, + var->data.binding + i, + tex_type, + var->data.image.format, + !(var->data.access & ACCESS_NON_WRITEABLE), + false); + } + } else if (glsl_contains_atomic(var->type)) { + uint32_t offset = var->data.offset / 4; + uint32_t size = glsl_atomic_size(var->type) / 4; + ureg_DECL_hw_atomic(c->ureg, offset, offset + size - 1, var->data.binding, 0); } + + /* lower_uniforms_to_ubo lowered non-sampler uniforms to UBOs, so CB0 + * size declaration happens with other UBOs below. + */ } + c->first_ubo = ~0; + + unsigned ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS] = {0}; nir_foreach_variable_with_modes(var, c->s, nir_var_mem_ubo) { - ureg_DECL_constant2D(c->ureg, 0, 0, var->data.driver_location); + int ubo = var->data.driver_location; + if (ubo == -1) + continue; + + if (!(ubo == 0 && c->s->info.first_ubo_is_default_ubo)) + c->first_ubo = MIN2(c->first_ubo, ubo); + + unsigned size = glsl_get_explicit_size(var->interface_type, false); + + int array_size = 1; + if (glsl_type_is_interface(glsl_without_array(var->type))) + array_size = MAX2(1, glsl_array_size(var->type)); + for (int i = 0; i < array_size; i++) { + /* Even if multiple NIR variables are in the same uniform block, their + * explicit size is the size of the block. + */ + if (ubo_sizes[ubo + i]) + assert(ubo_sizes[ubo + i] == size); + + ubo_sizes[ubo + i] = size; + } + } + + for (int i = 0; i < ARRAY_SIZE(ubo_sizes); i++) { + if (ubo_sizes[i]) + ureg_DECL_constant2D(c->ureg, 0, DIV_ROUND_UP(ubo_sizes[i], 16) - 1, i); } - nir_foreach_variable_with_modes(var, c->s, nir_var_mem_ssbo) { + for (int i = 0; i < c->s->info.num_ssbos; i++) { /* XXX: nv50 uses the atomic flag to set caching for (lowered) atomic * counters */ bool atomic = false; - ureg_DECL_buffer(c->ureg, var->data.binding, atomic); - } - - for (int i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (BITSET_TEST(c->s->info.textures_used, i)) - ureg_DECL_sampler(c->ureg, i); + ureg_DECL_buffer(c->ureg, i, atomic); } } @@ -438,22 +586,32 @@ ntt_setup_registers(struct ntt_compile *c, struct exec_list *list) static struct ureg_src ntt_get_load_const_src(struct ntt_compile *c, nir_load_const_instr *instr) { - uint32_t values[4]; int num_components = instr->def.num_components; - if (instr->def.bit_size == 32) { + if (!c->native_integers) { + float values[4]; + assert(instr->def.bit_size == 32); for (int i = 0; i < num_components; i++) - values[i] = instr->value[i].u32; + values[i] = uif(instr->value[i].u32); + + return ureg_DECL_immediate(c->ureg, values, num_components); } else { - assert(num_components <= 2); - for (int i = 0; i < num_components; i++) { - values[i * 2 + 0] = instr->value[i].u64 & 0xffffffff; - values[i * 2 + 1] = instr->value[i].u64 >> 32; + uint32_t values[4]; + + if (instr->def.bit_size == 32) { + for (int i = 0; i < num_components; i++) + values[i] = instr->value[i].u32; + } else { + assert(num_components <= 2); + for (int i = 0; i < num_components; i++) { + values[i * 2 + 0] = instr->value[i].u64 & 0xffffffff; + values[i * 2 + 1] = instr->value[i].u64 >> 32; + } + num_components *= 2; } - num_components *= 2; - } - return ureg_DECL_immediate_uint(c->ureg, values, num_components); + return ureg_DECL_immediate_uint(c->ureg, values, num_components); + } } static struct ureg_src @@ -509,7 +667,7 @@ ntt_get_src(struct ntt_compile *c, nir_src src) if (src.ssa->parent_instr->type == nir_instr_type_load_const) return ntt_get_load_const_src(c, nir_instr_as_load_const(src.ssa->parent_instr)); - return ureg_src(c->ssa_temp[src.ssa->index]); + return c->ssa_temp[src.ssa->index]; } else { nir_register *reg = src.reg.reg; struct ureg_dst reg_temp = c->reg_temp[reg->index]; @@ -575,7 +733,7 @@ ntt_swizzle_for_write_mask(struct ureg_src src, uint32_t write_mask) (write_mask & TGSI_WRITEMASK_W) ? TGSI_SWIZZLE_W : first_chan); } -static struct ureg_dst * +static struct ureg_dst ntt_get_ssa_def_decl(struct ntt_compile *c, nir_ssa_def *ssa) { uint32_t writemask = BITSET_MASK(ssa->num_components); @@ -586,24 +744,24 @@ ntt_get_ssa_def_decl(struct ntt_compile *c, nir_ssa_def *ssa) if (!ntt_try_store_in_tgsi_output(c, &dst, &ssa->uses, &ssa->if_uses)) dst = ureg_DECL_temporary(c->ureg); - c->ssa_temp[ssa->index] = ureg_writemask(dst, writemask); + c->ssa_temp[ssa->index] = ntt_swizzle_for_write_mask(ureg_src(dst), writemask); - return &c->ssa_temp[ssa->index]; + return ureg_writemask(dst, writemask); } -static struct ureg_dst * +static struct ureg_dst ntt_get_dest_decl(struct ntt_compile *c, nir_dest *dest) { if (dest->is_ssa) return ntt_get_ssa_def_decl(c, &dest->ssa); else - return &c->reg_temp[dest->reg.reg->index]; + return c->reg_temp[dest->reg.reg->index]; } static struct ureg_dst ntt_get_dest(struct ntt_compile *c, nir_dest *dest) { - struct ureg_dst dst = *ntt_get_dest_decl(c, dest); + struct ureg_dst dst = ntt_get_dest_decl(c, dest); if (!dest->is_ssa) { dst.Index += dest->reg.base_offset; @@ -623,22 +781,18 @@ ntt_get_dest(struct ntt_compile *c, nir_dest *dest) static void ntt_store_def(struct ntt_compile *c, nir_ssa_def *def, struct ureg_src src) { - if (!src.Negate && !src.Absolute && !src.Indirect && !src.DimIndirect && - src.SwizzleX == TGSI_SWIZZLE_X && - (src.SwizzleY == TGSI_SWIZZLE_Y || def->num_components < 2) && - (src.SwizzleZ == TGSI_SWIZZLE_Z || def->num_components < 3) && - (src.SwizzleW == TGSI_SWIZZLE_W || def->num_components < 4)) { + if (!src.Indirect && !src.DimIndirect) { switch (src.File) { case TGSI_FILE_IMMEDIATE: case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_SYSTEM_VALUE: - c->ssa_temp[def->index] = ureg_dst(src); + c->ssa_temp[def->index] = src; return; } } - ureg_MOV(c->ureg, *ntt_get_ssa_def_decl(c, def), src); + ureg_MOV(c->ureg, ntt_get_ssa_def_decl(c, def), src); } static void @@ -1012,10 +1166,12 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr) /* NIR is src0 != 0 ? src1 : src2. * TGSI is src0 < 0 ? src1 : src2. * - * However, fcsel so far as I can find only appears on - * bools-as-floats (1.0 or 0.0), so we can negate it for the TGSI op. + * However, fcsel so far as I can find only appears on bools-as-floats + * (1.0 or 0.0), so we can just negate it for the TGSI op. It's + * important to not have an abs here, as i915g has to make extra + * instructions to do the abs. */ - ureg_CMP(c->ureg, dst, ureg_negate(ureg_abs(src[0])), src[1], src[2]); + ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]); break; /* It would be nice if we could get this left as scalar in NIR, since @@ -1097,7 +1253,7 @@ ntt_ureg_src_indirect(struct ntt_compile *c, struct ureg_src usrc, nir_src src) { if (nir_src_is_const(src)) { - usrc.Index += nir_src_as_uint(src); + usrc.Index += ntt_src_as_uint(c, src); return usrc; } else { return ureg_src_indirect(usrc, ntt_reladdr(c, ntt_get_src(c, src))); @@ -1109,7 +1265,7 @@ ntt_ureg_dst_indirect(struct ntt_compile *c, struct ureg_dst dst, nir_src src) { if (nir_src_is_const(src)) { - dst.Index += nir_src_as_uint(src); + dst.Index += ntt_src_as_uint(c, src); return dst; } else { return ureg_dst_indirect(dst, ntt_reladdr(c, ntt_get_src(c, src))); @@ -1121,7 +1277,7 @@ ntt_ureg_src_dimension_indirect(struct ntt_compile *c, struct ureg_src usrc, nir_src src) { if (nir_src_is_const(src)) { - return ureg_src_dimension(usrc, nir_src_as_uint(src)); + return ureg_src_dimension(usrc, ntt_src_as_uint(c, src)); } else { @@ -1136,7 +1292,7 @@ ntt_ureg_dst_dimension_indirect(struct ntt_compile *c, struct ureg_dst udst, nir_src src) { if (nir_src_is_const(src)) { - return ureg_dst_dimension(udst, nir_src_as_uint(src)); + return ureg_dst_dimension(udst, ntt_src_as_uint(c, src)); } else { return ureg_dst_dimension_indirect(udst, ntt_reladdr(c, ntt_get_src(c, src)), @@ -1165,7 +1321,25 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr) struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, 0); - src = ntt_ureg_src_dimension_indirect(c, src, instr->src[0]); + struct ureg_dst addr_temp = ureg_dst_undef(); + + if (nir_src_is_const(instr->src[0])) { + src = ureg_src_dimension(src, ntt_src_as_uint(c, instr->src[0])); + } else { + /* virglrenderer requires that indirect UBO references have the UBO + * array's base index in the Index field, not added to the indrect + * address. + * + * Many nir intrinsics have a base address const value for the start of + * their array indirection, but load_ubo doesn't. We fake it by + * subtracting it off here. + */ + addr_temp = ureg_DECL_temporary(c->ureg); + ureg_UADD(c->ureg, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo)); + src = ureg_src_dimension_indirect(src, + ntt_reladdr(c, ureg_src(addr_temp)), + c->first_ubo); + } if (instr->intrinsic == nir_intrinsic_load_ubo_vec4) { /* !PIPE_CAP_LOAD_CONSTBUF: Just emit it as a vec4 reference to the const @@ -1173,7 +1347,7 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr) */ if (nir_src_is_const(instr->src[1])) { - src.Index += nir_src_as_uint(instr->src[1]); + src.Index += ntt_src_as_uint(c, instr->src[1]); } else { src = ureg_src_indirect(src, ntt_reladdr(c, ntt_get_src(c, instr->src[1]))); } @@ -1203,6 +1377,8 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr) 0 /* format: unused */ ); } + + ureg_release_temporary(c->ureg, addr_temp); } static unsigned @@ -1227,12 +1403,14 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr, { bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo || instr->intrinsic == nir_intrinsic_store_shared); - bool is_load = (instr->intrinsic == nir_intrinsic_load_ssbo || + bool is_load = (instr->intrinsic == nir_intrinsic_atomic_counter_read || + instr->intrinsic == nir_intrinsic_load_ssbo || instr->intrinsic == nir_intrinsic_load_shared); unsigned opcode; struct ureg_src src[4]; int num_src = 0; int nir_src; + struct ureg_dst addr_temp = ureg_dst_undef(); struct ureg_src memory; switch (mode) { @@ -1245,6 +1423,21 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr, memory = ureg_src_register(TGSI_FILE_MEMORY, 0); nir_src = 0; break; + case nir_var_uniform: { /* HW atomic buffers */ + memory = ureg_src_register(TGSI_FILE_HW_ATOMIC, 0); + /* ntt_ureg_src_indirect, except dividing by 4 */ + if (nir_src_is_const(instr->src[0])) { + memory.Index += nir_src_as_uint(instr->src[0]) / 4; + } else { + addr_temp = ureg_DECL_temporary(c->ureg); + ureg_USHR(c->ureg, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, 2)); + memory = ureg_src_indirect(memory, ntt_reladdr(c, ureg_src(addr_temp))); + } + memory = ureg_src_dimension(memory, nir_intrinsic_base(instr)); + nir_src = 0; + break; + } + default: unreachable("unknown memory type"); } @@ -1256,13 +1449,26 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr, src[num_src++] = memory; if (instr->intrinsic != nir_intrinsic_get_ssbo_size) { src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); /* offset */ - if (!is_load) - src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); /* value */ + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_inc: + src[num_src++] = ureg_imm1i(c->ureg, 1); + break; + case nir_intrinsic_atomic_counter_post_dec: + src[num_src++] = ureg_imm1i(c->ureg, -1); + break; + default: + if (!is_load) + src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); /* value */ + break; + } } } switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_add: + case nir_intrinsic_atomic_counter_inc: + case nir_intrinsic_atomic_counter_post_dec: case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_shared_atomic_add: opcode = TGSI_OPCODE_ATOMUADD; @@ -1271,10 +1477,12 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr, case nir_intrinsic_shared_atomic_fadd: opcode = TGSI_OPCODE_ATOMFADD; break; + case nir_intrinsic_atomic_counter_min: case nir_intrinsic_ssbo_atomic_imin: case nir_intrinsic_shared_atomic_imin: opcode = TGSI_OPCODE_ATOMIMIN; break; + case nir_intrinsic_atomic_counter_max: case nir_intrinsic_ssbo_atomic_imax: case nir_intrinsic_shared_atomic_imax: opcode = TGSI_OPCODE_ATOMIMAX; @@ -1287,27 +1495,33 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr, case nir_intrinsic_shared_atomic_umax: opcode = TGSI_OPCODE_ATOMUMAX; break; + case nir_intrinsic_atomic_counter_and: case nir_intrinsic_ssbo_atomic_and: case nir_intrinsic_shared_atomic_and: opcode = TGSI_OPCODE_ATOMAND; break; + case nir_intrinsic_atomic_counter_or: case nir_intrinsic_ssbo_atomic_or: case nir_intrinsic_shared_atomic_or: opcode = TGSI_OPCODE_ATOMOR; break; + case nir_intrinsic_atomic_counter_xor: case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_shared_atomic_xor: opcode = TGSI_OPCODE_ATOMXOR; break; + case nir_intrinsic_atomic_counter_exchange: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_shared_atomic_exchange: opcode = TGSI_OPCODE_ATOMXCHG; break; + case nir_intrinsic_atomic_counter_comp_swap: case nir_intrinsic_ssbo_atomic_comp_swap: case nir_intrinsic_shared_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); break; + case nir_intrinsic_atomic_counter_read: case nir_intrinsic_load_ssbo: case nir_intrinsic_load_shared: opcode = TGSI_OPCODE_LOAD; @@ -1347,27 +1561,8 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr, qualifier, TGSI_TEXTURE_BUFFER, 0 /* format: unused */); -} -static enum tgsi_texture_type -tgsi_target_from_sampler_dim(enum glsl_sampler_dim dim, bool is_array) -{ - switch (dim) { - case GLSL_SAMPLER_DIM_1D: - return is_array ? TGSI_TEXTURE_1D_ARRAY : TGSI_TEXTURE_1D; - case GLSL_SAMPLER_DIM_2D: - return is_array ? TGSI_TEXTURE_2D_ARRAY : TGSI_TEXTURE_2D; - case GLSL_SAMPLER_DIM_3D: - return TGSI_TEXTURE_3D; - case GLSL_SAMPLER_DIM_CUBE: - return is_array ? TGSI_TEXTURE_CUBE_ARRAY : TGSI_TEXTURE_CUBE; - case GLSL_SAMPLER_DIM_RECT: - return TGSI_TEXTURE_RECT; - case GLSL_SAMPLER_DIM_BUF: - return TGSI_TEXTURE_BUFFER; - default: - unreachable("unknown sampler dim"); - } + ureg_release_temporary(c->ureg, addr_temp); } static void @@ -1381,7 +1576,7 @@ ntt_emit_image_load_store(struct ntt_compile *c, nir_intrinsic_instr *instr) struct ureg_dst temp = ureg_dst_undef(); - enum tgsi_texture_type target = tgsi_target_from_sampler_dim(dim, is_array); + enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(dim, is_array, false); struct ureg_src resource = ntt_ureg_src_indirect(c, ureg_src_register(TGSI_FILE_IMAGE, 0), @@ -1530,6 +1725,10 @@ ntt_emit_load_input(struct ntt_compile *c, nir_intrinsic_instr *instr) switch (bary_instr->intrinsic) { case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_sample: + /* For these, we know that the barycentric load matches the + * interpolation on the input declaration, so we can use it directly. + */ ntt_store(c, &instr->dest, input); break; @@ -1547,9 +1746,9 @@ ntt_emit_load_input(struct ntt_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_barycentric_at_sample: + /* We stored the sample in the fake "bary" dest. */ ureg_INTERP_SAMPLE(c->ureg, ntt_get_dest(c, &instr->dest), input, - ureg_imm1u(c->ureg, - nir_src_as_uint(bary_instr->src[0]))); + ntt_get_src(c, instr->src[0])); break; case nir_intrinsic_load_barycentric_at_offset: @@ -1583,7 +1782,7 @@ ntt_emit_store_output(struct ntt_compile *c, nir_intrinsic_instr *instr) } uint32_t frac; - struct ureg_dst out = ntt_store_output_decl(c, instr, &frac); + struct ureg_dst out = ntt_output_decl(c, instr, &frac); if (instr->intrinsic == nir_intrinsic_store_per_vertex_output) { out = ntt_ureg_dst_indirect(c, out, instr->src[2]); @@ -1605,6 +1804,29 @@ ntt_emit_store_output(struct ntt_compile *c, nir_intrinsic_instr *instr) } static void +ntt_emit_load_output(struct ntt_compile *c, nir_intrinsic_instr *instr) +{ + /* ntt_try_store_in_tgsi_output() optimization is not valid if load_output + * is present. + */ + assert(c->s->info.stage != MESA_SHADER_VERTEX && + c->s->info.stage != MESA_SHADER_FRAGMENT); + + uint32_t frac; + struct ureg_dst out = ntt_output_decl(c, instr, &frac); + + if (instr->intrinsic == nir_intrinsic_load_per_vertex_output) { + out = ntt_ureg_dst_indirect(c, out, instr->src[1]); + out = ntt_ureg_dst_dimension_indirect(c, out, instr->src[0]); + } else { + out = ntt_ureg_dst_indirect(c, out, instr->src[0]); + } + + ureg_MOV(c->ureg, ntt_get_dest(c, &instr->dest), ureg_src(out)); + ntt_reladdr_dst_put(c, out); +} + +static void ntt_emit_load_sysval(struct ntt_compile *c, nir_intrinsic_instr *instr) { gl_system_value sysval = nir_system_value_from_intrinsic(instr->intrinsic); @@ -1618,6 +1840,23 @@ ntt_emit_load_sysval(struct ntt_compile *c, nir_intrinsic_instr *instr) uint32_t write_mask = BITSET_MASK(nir_dest_num_components(instr->dest)); sv = ntt_swizzle_for_write_mask(sv, write_mask); + /* TGSI and NIR define these intrinsics as always loading ints, but they can + * still appear on hardware with non-native-integers fragment shaders using + * the draw path (i915g). In that case, having called nir_lower_int_to_float + * means that we actually want floats instead. + */ + if (!c->native_integers) { + switch (instr->intrinsic) { + case nir_intrinsic_load_vertex_id: + case nir_intrinsic_load_instance_id: + ureg_U2F(c->ureg, ntt_get_dest(c, &instr->dest), sv); + return; + + default: + break; + } + } + ntt_store(c, &instr->dest, sv); } @@ -1642,6 +1881,7 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_load_point_coord: case nir_intrinsic_load_front_face: case nir_intrinsic_load_sample_id: + case nir_intrinsic_load_sample_pos: case nir_intrinsic_load_sample_mask_in: case nir_intrinsic_load_helper_invocation: case nir_intrinsic_load_tess_coord: @@ -1650,9 +1890,9 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_load_tess_level_outer: case nir_intrinsic_load_tess_level_inner: case nir_intrinsic_load_local_invocation_id: - case nir_intrinsic_load_work_group_id: - case nir_intrinsic_load_num_work_groups: - case nir_intrinsic_load_local_group_size: + case nir_intrinsic_load_workgroup_id: + case nir_intrinsic_load_num_workgroups: + case nir_intrinsic_load_workgroup_size: case nir_intrinsic_load_subgroup_size: case nir_intrinsic_load_subgroup_invocation: case nir_intrinsic_load_subgroup_eq_mask: @@ -1673,6 +1913,11 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) ntt_emit_store_output(c, instr); break; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + ntt_emit_load_output(c, instr); + break; + case nir_intrinsic_discard: ureg_KILL(c->ureg); break; @@ -1725,6 +1970,23 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) ntt_emit_mem(c, instr, nir_var_mem_shared); break; + case nir_intrinsic_atomic_counter_read: + case nir_intrinsic_atomic_counter_add: + case nir_intrinsic_atomic_counter_inc: + case nir_intrinsic_atomic_counter_post_dec: + case nir_intrinsic_atomic_counter_min: + case nir_intrinsic_atomic_counter_max: + case nir_intrinsic_atomic_counter_and: + case nir_intrinsic_atomic_counter_or: + case nir_intrinsic_atomic_counter_xor: + case nir_intrinsic_atomic_counter_exchange: + case nir_intrinsic_atomic_counter_comp_swap: + ntt_emit_mem(c, instr, nir_var_uniform); + break; + case nir_intrinsic_atomic_counter_pre_dec: + unreachable("Should be lowered by ntt_lower_atomic_pre_dec()"); + break; + case nir_intrinsic_image_load: case nir_intrinsic_image_store: case nir_intrinsic_image_size: @@ -1743,6 +2005,7 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_control_barrier: + case nir_intrinsic_memory_barrier_tcs_patch: ureg_BARRIER(c->ureg); break; @@ -1788,14 +2051,14 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) break; /* In TGSI we don't actually generate the barycentric coords, and emit - * interp intrinsics later. However, we do need to store the _at_offset - * argument so that we can use it at that point. + * interp intrinsics later. However, we do need to store the + * load_barycentric_at_* argument so that we can use it at that point. */ case nir_intrinsic_load_barycentric_pixel: case nir_intrinsic_load_barycentric_centroid: - case nir_intrinsic_load_barycentric_at_sample: + case nir_intrinsic_load_barycentric_sample: break; - + case nir_intrinsic_load_barycentric_at_sample: case nir_intrinsic_load_barycentric_at_offset: ntt_store(c, &instr->dest, ntt_get_src(c, instr->src[0])); break; @@ -1811,8 +2074,6 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) struct ntt_tex_operand_state { struct ureg_src srcs[4]; unsigned i; - unsigned chan; - bool is_temp[4]; }; static void @@ -1825,51 +2086,14 @@ ntt_push_tex_arg(struct ntt_compile *c, if (tex_src < 0) return; - struct ureg_src src = ntt_get_src(c, instr->src[tex_src].src); - int num_components = nir_tex_instr_src_size(instr, tex_src); - - /* Find which src in the tex args we'll fit in. */ - if (s->chan + num_components > 4) { - s->chan = 0; - s->i++; - } - - /* Would need to fix up swizzling up to the writemask channel here. */ - assert(num_components == 1 || s->chan == 0); - if (num_components == 1) - src = ureg_scalar(src, 0); - - if (ureg_src_is_undef(s->srcs[s->i])) { - /* First emit of a tex operand's components, no need for a mov. */ - s->srcs[s->i] = src; - } else { - /* Otherwise, we need to have a temporary for all the components that go - * in this operand. - */ - if (!s->is_temp[s->i]) { - struct ureg_src prev_src = s->srcs[s->i]; - s->srcs[s->i] = ureg_src(ureg_DECL_temporary(c->ureg)); - s->is_temp[s->i] = true; - - ureg_MOV(c->ureg, - ureg_writemask(ureg_dst(s->srcs[s->i]), - BITFIELD_MASK(s->chan)), prev_src); - } - - ureg_MOV(c->ureg, - ureg_writemask(ureg_dst(s->srcs[s->i]), - BITFIELD_RANGE(s->chan, num_components)), - src); - } - - s->chan += num_components; + s->srcs[s->i++] = ntt_get_src(c, instr->src[tex_src].src); } static void ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr) { struct ureg_dst dst = ntt_get_dest(c, &instr->dest); - unsigned target; + enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(instr->sampler_dim, instr->is_array, instr->is_shadow); unsigned tex_opcode; struct ureg_src sampler = ureg_DECL_sampler(c->ureg, instr->sampler_index); @@ -1881,7 +2105,11 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr) switch (instr->op) { case nir_texop_tex: - tex_opcode = TGSI_OPCODE_TEX; + if (nir_tex_instr_src_size(instr, nir_tex_instr_src_index(instr, nir_tex_src_backend1)) > + MAX2(instr->coord_components, 2) + instr->is_shadow) + tex_opcode = TGSI_OPCODE_TXP; + else + tex_opcode = TGSI_OPCODE_TEX; break; case nir_texop_txf: case nir_texop_txf_ms: @@ -1891,7 +2119,7 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr) int lod_src = nir_tex_instr_src_index(instr, nir_tex_src_lod); if (lod_src >= 0 && nir_src_is_const(instr->src[lod_src].src) && - nir_src_as_uint(instr->src[lod_src].src) == 0) { + ntt_src_as_uint(c, instr->src[lod_src].src) == 0) { tex_opcode = TGSI_OPCODE_TXF_LZ; } } @@ -1925,92 +2153,16 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr) } struct ntt_tex_operand_state s = { .i = 0 }; - ntt_push_tex_arg(c, instr, nir_tex_src_coord, &s); - /* We always have at least two slots for the coordinate, even on 1D. */ - s.chan = MAX2(s.chan, 2); - - ntt_push_tex_arg(c, instr, nir_tex_src_comparator, &s); - s.chan = MAX2(s.chan, 3); + ntt_push_tex_arg(c, instr, nir_tex_src_backend1, &s); + ntt_push_tex_arg(c, instr, nir_tex_src_backend2, &s); - ntt_push_tex_arg(c, instr, nir_tex_src_bias, &s); - if (tex_opcode != TGSI_OPCODE_TXF_LZ) + /* non-coord arg for TXQ */ + if (tex_opcode == TGSI_OPCODE_TXQ) { ntt_push_tex_arg(c, instr, nir_tex_src_lod, &s); - - /* End of packed src setup, everything that follows gets its own operand. */ - if (s.chan) - s.i++; - - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_1D: - if (instr->is_array) { - if (instr->is_shadow) { - target = TGSI_TEXTURE_SHADOW1D_ARRAY; - } else { - target = TGSI_TEXTURE_1D_ARRAY; - } - } else { - if (instr->is_shadow) { - target = TGSI_TEXTURE_SHADOW1D; - } else { - target = TGSI_TEXTURE_1D; - } - } - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_EXTERNAL: - if (instr->is_array) { - if (instr->is_shadow) { - target = TGSI_TEXTURE_SHADOW2D_ARRAY; - } else { - target = TGSI_TEXTURE_2D_ARRAY; - } - } else { - if (instr->is_shadow) { - target = TGSI_TEXTURE_SHADOW2D; - } else { - target = TGSI_TEXTURE_2D; - } - } - break; - case GLSL_SAMPLER_DIM_MS: - if (instr->is_array) { - target = TGSI_TEXTURE_2D_ARRAY_MSAA; - } else { - target = TGSI_TEXTURE_2D_ARRAY; - } - break; - case GLSL_SAMPLER_DIM_3D: - assert(!instr->is_shadow); - target = TGSI_TEXTURE_3D; - break; - case GLSL_SAMPLER_DIM_RECT: - if (instr->is_shadow) { - target = TGSI_TEXTURE_SHADOWRECT; - } else { - target = TGSI_TEXTURE_RECT; - } - break; - case GLSL_SAMPLER_DIM_CUBE: - if (instr->is_array) { - if (instr->is_shadow) { - target = TGSI_TEXTURE_SHADOWCUBE_ARRAY; - } else { - target = TGSI_TEXTURE_CUBE_ARRAY; - } - } else { - if (instr->is_shadow) { - target = TGSI_TEXTURE_SHADOWCUBE; - } else { - target = TGSI_TEXTURE_CUBE; - } - } - break; - case GLSL_SAMPLER_DIM_BUF: - target = TGSI_TEXTURE_BUFFER; - break; - default: - fprintf(stderr, "Unknown sampler dimensions: %d\n", instr->sampler_dim); - abort(); + /* virglrenderer mistakenly looks at .w instead of .x, so make sure it's + * scalar + */ + s.srcs[s.i - 1] = ureg_scalar(s.srcs[s.i - 1], 0); } if (s.i > 1) { @@ -2090,11 +2242,6 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr) ureg_MOV(c->ureg, dst, ureg_scalar(ureg_src(tex_dst), 3)); ureg_release_temporary(c->ureg, tex_dst); } - - for (int i = 0; i < s.i; i++) { - if (s.is_temp[i]) - ureg_release_temporary(c->ureg, ureg_dst(s.srcs[i])); - } } static void @@ -2209,7 +2356,7 @@ ntt_free_ssa_temp_by_index(struct ntt_compile *c, int index) if (c->ssa_temp[index].File != TGSI_FILE_TEMPORARY) return; - ureg_release_temporary(c->ureg, c->ssa_temp[index]); + ureg_release_temporary(c->ureg, ureg_dst(c->ssa_temp[index])); memset(&c->ssa_temp[index], 0, sizeof(c->ssa_temp[index])); } @@ -2294,7 +2441,7 @@ ntt_emit_impl(struct ntt_compile *c, nir_function_impl *impl) c->impl = impl; c->liveness = nir_live_ssa_defs_per_instr(impl); - c->ssa_temp = rzalloc_array(c, struct ureg_dst, impl->ssa_alloc); + c->ssa_temp = rzalloc_array(c, struct ureg_src, impl->ssa_alloc); c->reg_temp = rzalloc_array(c, struct ureg_dst, impl->reg_alloc); ntt_setup_registers(c, &impl->registers); @@ -2402,7 +2549,6 @@ static void ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) { bool progress; - nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen); unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage); unsigned control_flow_depth = screen->get_shader_param(screen, pipe_stage, @@ -2436,7 +2582,7 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) NIR_PASS(progress, s, nir_opt_trivial_continues); NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL); NIR_PASS(progress, s, nir_opt_undef); - NIR_PASS(progress, s, nir_opt_loop_unroll, no_indirects_mask); + NIR_PASS(progress, s, nir_opt_loop_unroll); } while (progress); } @@ -2649,6 +2795,110 @@ nir_to_tgsi_lower_64bit_to_vec2(nir_shader *s) NULL); } +struct ntt_lower_tex_state { + nir_ssa_def *channels[8]; + unsigned i; +}; + +static void +nir_to_tgsi_lower_tex_instr_arg(nir_builder *b, + nir_tex_instr *instr, + nir_tex_src_type tex_src_type, + struct ntt_lower_tex_state *s) +{ + int tex_src = nir_tex_instr_src_index(instr, tex_src_type); + if (tex_src < 0) + return; + + assert(instr->src[tex_src].src.is_ssa); + + nir_ssa_def *def = instr->src[tex_src].src.ssa; + for (int i = 0; i < def->num_components; i++) { + s->channels[s->i++] = nir_channel(b, def, i); + } + + nir_tex_instr_remove_src(instr, tex_src); +} + +/** + * Merges together a vec4 of tex coordinate/compare/bias/lod into a backend tex + * src. This lets NIR handle the coalescing of the vec4 rather than trying to + * manage it on our own, and may lead to more vectorization. + */ +static bool +nir_to_tgsi_lower_tex_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + if (nir_tex_instr_src_index(tex, nir_tex_src_coord) < 0) + return false; + + /* NIR after lower_tex will have LOD set to 0 for tex ops that wanted + * implicit lod in shader stages that don't have quad-based derivatives. + * TGSI doesn't want that, it requires that the backend do implict LOD 0 for + * those stages. + */ + if (!nir_shader_supports_implicit_lod(b->shader) && tex->op == nir_texop_txl) { + int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod); + nir_src *lod_src = &tex->src[lod_index].src; + if (nir_src_is_const(*lod_src) && nir_src_as_uint(*lod_src) == 0) { + nir_tex_instr_remove_src(tex, lod_index); + tex->op = nir_texop_tex; + } + } + + b->cursor = nir_before_instr(instr); + + struct ntt_lower_tex_state s = {0}; + + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_coord, &s); + /* We always have at least two slots for the coordinate, even on 1D. */ + s.i = MAX2(s.i, 2); + + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_comparator, &s); + s.i = MAX2(s.i, 3); + + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_bias, &s); + + /* XXX: LZ */ + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_lod, &s); + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_projector, &s); + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_ms_index, &s); + + /* No need to pack undefs in unused channels of the tex instr */ + while (!s.channels[s.i - 1]) + s.i--; + + /* Instead of putting undefs in the unused slots of the vecs, just put in + * another used channel. Otherwise, we'll get unnecessary moves into + * registers. + */ + assert(s.channels[0] != NULL); + for (int i = 1; i < s.i; i++) { + if (!s.channels[i]) + s.channels[i] = s.channels[0]; + } + + nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(nir_vec(b, s.channels, MIN2(s.i, 4)))); + if (s.i > 4) + nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_src_for_ssa(nir_vec(b, &s.channels[4], s.i - 4))); + + return true; +} + +static bool +nir_to_tgsi_lower_tex(nir_shader *s) +{ + return nir_shader_instructions_pass(s, + nir_to_tgsi_lower_tex_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); +} + static void ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) { @@ -2657,20 +2907,27 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) !screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage), PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); + nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen); + if (!options->lower_extract_byte || !options->lower_extract_word || + !options->lower_insert_byte || + !options->lower_insert_word || !options->lower_fdph || !options->lower_flrp64 || !options->lower_fmod || !options->lower_rotate || !options->lower_uniforms_to_ubo || !options->lower_vector_cmp || - options->lower_fsqrt != lower_fsqrt) { + options->lower_fsqrt != lower_fsqrt || + options->force_indirect_unrolling != no_indirects_mask) { nir_shader_compiler_options *new_options = ralloc(s, nir_shader_compiler_options); *new_options = *s->options; new_options->lower_extract_byte = true; new_options->lower_extract_word = true; + new_options->lower_insert_byte = true; + new_options->lower_insert_word = true; new_options->lower_fdph = true; new_options->lower_flrp64 = true; new_options->lower_fmod = true; @@ -2678,11 +2935,116 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) new_options->lower_uniforms_to_ubo = true, new_options->lower_vector_cmp = true; new_options->lower_fsqrt = lower_fsqrt; + new_options->force_indirect_unrolling = no_indirects_mask; s->options = new_options; } } +static bool +ntt_lower_atomic_pre_dec_filter(const nir_instr *instr, const void *_data) +{ + return (instr->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_atomic_counter_pre_dec); +} + +static nir_ssa_def * +ntt_lower_atomic_pre_dec_lower(nir_builder *b, nir_instr *instr, void *_data) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + nir_ssa_def *old_result = &intr->dest.ssa; + intr->intrinsic = nir_intrinsic_atomic_counter_post_dec; + + return nir_iadd_imm(b, old_result, -1); +} + +static bool +ntt_lower_atomic_pre_dec(nir_shader *s) +{ + return nir_shader_lower_instructions(s, + ntt_lower_atomic_pre_dec_filter, + ntt_lower_atomic_pre_dec_lower, NULL); +} + +/* Lowers texture projectors if we can't do them as TGSI_OPCODE_TXP. */ +static void +nir_to_tgsi_lower_txp(nir_shader *s) +{ + nir_lower_tex_options lower_tex_options = { + .lower_txp = 0, + }; + + nir_foreach_block(block, nir_shader_get_entrypoint(s)) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + nir_tex_instr *tex = nir_instr_as_tex(instr); + + if (nir_tex_instr_src_index(tex, nir_tex_src_projector) < 0) + continue; + + bool has_compare = nir_tex_instr_src_index(tex, nir_tex_src_comparator) >= 0; + bool has_lod = nir_tex_instr_src_index(tex, nir_tex_src_lod) >= 0 || s->info.stage != MESA_SHADER_FRAGMENT; + bool has_offset = nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0; + + /* We can do TXP for any tex (not txg) where we can fit all the + * coordinates and comparator and projector in one vec4 without any + * other modifiers to add on. + * + * nir_lower_tex() only handles the lowering on a sampler-dim basis, so + * if we get any funny projectors then we just blow them all away. + */ + if (tex->op != nir_texop_tex || has_lod || has_offset || (tex->coord_components >= 3 && has_compare)) + lower_tex_options.lower_txp |= 1 << tex->sampler_dim; + } + } + + /* nir_lower_tex must be run even if no options are set, because we need the + * LOD to be set for query_levels and for non-fragment shaders. + */ + NIR_PASS_V(s, nir_lower_tex, &lower_tex_options); +} + +static bool +nir_lower_primid_sysval_to_input_filter(const nir_instr *instr, const void *_data) +{ + return (instr->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_primitive_id); +} + +static nir_ssa_def * +nir_lower_primid_sysval_to_input_lower(nir_builder *b, nir_instr *instr, void *data) +{ + nir_variable *var = *(nir_variable **)data; + if (!var) { + var = nir_variable_create(b->shader, nir_var_shader_in, glsl_uint_type(), "gl_PrimitiveID"); + var->data.location = VARYING_SLOT_PRIMITIVE_ID; + b->shader->info.inputs_read |= VARYING_BIT_PRIMITIVE_ID; + var->data.driver_location = b->shader->num_outputs++; + + *(nir_variable **)data = var; + } + + nir_io_semantics semantics = { + .location = var->data.location, + .num_slots = 1 + }; + return nir_load_input(b, 1, 32, nir_imm_int(b, 0), + .base = var->data.driver_location, + .io_semantics = semantics); +} + +static bool +nir_lower_primid_sysval_to_input(nir_shader *s) +{ + nir_variable *input = NULL; + + return nir_shader_lower_instructions(s, + nir_lower_primid_sysval_to_input_filter, + nir_lower_primid_sysval_to_input_lower, &input); +} + /** * Translates the NIR shader to TGSI. * @@ -2709,12 +3071,18 @@ nir_to_tgsi(struct nir_shader *s, type_size, (nir_lower_io_options)0); NIR_PASS_V(s, nir_lower_regs_to_ssa); - const nir_lower_tex_options lower_tex_options = { - /* XXX: We could skip lowering of TXP for TEX with <=3 coord_compoennts. - */ - .lower_txp = ~0, - }; - NIR_PASS_V(s, nir_lower_tex, &lower_tex_options); + nir_to_tgsi_lower_txp(s); + NIR_PASS_V(s, nir_to_tgsi_lower_tex); + + /* While TGSI can represent PRIMID as either an input or a system value, + * glsl-to-tgsi had the GS (not TCS or TES) primid as an input, and drivers + * depend on that. + */ + if (s->info.stage == MESA_SHADER_GEOMETRY) + NIR_PASS_V(s, nir_lower_primid_sysval_to_input); + + if (s->info.num_abos) + NIR_PASS_V(s, ntt_lower_atomic_pre_dec); if (!original_options->lower_uniforms_to_ubo) { NIR_PASS_V(s, nir_lower_uniforms_to_ubo, @@ -2754,6 +3122,9 @@ nir_to_tgsi(struct nir_shader *s, } else { NIR_PASS_V(s, nir_lower_int_to_float); NIR_PASS_V(s, nir_lower_bool_to_float); + /* bool_to_float generates MOVs for b2f32 that we want to clean up. */ + NIR_PASS_V(s, nir_copy_prop); + NIR_PASS_V(s, nir_opt_dce); } /* Only lower 32-bit floats. The only other modifier type officially @@ -2789,6 +3160,7 @@ nir_to_tgsi(struct nir_shader *s, ureg_setup_shader_info(c->ureg, &s->info); ntt_setup_inputs(c); + ntt_setup_outputs(c); ntt_setup_uniforms(c); if (s->info.stage == MESA_SHADER_FRAGMENT) { @@ -2835,6 +3207,8 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = { .fuse_ffma64 = true, .lower_extract_byte = true, .lower_extract_word = true, + .lower_insert_byte = true, + .lower_insert_word = true, .lower_fdph = true, .lower_flrp64 = true, .lower_fmod = true, diff --git a/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi_info.c b/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi_info.c index 65fc8d2d6..3bb5f1f8b 100644 --- a/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi_info.c +++ b/lib/mesa/src/gallium/auxiliary/nir/nir_to_tgsi_info.c @@ -220,21 +220,21 @@ static void scan_instruction(const struct nir_shader *nir, case nir_intrinsic_load_invocation_id: info->uses_invocationid = true; break; - case nir_intrinsic_load_num_work_groups: + case nir_intrinsic_load_num_workgroups: info->uses_grid_size = true; break; - case nir_intrinsic_load_local_group_size: + case nir_intrinsic_load_workgroup_size: /* The block size is translated to IMM with a fixed block size. */ if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0) info->uses_block_size = true; break; case nir_intrinsic_load_local_invocation_id: - case nir_intrinsic_load_work_group_id: { + case nir_intrinsic_load_workgroup_id: { unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); while (mask) { unsigned i = u_bit_scan(&mask); - if (intr->intrinsic == nir_intrinsic_load_work_group_id) + if (intr->intrinsic == nir_intrinsic_load_workgroup_id) info->uses_block_id[i] = true; else info->uses_thread_id[i] = true; @@ -487,9 +487,9 @@ void nir_tgsi_scan_shader(const struct nir_shader *nir, } if (gl_shader_stage_is_compute(nir->info.stage)) { - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0]; - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1]; - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2]; + info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.workgroup_size[0]; + info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.workgroup_size[1]; + info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.workgroup_size[2]; } i = 0; @@ -498,7 +498,7 @@ void nir_tgsi_scan_shader(const struct nir_shader *nir, unsigned semantic_name, semantic_index; const struct glsl_type *type = variable->type; - if (nir_is_per_vertex_io(variable, nir->info.stage)) { + if (nir_is_arrayed_io(variable, nir->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -598,7 +598,7 @@ void nir_tgsi_scan_shader(const struct nir_shader *nir, i = variable->data.driver_location; const struct glsl_type *type = variable->type; - if (nir_is_per_vertex_io(variable, nir->info.stage)) { + if (nir_is_arrayed_io(variable, nir->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h index 8e7294629..3b630f776 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h @@ -31,11 +31,12 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD(false) DRI_CONF_GLSL_IGNORE_WRITE_TO_READONLY_VAR(false) DRI_CONF_ALLOW_DRAW_OUT_OF_ORDER(false) - DRI_CONF_ALLOW_INCORRECT_PRIMITIVE_ID(false) DRI_CONF_FORCE_COMPAT_PROFILE(false) DRI_CONF_FORCE_GL_NAMES_REUSE(false) DRI_CONF_TRANSCODE_ETC(false) + DRI_CONF_TRANSCODE_ASTC(false) DRI_CONF_FORCE_GL_VENDOR() + DRI_CONF_FORCE_GL_RENDERER() DRI_CONF_OVERRIDE_VRAM_SIZE() DRI_CONF_GLX_EXTENSION_OVERRIDE() DRI_CONF_INDIRECT_GL_EXTENSION_OVERRIDE() @@ -47,7 +48,7 @@ DRI_CONF_SECTION_MISCELLANEOUS DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER(false) DRI_CONF_GLSL_ZERO_INIT(false) DRI_CONF_VS_POSITION_ALWAYS_INVARIANT(false) + DRI_CONF_VS_POSITION_ALWAYS_PRECISE(false) DRI_CONF_ALLOW_RGB10_CONFIGS(true) - DRI_CONF_ALLOW_FP16_CONFIGS(false) DRI_CONF_FORCE_INTEGER_TEX_NEAREST(false) DRI_CONF_SECTION_END diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 8c0545505..7e3e8a4b1 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -64,13 +64,13 @@ enum pb_usage_flags { PB_USAGE_CPU_WRITE = (1 << 1), PB_USAGE_GPU_READ = (1 << 2), PB_USAGE_GPU_WRITE = (1 << 3), - PB_USAGE_DONTBLOCK = (1 << 9), - PB_USAGE_UNSYNCHRONIZED = (1 << 10), + PB_USAGE_DONTBLOCK = (1 << 4), + PB_USAGE_UNSYNCHRONIZED = (1 << 5), /* Persistent mappings may remain across a flush. Note that contrary * to OpenGL persistent maps, there is no requirement at the pipebuffer * api level to explicitly enforce coherency by barriers or range flushes. */ - PB_USAGE_PERSISTENT = (1 << 13) + PB_USAGE_PERSISTENT = (1 << 8) }; /* For error checking elsewhere */ @@ -288,7 +288,7 @@ pb_reference_with_winsys(void *winsys, * the requested or not. */ static inline boolean -pb_check_alignment(pb_size requested, pb_size provided) +pb_check_alignment(uint32_t requested, uint32_t provided) { if (!requested) return TRUE; @@ -310,16 +310,6 @@ pb_check_usage(unsigned requested, unsigned provided) return (requested & provided) == requested ? TRUE : FALSE; } - -/** - * Malloc-based buffer to store data that can't be used by the graphics - * hardware. - */ -struct pb_buffer * -pb_malloc_buffer_create(pb_size size, - const struct pb_desc *desc); - - #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 4595e1a40..d48f79c23 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -232,7 +232,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, } assert(pipe_is_referenced(&buf->buffer->reference)); - assert(pb_check_alignment(desc->alignment, 1ull << buf->buffer->alignment_log2)); + assert(pb_check_alignment(desc->alignment, 1u << buf->buffer->alignment_log2)); assert(buf->buffer->size >= size); pipe_reference_init(&buf->base.reference, 1); diff --git a/lib/mesa/src/gallium/auxiliary/postprocess/pp_colors.c b/lib/mesa/src/gallium/auxiliary/postprocess/pp_colors.c index f319ebb22..e7ce77758 100644 --- a/lib/mesa/src/gallium/auxiliary/postprocess/pp_colors.c +++ b/lib/mesa/src/gallium/auxiliary/postprocess/pp_colors.c @@ -47,7 +47,7 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in, pp_filter_misc_state(p); cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers); - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &p->view); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, false, &p->view); cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]); cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][1]); diff --git a/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.c b/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.c index 2bc2ac873..102e71f48 100644 --- a/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.c +++ b/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.c @@ -134,7 +134,7 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in, const struct pipe_sampler_state *samplers[] = {&p->sampler_point}; cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers); } - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &p->view); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, false, &p->view); cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]); /* offsetvs */ cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][2]); @@ -166,7 +166,7 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in, } arr[0] = p->view; - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 3, 0, arr); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 3, 0, false, arr); cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]); /* passvs */ cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][3]); @@ -198,7 +198,7 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in, } arr[1] = p->view; - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0, arr); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0, false, arr); cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]); /* offsetvs */ cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][4]); diff --git a/lib/mesa/src/gallium/auxiliary/postprocess/pp_run.c b/lib/mesa/src/gallium/auxiliary/postprocess/pp_run.c index 3615f348c..93e0fa7b7 100644 --- a/lib/mesa/src/gallium/auxiliary/postprocess/pp_run.c +++ b/lib/mesa/src/gallium/auxiliary/postprocess/pp_run.c @@ -184,14 +184,11 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, } /* restore state we changed */ - cso_restore_state(cso); - - /* Unbind resources that we have bound. */ - struct pipe_context *pipe = ppq->p->pipe; - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, NULL); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL); - pipe->set_vertex_buffers(pipe, 0, 0, 1, false, NULL); - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 0, 3, NULL); + cso_restore_state(cso, CSO_UNBIND_FS_SAMPLERVIEWS | + CSO_UNBIND_FS_IMAGE0 | + CSO_UNBIND_VS_CONSTANTS | + CSO_UNBIND_FS_CONSTANTS | + CSO_UNBIND_VERTEX_BUFFER0); /* restore states not restored by cso */ if (ppq->p->st) { diff --git a/lib/mesa/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/lib/mesa/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 76eda8467..4fb74993d 100644 --- a/lib/mesa/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/lib/mesa/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -33,6 +33,10 @@ #include "d3d12/d3d12_public.h" #endif +#ifdef GALLIUM_ASAHI +#include "asahi/agx_public.h" +#endif + static inline struct pipe_screen * sw_screen_create_named(struct sw_winsys *winsys, const char *driver) { @@ -71,30 +75,38 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver) screen = d3d12_create_dxcore_screen(winsys, NULL); #endif +#if defined(GALLIUM_ASAHI) + if (screen == NULL && strcmp(driver, "asahi") == 0) + screen = agx_screen_create(winsys); +#endif + return screen ? debug_screen_wrap(screen) : NULL; } static inline struct pipe_screen * -sw_screen_create(struct sw_winsys *winsys) +sw_screen_create_vk(struct sw_winsys *winsys, bool sw_vk) { UNUSED bool only_sw = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false); const char *drivers[] = { - debug_get_option("GALLIUM_DRIVER", ""), + (sw_vk ? "" : debug_get_option("GALLIUM_DRIVER", "")), #if defined(GALLIUM_D3D12) - only_sw ? "" : "d3d12", + (sw_vk || only_sw) ? "" : "d3d12", +#endif +#if defined(GALLIUM_ASAHI) + (sw_vk || only_sw) ? "" : "asahi", #endif #if defined(GALLIUM_LLVMPIPE) "llvmpipe", #endif #if defined(GALLIUM_SOFTPIPE) - "softpipe", + (sw_vk ? "" : "softpipe"), #endif #if defined(GALLIUM_SWR) - "swr", + (sw_vk ? "" : "swr"), #endif #if defined(GALLIUM_ZINK) - only_sw ? "" : "zink", + (sw_vk || only_sw) ? "" : "zink", #endif }; @@ -109,4 +121,9 @@ sw_screen_create(struct sw_winsys *winsys) return NULL; } +static inline struct pipe_screen * +sw_screen_create(struct sw_winsys *winsys) +{ + return sw_screen_create_vk(winsys, false); +} #endif diff --git a/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper.h b/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper.h index 88a5086d2..059ae2d44 100644 --- a/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper.h +++ b/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper.h @@ -21,6 +21,10 @@ #include "d3d12/d3d12_public.h" #endif +#ifdef GALLIUM_ASAHI +#include "asahi/agx_public.h" +#endif + #ifdef GALLIUM_SOFTPIPE #include "softpipe/sp_public.h" #endif @@ -76,30 +80,37 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver) screen = d3d12_create_dxcore_screen(winsys, NULL); #endif +#if defined(GALLIUM_ASAHI) + if (screen == NULL && strcmp(driver, "asahi") == 0) + screen = agx_screen_create(winsys); +#endif + return screen; } - struct pipe_screen * -sw_screen_create(struct sw_winsys *winsys) +sw_screen_create_vk(struct sw_winsys *winsys, bool sw_vk) { UNUSED bool only_sw = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false); const char *drivers[] = { - debug_get_option("GALLIUM_DRIVER", ""), + (sw_vk ? "" : debug_get_option("GALLIUM_DRIVER", "")), #if defined(GALLIUM_D3D12) - only_sw ? "" : "d3d12", + (sw_vk || only_sw) ? "" : "d3d12", +#endif +#if defined(GALLIUM_ASAHI) + (sw_vk || only_sw) ? "" : "asahi", #endif #if defined(GALLIUM_LLVMPIPE) "llvmpipe", #endif #if defined(GALLIUM_SOFTPIPE) - "softpipe", + sw_vk ? "" : "softpipe", #endif #if defined(GALLIUM_SWR) - "swr", + sw_vk ? "" : "swr", #endif #if defined(GALLIUM_ZINK) - only_sw ? "" : "zink", + (sw_vk || only_sw) ? "" : "zink", #endif }; @@ -114,4 +125,9 @@ sw_screen_create(struct sw_winsys *winsys) return NULL; } +struct pipe_screen * +sw_screen_create(struct sw_winsys *winsys) +{ + return sw_screen_create_vk(winsys, false); +} #endif diff --git a/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper_public.h b/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper_public.h index 12b301b6a..499813cca 100644 --- a/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper_public.h +++ b/lib/mesa/src/gallium/auxiliary/target-helpers/sw_helper_public.h @@ -5,6 +5,9 @@ struct pipe_screen; struct sw_winsys; struct pipe_screen * +sw_screen_create_vk(struct sw_winsys *winsys, bool sw_vk); + +struct pipe_screen * sw_screen_create(struct sw_winsys *winsys); #endif /* _SW_HELPER_PUBLIC_H */ diff --git a/lib/mesa/src/gallium/auxiliary/translate/translate.h b/lib/mesa/src/gallium/auxiliary/translate/translate.h index d77561aa7..b70d90b09 100644 --- a/lib/mesa/src/gallium/auxiliary/translate/translate.h +++ b/lib/mesa/src/gallium/auxiliary/translate/translate.h @@ -45,12 +45,19 @@ #include "pipe/p_state.h" /** - * Translate has to work on one more attribute because - * the draw module has to be able to pass the vertex - * position even if the fragment shader already consumes - * PIPE_MAX_ATTRIBS inputs. + * Translate has to work on two more attributes because + * the draw module has to be able to pass a few fixed + * function vertex shader outputs even if the fragment + * shader already consumes PIPE_MAX_ATTRIBS inputs. + * + * These vertex shader outputs include: + * - position + * - bcolor (up to two) + * - point-size + * - viewport index + * - layer */ -#define TRANSLATE_MAX_ATTRIBS (PIPE_MAX_ATTRIBS + 1) +#define TRANSLATE_MAX_ATTRIBS (PIPE_MAX_ATTRIBS + 6) enum translate_element_type { TRANSLATE_ELEMENT_NORMAL, @@ -132,6 +139,7 @@ boolean translate_is_output_format_supported(enum pipe_format format); static inline int translate_keysize( const struct translate_key *key ) { + assert(key->nr_elements <= TRANSLATE_MAX_ATTRIBS); return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_box.h b/lib/mesa/src/gallium/auxiliary/util/u_box.h index 764bf5037..c39e13964 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_box.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_box.h @@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "util/u_math.h" +#include "util/format/u_format.h" static inline void u_box_1d(unsigned x, unsigned w, struct pipe_box *box) @@ -239,4 +240,22 @@ u_box_minify_3d(struct pipe_box *dst, dst->depth = MAX2(src->depth >> l, 1); } +/* Converts a box specified in pixels to an equivalent box specified + * in blocks, where the boxes represent a region-of-interest of an image with + * the given format. This is trivial (a copy) for uncompressed formats. + */ +static inline void +u_box_pixels_to_blocks(struct pipe_box *blocks, + const struct pipe_box *pixels, enum pipe_format format) +{ + u_box_3d( + pixels->x / util_format_get_blockwidth(format), + pixels->y / util_format_get_blockheight(format), + pixels->z, + DIV_ROUND_UP(pixels->width, util_format_get_blockwidth(format)), + DIV_ROUND_UP(pixels->height, util_format_get_blockheight(format)), + pixels->depth, + blocks); +} + #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_compute.c b/lib/mesa/src/gallium/auxiliary/util/u_compute.c index 79755abaf..8d4d871b2 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_compute.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_compute.c @@ -76,7 +76,7 @@ static void *blit_compute_shader(struct pipe_context *ctx) } void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_info, - void **compute_state) + void **compute_state, bool half_texel_offset) { if (blit_info->src.box.width == 0 || blit_info->src.box.height == 0 || blit_info->dst.box.width == 0 || blit_info->dst.box.height == 0) @@ -91,9 +91,10 @@ void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_inf float x_scale = blit_info->src.box.width / (float)blit_info->dst.box.width; float y_scale = blit_info->src.box.height / (float)blit_info->dst.box.height; float z_scale = blit_info->src.box.depth / (float)blit_info->dst.box.depth; + float offset = half_texel_offset ? 0.5 : 0.0; - unsigned data[] = {u_bitcast_f2u(blit_info->src.box.x / (float)src->width0), - u_bitcast_f2u(blit_info->src.box.y / (float)src->height0), + unsigned data[] = {u_bitcast_f2u((blit_info->src.box.x + offset) / (float)src->width0), + u_bitcast_f2u((blit_info->src.box.y + offset) / (float)src->height0), u_bitcast_f2u(blit_info->src.box.z), u_bitcast_f2u(0), u_bitcast_f2u(x_scale / src->width0), @@ -138,7 +139,7 @@ void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_inf u_sampler_view_default_template(&src_templ, src, src->format); src_templ.format = util_format_linear(blit_info->src.format); src_view = ctx->create_sampler_view(ctx, src, &src_templ); - ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, &src_view); + ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, false, &src_view); if (!*compute_state) *compute_state = blit_compute_shader(ctx); @@ -159,7 +160,7 @@ void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_inf ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL); ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, false, NULL); - ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL); + ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, false, NULL); pipe_sampler_view_reference(&src_view, NULL); ctx->delete_sampler_state(ctx, sampler_state_p); ctx->bind_compute_state(ctx, NULL); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_compute.h b/lib/mesa/src/gallium/auxiliary/util/u_compute.h index 8c2866af8..4a6c66e0e 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_compute.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_compute.h @@ -36,7 +36,7 @@ extern "C" { #endif void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_info, - void **compute_state); + void **compute_state, bool half_texel_offset); #ifdef __cplusplus } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c index 91bfa10af..fd0513f65 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c @@ -113,10 +113,10 @@ debug_dump_surface(struct pipe_context *pipe, */ texture = surface->texture; - data = pipe_transfer_map(pipe, texture, surface->u.tex.level, - surface->u.tex.first_layer, - PIPE_MAP_READ, - 0, 0, surface->width, surface->height, &transfer); + data = pipe_texture_map(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, + PIPE_MAP_READ, + 0, 0, surface->width, surface->height, &transfer); if (!data) return; @@ -128,7 +128,7 @@ debug_dump_surface(struct pipe_context *pipe, transfer->stride, data); - pipe->transfer_unmap(pipe, transfer); + pipe->texture_unmap(pipe, transfer); } @@ -192,13 +192,13 @@ debug_dump_surface_bmp(struct pipe_context *pipe, struct pipe_resource *texture = surface->texture; void *ptr; - ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level, - surface->u.tex.first_layer, PIPE_MAP_READ, - 0, 0, surface->width, surface->height, &transfer); + ptr = pipe_texture_map(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, PIPE_MAP_READ, + 0, 0, surface->width, surface->height, &transfer); debug_dump_transfer_bmp(pipe, filename, transfer, ptr); - pipe->transfer_unmap(pipe, transfer); + pipe->texture_unmap(pipe, transfer); } void diff --git a/lib/mesa/src/gallium/auxiliary/util/u_driconf.c b/lib/mesa/src/gallium/auxiliary/util/u_driconf.c new file mode 100644 index 000000000..8ace84747 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_driconf.c @@ -0,0 +1,69 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "u_driconf.h" + +void +u_driconf_fill_st_options(struct st_config_options *options, + const struct driOptionCache *optionCache) +{ +#define query_option_impl(option, type) \ + options->option = driQueryOption##type(optionCache, #option) +#define query_bool_option(option) query_option_impl(option, b) +#define query_int_option(option) query_option_impl(option, i) +#define query_string_option(option) \ + do { \ + char *option = driQueryOptionstr(optionCache, #option); \ + if (*option) \ + options->option = strdup(option); \ + } while (0) + + query_bool_option(disable_blend_func_extended); + query_bool_option(disable_arb_gpu_shader5); + query_bool_option(disable_glsl_line_continuations); + query_bool_option(force_glsl_extensions_warn); + query_int_option(force_glsl_version); + query_bool_option(allow_extra_pp_tokens); + query_bool_option(allow_glsl_extension_directive_midshader); + query_bool_option(allow_glsl_120_subset_in_110); + query_bool_option(allow_glsl_builtin_const_expression); + query_bool_option(allow_glsl_relaxed_es); + query_bool_option(allow_glsl_builtin_variable_redeclaration); + query_bool_option(allow_higher_compat_version); + query_bool_option(glsl_ignore_write_to_readonly_var); + query_bool_option(glsl_zero_init); + query_bool_option(force_integer_tex_nearest); + query_bool_option(vs_position_always_invariant); + query_bool_option(vs_position_always_precise); + query_bool_option(force_glsl_abs_sqrt); + query_bool_option(allow_glsl_cross_stage_interpolation_mismatch); + query_bool_option(allow_draw_out_of_order); + query_bool_option(ignore_map_unsynchronized); + query_bool_option(force_gl_names_reuse); + query_bool_option(transcode_etc); + query_bool_option(transcode_astc); + query_string_option(force_gl_vendor); + query_string_option(force_gl_renderer); + + driComputeOptionsSha1(optionCache, options->config_options_sha1); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_driconf.h b/lib/mesa/src/gallium/auxiliary/util/u_driconf.h new file mode 100644 index 000000000..00eead301 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_driconf.h @@ -0,0 +1,42 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef U_DRICONF_H_ +#define U_DRICONF_H_ + +#include "util/xmlconfig.h" +#include "frontend/api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +u_driconf_fill_st_options(struct st_config_options *options, + const struct driOptionCache *optionCache); + +#ifdef __cplusplus +} +#endif + +#endif /* U_DRICONF_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim.c b/lib/mesa/src/gallium/auxiliary/util/u_prim.c index cbd48e26a..a84d0e71e 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim.c @@ -21,12 +21,25 @@ */ #include "u_prim.h" +#include "pipe/p_state.h" /** Return string name of given primitive type */ const char * u_prim_name(enum pipe_prim_type prim) { +#if defined(__GNUC__) + /* Check that the enum is packed: */ + STATIC_ASSERT(sizeof(enum pipe_prim_type) == 1); +#endif + + /* Draw merging in u_threaded_context requires that sizeof(mode) == 1. */ + struct pipe_draw_info info; + STATIC_ASSERT(sizeof(info.mode) == 1); + + struct pipe_draw_vertex_state_info dvs_info; + STATIC_ASSERT(sizeof(dvs_info.mode) == 1); + static const struct debug_named_value names[] = { DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS), DEBUG_NAMED_VALUE(PIPE_PRIM_LINES), diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim.h b/lib/mesa/src/gallium/auxiliary/util/u_prim.h index b9d4a9e80..1fbb2f5b5 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim.h @@ -201,12 +201,16 @@ u_vertices_per_prim(enum pipe_prim_type primitive) case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: return 6; + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + /* these won't be seen from geometry shaders + but prim assembly might for prim id. */ + return 4; + /* following primitives should never be used * with geometry shaders abd their size is * undefined */ case PIPE_PRIM_POLYGON: - case PIPE_PRIM_QUADS: - case PIPE_PRIM_QUAD_STRIP: default: debug_printf("Unrecognized geometry shader primitive"); return 3; diff --git a/lib/mesa/src/gallium/auxiliary/util/u_screen.c b/lib/mesa/src/gallium/auxiliary/util/u_screen.c index 6e6aadbfd..eba554600 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_screen.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_screen.c @@ -77,6 +77,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: + case PIPE_CAP_DEPTH_CLAMP_ENABLE: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: @@ -87,6 +88,10 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: return 0; + case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: + case PIPE_CAP_SUPPORTED_PRIM_MODES: + return BITFIELD_MASK(PIPE_PRIM_MAX); + case PIPE_CAP_MIN_TEXEL_OFFSET: /* GL 3.x minimum value. */ return -8; @@ -269,7 +274,6 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: case PIPE_CAP_CULL_DISTANCE: - case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_MAX_WINDOW_RECTANGLES: /* Enables EXT_window_rectangles */ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: @@ -287,6 +291,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, return 4; /* GLES 2.0 minimum value */ case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: + case PIPE_CAP_PREFER_BACK_BUFFER_REUSE: return 1; case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: @@ -460,11 +465,16 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, return 0; case PIPE_CAP_SAMPLER_REDUCTION_MINMAX: + case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB: return 0; case PIPE_CAP_ALLOW_DYNAMIC_VAO_FASTPATH: return 1; + case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART: + case PIPE_CAP_DRAW_VERTEX_STATE: + return 0; + default: unreachable("bad PIPE_CAP_*"); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_sse.h b/lib/mesa/src/gallium/auxiliary/util/u_sse.h index cae4138ba..e372d3b6b 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_sse.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_sse.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2008 VMware, Inc. + * Copyright 2008-2021 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -38,6 +38,8 @@ #define U_SSE_H_ #include "pipe/p_config.h" +#include "pipe/p_compiler.h" +#include "util/u_debug.h" #if defined(PIPE_ARCH_SSE) @@ -296,6 +298,408 @@ transpose2_64_2_32(const __m128i * restrict a01, #define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) +/* + * Implements (1-w)*a + w*b = a - wa + wb = w(b-a) + a + * ((b-a)*w >> 8) + a + * The math behind negative sub results (logic shift/mask) is tricky. + * + * w -- weight values + * a -- src0 values + * b -- src1 values + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_epi16(__m128i w, __m128i a, __m128i b) +{ + __m128i res; + + res = _mm_sub_epi16(b, a); + res = _mm_mullo_epi16(res, w); + res = _mm_srli_epi16(res, 8); + /* use add_epi8 instead of add_epi16 so no need to mask off upper bits */ + res = _mm_add_epi8(res, a); + + return res; +} + + +/* Apply premultiplied-alpha blending on two pixels simultaneously. + * All parameters are packed as 8.8 fixed point values in __m128i SSE + * registers, with the upper 8 bits all zero. + * + * a -- src alpha values + * d -- dst color values + * s -- src color values + */ +static inline __m128i +util_sse2_premul_blend_epi16( __m128i a, __m128i d, __m128i s) +{ + __m128i da, d_sub_da, tmp; + tmp = _mm_mullo_epi16(d, a); + da = _mm_srli_epi16(tmp, 8); + d_sub_da = _mm_sub_epi16(d, da); + + return _mm_add_epi16(s, d_sub_da); +} + + +/* Apply premultiplied-alpha blending on four pixels in packed BGRA + * format (one/inv_src_alpha blend mode). + * + * src -- four pixels (bgra8 format) + * dst -- four destination pixels (bgra8) + * return -- blended pixels (bgra8) + */ +static ALWAYS_INLINE __m128i +util_sse2_blend_premul_4(const __m128i src, + const __m128i dst) +{ + + __m128i al, ah, dl, dh, sl, sh, rl, rh; + __m128i zero = _mm_setzero_si128(); + + /* Blend first two pixels: + */ + sl = _mm_unpacklo_epi8(src, zero); + dl = _mm_unpacklo_epi8(dst, zero); + + al = _mm_shufflehi_epi16(sl, 0xff); + al = _mm_shufflelo_epi16(al, 0xff); + + rl = util_sse2_premul_blend_epi16(al, dl, sl); + + /* Blend second two pixels: + */ + sh = _mm_unpackhi_epi8(src, zero); + dh = _mm_unpackhi_epi8(dst, zero); + + ah = _mm_shufflehi_epi16(sh, 0xff); + ah = _mm_shufflelo_epi16(ah, 0xff); + + rh = util_sse2_premul_blend_epi16(ah, dh, sh); + + /* Pack the results down to four bgra8 pixels: + */ + return _mm_packus_epi16(rl, rh); +} + + +/* Apply src-alpha blending on four pixels in packed BGRA + * format (srcalpha/inv_src_alpha blend mode). + * + * src -- four pixels (bgra8 format) + * dst -- four destination pixels (bgra8) + * return -- blended pixels (bgra8) + */ +static ALWAYS_INLINE __m128i +util_sse2_blend_srcalpha_4(const __m128i src, + const __m128i dst) +{ + + __m128i al, ah, dl, dh, sl, sh, rl, rh; + __m128i zero = _mm_setzero_si128(); + + /* Blend first two pixels: + */ + sl = _mm_unpacklo_epi8(src, zero); + dl = _mm_unpacklo_epi8(dst, zero); + + al = _mm_shufflehi_epi16(sl, 0xff); + al = _mm_shufflelo_epi16(al, 0xff); + + rl = util_sse2_lerp_epi16(al, dl, sl); + + /* Blend second two pixels: + */ + sh = _mm_unpackhi_epi8(src, zero); + dh = _mm_unpackhi_epi8(dst, zero); + + ah = _mm_shufflehi_epi16(sh, 0xff); + ah = _mm_shufflelo_epi16(ah, 0xff); + + rh = util_sse2_lerp_epi16(ah, dh, sh); + + /* Pack the results down to four bgra8 pixels: + */ + return _mm_packus_epi16(rl, rh); +} + + +/** + * premultiplies src with constant alpha then + * does one/inv_src_alpha blend. + * + * src 16xi8 (normalized) + * dst 16xi8 (normalized) + * cst_alpha (constant alpha (u8 value)) + */ +static ALWAYS_INLINE __m128i +util_sse2_blend_premul_src_4(const __m128i src, + const __m128i dst, + const unsigned cst_alpha) +{ + + __m128i srca, d, s, rl, rh; + __m128i zero = _mm_setzero_si128(); + __m128i cst_alpha_vec = _mm_set1_epi16(cst_alpha); + + /* Blend first two pixels: + */ + s = _mm_unpacklo_epi8(src, zero); + s = _mm_mullo_epi16(s, cst_alpha_vec); + /* the shift will cause some precision loss */ + s = _mm_srli_epi16(s, 8); + + srca = _mm_shufflehi_epi16(s, 0xff); + srca = _mm_shufflelo_epi16(srca, 0xff); + + d = _mm_unpacklo_epi8(dst, zero); + rl = util_sse2_premul_blend_epi16(srca, d, s); + + /* Blend second two pixels: + */ + s = _mm_unpackhi_epi8(src, zero); + s = _mm_mullo_epi16(s, cst_alpha_vec); + /* the shift will cause some precision loss */ + s = _mm_srli_epi16(s, 8); + + srca = _mm_shufflehi_epi16(s, 0xff); + srca = _mm_shufflelo_epi16(srca, 0xff); + + d = _mm_unpackhi_epi8(dst, zero); + rh = util_sse2_premul_blend_epi16(srca, d, s); + + /* Pack the results down to four bgra8 pixels: + */ + return _mm_packus_epi16(rl, rh); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1 are 16 x i8 vectors, with [0..255] normalized values. + * + * weight_lo and weight_hi should be a 8 x i16 vectors, in 8.8 fixed point + * format, for the low and high components. + * We'd want to pass these as values but MSVC limitation forces us to pass these + * as pointers since it will complain if more than 3 __m128 are passed by value. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_epi8_fixed88(__m128i src0, __m128i src1, + const __m128i * restrict weight_lo, + const __m128i * restrict weight_hi) +{ + const __m128i zero = _mm_setzero_si128(); + + __m128i src0_lo = _mm_unpacklo_epi8(src0, zero); + __m128i src0_hi = _mm_unpackhi_epi8(src0, zero); + + __m128i src1_lo = _mm_unpacklo_epi8(src1, zero); + __m128i src1_hi = _mm_unpackhi_epi8(src1, zero); + + __m128i dst_lo; + __m128i dst_hi; + + dst_lo = util_sse2_lerp_epi16(*weight_lo, src0_lo, src1_lo); + dst_hi = util_sse2_lerp_epi16(*weight_hi, src0_hi, src1_hi); + + return _mm_packus_epi16(dst_lo, dst_hi); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1 are 16 x i8 vectors, with [0..255] normalized values. + * + * weight should be a 16 x i8 vector, in 0.8 fixed point values. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_epi8_fixed08(__m128i src0, __m128i src1, + __m128i weight) +{ + const __m128i zero = _mm_setzero_si128(); + __m128i weight_lo = _mm_unpacklo_epi8(weight, zero); + __m128i weight_hi = _mm_unpackhi_epi8(weight, zero); + + return util_sse2_lerp_epi8_fixed88(src0, src1, + &weight_lo, &weight_hi); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1, and weight are 16 x i8 vectors, with [0..255] normalized + * values. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_unorm8(__m128i src0, __m128i src1, + __m128i weight) +{ + const __m128i zero = _mm_setzero_si128(); + __m128i weight_lo = _mm_unpacklo_epi8(weight, zero); + __m128i weight_hi = _mm_unpackhi_epi8(weight, zero); + +#if 0 + /* + * Rescale from [0..255] to [0..256]. + */ + weight_lo = _mm_add_epi16(weight_lo, _mm_srli_epi16(weight_lo, 7)); + weight_hi = _mm_add_epi16(weight_hi, _mm_srli_epi16(weight_hi, 7)); +#endif + + return util_sse2_lerp_epi8_fixed88(src0, src1, + &weight_lo, &weight_hi); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1, src2, src3 are 16 x i8 vectors, with [0..255] normalized + * values. + * + * ws_lo, ws_hi, wt_lo, wt_hi should be a 8 x i16 vectors, in 8.8 fixed point + * format, for the low and high components. + * We'd want to pass these as values but MSVC limitation forces us to pass these + * as pointers since it will complain if more than 3 __m128 are passed by value. + * + * This uses ws_lo, ws_hi to interpolate between src0 and src1, as well as to + * interpolate between src2 and src3, then uses wt_lo and wt_hi to interpolate + * between the resulting vectors. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_2d_epi8_fixed88(__m128i src0, __m128i src1, + const __m128i * restrict src2, + const __m128i * restrict src3, + const __m128i * restrict ws_lo, + const __m128i * restrict ws_hi, + const __m128i * restrict wt_lo, + const __m128i * restrict wt_hi) +{ + const __m128i zero = _mm_setzero_si128(); + + __m128i src0_lo = _mm_unpacklo_epi8(src0, zero); + __m128i src0_hi = _mm_unpackhi_epi8(src0, zero); + + __m128i src1_lo = _mm_unpacklo_epi8(src1, zero); + __m128i src1_hi = _mm_unpackhi_epi8(src1, zero); + + __m128i src2_lo = _mm_unpacklo_epi8(*src2, zero); + __m128i src2_hi = _mm_unpackhi_epi8(*src2, zero); + + __m128i src3_lo = _mm_unpacklo_epi8(*src3, zero); + __m128i src3_hi = _mm_unpackhi_epi8(*src3, zero); + + __m128i dst_lo, dst01_lo, dst23_lo; + __m128i dst_hi, dst01_hi, dst23_hi; + + dst01_lo = util_sse2_lerp_epi16(*ws_lo, src0_lo, src1_lo); + dst01_hi = util_sse2_lerp_epi16(*ws_hi, src0_hi, src1_hi); + dst23_lo = util_sse2_lerp_epi16(*ws_lo, src2_lo, src3_lo); + dst23_hi = util_sse2_lerp_epi16(*ws_hi, src2_hi, src3_hi); + + dst_lo = util_sse2_lerp_epi16(*wt_lo, dst01_lo, dst23_lo); + dst_hi = util_sse2_lerp_epi16(*wt_hi, dst01_hi, dst23_hi); + + return _mm_packus_epi16(dst_lo, dst_hi); +} + +/** + * Stretch a row of pixels using linear filter. + * + * Uses Bresenham's line algorithm using 16.16 fixed point representation for + * the error term. + * + * @param dst_width destination width in pixels + * @param src_x start x0 in 16.16 fixed point format + * @param src_xstep step in 16.16. fixed point format + * + * @return final src_x value (i.e., src_x + dst_width*src_xstep) + */ +static ALWAYS_INLINE int32_t +util_sse2_stretch_row_8unorm(__m128i * restrict dst, + int32_t dst_width, + const uint32_t * restrict src, + int32_t src_x, + int32_t src_xstep) +{ + int16_t error0, error1, error2, error3; + __m128i error_lo, error_hi, error_step; + + assert(dst_width >= 0); + assert(dst_width % 4 == 0); + + error0 = src_x; + error1 = error0 + src_xstep; + error2 = error1 + src_xstep; + error3 = error2 + src_xstep; + + error_lo = _mm_setr_epi16(error0, error0, error0, error0, + error1, error1, error1, error1); + error_hi = _mm_setr_epi16(error2, error2, error2, error2, + error3, error3, error3, error3); + error_step = _mm_set1_epi16(src_xstep << 2); + + dst_width >>= 2; + while (dst_width) { + uint16_t src_x0; + uint16_t src_x1; + uint16_t src_x2; + uint16_t src_x3; + __m128i src0, src1; + __m128i weight_lo, weight_hi; + + /* + * It is faster to re-compute the coordinates in the scalar integer unit here, + * than to fetch the values from the SIMD integer unit. + */ + + src_x0 = src_x >> 16; + src_x += src_xstep; + src_x1 = src_x >> 16; + src_x += src_xstep; + src_x2 = src_x >> 16; + src_x += src_xstep; + src_x3 = src_x >> 16; + src_x += src_xstep; + + /* + * Fetch pairs of pixels 64bit at a time, and then swizzle them inplace. + */ + + { + __m128i src_00_10 = _mm_loadl_epi64((const __m128i *)&src[src_x0]); + __m128i src_01_11 = _mm_loadl_epi64((const __m128i *)&src[src_x1]); + __m128i src_02_12 = _mm_loadl_epi64((const __m128i *)&src[src_x2]); + __m128i src_03_13 = _mm_loadl_epi64((const __m128i *)&src[src_x3]); + + __m128i src_00_01_10_11 = _mm_unpacklo_epi32(src_00_10, src_01_11); + __m128i src_02_03_12_13 = _mm_unpacklo_epi32(src_02_12, src_03_13); + + src0 = _mm_unpacklo_epi64(src_00_01_10_11, src_02_03_12_13); + src1 = _mm_unpackhi_epi64(src_00_01_10_11, src_02_03_12_13); + } + + weight_lo = _mm_srli_epi16(error_lo, 8); + weight_hi = _mm_srli_epi16(error_hi, 8); + + *dst = util_sse2_lerp_epi8_fixed88(src0, src1, + &weight_lo, &weight_hi); + + error_lo = _mm_add_epi16(error_lo, error_step); + error_hi = _mm_add_epi16(error_hi, error_step); + + ++dst; + --dst_width; + } + + return src_x; +} + + + #endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.c b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.c new file mode 100644 index 000000000..3e9a254a4 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.c @@ -0,0 +1,96 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "u_trace_gallium.h" +#include "u_inlines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" + +#include "u_tracepoints.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static void * +u_trace_pipe_create_ts_buffer(struct u_trace_context *utctx, uint32_t size) +{ + struct pipe_context *ctx = utctx->pctx; + + struct pipe_resource tmpl = { + .target = PIPE_BUFFER, + .format = PIPE_FORMAT_R8_UNORM, + .bind = PIPE_BIND_QUERY_BUFFER | PIPE_BIND_LINEAR, + .width0 = size, + .height0 = 1, + .depth0 = 1, + .array_size = 1, + }; + + return ctx->screen->resource_create(ctx->screen, &tmpl); +} + +static void +u_trace_pipe_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps) +{ + struct pipe_resource *buffer = timestamps; + pipe_resource_reference(&buffer, NULL); +} + +void +u_trace_pipe_context_init(struct u_trace_context *utctx, + struct pipe_context *pctx, + u_trace_record_ts record_timestamp, + u_trace_read_ts read_timestamp, + u_trace_delete_flush_data delete_flush_data) +{ + u_trace_context_init(utctx, pctx, + u_trace_pipe_create_ts_buffer, + u_trace_pipe_delete_ts_buffer, + record_timestamp, + read_timestamp, + delete_flush_data); +} + +inline void +trace_framebuffer_state(struct u_trace *ut, void *cs, const struct pipe_framebuffer_state *pfb) +{ + if (likely(!ut->enabled)) + return; + + trace_framebuffer(ut, cs, pfb); + + for (unsigned i = 0; i < pfb->nr_cbufs; i++) { + if (pfb->cbufs[i]) { + trace_surface(ut, cs, pfb->cbufs[i]); + } + } + if (pfb->zsbuf) { + trace_surface(ut, cs, pfb->zsbuf); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.h b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.h new file mode 100644 index 000000000..e37e3e663 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.h @@ -0,0 +1,57 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _U_TRACE_GALLIUM_H +#define _U_TRACE_GALLIUM_H + +#include "util/perf/u_trace.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Gallium specific u_trace helpers */ + +struct pipe_context; +struct pipe_framebuffer_state; + +void +u_trace_pipe_context_init(struct u_trace_context *utctx, + struct pipe_context *pctx, + u_trace_record_ts record_timestamp, + u_trace_read_ts read_timestamp, + u_trace_delete_flush_data delete_flush_data); + +/* + * In some cases it is useful to have composite tracepoints like this, + * to log more complex data structures. + */ + +void +trace_framebuffer_state(struct u_trace *ut, void *cs, const struct pipe_framebuffer_state *pfb); + +#ifdef __cplusplus +} +#endif + +#endif /* _U_TRACE_GALLIUM_H */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py b/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py index f8a70d05c..30aaab9df 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py +++ b/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py @@ -37,6 +37,8 @@ sys.path.insert(0, args.import_path) from u_trace import Header from u_trace import Tracepoint +from u_trace import TracepointArg as Arg +from u_trace import TracepointArgStruct as ArgStruct from u_trace import utrace_generate # @@ -47,11 +49,11 @@ Header('pipe/p_state.h') Header('util/format/u_format.h') Tracepoint('surface', - args=[['const struct pipe_surface *', 'psurf']], - tp_struct=[['uint16_t', 'width', 'psurf->width'], - ['uint16_t', 'height', 'psurf->height'], - ['uint8_t', 'nr_samples', 'psurf->nr_samples'], - ['const char *', 'format', 'util_format_short_name(psurf->format)']], + args=[ArgStruct(type='const struct pipe_surface *', var='psurf')], + tp_struct=[Arg(type='uint16_t', name='width', var='psurf->width', c_format='%u'), + Arg(type='uint16_t', name='height', var='psurf->height', c_format='%u'), + Arg(type='uint8_t', name='nr_samples', var='psurf->nr_samples', c_format='%u'), + Arg(type='const char *', name='format', var='util_format_short_name(psurf->format)', c_format='%s')], tp_print=['%ux%u@%u, fmt=%s', '__entry->width', '__entry->height', @@ -61,12 +63,12 @@ Tracepoint('surface', # Note: called internally from trace_framebuffer_state() Tracepoint('framebuffer', - args=[['const struct pipe_framebuffer_state *', 'pfb']], - tp_struct=[['uint16_t', 'width', 'pfb->width'], - ['uint16_t', 'height', 'pfb->height'], - ['uint8_t', 'layers', 'pfb->layers'], - ['uint8_t', 'samples', 'pfb->samples'], - ['uint8_t', 'nr_cbufs', 'pfb->nr_cbufs']], + args=[ArgStruct(type='const struct pipe_framebuffer_state *', var='pfb')], + tp_struct=[Arg(type='uint16_t', name='width', var='pfb->width', c_format='%u'), + Arg(type='uint16_t', name='height', var='pfb->height', c_format='%u'), + Arg(type='uint8_t', name='layers', var='pfb->layers', c_format='%u'), + Arg(type='uint8_t', name='samples', var='pfb->samples', c_format='%u'), + Arg(type='uint8_t', name='nr_cbufs', var='pfb->nr_cbufs', c_format='%u')], tp_print=['%ux%ux%u@%u, nr_cbufs: %u', '__entry->width', '__entry->height', @@ -76,17 +78,17 @@ Tracepoint('framebuffer', ) Tracepoint('grid_info', - args=[['const struct pipe_grid_info *', 'pgrid']], - tp_struct=[['uint8_t', 'work_dim', 'pgrid->work_dim'], - ['uint16_t', 'block_x', 'pgrid->block[0]'], - ['uint16_t', 'block_y', 'pgrid->block[1]'], - ['uint16_t', 'block_z', 'pgrid->block[2]'], - ['uint16_t', 'grid_x', 'pgrid->grid[0]'], - ['uint16_t', 'grid_y', 'pgrid->grid[1]'], - ['uint16_t', 'grid_z', 'pgrid->grid[2]']], + args=[ArgStruct(type='const struct pipe_grid_info *', var='pgrid')], + tp_struct=[Arg(type='uint8_t', name='work_dim', var='pgrid->work_dim', c_format='%u'), + Arg(type='uint16_t', name='block_x', var='pgrid->block[0]', c_format='%u'), + Arg(type='uint16_t', name='block_y', var='pgrid->block[1]', c_format='%u'), + Arg(type='uint16_t', name='block_z', var='pgrid->block[2]', c_format='%u'), + Arg(type='uint16_t', name='grid_x', var='pgrid->grid[0]', c_format='%u'), + Arg(type='uint16_t', name='grid_y', var='pgrid->grid[1]', c_format='%u'), + Arg(type='uint16_t', name='grid_z', var='pgrid->grid[2]', c_format='%u')], tp_print=['work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u', '__entry->work_dim', '__entry->block_x', '__entry->block_y', '__entry->block_z', '__entry->grid_x', '__entry->grid_y', '__entry->grid_z'], ) -utrace_generate(cpath=args.src, hpath=args.hdr) +utrace_generate(cpath=args.src, hpath=args.hdr, ctx_param='struct pipe_context *pctx') diff --git a/lib/mesa/src/gallium/auxiliary/util/u_transfer.c b/lib/mesa/src/gallium/auxiliary/util/u_transfer.c index 84b80d400..80576ddf1 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_transfer.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_transfer.c @@ -31,12 +31,12 @@ void u_default_buffer_subdata(struct pipe_context *pipe, u_box_1d(offset, size, &box); - map = pipe->transfer_map(pipe, resource, 0, usage, &box, &transfer); + map = pipe->buffer_map(pipe, resource, 0, usage, &box, &transfer); if (!map) return; memcpy(map, data, size); - pipe_transfer_unmap(pipe, transfer); + pipe_buffer_unmap(pipe, transfer); } void u_default_texture_subdata(struct pipe_context *pipe, @@ -60,7 +60,7 @@ void u_default_texture_subdata(struct pipe_context *pipe, /* texture_subdata implicitly discards the rewritten buffer range */ usage |= PIPE_MAP_DISCARD_RANGE; - map = pipe->transfer_map(pipe, + map = pipe->texture_map(pipe, resource, level, usage, @@ -81,19 +81,9 @@ void u_default_texture_subdata(struct pipe_context *pipe, layer_stride, /* bytes */ 0, 0, 0); - pipe_transfer_unmap(pipe, transfer); + pipe_texture_unmap(pipe, transfer); } - -bool u_default_resource_get_handle(UNUSED struct pipe_screen *screen, - UNUSED struct pipe_resource *resource, - UNUSED struct winsys_handle *handle) -{ - return FALSE; -} - - - void u_default_transfer_flush_region(UNUSED struct pipe_context *pipe, UNUSED struct pipe_transfer *transfer, UNUSED const struct pipe_box *box) @@ -101,59 +91,3 @@ void u_default_transfer_flush_region(UNUSED struct pipe_context *pipe, /* This is a no-op implementation, nothing to do. */ } - -void u_default_transfer_unmap(UNUSED struct pipe_context *pipe, - UNUSED struct pipe_transfer *transfer) -{ -} - - -static inline struct u_resource * -u_resource( struct pipe_resource *res ) -{ - return (struct u_resource *)res; -} - -bool u_resource_get_handle_vtbl(struct pipe_screen *screen, - UNUSED struct pipe_context *ctx, - struct pipe_resource *resource, - struct winsys_handle *handle, - UNUSED unsigned usage) -{ - struct u_resource *ur = u_resource(resource); - return ur->vtbl->resource_get_handle(screen, resource, handle); -} - -void u_resource_destroy_vtbl(struct pipe_screen *screen, - struct pipe_resource *resource) -{ - struct u_resource *ur = u_resource(resource); - ur->vtbl->resource_destroy(screen, resource); -} - -void *u_transfer_map_vtbl(struct pipe_context *context, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **transfer) -{ - struct u_resource *ur = u_resource(resource); - return ur->vtbl->transfer_map(context, resource, level, usage, box, - transfer); -} - -void u_transfer_flush_region_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - struct u_resource *ur = u_resource(transfer->resource); - ur->vtbl->transfer_flush_region(pipe, transfer, box); -} - -void u_transfer_unmap_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - struct u_resource *ur = u_resource(transfer->resource); - ur->vtbl->transfer_unmap(pipe, transfer); -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c b/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c index 47898e0bd..d1e8d123a 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c @@ -213,7 +213,7 @@ transfer_map_msaa(struct pipe_context *pctx, map_box.x = 0; map_box.y = 0; - void *ss_map = pctx->transfer_map(pctx, trans->ss, 0, usage, &map_box, + void *ss_map = pctx->texture_map(pctx, trans->ss, 0, usage, &map_box, &trans->trans); if (!ss_map) { free(trans); @@ -505,7 +505,7 @@ u_transfer_helper_transfer_unmap(struct pipe_context *pctx, * so don't call helper->vtbl->transfer_unmap() directly */ if (trans->ss) { - pctx->transfer_unmap(pctx, trans->trans); + pctx->texture_unmap(pctx, trans->trans); pipe_resource_reference(&trans->ss, NULL); } else { helper->vtbl->transfer_unmap(pctx, trans->trans); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.c b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.c new file mode 100644 index 000000000..f98a1071a --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.c @@ -0,0 +1,134 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_vertex_state_cache.h" +#include "util/u_inlines.h" +#include "util/hash_table.h" +#include "util/set.h" + +static uint32_t key_hash(const void *key) +{ + const struct pipe_vertex_state *state = key; + + return _mesa_hash_data(&state->input, sizeof(state->input)); +} + +static bool key_equals(const void *a, const void *b) +{ + const struct pipe_vertex_state *sa = a; + const struct pipe_vertex_state *sb = b; + + return !memcmp(&sa->input, &sb->input, sizeof(sa->input)); +} + +void +util_vertex_state_cache_init(struct util_vertex_state_cache *cache, + pipe_create_vertex_state_func create, + pipe_vertex_state_destroy_func destroy) +{ + simple_mtx_init(&cache->lock, mtx_plain); + cache->set = _mesa_set_create(NULL, key_hash, key_equals); + cache->create = create; + cache->destroy = destroy; +} + +void +util_vertex_state_cache_deinit(struct util_vertex_state_cache *cache) +{ + if (cache->set) { + set_foreach(cache->set, entry) { + fprintf(stderr, "mesa: vertex state cache should be empty\n"); + assert(!"vertex state cache should be empty"); + } + + _mesa_set_destroy(cache->set, NULL); + simple_mtx_destroy(&cache->lock); + } +} + +struct pipe_vertex_state * +util_vertex_state_cache_get(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask, + struct util_vertex_state_cache *cache) +{ + struct pipe_vertex_state key; + + memset(&key, 0, sizeof(key)); + key.input.indexbuf = indexbuf; + key.input.vbuffer.stride = buffer->stride; + assert(!buffer->is_user_buffer); + key.input.vbuffer.buffer_offset = buffer->buffer_offset; + key.input.vbuffer.buffer = buffer->buffer; + key.input.num_elements = num_elements; + for (unsigned i = 0; i < num_elements; i++) + key.input.elements[i] = elements[i]; + key.input.full_velem_mask = full_velem_mask; + + uint32_t hash = key_hash(&key); + + /* Find the state in the live cache. */ + simple_mtx_lock(&cache->lock); + struct set_entry *entry = _mesa_set_search_pre_hashed(cache->set, hash, &key); + struct pipe_vertex_state *state = entry ? (void*)entry->key : NULL; + + /* Return if the state already exists. */ + if (state) { + /* Increase the refcount. */ + p_atomic_inc(&state->reference.count); + assert(state->reference.count >= 1); + simple_mtx_unlock(&cache->lock); + return state; + } + + state = cache->create(screen, buffer, elements, num_elements, indexbuf, + full_velem_mask); + if (state) { + assert(key_hash(state) == hash); + _mesa_set_add_pre_hashed(cache->set, hash, state); + } + + simple_mtx_unlock(&cache->lock); + return state; +} + +void +util_vertex_state_destroy(struct pipe_screen *screen, + struct util_vertex_state_cache *cache, + struct pipe_vertex_state *state) +{ + simple_mtx_lock(&cache->lock); + /* There could have been a thread race and the cache might have returned + * the vertex state being destroyed. Check the reference count and do + * nothing if it's positive. + */ + if (p_atomic_read(&state->reference.count) <= 0) { + _mesa_set_remove_key(cache->set, state); + cache->destroy(screen, state); + } + simple_mtx_unlock(&cache->lock); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.h b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.h new file mode 100644 index 000000000..902e91e43 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.h @@ -0,0 +1,67 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* This deduplicates pipe_vertex_state CSOs to enable draw merging in + * u_threaded_context because the draw merging is possible only if different + * display lists use the same pipe_vertex_state CSO. + */ + +#ifndef U_VERTEX_STATE_CACHE_H +#define U_VERTEX_STATE_CACHE_H + +#include "util/simple_mtx.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +struct util_vertex_state_cache { + simple_mtx_t lock; + struct set *set; + + pipe_create_vertex_state_func create; + pipe_vertex_state_destroy_func destroy; +}; + +void +util_vertex_state_cache_init(struct util_vertex_state_cache *cache, + pipe_create_vertex_state_func create, + pipe_vertex_state_destroy_func destroy); + +void +util_vertex_state_cache_deinit(struct util_vertex_state_cache *cache); + +struct pipe_vertex_state * +util_vertex_state_cache_get(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask, + struct util_vertex_state_cache *cache); + +void +util_vertex_state_destroy(struct pipe_screen *screen, + struct util_vertex_state_cache *cache, + struct pipe_vertex_state *state); + +#endif diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_cs.c b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_cs.c index 813aa1489..ad2175213 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_cs.c +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_cs.c @@ -727,7 +727,7 @@ draw_layers(struct vl_compositor *c, c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0, num_sampler_views, layer->samplers); c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, - num_sampler_views, 0, samplers); + num_sampler_views, 0, false, samplers); cs_launch(c, layer->cs, &(drawn.area)); @@ -735,7 +735,7 @@ draw_layers(struct vl_compositor *c, c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL); c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL); c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0, 0, - num_sampler_views, NULL); + num_sampler_views, false, NULL); c->pipe->bind_compute_state(c->pipe, NULL); c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0, num_sampler_views, NULL); diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_gfx.c b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_gfx.c index c4eba2293..24f5625b4 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_gfx.c +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor_gfx.c @@ -665,7 +665,7 @@ draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rec c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_FRAGMENT, 0, num_sampler_views, layer->samplers); c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0, - num_sampler_views, 0, samplers); + num_sampler_views, 0, false, samplers); util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4); vb_index++; diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_idct.c b/lib/mesa/src/gallium/auxiliary/vl/vl_idct.c index ccee0d488..58fd5329d 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_idct.c +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_idct.c @@ -718,7 +718,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe, float scale) if (!matrix) goto error_matrix; - f = pipe->transfer_map(pipe, matrix, 0, + f = pipe->texture_map(pipe, matrix, 0, PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE, &rect, &buf_transfer); @@ -732,7 +732,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe, float scale) // transpose and scale f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale; - pipe->transfer_unmap(pipe, buf_transfer); + pipe->texture_unmap(pipe, buf_transfer); memset(&sv_templ, 0, sizeof(sv_templ)); u_sampler_view_default_template(&sv_templ, matrix, matrix->format); @@ -836,7 +836,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_ 0, 2, idct->samplers); idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0, - buffer->sampler_views.stage[0]); + false, buffer->sampler_views.stage[0]); /* mismatch control */ idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch); @@ -863,6 +863,6 @@ vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer) idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2, idct->samplers); idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, - 0, 2, 0, buffer->sampler_views.stage[1]); + 0, 2, 0, false, buffer->sampler_views.stage[1]); } diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_mc.c b/lib/mesa/src/gallium/auxiliary/vl/vl_mc.c index 0b2a210cb..d331da1d5 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_mc.c +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_mc.c @@ -622,7 +622,7 @@ vl_mc_render_ref(struct vl_mc *renderer, struct vl_mc_buffer *buffer, struct pip renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref); renderer->pipe->set_sampler_views(renderer->pipe, PIPE_SHADER_FRAGMENT, - 0, 1, 0, &ref); + 0, 1, 0, false, &ref); renderer->pipe->bind_sampler_states(renderer->pipe, PIPE_SHADER_FRAGMENT, 0, 1, &renderer->sampler_ref); |