diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 02:30:08 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 02:30:08 +0000 |
commit | 1d35364040c0ffa99133522fa5ab3bd6131d8bf7 (patch) | |
tree | 0ea3d9ca4ad10692c6477168b67e98cb50ea6bd3 /lib/mesa/src/gallium/drivers/llvmpipe | |
parent | b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (diff) |
Merge Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/gallium/drivers/llvmpipe')
24 files changed, 1700 insertions, 668 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources deleted file mode 100644 index d928ccba4..000000000 --- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources +++ /dev/null @@ -1,71 +0,0 @@ -C_SOURCES := \ - lp_bld_alpha.c \ - lp_bld_alpha.h \ - lp_bld_blend_aos.c \ - lp_bld_blend.c \ - lp_bld_blend.h \ - lp_bld_blend_logicop.c \ - lp_bld_depth.c \ - lp_bld_depth.h \ - lp_bld_interp.c \ - lp_bld_interp.h \ - lp_clear.c \ - lp_clear.h \ - lp_context.c \ - lp_context.h \ - lp_debug.h \ - lp_draw_arrays.c \ - lp_fence.c \ - lp_fence.h \ - lp_flush.c \ - lp_flush.h \ - lp_jit.c \ - lp_jit.h \ - lp_limits.h \ - lp_memory.c \ - lp_memory.h \ - lp_perf.c \ - lp_perf.h \ - lp_public.h \ - lp_query.c \ - lp_query.h \ - lp_rast.c \ - lp_rast_debug.c \ - lp_rast.h \ - lp_rast_priv.h \ - lp_rast_tri.c \ - lp_rast_tri_tmp.h \ - lp_scene.c \ - lp_scene.h \ - lp_scene_queue.c \ - lp_scene_queue.h \ - lp_screen.c \ - lp_screen.h \ - lp_setup.c \ - lp_setup_context.h \ - lp_setup.h \ - lp_setup_line.c \ - lp_setup_point.c \ - lp_setup_tri.c \ - lp_setup_vbuf.c \ - lp_state_blend.c \ - lp_state_clip.c \ - lp_state_derived.c \ - lp_state_fs.c \ - lp_state_fs.h \ - lp_state_gs.c \ - lp_state.h \ - lp_state_rasterizer.c \ - lp_state_sampler.c \ - lp_state_setup.c \ - lp_state_setup.h \ - lp_state_so.c \ - lp_state_surface.c \ - lp_state_vertex.c \ - lp_state_vs.c \ - lp_surface.c \ - lp_surface.h \ - lp_tex_sample.c \ - lp_tex_sample.h \ - lp_texture.c \ - lp_texture.h diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-fails.txt b/lib/mesa/src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-fails.txt deleted file mode 100644 index 5c92cf1fd..000000000 --- a/lib/mesa/src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-fails.txt +++ /dev/null @@ -1,15 +0,0 @@ -dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail -dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail -dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail -dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail -dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail -dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_y_neg_z_and_neg_x_neg_y_pos_z,Fail -dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_y_pos_z_and_neg_x_neg_y_neg_z,Fail -dEQP-GLES2.functional.polygon_offset.default_displacement_with_units,Fail -dEQP-GLES2.functional.polygon_offset.fixed16_displacement_with_units,Fail -dEQP-GLES2.functional.rasterization.interpolation.basic.line_loop_wide,Fail -dEQP-GLES2.functional.rasterization.interpolation.basic.line_strip_wide,Fail -dEQP-GLES2.functional.rasterization.interpolation.basic.lines_wide,Fail -dEQP-GLES2.functional.rasterization.interpolation.projected.line_loop_wide,Fail -dEQP-GLES2.functional.rasterization.interpolation.projected.line_strip_wide,Fail -dEQP-GLES2.functional.rasterization.interpolation.projected.lines_wide,Fail diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-replay.txt b/lib/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-replay.txt deleted file mode 100644 index e69de29bb..000000000 --- a/lib/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-replay.txt +++ /dev/null diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c index aaf6a80e8..35f3618e8 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c @@ -179,12 +179,13 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv, { struct llvmpipe_context *llvmpipe; + if (!llvmpipe_screen_late_init(llvmpipe_screen(screen))) + return NULL; + llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16); if (!llvmpipe) return NULL; - util_init_math(); - memset(llvmpipe, 0, sizeof *llvmpipe); make_empty_list(&llvmpipe->fs_variants_list); @@ -283,6 +284,9 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv, draw_wide_point_threshold(llvmpipe->draw, 10000.0); draw_wide_line_threshold(llvmpipe->draw, 10000.0); + /* initial state for clipping - enabled, with no guardband */ + draw_set_driver_clipping(llvmpipe->draw, FALSE, FALSE, FALSE, TRUE); + lp_reset_counters(); /* If llvmpipe_set_scissor_states() is never called, we still need to diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h index b1adba61d..c42aeca80 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h @@ -117,6 +117,8 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; + + uint8_t patch_vertices; /** Which vertex shader output slot contains color */ int8_t color_slot[2]; @@ -156,6 +158,9 @@ struct llvmpipe_context { unsigned nr_fs_variants; unsigned nr_fs_instrs; + boolean permit_linear_rasterizer; + boolean single_vp; + struct lp_setup_variant_list_item setup_variants_list; unsigned nr_setup_variants; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index e8f0ae609..48d112ac0 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -52,8 +52,9 @@ */ static void llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, + unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count *draws, + const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { if (!indirect && (!draws[0].count || !info->instance_count)) @@ -145,7 +146,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, !lp->queries_disabled); /* draw! */ - draw_vbo(draw, info, indirect, draws, num_draws); + draw_vbo(draw, info, drawid_offset, indirect, draws, num_draws, + lp->patch_vertices); /* * unmap vertex/index buffers @@ -165,6 +167,16 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, } } + llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_VERTEX); + llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_GEOMETRY); + llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_TESS_CTRL); + llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_TESS_EVAL); + + llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_VERTEX); + llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_GEOMETRY); + llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_TESS_CTRL); + llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_TESS_EVAL); + /* * TODO: Flush only when a user vertex/index buffer is present * (or even better, modify draw module to do this diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c index 80d8d9e5c..3c763240a 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c @@ -39,6 +39,7 @@ #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_format.h" #include "lp_context.h" +#include "lp_screen.h" #include "lp_jit.h" static LLVMTypeRef @@ -111,7 +112,8 @@ create_jit_sampler_type(struct gallivm_state *gallivm) LLVMTypeRef elem_types[LP_JIT_SAMPLER_NUM_FIELDS]; elem_types[LP_JIT_SAMPLER_MIN_LOD] = elem_types[LP_JIT_SAMPLER_MAX_LOD] = - elem_types[LP_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(lc); + elem_types[LP_JIT_SAMPLER_LOD_BIAS] = + elem_types[LP_JIT_SAMPLER_MAX_ANISO] = LLVMFloatTypeInContext(lc); elem_types[LP_JIT_SAMPLER_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(lc), 4); @@ -130,6 +132,9 @@ create_jit_sampler_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, border_color, gallivm->target, sampler_type, LP_JIT_SAMPLER_BORDER_COLOR); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, max_aniso, + gallivm->target, sampler_type, + LP_JIT_SAMPLER_MAX_ANISO); LP_CHECK_STRUCT_SIZE(struct lp_jit_sampler, gallivm->target, sampler_type); return sampler_type; @@ -185,6 +190,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) struct gallivm_state *gallivm = lp->gallivm; LLVMContextRef lc = gallivm->context; LLVMTypeRef viewport_type, texture_type, sampler_type, image_type; + LLVMTypeRef linear_elem_type; /* struct lp_jit_viewport */ { @@ -232,6 +238,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); elem_types[LP_JIT_CTX_F_BLEND_COLOR] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0); elem_types[LP_JIT_CTX_VIEWPORTS] = LLVMPointerType(viewport_type, 0); + elem_types[LP_JIT_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0); elem_types[LP_JIT_CTX_SSBOS] = LLVMArrayType(LLVMPointerType(LLVMInt32TypeInContext(lc), 0), LP_MAX_TGSI_SHADER_BUFFERS); elem_types[LP_JIT_CTX_NUM_SSBOS] = @@ -281,6 +288,9 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, sample_mask, gallivm->target, context_type, LP_JIT_CTX_SAMPLE_MASK); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, aniso_filter_table, + gallivm->target, context_type, + LP_JIT_CTX_ANISO_FILTER_TABLE); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, gallivm->target, context_type); @@ -306,6 +316,74 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0); } + /* + * lp_linear_elem + * + * XXX: it can be instanced only once due to the use of opaque types, and + * the fact that screen->module is also a global. + */ + { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[1]; + LLVMTypeRef func_type; + + ret_type = LLVMPointerType(LLVMVectorType(LLVMInt8TypeInContext(lc), 16), 0); + + arg_types[0] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); + + /* lp_linear_func */ + func_type = LLVMFunctionType(ret_type, arg_types, ARRAY_SIZE(arg_types), 0); + + /* + * We actually define lp_linear_elem not as a structure but simply as a + * lp_linear_func pointer + */ + linear_elem_type = LLVMPointerType(func_type, 0); + } + + /* struct lp_jit_linear_context */ + { + LLVMTypeRef linear_elem_ptr_type = LLVMPointerType(linear_elem_type, 0); + LLVMTypeRef elem_types[LP_JIT_LINEAR_CTX_COUNT]; + LLVMTypeRef linear_context_type; + + + elem_types[LP_JIT_LINEAR_CTX_CONSTANTS] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); + elem_types[LP_JIT_LINEAR_CTX_TEX] = + LLVMArrayType(linear_elem_ptr_type, LP_MAX_LINEAR_TEXTURES); + elem_types[LP_JIT_LINEAR_CTX_INPUTS] = + LLVMArrayType(linear_elem_ptr_type, LP_MAX_LINEAR_INPUTS); + elem_types[LP_JIT_LINEAR_CTX_COLOR0] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); + elem_types[LP_JIT_LINEAR_CTX_BLEND_COLOR] = LLVMInt32TypeInContext(lc); + elem_types[LP_JIT_LINEAR_CTX_ALPHA_REF] = LLVMInt8TypeInContext(lc); + + linear_context_type = LLVMStructTypeInContext(lc, elem_types, + ARRAY_SIZE(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, constants, + gallivm->target, linear_context_type, + LP_JIT_LINEAR_CTX_CONSTANTS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, tex, + gallivm->target, linear_context_type, + LP_JIT_LINEAR_CTX_TEX); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, inputs, + gallivm->target, linear_context_type, + LP_JIT_LINEAR_CTX_INPUTS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, color0, + gallivm->target, linear_context_type, + LP_JIT_LINEAR_CTX_COLOR0); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, blend_color, + gallivm->target, linear_context_type, + LP_JIT_LINEAR_CTX_BLEND_COLOR); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, alpha_ref_value, + gallivm->target, linear_context_type, + LP_JIT_LINEAR_CTX_ALPHA_REF); + LP_CHECK_STRUCT_SIZE(struct lp_jit_linear_context, + gallivm->target, linear_context_type); + + lp->jit_linear_context_ptr_type = LLVMPointerType(linear_context_type, 0); + } + if (gallivm_debug & GALLIVM_DEBUG_IR) { char *str = LLVMPrintModuleToString(gallivm->module); fprintf(stderr, "%s", str); @@ -385,6 +463,8 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp) elem_types[LP_JIT_CS_CTX_KERNEL_ARGS] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); + elem_types[LP_JIT_CS_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0); + cs_context_type = LLVMStructTypeInContext(lc, elem_types, ARRAY_SIZE(elem_types), 0); @@ -415,6 +495,9 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp) LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, kernel_args, gallivm->target, cs_context_type, LP_JIT_CS_CTX_KERNEL_ARGS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, aniso_filter_table, + gallivm->target, cs_context_type, + LP_JIT_CS_CTX_ANISO_FILTER_TABLE); LP_CHECK_STRUCT_SIZE(struct lp_jit_cs_context, gallivm->target, cs_context_type); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c index 94a0711df..f67fbda6b 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c @@ -509,6 +509,126 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, } /** + * Directly copy pixels from a texture to the destination color buffer. + * This is a bin command called during bin processing. + */ +static void +lp_rast_blit_tile_to_dest(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_scene *scene = task->scene; + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const struct lp_rast_state *state = task->state; + struct lp_fragment_shader_variant *variant = state->variant; + const struct lp_jit_texture *texture = &state->jit_context.textures[0]; + const uint8_t *src; + uint8_t *dst; + unsigned src_stride; + unsigned dst_stride; + struct pipe_surface *cbuf = scene->fb.cbufs[0]; + const unsigned face_slice = cbuf->u.tex.first_layer; + const unsigned level = cbuf->u.tex.level; + struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); + int src_x, src_y; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + if (inputs->disable) { + /* This command was partially binned and has been disabled */ + return; + } + + dst = llvmpipe_get_texture_image_address(lpt, face_slice, level); + + if (!dst) + return; + + dst_stride = lpt->row_stride[level]; + + src = texture->base; + src_stride = texture->row_stride[0]; + + src_x = util_iround(GET_A0(inputs)[1][0]*texture->width - 0.5f); + src_y = util_iround(GET_A0(inputs)[1][1]*texture->height - 0.5f); + + src_x = src_x + task->x; + src_y = src_y + task->y; + + if (0) { + union util_color uc; + uc.ui[0] = 0xff0000ff; + util_fill_rect(dst, + cbuf->format, + dst_stride, + task->x, + task->y, + task->width, + task->height, + &uc); + return; + } + + if (src_x >= 0 && + src_y >= 0 && + src_x + task->width <= texture->width && + src_y + task->height <= texture->height) { + + if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA || + (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 && + cbuf->format == PIPE_FORMAT_B8G8R8X8_UNORM)) { + util_copy_rect(dst, + cbuf->format, + dst_stride, + task->x, task->y, + task->width, task->height, + src, src_stride, + src_x, src_y); + return; + } + + if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1) { + if (cbuf->format == PIPE_FORMAT_B8G8R8A8_UNORM) { + int x, y; + + dst += task->x * 4; + src += src_x * 4; + dst += task->y * dst_stride; + src += src_y * src_stride; + + for (y = 0; y < task->height; ++y) { + const uint32_t *src_row = (const uint32_t *)src; + uint32_t *dst_row = (uint32_t *)dst; + + for (x = 0; x < task->width; ++x) { + *dst_row++ = *src_row++ | 0xff000000; + } + dst += dst_stride; + src += src_stride; + } + + return; + } + } + + } + + /* + * Fall back to the jit shaders. + */ + + lp_rast_shade_tile_opaque(task, arg); +} + +static void +lp_rast_blit_tile(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + /* This kindof just works, but isn't efficient: + */ + lp_rast_blit_tile_to_dest(task, arg); +} + +/** * Begin a new occlusion query. * This is a bin command put in all bins. * Called per thread. @@ -601,8 +721,123 @@ lp_rast_tile_end(struct lp_rasterizer_task *task) task->bin = NULL; } -static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = -{ + + + + + +/* Currently have two rendering paths only - the general case triangle + * path and the super-specialized blit/clear path. + */ +#define TRI ((LP_RAST_FLAGS_TRI <<1)-1) /* general case */ +#define RECT ((LP_RAST_FLAGS_RECT<<1)-1) /* direct rectangle rasterizer */ +#define BLIT ((LP_RAST_FLAGS_BLIT<<1)-1) /* write direct-to-dest */ + +static const unsigned +rast_flags[] = { + BLIT, /* clear color */ + TRI, /* clear zstencil */ + TRI, /* triangle_1 */ + TRI, /* triangle_2 */ + TRI, /* triangle_3 */ + TRI, /* triangle_4 */ + TRI, /* triangle_5 */ + TRI, /* triangle_6 */ + TRI, /* triangle_7 */ + TRI, /* triangle_8 */ + TRI, /* triangle_3_4 */ + TRI, /* triangle_3_16 */ + TRI, /* triangle_4_16 */ + RECT, /* shade_tile */ + RECT, /* shade_tile_opaque */ + TRI, /* begin_query */ + TRI, /* end_query */ + BLIT, /* set_state, */ + TRI, /* lp_rast_triangle_32_1 */ + TRI, /* lp_rast_triangle_32_2 */ + TRI, /* lp_rast_triangle_32_3 */ + TRI, /* lp_rast_triangle_32_4 */ + TRI, /* lp_rast_triangle_32_5 */ + TRI, /* lp_rast_triangle_32_6 */ + TRI, /* lp_rast_triangle_32_7 */ + TRI, /* lp_rast_triangle_32_8 */ + TRI, /* lp_rast_triangle_32_3_4 */ + TRI, /* lp_rast_triangle_32_3_16 */ + TRI, /* lp_rast_triangle_32_4_16 */ + TRI, /* lp_rast_triangle_ms_1 */ + TRI, /* lp_rast_triangle_ms_2 */ + TRI, /* lp_rast_triangle_ms_3 */ + TRI, /* lp_rast_triangle_ms_4 */ + TRI, /* lp_rast_triangle_ms_5 */ + TRI, /* lp_rast_triangle_ms_6 */ + TRI, /* lp_rast_triangle_ms_7 */ + TRI, /* lp_rast_triangle_ms_8 */ + TRI, /* lp_rast_triangle_ms_3_4 */ + TRI, /* lp_rast_triangle_ms_3_16 */ + TRI, /* lp_rast_triangle_ms_4_16 */ + + RECT, /* rectangle */ + BLIT, /* blit */ +}; + +/* + */ +static const lp_rast_cmd_func +dispatch_blit[] = { + lp_rast_clear_color, + NULL, /* clear_zstencil */ + NULL, /* triangle_1 */ + NULL, /* triangle_2 */ + NULL, /* triangle_3 */ + NULL, /* triangle_4 */ + NULL, /* triangle_5 */ + NULL, /* triangle_6 */ + NULL, /* triangle_7 */ + NULL, /* triangle_8 */ + NULL, /* triangle_3_4 */ + NULL, /* triangle_3_16 */ + NULL, /* triangle_4_16 */ + NULL, /* shade_tile */ + NULL, /* shade_tile_opaque */ + NULL, /* begin_query */ + NULL, /* end_query */ + lp_rast_set_state, /* set_state */ + NULL, /* lp_rast_triangle_32_1 */ + NULL, /* lp_rast_triangle_32_2 */ + NULL, /* lp_rast_triangle_32_3 */ + NULL, /* lp_rast_triangle_32_4 */ + NULL, /* lp_rast_triangle_32_5 */ + NULL, /* lp_rast_triangle_32_6 */ + NULL, /* lp_rast_triangle_32_7 */ + NULL, /* lp_rast_triangle_32_8 */ + NULL, /* lp_rast_triangle_32_3_4 */ + NULL, /* lp_rast_triangle_32_3_16 */ + NULL, /* lp_rast_triangle_32_4_16 */ + NULL, /* lp_rast_triangle_ms_1 */ + NULL, /* lp_rast_triangle_ms_2 */ + NULL, /* lp_rast_triangle_ms_3 */ + NULL, /* lp_rast_triangle_ms_4 */ + NULL, /* lp_rast_triangle_ms_5 */ + NULL, /* lp_rast_triangle_ms_6 */ + NULL, /* lp_rast_triangle_ms_7 */ + NULL, /* lp_rast_triangle_ms_8 */ + NULL, /* lp_rast_triangle_ms_3_4 */ + NULL, /* lp_rast_triangle_ms_3_16 */ + NULL, /* lp_rast_triangle_ms_4_16 */ + + NULL, /* rectangle */ + lp_rast_blit_tile_to_dest, +}; + + + +/* Triangle and general case rasterization: Use the SOA llvm shdaers, + * an active swizzled tile for each color buf, etc. Don't blit/clear + * directly to destination surface as we know there are swizzled + * operations coming. + */ +static const lp_rast_cmd_func +dispatch_tri[] = { lp_rast_clear_color, lp_rast_clear_zstencil, lp_rast_triangle_1, @@ -643,27 +878,133 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_triangle_ms_3_4, lp_rast_triangle_ms_3_16, lp_rast_triangle_ms_4_16, + lp_rast_rectangle, + lp_rast_blit_tile, +}; + + +/* Debug rasterization with most fastpaths disabled. + */ +static const lp_rast_cmd_func +dispatch_tri_debug[] = +{ + lp_rast_clear_color, + lp_rast_clear_zstencil, + lp_rast_triangle_1, + lp_rast_triangle_2, + lp_rast_triangle_3, + lp_rast_triangle_4, + lp_rast_triangle_5, + lp_rast_triangle_6, + lp_rast_triangle_7, + lp_rast_triangle_8, + lp_rast_triangle_3_4, + lp_rast_triangle_3_16, + lp_rast_triangle_4_16, + lp_rast_shade_tile, + lp_rast_shade_tile, + lp_rast_begin_query, + lp_rast_end_query, + lp_rast_set_state, + lp_rast_triangle_32_1, + lp_rast_triangle_32_2, + lp_rast_triangle_32_3, + lp_rast_triangle_32_4, + lp_rast_triangle_32_5, + lp_rast_triangle_32_6, + lp_rast_triangle_32_7, + lp_rast_triangle_32_8, + lp_rast_triangle_32_3_4, + lp_rast_triangle_32_3_16, + lp_rast_triangle_32_4_16, + lp_rast_triangle_ms_1, + lp_rast_triangle_ms_2, + lp_rast_triangle_ms_3, + lp_rast_triangle_ms_4, + lp_rast_triangle_ms_5, + lp_rast_triangle_ms_6, + lp_rast_triangle_ms_7, + lp_rast_triangle_ms_8, + lp_rast_triangle_ms_3_4, + lp_rast_triangle_ms_3_16, + lp_rast_triangle_ms_4_16, + + lp_rast_rectangle, + lp_rast_shade_tile, }; +struct lp_bin_info +lp_characterize_bin(const struct cmd_bin *bin) +{ + struct cmd_block *block; + struct lp_bin_info info; + unsigned andflags = ~0; + unsigned k, j = 0; + + STATIC_ASSERT(ARRAY_SIZE(rast_flags) == LP_RAST_OP_MAX); + + for (block = bin->head; block; block = block->next) { + for (k = 0; k < block->count; k++, j++) { + andflags &= rast_flags[block->cmd[k]]; + } + } + + info.type = andflags; + info.count = j; + + return info; +} + static void -do_rasterize_bin(struct lp_rasterizer_task *task, - const struct cmd_bin *bin, - int x, int y) +blit_rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin) { const struct cmd_block *block; unsigned k; - if (0) - lp_debug_bin(bin, x, y); + STATIC_ASSERT(ARRAY_SIZE(dispatch_blit) == LP_RAST_OP_MAX); + if (0) debug_printf("%s\n", __FUNCTION__); for (block = bin->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - dispatch[block->cmd[k]]( task, block->arg[k] ); + dispatch_blit[block->cmd[k]]( task, block->arg[k] ); } } } +static void +tri_rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, + int x, int y) +{ + const struct cmd_block *block; + unsigned k; + + STATIC_ASSERT(ARRAY_SIZE(dispatch_tri) == LP_RAST_OP_MAX); + + for (block = bin->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + dispatch_tri[block->cmd[k]]( task, block->arg[k] ); + } + } +} + +static void +debug_rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin) +{ + const struct cmd_block *block; + unsigned k; + + STATIC_ASSERT(ARRAY_SIZE(dispatch_tri_debug) == LP_RAST_OP_MAX); + + for (block = bin->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + dispatch_tri_debug[block->cmd[k]]( task, block->arg[k] ); + } + } +} /** @@ -676,9 +1017,20 @@ static void rasterize_bin(struct lp_rasterizer_task *task, const struct cmd_bin *bin, int x, int y ) { + struct lp_bin_info info = lp_characterize_bin(bin); + lp_rast_tile_begin( task, bin, x, y ); - do_rasterize_bin(task, bin, x, y); + if (LP_DEBUG & DEBUG_NO_FASTPATH) + debug_rasterize_bin(task, bin); + else if (info.type & LP_RAST_FLAGS_BLIT) + blit_rasterize_bin(task, bin); + else if (task->scene->permit_linear_rasterizer && + !(LP_PERF & PERF_NO_RAST_LINEAR) && + (info.type & LP_RAST_FLAGS_RECT)) + lp_linear_rasterize_bin(task, bin); + else + tri_rasterize_bin(task, bin, x, y); lp_rast_tile_end(task); @@ -686,7 +1038,9 @@ rasterize_bin(struct lp_rasterizer_task *task, /* Debug/Perf flags: */ if (bin->head->count == 1) { - if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) + if (bin->head->cmd[0] == LP_RAST_OP_BLIT) + LP_COUNT(nr_pure_blit_64); + else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) LP_COUNT(nr_pure_shade_opaque_64); else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) LP_COUNT(nr_pure_shade_64); @@ -1012,7 +1366,12 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ for (i = 0; i < rast->num_threads; i++) { #ifdef _WIN32 - pipe_semaphore_wait(&rast->tasks[i].work_done); + /* Threads might already be dead - Windows apparently terminates other threads when + * returning from main. + */ + DWORD exit_code = STILL_ACTIVE; + if (GetExitCodeThread(rast->threads[i], &exit_code) && exit_code == STILL_ACTIVE) + pipe_semaphore_wait(&rast->tasks[i].work_done); #else thrd_join(rast->threads[i], NULL); #endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h index c8154348e..c4da9cca2 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -176,6 +176,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, color = task->color_tiles[buf] + pixel_offset; if (layer) { + assert(layer <= task->scene->fb_max_layer); color += layer * task->scene->cbufs[buf].layer_stride; } @@ -347,6 +348,10 @@ void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *, void lp_rast_triangle_32_4_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); + +void lp_rast_rectangle( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + void lp_rast_triangle_ms_1( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); void lp_rast_triangle_ms_2( struct lp_rasterizer_task *, @@ -406,4 +411,13 @@ lp_rast_set_state(struct lp_rasterizer_task *task, void lp_debug_bin( const struct cmd_bin *bin, int x, int y ); +void +lp_linear_rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin); + +void +lp_rast_linear_rect_fallback(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + const struct u_rect *box); + #endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c index 539b84c65..49db1832e 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c @@ -68,9 +68,7 @@ lp_scene_create( struct pipe_context *pipe ) return NULL; scene->pipe = pipe; - - scene->data.head = - CALLOC_STRUCT(data_block); + scene->data.head = &scene->data.first; (void) mtx_init(&scene->mutex, mtx_plain); @@ -101,8 +99,7 @@ lp_scene_destroy(struct lp_scene *scene) { lp_fence_reference(&scene->fence, NULL); mtx_destroy(&scene->mutex); - assert(scene->data.head->next == NULL); - FREE(scene->data.head); + assert(scene->data.head == &scene->data.first); FREE(scene); } @@ -129,8 +126,8 @@ lp_scene_is_empty(struct lp_scene *scene ) /* Returns true if there has ever been a failed allocation attempt in - * this scene. Used in triangle emit to avoid having to check success - * at each bin. + * this scene. Used in triangle/rectangle emit to avoid having to + * check success at each bin. */ boolean lp_scene_is_oom(struct lp_scene *scene) @@ -155,6 +152,44 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) } } +static void +init_scene_texture(struct lp_scene_surface *ssurf, struct pipe_surface *psurf) +{ + if (!psurf) { + ssurf->stride = 0; + ssurf->layer_stride = 0; + ssurf->sample_stride = 0; + ssurf->nr_samples = 0; + ssurf->map = NULL; + return; + } + + if (llvmpipe_resource_is_texture(psurf->texture)) { + ssurf->stride = llvmpipe_resource_stride(psurf->texture, + psurf->u.tex.level); + ssurf->layer_stride = llvmpipe_layer_stride(psurf->texture, + psurf->u.tex.level); + ssurf->sample_stride = llvmpipe_sample_stride(psurf->texture); + + ssurf->map = llvmpipe_resource_map(psurf->texture, + psurf->u.tex.level, + psurf->u.tex.first_layer, + LP_TEX_USAGE_READ_WRITE); + ssurf->format_bytes = util_format_get_blocksize(psurf->format); + ssurf->nr_samples = util_res_sample_count(psurf->texture); + } + else { + struct llvmpipe_resource *lpr = llvmpipe_resource(psurf->texture); + unsigned pixstride = util_format_get_blocksize(psurf->format); + ssurf->stride = psurf->texture->width0; + ssurf->layer_stride = 0; + ssurf->sample_stride = 0; + ssurf->nr_samples = 1; + ssurf->map = lpr->data; + ssurf->map += psurf->u.buf.first_element * pixstride; + ssurf->format_bytes = util_format_get_blocksize(psurf->format); + } +} void lp_scene_begin_rasterization(struct lp_scene *scene) @@ -166,54 +201,12 @@ lp_scene_begin_rasterization(struct lp_scene *scene) for (i = 0; i < scene->fb.nr_cbufs; i++) { struct pipe_surface *cbuf = scene->fb.cbufs[i]; - - if (!cbuf) { - scene->cbufs[i].stride = 0; - scene->cbufs[i].layer_stride = 0; - scene->cbufs[i].sample_stride = 0; - scene->cbufs[i].nr_samples = 0; - scene->cbufs[i].map = NULL; - continue; - } - - if (llvmpipe_resource_is_texture(cbuf->texture)) { - scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture, - cbuf->u.tex.level); - scene->cbufs[i].layer_stride = llvmpipe_layer_stride(cbuf->texture, - cbuf->u.tex.level); - scene->cbufs[i].sample_stride = llvmpipe_sample_stride(cbuf->texture); - - scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture, - cbuf->u.tex.level, - cbuf->u.tex.first_layer, - LP_TEX_USAGE_READ_WRITE); - scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); - scene->cbufs[i].nr_samples = util_res_sample_count(cbuf->texture); - } - else { - struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture); - unsigned pixstride = util_format_get_blocksize(cbuf->format); - scene->cbufs[i].stride = cbuf->texture->width0; - scene->cbufs[i].layer_stride = 0; - scene->cbufs[i].sample_stride = 0; - scene->cbufs[i].nr_samples = 1; - scene->cbufs[i].map = lpr->data; - scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride; - scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); - } + init_scene_texture(&scene->cbufs[i], cbuf); } if (fb->zsbuf) { struct pipe_surface *zsbuf = scene->fb.zsbuf; - scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level); - scene->zsbuf.layer_stride = llvmpipe_layer_stride(zsbuf->texture, zsbuf->u.tex.level); - scene->zsbuf.sample_stride = llvmpipe_sample_stride(zsbuf->texture); - scene->zsbuf.nr_samples = util_res_sample_count(zsbuf->texture); - scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, - zsbuf->u.tex.level, - zsbuf->u.tex.first_layer, - LP_TEX_USAGE_READ_WRITE); - scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format); + init_scene_texture(&scene->zsbuf, zsbuf); } } @@ -226,7 +219,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene) void lp_scene_end_rasterization(struct lp_scene *scene ) { - int i, j; + int i; /* Unmap color buffers */ for (i = 0; i < scene->fb.nr_cbufs; i++) { @@ -252,19 +245,7 @@ lp_scene_end_rasterization(struct lp_scene *scene ) /* Reset all command lists: */ - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->head = NULL; - bin->tail = NULL; - bin->last_state = NULL; - } - } - - /* If there are any bins which weren't cleared by the loop above, - * they will be caught (on debug builds at least) by this assert: - */ - assert(lp_scene_is_empty(scene)); + memset(scene->tile, 0, sizeof scene->tile); /* Decrement texture ref counts */ @@ -282,6 +263,7 @@ lp_scene_end_rasterization(struct lp_scene *scene ) ref->resource[i]->height0, llvmpipe_resource_size(ref->resource[i])); j++; + llvmpipe_resource_unmap(ref->resource[i], 0, 0); pipe_resource_reference(&ref->resource[i], NULL); } } @@ -313,13 +295,14 @@ lp_scene_end_rasterization(struct lp_scene *scene ) struct data_block_list *list = &scene->data; struct data_block *block, *tmp; - for (block = list->head->next; block; block = tmp) { + for (block = list->head; block; block = tmp) { tmp = block->next; - FREE(block); + if (block != &list->first) + FREE(block); } + list->head = &list->first; list->head->next = NULL; - list->head->used = 0; } lp_fence_reference(&scene->fence, NULL); @@ -443,6 +426,12 @@ lp_scene_add_resource_reference(struct lp_scene *scene, memset(ref, 0, sizeof *ref); } + /* Map resource again to increment the map count. We likely use the + * already-mapped pointer in a texture of the jit context, and that pointer + * needs to stay mapped during rasterization. This map is unmap'ed when + * finalizing scene rasterization. */ + llvmpipe_resource_map(resource, 0, 0, LP_TEX_USAGE_READ); + /* Append the reference to the reference block. */ pipe_resource_reference(&ref->resource[ref->count++], resource); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h index ba6b20139..a089e6a49 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h @@ -54,7 +54,8 @@ struct lp_rast_state; */ #define CMD_BLOCK_MAX 29 -/* Bytes per data block. +/* Bytes per data block. This effectively limits the maximum constant buffer + * size. */ #define DATA_BLOCK_SIZE (64 * 1024) @@ -119,6 +120,15 @@ struct resource_ref; struct shader_ref; +struct lp_scene_surface { + uint8_t *map; + unsigned stride; + unsigned layer_stride; + unsigned format_bytes; + unsigned sample_stride; + unsigned nr_samples; +}; + /** * All bins and bin data are contained here. * Per-bin data goes into the 'tile' bins. @@ -140,14 +150,7 @@ struct lp_scene { /* Framebuffer mappings - valid only between begin_rasterization() * and end_rasterization(). */ - struct { - uint8_t *map; - unsigned stride; - unsigned layer_stride; - unsigned format_bytes; - unsigned sample_stride; - unsigned nr_samples; - } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS]; + struct lp_scene_surface zsbuf, cbufs[PIPE_MAX_COLOR_BUFS]; /* The amount of layers in the fb (minimum of all attachments) */ unsigned fb_max_layer; @@ -179,6 +182,8 @@ struct lp_scene { unsigned resource_reference_size; boolean alloc_failed; + boolean permit_linear_rasterizer; + /** * Number of active tiles in each dimension. * This basically the framebuffer size divided by tile size @@ -234,7 +239,7 @@ lp_scene_alloc( struct lp_scene *scene, unsigned size) if (LP_DEBUG & DEBUG_MEM) debug_printf("alloc %u block %u/%u tot %u/%u\n", - size, block->used, DATA_BLOCK_SIZE, + size, block->used, (unsigned)DATA_BLOCK_SIZE, scene->scene_size, LP_SCENE_MAX_SIZE); if (block->used + size > DATA_BLOCK_SIZE) { @@ -268,7 +273,7 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, if (LP_DEBUG & DEBUG_MEM) debug_printf("alloc %u block %u/%u tot %u/%u\n", size + alignment - 1, - block->used, DATA_BLOCK_SIZE, + block->used, (unsigned)DATA_BLOCK_SIZE, scene->scene_size, LP_SCENE_MAX_SIZE); if (block->used + size + alignment - 1 > DATA_BLOCK_SIZE) { @@ -286,17 +291,6 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, } -/* Put back data if we decide not to use it, eg. culled triangles. - */ -static inline void -lp_scene_putback_data( struct lp_scene *scene, unsigned size) -{ - struct data_block_list *list = &scene->data; - assert(list->head && list->head->used >= size); - list->head->used -= size; -} - - /** Return pointer to a particular tile's bin. */ static inline struct cmd_bin * lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y) diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c index c8c577623..d308319af 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c @@ -70,11 +70,15 @@ static const struct debug_named_value lp_debug_flags[] = { { "counters", DEBUG_COUNTERS, NULL }, { "scene", DEBUG_SCENE, NULL }, { "fence", DEBUG_FENCE, NULL }, + { "no_fastpath", DEBUG_NO_FASTPATH, NULL }, + { "linear", DEBUG_LINEAR, NULL }, + { "linear2", DEBUG_LINEAR2, NULL }, { "mem", DEBUG_MEM, NULL }, { "fs", DEBUG_FS, NULL }, { "cs", DEBUG_CS, NULL }, { "tgsi_ir", DEBUG_TGSI_IR, NULL }, { "cache_stats", DEBUG_CACHE_STATS, NULL }, + { "accurate_a0", DEBUG_ACCURATE_A0 }, DEBUG_NAMED_VALUE_END }; #endif @@ -89,6 +93,8 @@ static const struct debug_named_value lp_perf_flags[] = { { "no_blend", PERF_NO_BLEND, NULL }, { "no_depth", PERF_NO_DEPTH, NULL }, { "no_alphatest", PERF_NO_ALPHATEST, NULL }, + { "no_rast_linear", PERF_NO_RAST_LINEAR, NULL }, + { "no_shade", PERF_NO_SHADE, NULL }, DEBUG_NAMED_VALUE_END }; @@ -103,10 +109,8 @@ llvmpipe_get_vendor(struct pipe_screen *screen) static const char * llvmpipe_get_name(struct pipe_screen *screen) { - static char buf[100]; - snprintf(buf, sizeof(buf), "llvmpipe (LLVM " MESA_LLVM_VERSION_STRING ", %u bits)", - lp_native_vector_width ); - return buf; + struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); + return lscreen->renderer_string; } @@ -117,6 +121,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: + case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: @@ -165,6 +170,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTH_CLIP_DISABLE: return 1; + case PIPE_CAP_DEPTH_CLAMP_ENABLE: + return 1; case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; case PIPE_CAP_TGSI_INSTANCEID: @@ -202,12 +209,11 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: case PIPE_CAP_GLSL_FEATURE_LEVEL: { struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); return lscreen->use_tgsi ? 330 : 450; } - case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: - return 140; case PIPE_CAP_COMPUTE: return GALLIVM_HAVE_CORO; case PIPE_CAP_USER_VERTEX_BUFFERS: @@ -341,7 +347,12 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: return 1; +#ifdef PIPE_MEMORY_FD + case PIPE_CAP_MEMOBJ: + return 1; +#endif case PIPE_CAP_SAMPLER_REDUCTION_MINMAX: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_TGSI_VOTE: @@ -378,11 +389,8 @@ llvmpipe_get_shader_param(struct pipe_screen *screen, else return PIPE_SHADER_IR_NIR; } - switch (param) { - default: - return gallivm_get_shader_param(param); - } - FALLTHROUGH; + + return gallivm_get_shader_param(param); case PIPE_SHADER_TESS_CTRL: case PIPE_SHADER_TESS_EVAL: /* Tessellation shader needs llvm coroutines support */ @@ -513,7 +521,7 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen, case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: if (ret) { uint64_t *max_input = ret; - *max_input = 4096; + *max_input = 1576; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: @@ -552,6 +560,18 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen, return 0; } +static void +llvmpipe_get_driver_uuid(struct pipe_screen *pscreen, char *uuid) +{ + memset(uuid, 0, PIPE_UUID_SIZE); +} + +static void +llvmpipe_get_device_uuid(struct pipe_screen *pscreen, char *uuid) +{ + memset(uuid, 0, PIPE_UUID_SIZE); +} + static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_scmp = true, .lower_flrp32 = true, @@ -564,9 +584,11 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_ffma16 = true, .lower_ffma32 = true, .lower_ffma64 = true, + .lower_flrp16 = true, .lower_fmod = true, .lower_hadd = true, - .lower_add_sat = true, + .lower_uadd_sat = true, + .lower_iadd_sat = true, .lower_ldexp = true, .lower_pack_snorm_2x16 = true, .lower_pack_snorm_4x8 = true, @@ -581,6 +603,8 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_unpack_half_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, + .lower_insert_byte = true, + .lower_insert_word = true, .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, @@ -589,19 +613,20 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, .lower_to_scalar = true, - .lower_cs_local_index_from_id = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, .lower_device_index_to_zero = true, + .support_16bit_alu = true, + .lower_fisnormal = true, }; -static void +static char * llvmpipe_finalize_nir(struct pipe_screen *screen, - void *nirptr, - bool optimize) + void *nirptr) { struct nir_shader *nir = (struct nir_shader *)nirptr; lp_build_opt_nir(nir); + return NULL; } static inline const void * @@ -650,7 +675,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) return false; - if (bind & PIPE_BIND_RENDER_TARGET) { + if (bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SHADER_IMAGE)) { if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { /* this is a lie actually other formats COULD exist where we would fail */ if (format_desc->nr_channels < 3) @@ -674,6 +699,54 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, return false; } + if (bind & PIPE_BIND_SHADER_IMAGE) { + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R11G11B10_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R32G32B32A32_UINT: + case PIPE_FORMAT_R16G16B16A16_UINT: + case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_R8G8B8A8_UINT: + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_R16G16_UINT: + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R32G32B32A32_SINT: + case PIPE_FORMAT_R16G16B16A16_SINT: + case PIPE_FORMAT_R8G8B8A8_SINT: + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R16G16_SINT: + case PIPE_FORMAT_R8G8_SINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_R8_SINT: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R8_SNORM: + break; + + default: + return false; + } + } + if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) && ((bind & PIPE_BIND_DISPLAY_TARGET) == 0)) { /* Disable all 3-channel formats, where channel size != 32 bits. @@ -687,6 +760,16 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, format_desc->block.bits != 96) { return false; } + + /* Disable 64-bit integer formats for RT/samplers. + * VK CTS crashes with these and they don't make much sense. + */ + int c = util_format_get_first_non_void_channel(format_desc->format); + if (c >= 0) { + if (format_desc->channel[c].pure_integer && format_desc->channel[c].size == 64) + return false; + } + } if (!(bind & PIPE_BIND_VERTEX_BUFFER) && @@ -890,6 +973,36 @@ void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen, disk_cache_compute_key(screen->disk_shader_cache, ir_sha1_cache_key, 20, sha1); disk_cache_put(screen->disk_shader_cache, sha1, cache->data, cache->data_size, NULL); } + +bool +llvmpipe_screen_late_init(struct llvmpipe_screen *screen) +{ + bool ret = true; + mtx_lock(&screen->late_mutex); + + if (screen->late_init_done) + goto out; + + screen->rast = lp_rast_create(screen->num_threads); + if (!screen->rast) { + ret = false; + goto out; + } + + screen->cs_tpool = lp_cs_tpool_create(screen->num_threads); + if (!screen->cs_tpool) { + lp_rast_destroy(screen->rast); + ret = false; + goto out; + } + + lp_disk_cache_create(screen); + screen->late_init_done = true; +out: + mtx_unlock(&screen->late_mutex); + return ret; +} + /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). @@ -939,6 +1052,9 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->base.get_timestamp = llvmpipe_get_timestamp; + screen->base.get_driver_uuid = llvmpipe_get_driver_uuid; + screen->base.get_device_uuid = llvmpipe_get_device_uuid; + screen->base.finalize_nir = llvmpipe_finalize_nir; screen->base.get_disk_shader_cache = lp_get_disk_shader_cache; @@ -948,28 +1064,19 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR); screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0; #ifdef EMBEDDED_DEVICE - screen->num_threads = 0; + screen->num_threads = MIN2(screen->num_threads, 2); #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); - screen->rast = lp_rast_create(screen->num_threads); - if (!screen->rast) { - lp_jit_screen_cleanup(screen); - FREE(screen); - return NULL; - } - (void) mtx_init(&screen->rast_mutex, mtx_plain); + lp_build_init(); /* get lp_native_vector_width initialised */ + + snprintf(screen->renderer_string, sizeof(screen->renderer_string), "llvmpipe (LLVM " MESA_LLVM_VERSION_STRING ", %u bits)", lp_native_vector_width ); - screen->cs_tpool = lp_cs_tpool_create(screen->num_threads); - if (!screen->cs_tpool) { - lp_rast_destroy(screen->rast); - lp_jit_screen_cleanup(screen); - FREE(screen); - return NULL; - } (void) mtx_init(&screen->cs_mutex, mtx_plain); + (void) mtx_init(&screen->rast_mutex, mtx_plain); + + (void) mtx_init(&screen->late_mutex, mtx_plain); - lp_disk_cache_create(screen); return &screen->base; } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h index a790c199c..c72bf838a 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h @@ -64,6 +64,11 @@ struct llvmpipe_screen bool use_tgsi; bool allow_cl; + mtx_t late_mutex; + bool late_init_done; + + char renderer_string[100]; + struct disk_cache *disk_shader_cache; unsigned num_disk_shader_cache_hits; unsigned num_disk_shader_cache_misses; @@ -76,6 +81,7 @@ void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen, struct lp_cached_code *cache, unsigned char ir_sha1_cache_key[20]); +bool llvmpipe_screen_late_init(struct llvmpipe_screen *screen); static inline struct llvmpipe_screen * llvmpipe_screen( struct pipe_screen *pipe ) diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c index 43177745a..50f3cea7b 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c @@ -39,6 +39,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_cpu_detect.h" #include "util/u_viewport.h" #include "draw/draw_pipe.h" #include "util/os_time.h" @@ -53,6 +54,7 @@ #include "lp_setup_context.h" #include "lp_screen.h" #include "lp_state.h" +#include "lp_jit.h" #include "frontend/sw_winsys.h" #include "draw/draw_context.h" @@ -84,6 +86,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup) lp_scene_begin_binning(setup->scene, &setup->fb); + setup->scene->permit_linear_rasterizer = setup->permit_linear_rasterizer; } @@ -98,6 +101,20 @@ first_triangle( struct lp_setup_context *setup, setup->triangle( setup, v0, v1, v2 ); } +static boolean +first_rectangle( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + const float (*v4)[4], + const float (*v5)[4]) +{ + assert(setup->state == SETUP_ACTIVE); + lp_setup_choose_rect( setup ); + return setup->rect( setup, v0, v1, v2, v3, v4, v5 ); +} + static void first_line( struct lp_setup_context *setup, const float (*v0)[4], @@ -117,7 +134,8 @@ first_point( struct lp_setup_context *setup, setup->point( setup, v0 ); } -void lp_setup_reset( struct lp_setup_context *setup ) +void +lp_setup_reset( struct lp_setup_context *setup ) { unsigned i; @@ -145,6 +163,7 @@ void lp_setup_reset( struct lp_setup_context *setup ) setup->line = first_line; setup->point = first_point; setup->triangle = first_triangle; + setup->rect = first_rectangle; } @@ -576,6 +595,7 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; + setup->rect = first_rectangle; setup->multisample = multisample; setup->pixel_offset = half_pixel_center ? 0.5f : 0.0f; setup->bottom_edge_rule = bottom_edge_rule; @@ -588,26 +608,32 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, void lp_setup_set_line_state( struct lp_setup_context *setup, - float line_width) + float line_width, + boolean line_rectangular) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->line_width = line_width; + setup->rectangular_lines = line_rectangular; } void lp_setup_set_point_state( struct lp_setup_context *setup, float point_size, + boolean point_tri_clip, boolean point_size_per_vertex, uint sprite_coord_enable, - uint sprite_coord_origin) + uint sprite_coord_origin, + boolean point_quad_rasterization) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->point_size = point_size; setup->sprite_coord_enable = sprite_coord_enable; setup->sprite_coord_origin = sprite_coord_origin; + setup->point_tri_clip = point_tri_clip; setup->point_size_per_vertex = point_size_per_vertex; + setup->legacy_points = !point_quad_rasterization; } void @@ -706,7 +732,11 @@ lp_setup_set_fs_images(struct lp_setup_context *setup, if (llvmpipe_resource_is_texture(res)) { uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level]; + const uint32_t bw = util_format_get_blockwidth(image->resource->format); + const uint32_t bh = util_format_get_blockheight(image->resource->format); + jit_image->width = DIV_ROUND_UP(jit_image->width, bw); + jit_image->height = DIV_ROUND_UP(jit_image->height, bh); jit_image->width = u_minify(jit_image->width, image->u.tex.level); jit_image->height = u_minify(jit_image->height, image->u.tex.level); @@ -829,6 +859,7 @@ lp_setup_set_rasterizer_discard(struct lp_setup_context *setup, setup->line = first_line; setup->point = first_point; setup->triangle = first_triangle; + setup->rect = first_rectangle; } } @@ -842,6 +873,24 @@ lp_setup_set_vertex_info(struct lp_setup_context *setup, } +void +lp_setup_set_linear_mode( struct lp_setup_context *setup, + boolean mode ) +{ + /* The linear rasterizer requires sse2 both at compile and runtime, + * in particular for the code in lp_rast_linear_fallback.c. This + * is more than ten-year-old technology, so it's a reasonable + * baseline. + */ +#if defined(PIPE_ARCH_SSE) + setup->permit_linear_rasterizer = (mode && + util_get_cpu_caps()->has_sse2); +#else + setup->permit_linear_rasterizer = FALSE; +#endif +} + + /** * Called during state validation when LP_NEW_VIEWPORT is set. */ @@ -851,6 +900,7 @@ lp_setup_set_viewports(struct lp_setup_context *setup, const struct pipe_viewport_state *viewports) { struct llvmpipe_context *lp = llvmpipe_context(setup->pipe); + float half_height, x0, y0; unsigned i; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -859,6 +909,26 @@ lp_setup_set_viewports(struct lp_setup_context *setup, assert(viewports); /* + * Linear rasterizer path for scissor/viewport intersection. + * + * Calculate "scissor" rect from the (first) viewport. + * Just like stored scissor rects need inclusive coords. + * For rounding, assume half pixel center (d3d9 should not end up + * with fractional viewports) - quite obviously for msaa we'd need + * fractional values here (and elsewhere for the point bounding box). + * + * See: lp_setup.c::try_update_scene_state + */ + half_height = fabsf(viewports[0].scale[1]); + x0 = viewports[0].translate[0] - viewports[0].scale[0]; + y0 = viewports[0].translate[1] - half_height; + setup->vpwh.x0 = (int)(x0 + 0.5f); + setup->vpwh.x1 = (int)(viewports[0].scale[0] * 2.0f + x0 - 0.5f); + setup->vpwh.y0 = (int)(y0 + 0.5f); + setup->vpwh.y1 = (int)(half_height * 2.0f + y0 - 0.5f); + setup->dirty |= LP_SETUP_NEW_SCISSOR; + + /* * For use in lp_state_fs.c, propagate the viewport values for all viewports. */ for (i = 0; i < num_viewports; i++) { @@ -878,7 +948,7 @@ lp_setup_set_viewports(struct lp_setup_context *setup, /** - * Called during state validation when LP_NEW_SAMPLER_VIEW is set. + * Called directly by llvmpipe_set_sampler_views */ void lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, @@ -896,6 +966,12 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, for (i = 0; i < max_tex_num; i++) { struct pipe_sampler_view *view = i < num ? views[i] : NULL; + /* We are going to overwrite/unref the current texture further below. If + * set, make sure to unmap its resource to avoid leaking previous + * mapping. */ + if (setup->fs.current_tex[i]) + llvmpipe_resource_unmap(setup->fs.current_tex[i], 0, 0); + if (view) { struct pipe_resource *res = view->texture; struct llvmpipe_resource *lp_tex = llvmpipe_resource(res); @@ -1000,13 +1076,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, } else { /* display target texture/surface */ - /* - * XXX: Where should this be unmapped? - */ - struct llvmpipe_screen *screen = llvmpipe_screen(res->screen); - struct sw_winsys *winsys = screen->winsys; - jit_tex->base = winsys->displaytarget_map(winsys, lp_tex->dt, - PIPE_MAP_READ); + jit_tex->base = llvmpipe_resource_map(res, 0, 0, LP_TEX_USAGE_READ); jit_tex->row_stride[0] = lp_tex->row_stride[0]; jit_tex->img_stride[0] = lp_tex->img_stride[0]; jit_tex->mip_offsets[0] = 0; @@ -1028,7 +1098,6 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, setup->dirty |= LP_SETUP_NEW_FS; } - /** * Called during state validation when LP_NEW_SAMPLER is set. */ @@ -1053,6 +1122,7 @@ lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, jit_sam->min_lod = sampler->min_lod; jit_sam->max_lod = sampler->max_lod; jit_sam->lod_bias = sampler->lod_bias; + jit_sam->max_aniso = sampler->max_anisotropy; COPY_4V(jit_sam->border_color, sampler->border_color.f); } } @@ -1061,6 +1131,8 @@ lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, } + + /** * Is the given texture referenced by any scene? * Note: we have to check all scenes including any scenes currently @@ -1289,6 +1361,7 @@ try_update_scene_state( struct lp_setup_context *setup ) memcpy(&stored->jit_context, &setup->fs.current.jit_context, sizeof setup->fs.current.jit_context); + stored->jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table(); stored->variant = setup->fs.current.variant; if (!lp_scene_add_frag_shader_reference(scene, @@ -1314,6 +1387,7 @@ try_update_scene_state( struct lp_setup_context *setup ) if (setup->dirty & LP_SETUP_NEW_SCISSOR) { unsigned i; + for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) { setup->draw_regions[i] = setup->framebuffer; if (setup->scissor_test) { @@ -1321,6 +1395,35 @@ try_update_scene_state( struct lp_setup_context *setup ) &setup->draw_regions[i]); } } + if (setup->permit_linear_rasterizer) { + /* NOTE: this only takes first vp into account. */ + boolean need_vp_scissoring = !!memcmp(&setup->vpwh, &setup->framebuffer, + sizeof(setup->framebuffer)); + assert(setup->viewport_index_slot < 0); + if (need_vp_scissoring) { + u_rect_possible_intersection(&setup->vpwh, + &setup->draw_regions[0]); + } + } + else if (setup->point_tri_clip) { + /* + * for d3d-style point clipping, we're going to need + * the fake vp scissor too. Hence do the intersection with vp, + * but don't indicate this. As above this will only work for first vp + * which should be ok because we instruct draw to only skip point + * clipping when there's only one viewport (this works because d3d10 + * points are always single pixel). + * (Also note that if we have permit_linear_rasterizer this will + * cause large points to always get vp scissored, regardless the + * point_tri_clip setting.) + */ + boolean need_vp_scissoring = !!memcmp(&setup->vpwh, &setup->framebuffer, + sizeof(setup->framebuffer)); + if (need_vp_scissoring) { + u_rect_possible_intersection(&setup->vpwh, + &setup->draw_regions[0]); + } + } } setup->dirty = 0; @@ -1417,7 +1520,10 @@ lp_setup_destroy( struct lp_setup_context *setup ) util_unreference_framebuffer_state(&setup->fb); for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) { - pipe_resource_reference(&setup->fs.current_tex[i], NULL); + struct pipe_resource **res_ptr = &setup->fs.current_tex[i]; + if (*res_ptr) + llvmpipe_resource_unmap(*res_ptr, 0, 0); + pipe_resource_reference(res_ptr, NULL); } for (i = 0; i < ARRAY_SIZE(setup->constants); i++) { @@ -1650,4 +1756,69 @@ lp_setup_flush_and_restart(struct lp_setup_context *setup) return TRUE; } - +void +lp_setup_add_scissor_planes(const struct u_rect *scissor, + struct lp_rast_plane *plane_s, + boolean s_planes[4], bool multisample) +{ + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + * (Or only store the c value together with a bit indicating which + * scissor edge this is, so rasterization would treat them differently + * (easier to evaluate) to ordinary planes.) + */ + int adj = multisample ? 127 : 0; + if (s_planes[0]) { + int x0 = scissor->x0 - 1; + plane_s->dcdx = ~0U << 8; + plane_s->dcdy = 0; + plane_s->c = x0 << 8; + plane_s->c += adj; + plane_s->c = -plane_s->c; /* flip sign */ + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[1]) { + int x1 = scissor->x1; + plane_s->dcdx = 1 << 8; + plane_s->dcdy = 0; + plane_s->c = x1 << 8; + plane_s->c += 127 + adj; + plane_s->eo = 0 << 8; + plane_s++; + } + if (s_planes[2]) { + int y0 = scissor->y0 - 1; + plane_s->dcdx = 0; + plane_s->dcdy = 1 << 8; + plane_s->c = y0 << 8; + plane_s->c += adj; + plane_s->c = -plane_s->c; /* flip sign */ + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[3]) { + int y1 = scissor->y1; + plane_s->dcdx = 0; + plane_s->dcdy = ~0U << 8; + plane_s->c = y1 << 8; + plane_s->c += 127 + adj; + plane_s->eo = 0; + plane_s++; + } +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h index 82fc14b5e..656a64f40 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -96,13 +96,17 @@ struct lp_setup_context struct llvmpipe_query *active_queries[LP_MAX_ACTIVE_BINNED_QUERIES]; unsigned active_binned_queries; - boolean flatshade_first; - boolean ccw_is_frontface; - boolean scissor_test; - boolean point_size_per_vertex; - boolean rasterizer_discard; - boolean multisample; - unsigned cullmode; + unsigned flatshade_first:1; + unsigned ccw_is_frontface:1; + unsigned scissor_test:1; + unsigned point_tri_clip:1; + unsigned point_size_per_vertex:1; + unsigned legacy_points:1; + unsigned rasterizer_discard:1; + unsigned permit_linear_rasterizer:1; + unsigned multisample:1; + unsigned rectangular_lines:1; + unsigned cullmode:2; /**< PIPE_FACE_x */ unsigned bottom_edge_rule; float pixel_offset; float line_width; @@ -115,6 +119,7 @@ struct lp_setup_context struct pipe_framebuffer_state fb; struct u_rect framebuffer; struct u_rect scissors[PIPE_MAX_VIEWPORTS]; + struct u_rect vpwh; struct u_rect draw_regions[PIPE_MAX_VIEWPORTS]; /* intersection of fb & scissor */ struct lp_jit_viewport viewports[PIPE_MAX_VIEWPORTS]; @@ -177,6 +182,15 @@ struct lp_setup_context const float (*v0)[4], const float (*v1)[4], const float (*v2)[4]); + + boolean + (*rect)( struct lp_setup_context *, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + const float (*v4)[4], + const float (*v5)[4]); }; static inline void @@ -193,10 +207,15 @@ scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox, scis_planes[3] = (bbox->y1 > scissor->y1); } +void +lp_setup_add_scissor_planes(const struct u_rect *scissor, + struct lp_rast_plane *plane_s, + boolean s_planes[4], bool multisample); void lp_setup_choose_triangle( struct lp_setup_context *setup ); void lp_setup_choose_line( struct lp_setup_context *setup ); void lp_setup_choose_point( struct lp_setup_context *setup ); +void lp_setup_choose_rect( struct lp_setup_context *setup ); void lp_setup_init_vbuf(struct lp_setup_context *setup); @@ -207,6 +226,15 @@ void lp_setup_destroy( struct lp_setup_context *setup ); boolean lp_setup_flush_and_restart(struct lp_setup_context *setup); +boolean +lp_setup_whole_tile(struct lp_setup_context *setup, + const struct lp_rast_shader_inputs *inputs, + int tx, int ty); + +boolean +lp_setup_is_blit(const struct lp_setup_context *setup, + const struct lp_rast_shader_inputs *inputs); + void lp_setup_print_triangle(struct lp_setup_context *setup, const float (*v0)[4], @@ -218,6 +246,19 @@ lp_setup_print_vertex(struct lp_setup_context *setup, const char *name, const float (*v)[4]); +void +lp_rect_cw(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean frontfacing); + +void +lp_setup_triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front ); struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, @@ -225,6 +266,16 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned nr_planes, unsigned *tri_size); +struct lp_rast_rectangle * +lp_setup_alloc_rectangle(struct lp_scene *scene, + unsigned nr_inputs); + +boolean +lp_setup_analyse_triangles(struct lp_setup_context *setup, + const void *vb, + int stride, + int nr); + boolean lp_setup_bin_triangle(struct lp_setup_context *setup, struct lp_rast_triangle *tri, @@ -233,4 +284,9 @@ lp_setup_bin_triangle(struct lp_setup_context *setup, int nr_planes, unsigned scissor_index); +boolean +lp_setup_bin_rectangle(struct lp_setup_context *setup, + struct lp_rast_rectangle *rect); + + #endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c index 0535138df..1f812e8ea 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -357,10 +357,24 @@ try_setup_line( struct lp_setup_context *setup, info.v2 = v2; - /* X-MAJOR LINE */ - if (fabsf(dx) >= fabsf(dy)) { + if (setup->rectangular_lines) { + float scale = (setup->line_width * 0.5f) / sqrtf(area); + int tx = subpixel_snap(-dy * scale); + int ty = subpixel_snap(+dx * scale); + + x[0] = subpixel_snap(v1[0][0] - pixel_offset) - tx; + x[1] = subpixel_snap(v2[0][0] - pixel_offset) - tx; + x[2] = subpixel_snap(v2[0][0] - pixel_offset) + tx; + x[3] = subpixel_snap(v1[0][0] - pixel_offset) + tx; + + y[0] = subpixel_snap(v1[0][1] - pixel_offset) - ty; + y[1] = subpixel_snap(v2[0][1] - pixel_offset) - ty; + y[2] = subpixel_snap(v2[0][1] - pixel_offset) + ty; + y[3] = subpixel_snap(v1[0][1] - pixel_offset) + ty; + } else if (fabsf(dx) >= fabsf(dy)) { float dydx = dy / dx; + /* X-MAJOR LINE */ x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f; y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f; x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f; @@ -412,6 +426,10 @@ try_setup_line( struct lp_setup_context *setup, will_draw_start = sign(-x1diff) != sign(dx); will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0; + /* interpolate using the preferred wide-lines formula */ + info.dx *= 1 + dydx * dydx; + info.dy = 0; + if (dx < 0) { /* if v2 is to the right of v1, swap pointers */ const float (*temp)[4] = v1; @@ -509,6 +527,10 @@ try_setup_line( struct lp_setup_context *setup, will_draw_start = sign(y1diff) == sign(dy); will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0; + /* interpolate using the preferred wide-lines formula */ + info.dx = 0; + info.dy *= 1 + dxdy * dxdy; + if (dy > 0) { /* if v2 is on top of v1, swap pointers */ const float (*temp)[4] = v1; @@ -572,15 +594,8 @@ try_setup_line( struct lp_setup_context *setup, bbox.y1--; } - if (bbox.x1 < bbox.x0 || - bbox.y1 < bbox.y0) { - if (0) debug_printf("empty bounding box\n"); - LP_COUNT(nr_culled_tris); - return TRUE; - } - if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { - if (0) debug_printf("offscreen\n"); + if (0) debug_printf("no intersection\n"); LP_COUNT(nr_culled_tris); return TRUE; } @@ -696,60 +711,8 @@ try_setup_line( struct lp_setup_context *setup, if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; } - - /* - * When rasterizing scissored tris, use the intersection of the - * triangle bounding box and the scissor rect to generate the - * scissor planes. - * - * This permits us to cut off the triangle "tails" that are present - * in the intermediate recursive levels caused when two of the - * triangles edges don't diverge quickly enough to trivially reject - * exterior blocks from the triangle. - * - * It's not really clear if it's worth worrying about these tails, - * but since we generate the planes for each scissored tri, it's - * free to trim them in this case. - * - * Note that otherwise, the scissor planes only vary in 'C' value, - * and even then only on state-changes. Could alternatively store - * these planes elsewhere. - * (Or only store the c value together with a bit indicating which - * scissor edge this is, so rasterization would treat them differently - * (easier to evaluate) to ordinary planes.) - */ if (nr_planes > 4) { - struct lp_rast_plane *plane_s = &plane[4]; - - if (s_planes[0]) { - plane_s->dcdx = ~0U << 8; - plane_s->dcdy = 0; - plane_s->c = (1-scissor->x0) << 8; - plane_s->eo = 1 << 8; - plane_s++; - } - if (s_planes[1]) { - plane_s->dcdx = 1 << 8; - plane_s->dcdy = 0; - plane_s->c = (scissor->x1+1) << 8; - plane_s->eo = 0 << 8; - plane_s++; - } - if (s_planes[2]) { - plane_s->dcdx = 0; - plane_s->dcdy = 1 << 8; - plane_s->c = (1-scissor->y0) << 8; - plane_s->eo = 1 << 8; - plane_s++; - } - if (s_planes[3]) { - plane_s->dcdx = 0; - plane_s->dcdy = ~0U << 8; - plane_s->c = (scissor->y1+1) << 8; - plane_s->eo = 0; - plane_s++; - } - assert(plane_s == &plane[nr_planes]); + lp_setup_add_scissor_planes(scissor, &plane[4], s_planes, setup->multisample); } return lp_setup_bin_triangle(setup, line, &bbox, &bboxpos, nr_planes, viewport_index); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c index 696612309..6d4e42634 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -352,10 +352,8 @@ try_setup_point( struct lp_setup_context *setup, int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset; struct lp_scene *scene = setup->scene; - struct lp_rast_triangle *point; - unsigned bytes; struct u_rect bbox; - unsigned nr_planes = 4; + int x[2], y[2]; struct point_info info; unsigned viewport_index = 0; unsigned layer = 0; @@ -374,8 +372,7 @@ try_setup_point( struct lp_setup_context *setup, print_point(setup, v0, size); /* Bounding rectangle (in pixels) */ - if (!lp_context->rasterizer || - lp_context->rasterizer->point_quad_rasterization) { + if (!setup->legacy_points || setup->multisample) { /* * Rasterize points as quads. */ @@ -388,10 +385,14 @@ try_setup_point( struct lp_setup_context *setup, x0 = subpixel_snap(v0[0][0] - pixel_offset) - fixed_width/2; y0 = subpixel_snap(v0[0][1] - pixel_offset) - fixed_width/2; - bbox.x0 = (x0 + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.y0 = (y0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + x[0] = x0; + x[1] = x0 + fixed_width; + y[0] = y0; + y[1] = y0 + fixed_width; + bbox.x0 = x[0] >> FIXED_ORDER; + bbox.x1 = (x[1] + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (y[0] + adj) >> FIXED_ORDER; + bbox.y1 = (y[1] + (FIXED_ONE-1) + adj) >> FIXED_ORDER; /* Inclusive coordinates: */ @@ -439,6 +440,11 @@ try_setup_point( struct lp_setup_context *setup, bbox.x1 = bbox.x0 + int_width - 1; bbox.y1 = bbox.y0 + int_width - 1; } + + x[0] = (bbox.x0 - 1) << 8; + x[1] = (bbox.x1 + 1) << 8; + y[0] = (bbox.y0 - 1) << 8; + y[1] = (bbox.y1 + 1) << 8; } if (0) { @@ -452,79 +458,143 @@ try_setup_point( struct lp_setup_context *setup, } if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { - if (0) debug_printf("offscreen\n"); + if (0) debug_printf("no intersection\n"); LP_COUNT(nr_culled_tris); return TRUE; } u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox); - point = lp_setup_alloc_triangle(scene, - key->num_inputs, - nr_planes, - &bytes); - if (!point) - return FALSE; - + /* We can't use rectangle reasterizer for non-legacy points for now. */ + if (!setup->legacy_points || setup->multisample) { + struct lp_rast_triangle *point; + struct lp_rast_plane *plane; + unsigned bytes; + unsigned nr_planes = 4; + + point = lp_setup_alloc_triangle(scene, + key->num_inputs, + nr_planes, + &bytes); + if (!point) + return FALSE; + #ifdef DEBUG - point->v[0][0] = v0[0][0]; - point->v[0][1] = v0[0][1]; + point->v[0][0] = v0[0][0]; + point->v[0][1] = v0[0][1]; #endif - LP_COUNT(nr_tris); + LP_COUNT(nr_tris); - if (draw_will_inject_frontface(lp_context->draw) && - setup->face_slot > 0) { - point->inputs.frontfacing = v0[setup->face_slot][0]; - } else { - point->inputs.frontfacing = TRUE; - } + if (draw_will_inject_frontface(lp_context->draw) && + setup->face_slot > 0) { + point->inputs.frontfacing = v0[setup->face_slot][0]; + } else { + point->inputs.frontfacing = TRUE; + } - info.v0 = v0; - info.dx01 = 0; - info.dx12 = fixed_width; - info.dy01 = fixed_width; - info.dy12 = 0; - info.a0 = GET_A0(&point->inputs); - info.dadx = GET_DADX(&point->inputs); - info.dady = GET_DADY(&point->inputs); - info.frontfacing = point->inputs.frontfacing; + info.v0 = v0; + info.dx01 = 0; + info.dx12 = fixed_width; + info.dy01 = fixed_width; + info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); + info.frontfacing = point->inputs.frontfacing; - /* Setup parameter interpolants: - */ - setup_point_coefficients(setup, &info); + /* Setup parameter interpolants: + */ + setup_point_coefficients(setup, &info); - point->inputs.disable = FALSE; - point->inputs.opaque = FALSE; - point->inputs.layer = layer; - point->inputs.viewport_index = viewport_index; - point->inputs.view_index = setup->view_index; + point->inputs.disable = FALSE; + point->inputs.is_blit = FALSE; + point->inputs.opaque = setup->fs.current.variant->opaque; + point->inputs.layer = layer; + point->inputs.viewport_index = viewport_index; + point->inputs.view_index = setup->view_index; - { - struct lp_rast_plane *plane = GET_PLANES(point); + plane = GET_PLANES(point); plane[0].dcdx = ~0U << 8; plane[0].dcdy = 0; - plane[0].c = (1-bbox.x0) << 8; + plane[0].c = -MAX2(x[0], bbox.x0 << 8); plane[0].eo = 1 << 8; plane[1].dcdx = 1 << 8; plane[1].dcdy = 0; - plane[1].c = (bbox.x1+1) << 8; + plane[1].c = MIN2(x[1], (bbox.x1 + 1) << 8); plane[1].eo = 0; plane[2].dcdx = 0; plane[2].dcdy = 1 << 8; - plane[2].c = (1-bbox.y0) << 8; + plane[2].c = -MAX2(y[0], (bbox.y0 << 8) - adj); plane[2].eo = 1 << 8; plane[3].dcdx = 0; plane[3].dcdy = ~0U << 8; - plane[3].c = (bbox.y1+1) << 8; + plane[3].c = MIN2(y[1], (bbox.y1 + 1) << 8); plane[3].eo = 0; - } - return lp_setup_bin_triangle(setup, point, &bbox, &bbox, nr_planes, viewport_index); + if (!setup->legacy_points || setup->multisample) { + /* adjust for fill-rule*/ + plane[0].c++; /* left */ + if (setup->bottom_edge_rule == 0) + plane[2].c++; /* top-left */ + else + plane[3].c++; /* bottom-left */ + } + + return lp_setup_bin_triangle(setup, point, &bbox, &bbox, nr_planes, viewport_index); + + } else { + struct lp_rast_rectangle *point; + point = lp_setup_alloc_rectangle(scene, + key->num_inputs); + if (!point) + return FALSE; +#ifdef DEBUG + point->v[0][0] = v0[0][0]; + point->v[0][1] = v0[0][1]; +#endif + + point->box.x0 = bbox.x0; + point->box.x1 = bbox.x1; + point->box.y0 = bbox.y0; + point->box.y1 = bbox.y1; + + LP_COUNT(nr_tris); + + if (draw_will_inject_frontface(lp_context->draw) && + setup->face_slot > 0) { + point->inputs.frontfacing = v0[setup->face_slot][0]; + } else { + point->inputs.frontfacing = TRUE; + } + + info.v0 = v0; + info.dx01 = 0; + info.dx12 = fixed_width; + info.dy01 = fixed_width; + info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); + info.frontfacing = point->inputs.frontfacing; + + /* Setup parameter interpolants: + */ + setup_point_coefficients(setup, &info); + + point->inputs.disable = FALSE; + point->inputs.is_blit = FALSE; + point->inputs.opaque = setup->fs.current.variant->opaque; + point->inputs.layer = layer; + point->inputs.viewport_index = viewport_index; + point->inputs.view_index = setup->view_index; + + return lp_setup_bin_rectangle(setup, point); + } } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 4fb76dd22..347f0a61c 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -205,6 +205,7 @@ lp_rast_32_tri_tab[MAX_PLANES+1] = { LP_RAST_OP_TRIANGLE_32_8 }; + static unsigned lp_rast_ms_tri_tab[MAX_PLANES+1] = { 0, /* should be impossible */ @@ -218,56 +219,46 @@ lp_rast_ms_tri_tab[MAX_PLANES+1] = { LP_RAST_OP_MS_TRIANGLE_8 }; -/** - * The primitive covers the whole tile- shade whole tile. +/* + * Detect big primitives drawn with an alpha == 1.0. * - * \param tx, ty the tile position in tiles, not pixels + * This is used when simulating anti-aliasing primitives in shaders, e.g., + * when drawing the windows client area in Aero's flip-3d effect. */ static boolean -lp_setup_whole_tile(struct lp_setup_context *setup, - const struct lp_rast_shader_inputs *inputs, - int tx, int ty) +check_opaque(struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4]) { - struct lp_scene *scene = setup->scene; + const struct lp_fragment_shader_variant *variant = + setup->fs.current.variant; + const struct lp_tgsi_channel_info *alpha_info = &variant->shader->info.cbuf[0][3]; - LP_COUNT(nr_fully_covered_64); - - /* if variant is opaque and scissor doesn't effect the tile */ - if (inputs->opaque) { - /* Several things prevent this optimization from working: - * - For layered rendering we can't determine if this covers the same layer - * as previous rendering (or in case of clears those actually always cover - * all layers so optimization is impossible). Need to use fb_max_layer and - * not setup->layer_slot to determine this since even if there's currently - * no slot assigned previous rendering could have used one. - * - If there were any Begin/End query commands in the scene then those - * would get removed which would be very wrong. Furthermore, if queries - * were just active we also can't do the optimization since to get - * accurate query results we unfortunately need to execute the rendering - * commands. - */ - if (!scene->fb.zsbuf && scene->fb_max_layer == 0 && !scene->had_queries) { - /* - * All previous rendering will be overwritten so reset the bin. - */ - lp_scene_bin_reset( scene, tx, ty ); - } + if (variant->opaque) + return TRUE; + + if (!variant->potentially_opaque) + return FALSE; + + if (alpha_info->file == TGSI_FILE_CONSTANT) { + const float *constants = setup->fs.current.jit_context.constants[0]; + float alpha = constants[alpha_info->u.index*4 + + alpha_info->swizzle]; + return alpha == 1.0f; + } - LP_COUNT(nr_shade_opaque_64); - return lp_scene_bin_cmd_with_state( scene, tx, ty, - setup->fs.stored, - LP_RAST_OP_SHADE_TILE_OPAQUE, - lp_rast_arg_inputs(inputs) ); - } else { - LP_COUNT(nr_shade_64); - return lp_scene_bin_cmd_with_state( scene, tx, ty, - setup->fs.stored, - LP_RAST_OP_SHADE_TILE, - lp_rast_arg_inputs(inputs) ); + if (alpha_info->file == TGSI_FILE_INPUT) { + return (v1[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f && + v2[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f && + v3[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f); } + + return FALSE; } + /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's @@ -333,15 +324,8 @@ do_triangle_ccw(struct lp_setup_context *setup, bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER; } - if (bbox.x1 < bbox.x0 || - bbox.y1 < bbox.y0) { - if (0) debug_printf("empty bounding box\n"); - LP_COUNT(nr_culled_tris); - return TRUE; - } - if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { - if (0) debug_printf("offscreen\n"); + if (0) debug_printf("no intersection\n"); LP_COUNT(nr_culled_tris); return TRUE; } @@ -382,17 +366,97 @@ do_triangle_ccw(struct lp_setup_context *setup, LP_COUNT(nr_tris); + /* + * Rotate the tri such that v0 is closest to the fb origin. + * This can give more accurate a0 value (which is at fb origin) + * when calculating the interpolants. + * It can't work when there's flat shading for instance in one + * of the attributes, hence restrict this to just a single attribute + * which is what causes some test failures. + * (This does not address the problem that interpolation may be + * inaccurate if gradients are relatively steep in small tris far + * away from the origin. It does however fix the (silly) wgf11rasterizer + * Interpolator test.) + * XXX This causes problems with mipgen -EmuTexture for not yet really + * understood reasons (if the vertices would be submitted in a different + * order, we'd also generate the same "wrong" results here without + * rotation). In any case, that we generate different values if a prim + * has the vertices rotated but is otherwise the same (which is due to + * numerical issues) is not a nice property. An additional problem by + * swapping the vertices here (which is possibly worse) is that + * the same primitive coming in twice might generate different values + * (in particular for z) due to the swapping potentially not happening + * both times, if the attributes to be interpolated are different. For now, + * just restrict this to not get used with dx9 (by checking pixel offset), + * could also restrict it further to only trigger with wgf11Interpolator + * Rasterizer test (the only place which needs it, with always the same + * vertices even). + */ + if ((LP_DEBUG & DEBUG_ACCURATE_A0) && + setup->pixel_offset == 0.5f && + key->num_inputs == 1 && + (key->inputs[0].interp == LP_INTERP_LINEAR || + key->inputs[0].interp == LP_INTERP_PERSPECTIVE)) { + float dist0 = v0[0][0] * v0[0][0] + v0[0][1] * v0[0][1]; + float dist1 = v1[0][0] * v1[0][0] + v1[0][1] * v1[0][1]; + float dist2 = v2[0][0] * v2[0][0] + v2[0][1] * v2[0][1]; + if (dist0 > dist1 && dist1 < dist2) { + const float (*vt)[4]; + int x, y; + vt = v0; + v0 = v1; + v1 = v2; + v2 = vt; + x = position->x[0]; + y = position->y[0]; + position->x[0] = position->x[1]; + position->y[0] = position->y[1]; + position->x[1] = position->x[2]; + position->y[1] = position->y[2]; + position->x[2] = x; + position->y[2] = y; + + position->dx20 = position->dx01; + position->dy20 = position->dy01; + position->dx01 = position->x[0] - position->x[1]; + position->dy01 = position->y[0] - position->y[1]; + } + else if (dist0 > dist2) { + const float (*vt)[4]; + int x, y; + vt = v0; + v0 = v2; + v2 = v1; + v1 = vt; + x = position->x[0]; + y = position->y[0]; + position->x[0] = position->x[2]; + position->y[0] = position->y[2]; + position->x[2] = position->x[1]; + position->y[2] = position->y[1]; + position->x[1] = x; + position->y[1] = y; + + position->dx01 = position->dx20; + position->dy01 = position->dy20; + position->dx20 = position->x[2] - position->x[0]; + position->dy20 = position->y[2] - position->y[0]; + } + } + /* Setup parameter interpolants: */ setup->setup.variant->jit_function(v0, v1, v2, frontfacing, GET_A0(&tri->inputs), GET_DADX(&tri->inputs), - GET_DADY(&tri->inputs)); + GET_DADY(&tri->inputs), + &setup->setup.variant->key); tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; - tri->inputs.opaque = setup->fs.current.variant->opaque; + tri->inputs.is_blit = FALSE; + tri->inputs.opaque = check_opaque(setup, v0, v1, v2); tri->inputs.layer = layer; tri->inputs.viewport_index = viewport_index; tri->inputs.view_index = setup->view_index; @@ -670,61 +734,8 @@ do_triangle_ccw(struct lp_setup_context *setup, plane[2].eo); } - - /* - * When rasterizing scissored tris, use the intersection of the - * triangle bounding box and the scissor rect to generate the - * scissor planes. - * - * This permits us to cut off the triangle "tails" that are present - * in the intermediate recursive levels caused when two of the - * triangles edges don't diverge quickly enough to trivially reject - * exterior blocks from the triangle. - * - * It's not really clear if it's worth worrying about these tails, - * but since we generate the planes for each scissored tri, it's - * free to trim them in this case. - * - * Note that otherwise, the scissor planes only vary in 'C' value, - * and even then only on state-changes. Could alternatively store - * these planes elsewhere. - * (Or only store the c value together with a bit indicating which - * scissor edge this is, so rasterization would treat them differently - * (easier to evaluate) to ordinary planes.) - */ if (nr_planes > 3) { - /* why not just use draw_regions */ - struct lp_rast_plane *plane_s = &plane[3]; - - if (s_planes[0]) { - plane_s->dcdx = ~0U << 8; - plane_s->dcdy = 0; - plane_s->c = (1-scissor->x0) << 8; - plane_s->eo = 1 << 8; - plane_s++; - } - if (s_planes[1]) { - plane_s->dcdx = 1 << 8; - plane_s->dcdy = 0; - plane_s->c = (scissor->x1+1) << 8; - plane_s->eo = 0 << 8; - plane_s++; - } - if (s_planes[2]) { - plane_s->dcdx = 0; - plane_s->dcdy = 1 << 8; - plane_s->c = (1-scissor->y0) << 8; - plane_s->eo = 1 << 8; - plane_s++; - } - if (s_planes[3]) { - plane_s->dcdx = 0; - plane_s->dcdy = ~0U << 8; - plane_s->c = (scissor->y1+1) << 8; - plane_s->eo = 0; - plane_s++; - } - assert(plane_s == &plane[nr_planes]); + lp_setup_add_scissor_planes(scissor, &plane[3], s_planes, setup->multisample); } return lp_setup_bin_triangle(setup, tri, &bbox, &bboxpos, nr_planes, viewport_index); @@ -912,8 +923,8 @@ lp_setup_bin_triangle(struct lp_setup_context *setup, ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER; } - - + tri->inputs.is_blit = lp_setup_is_blit(setup, &tri->inputs); + /* Test tile-sized blocks against the triangle. * Discard blocks fully outside the tri. If the block is fully * contained inside the tri, bin an lp_rast_shade_tile command. diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c index 04899dd9b..d30d619d8 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -173,6 +173,74 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) } +static void +check_linear_rasterizer( struct llvmpipe_context *lp ) +{ + boolean bgr8; + boolean permit_linear; + boolean single_vp; + boolean clipping_changed = FALSE; + + bgr8 = (lp->framebuffer.nr_cbufs == 1 && lp->framebuffer.cbufs[0] && + lp->framebuffer.cbufs[0]->texture->nr_samples == 1 && + lp->framebuffer.cbufs[0]->texture->target == PIPE_TEXTURE_2D && + (lp->framebuffer.cbufs[0]->format == PIPE_FORMAT_B8G8R8A8_UNORM || + lp->framebuffer.cbufs[0]->format == PIPE_FORMAT_B8G8R8X8_UNORM)); + + /* permit_linear means guardband, hence fake scissor, which we can only + * handle if there's just one vp. */ + single_vp = lp->viewport_index_slot < 0; + permit_linear = (!lp->framebuffer.zsbuf && + bgr8 && + single_vp); + + /* Tell draw that we're happy doing our own x/y clipping. + */ + if (lp->permit_linear_rasterizer != permit_linear) { + lp->permit_linear_rasterizer = permit_linear; + lp_setup_set_linear_mode(lp->setup, permit_linear); + clipping_changed = TRUE; + } + + if (lp->single_vp != single_vp) { + lp->single_vp = single_vp; + clipping_changed = TRUE; + } + + /* Disable xy clipping in linear mode. + * + * Use a guard band if we don't have zsbuf. Could enable + * guardband always - this just to be conservative. + * + * Because we have a layering violation where the draw module emits + * state changes to the driver while we're already inside a draw + * call, need to be careful about when we make calls back to the + * draw module. Hence the clipping_changed flag which is as much + * to prevent flush recursion as it is to short-circuit noop state + * changes. + */ + if (clipping_changed) { + draw_set_driver_clipping(lp->draw, + FALSE, + FALSE, + permit_linear, + single_vp); + } +} + + +/** + * Handle state changes before clears. + * Called just prior to clearing (pipe::clear()). + */ +void llvmpipe_update_derived_clear( struct llvmpipe_context *llvmpipe ) +{ + if (llvmpipe->dirty & (LP_NEW_FS | + LP_NEW_FRAMEBUFFER)) + check_linear_rasterizer(llvmpipe); +} + + /** * Handle state changes. * Called just prior to drawing anything (pipe::draw_arrays(), etc). @@ -293,6 +361,8 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) llvmpipe->viewports); } + llvmpipe_update_derived_clear(llvmpipe); + llvmpipe->dirty = 0; } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2fe01ce48..80dd95f01 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -67,6 +67,7 @@ #include "util/u_string.h" #include "util/simple_list.h" #include "util/u_dual_blend.h" +#include "util/u_upload_mgr.h" #include "util/os_time.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" @@ -451,10 +452,13 @@ static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface, } static void fs_fb_fetch(const struct lp_build_fs_iface *iface, - struct lp_build_context *bld, - unsigned cbuf, - LLVMValueRef result[4]) + struct lp_build_context *bld, + int location, + LLVMValueRef result[4]) { + assert(location >= FRAG_RESULT_DATA0 && location <= FRAG_RESULT_DATA7); + const int cbuf = location - FRAG_RESULT_DATA0; + struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface; struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; @@ -589,7 +593,8 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMValueRef stencil_refs[2]; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef zs_samples = lp_build_const_int32(gallivm, key->zsbuf_nr_samples); - struct lp_build_for_loop_state loop_state, sample_loop_state; + LLVMValueRef z_out = NULL, s_out = NULL; + struct lp_build_for_loop_state loop_state, sample_loop_state = {0}; struct lp_build_mask_context mask; /* * TODO: figure out if simple_shader optimization is really worthwile to @@ -700,6 +705,17 @@ generate_fs_loop(struct gallivm_state *gallivm, color_store_size, "color1"); } } + if (shader->info.base.writes_z) { + z_out = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + color_store_size, "depth"); + } + + if (shader->info.base.writes_stencil) { + s_out = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + color_store_size, "depth"); + } lp_build_for_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0), @@ -959,6 +975,7 @@ generate_fs_loop(struct gallivm_state *gallivm, params.ssbo_ptr = ssbo_ptr; params.ssbo_sizes_ptr = num_ssbo_ptr; params.image = image; + params.aniso_filter_table = lp_jit_context_aniso_filter_table(gallivm, context_ptr); /* Build the actual shader */ if (shader->base.type == PIPE_SHADER_IR_TGSI) @@ -1048,6 +1065,33 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMBuildStore(builder, output_smask, out_sample_mask_storage); } + if (shader->info.base.writes_z) { + int pos0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_POSITION, + 0); + LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos0][2], ""); + LLVMValueRef idx = loop_state.counter; + if (key->min_samples > 1) + idx = LLVMBuildAdd(builder, idx, + LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), ""); + LLVMValueRef ptr = LLVMBuildGEP(builder, z_out, &idx, 1, ""); + LLVMBuildStore(builder, out, ptr); + } + + if (shader->info.base.writes_stencil) { + int sten_out = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_STENCIL, + 0); + LLVMValueRef out = LLVMBuildLoad(builder, outputs[sten_out][1], "output.s"); + LLVMValueRef idx = loop_state.counter; + if (key->min_samples > 1) + idx = LLVMBuildAdd(builder, idx, + LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), ""); + LLVMValueRef ptr = LLVMBuildGEP(builder, s_out, &idx, 1, ""); + LLVMBuildStore(builder, out, ptr); + } + + /* Color write - per fragment sample */ for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) { @@ -1118,14 +1162,13 @@ generate_fs_loop(struct gallivm_state *gallivm, /* Late Z test */ if (depth_mode & LATE_DEPTH_TEST) { - int pos0 = find_output_by_semantic(&shader->info.base, - TGSI_SEMANTIC_POSITION, - 0); - int s_out = find_output_by_semantic(&shader->info.base, - TGSI_SEMANTIC_STENCIL, - 0); - if (pos0 != -1 && outputs[pos0][2]) { - z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + if (shader->info.base.writes_z) { + LLVMValueRef idx = loop_state.counter; + if (key->min_samples > 1) + idx = LLVMBuildAdd(builder, idx, + LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), ""); + LLVMValueRef ptr = LLVMBuildGEP(builder, z_out, &idx, 1, ""); + z = LLVMBuildLoad(builder, ptr, "output.z"); } else { if (key->multisample) { lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL); @@ -1147,10 +1190,15 @@ generate_fs_loop(struct gallivm_state *gallivm, lp_build_const_vec(gallivm, type, 1.0)); } - if (s_out != -1 && outputs[s_out][1]) { + if (shader->info.base.writes_stencil) { + LLVMValueRef idx = loop_state.counter; + if (key->min_samples > 1) + idx = LLVMBuildAdd(builder, idx, + LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), ""); + LLVMValueRef ptr = LLVMBuildGEP(builder, s_out, &idx, 1, ""); + stencil_refs[0] = LLVMBuildLoad(builder, ptr, "output.s"); /* there's only one value, and spec says to discard additional bits */ LLVMValueRef s_max_mask = lp_build_const_int_vec(gallivm, int_type, 255); - stencil_refs[0] = LLVMBuildLoad(builder, outputs[s_out][1], "output.s"); stencil_refs[0] = LLVMBuildBitCast(builder, stencil_refs[0], int_vec_type, ""); stencil_refs[0] = LLVMBuildAnd(builder, stencil_refs[0], s_max_mask, ""); stencil_refs[1] = stencil_refs[0]; @@ -1664,6 +1712,15 @@ scale_bits(struct gallivm_state *gallivm, int delta_bits = src_bits - dst_bits; if (delta_bits <= dst_bits) { + + if (dst_bits == 4) { + struct lp_type flt_type = lp_type_float_vec(32, src_type.length * 32); + + result = lp_build_unsigned_norm_to_float(gallivm, src_bits, flt_type, src); + result = lp_build_clamped_float_to_unsigned_norm(gallivm, flt_type, dst_bits, result); + return result; + } + /* * Approximate the rescaling with a single shift. * @@ -3113,7 +3170,7 @@ generate_fragment(struct llvmpipe_context *lp, } /* code generated texture sampling */ - sampler = lp_llvm_sampler_soa_create(key->samplers, key->nr_samplers); + sampler = lp_llvm_sampler_soa_create(lp_fs_variant_key_samplers(key), key->nr_samplers); image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key), key->nr_images); num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */ @@ -3387,7 +3444,8 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key) debug_printf("blend.alpha_to_coverage is enabled\n"); } for (i = 0; i < key->nr_samplers; ++i) { - const struct lp_static_sampler_state *sampler = &key->samplers[i].sampler_state; + const struct lp_sampler_static_state *samplers = lp_fs_variant_key_samplers(key); + const struct lp_static_sampler_state *sampler = &samplers[i].sampler_state; debug_printf("sampler[%u] = \n", i); debug_printf(" .wrap = %s %s %s\n", util_str_tex_wrap(sampler->wrap_s, TRUE), @@ -3407,9 +3465,11 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key) debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod); debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod); debug_printf(" .reduction_mode = %u\n", sampler->reduction_mode); + debug_printf(" .aniso = %u\n", sampler->aniso); } for (i = 0; i < key->nr_sampler_views; ++i) { - const struct lp_static_texture_state *texture = &key->samplers[i].texture_state; + const struct lp_sampler_static_state *samplers = lp_fs_variant_key_samplers(key); + const struct lp_static_texture_state *texture = &samplers[i].texture_state; debug_printf("texture[%u] = \n", i); debug_printf(" .format = %s\n", util_format_name(texture->format)); @@ -3439,6 +3499,24 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key) } } +const char * +lp_debug_fs_kind(enum lp_fs_kind kind) +{ + switch(kind) { + case LP_FS_KIND_GENERAL: + return "GENERAL"; + case LP_FS_KIND_BLIT_RGBA: + return "BLIT_RGBA"; + case LP_FS_KIND_BLIT_RGB1: + return "BLIT_RGB1"; + case LP_FS_KIND_AERO_MINIFICATION: + return "AERO_MINIFICATION"; + case LP_FS_KIND_LLVM_LINEAR: + return "LLVM_LINEAR"; + default: + return "unknown"; + } +} void lp_debug_fs_variant(struct lp_fragment_shader_variant *variant) @@ -3451,6 +3529,9 @@ lp_debug_fs_variant(struct lp_fragment_shader_variant *variant) nir_print_shader(variant->shader->base.ir.nir, stderr); dump_fs_variant_key(&variant->key); debug_printf("variant->opaque = %u\n", variant->opaque); + debug_printf("variant->potentially_opaque = %u\n", variant->potentially_opaque); + debug_printf("variant->blit = %u\n", variant->blit); + debug_printf("shader->kind = %s\n", lp_debug_fs_kind(variant->shader->kind)); debug_printf("\n"); } @@ -3489,6 +3570,8 @@ generate_variant(struct llvmpipe_context *lp, struct lp_fragment_shader_variant *variant; const struct util_format_description *cbuf0_format_desc = NULL; boolean fullcolormask; + boolean no_kill; + boolean linear; char module_name[64]; unsigned char ir_sha1_cache_key[20]; struct lp_cached_code cached = { 0 }; @@ -3534,9 +3617,9 @@ generate_variant(struct llvmpipe_context *lp, fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask); } - variant->opaque = - !key->blend.logicop_enable && - !key->blend.rt[0].blend_enable && + /* The scissor is ignored here as only tiles inside the scissoring + * rectangle will refer to this */ + no_kill = fullcolormask && !key->stencil[0].enabled && !key->alpha.enabled && @@ -3544,13 +3627,83 @@ generate_variant(struct llvmpipe_context *lp, !key->blend.alpha_to_coverage && !key->depth.enabled && !shader->info.base.uses_kill && - !shader->info.base.writes_samplemask - ? TRUE : FALSE; + !shader->info.base.writes_samplemask; + + variant->opaque = + no_kill && + !key->blend.logicop_enable && + !key->blend.rt[0].blend_enable + ? TRUE : FALSE; + + variant->potentially_opaque = + no_kill && + !key->blend.logicop_enable && + key->blend.rt[0].blend_enable && + key->blend.rt[0].rgb_func == PIPE_BLEND_ADD && + key->blend.rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA && + key->blend.rt[0].alpha_func == key->blend.rt[0].rgb_func && + key->blend.rt[0].alpha_dst_factor == key->blend.rt[0].rgb_dst_factor && + shader->base.type == PIPE_SHADER_IR_TGSI && + /* + * FIXME: for NIR, all of the fields of info.xxx (except info.base) + * are zeros, hence shader analysis (here and elsewhere) using these + * bits cannot work and will silently fail (cbuf is the only pointer + * field, hence causing a crash). + */ + shader->info.cbuf[0][3].file != TGSI_FILE_NULL + ? TRUE : FALSE; + + /* We only care about opaque blits for now */ + if (variant->opaque && + (shader->kind == LP_FS_KIND_BLIT_RGBA || + shader->kind == LP_FS_KIND_BLIT_RGB1)) { + unsigned target, min_img_filter, mag_img_filter, min_mip_filter; + enum pipe_format texture_format; + struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(key, 0); + assert(samp0); + texture_format = samp0->texture_state.format; + target = samp0->texture_state.target; + min_img_filter = samp0->sampler_state.min_img_filter; + mag_img_filter = samp0->sampler_state.mag_img_filter; + if (samp0->texture_state.level_zero_only) { + min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + } else { + min_mip_filter = samp0->sampler_state.min_mip_filter; + } + + if (target == PIPE_TEXTURE_2D && + min_img_filter == PIPE_TEX_FILTER_NEAREST && + mag_img_filter == PIPE_TEX_FILTER_NEAREST && + min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + ((texture_format && + util_is_format_compatible(util_format_description(texture_format), + cbuf0_format_desc)) || + (shader->kind == LP_FS_KIND_BLIT_RGB1 && + (texture_format == PIPE_FORMAT_B8G8R8A8_UNORM || + texture_format == PIPE_FORMAT_B8G8R8X8_UNORM) && + (key->cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM || + key->cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM)))) + variant->blit = 1; + } + + + /* Whether this is a candidate for the linear path */ + linear = + !key->stencil[0].enabled && + !key->depth.enabled && + !shader->info.base.uses_kill && + !key->blend.logicop_enable && + (key->cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM || + key->cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM); + + memcpy(&variant->key, key, sizeof *key); if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { lp_debug_fs_variant(variant); } + llvmpipe_fs_variant_fastpath(variant); + lp_jit_init_types(variant); if (variant->jit_function[RAST_EDGE_TEST] == NULL) @@ -3563,6 +3716,36 @@ generate_variant(struct llvmpipe_context *lp, } } + if (linear) { + /* Currently keeping both the old fastpaths and new linear path + * active. The older code is still somewhat faster for the cases + * it covers. + * + * XXX: consider restricting this to aero-mode only. + */ + if (fullcolormask && + !key->alpha.enabled && + !key->blend.alpha_to_coverage) { + llvmpipe_fs_variant_linear_fastpath(variant); + } + + /* If the original fastpath doesn't cover this variant, try the new + * code: + */ + if (variant->jit_linear == NULL) { + if (shader->kind == LP_FS_KIND_BLIT_RGBA || + shader->kind == LP_FS_KIND_BLIT_RGB1 || + shader->kind == LP_FS_KIND_LLVM_LINEAR) { + llvmpipe_fs_variant_linear_llvm(lp, shader, variant); + } + } + } else { + if (LP_DEBUG & DEBUG_LINEAR) { + lp_debug_fs_variant(variant); + debug_printf(" ----> no linear path for this variant\n"); + } + } + /* * Compile everything */ @@ -3585,6 +3768,19 @@ generate_variant(struct llvmpipe_context *lp, variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; } + if (linear) { + if (variant->linear_function) { + variant->jit_linear_llvm = (lp_jit_linear_llvm_func) + gallivm_jit_function(variant->gallivm, variant->linear_function); + } + + /* + * This must be done after LLVM compilation, as it will call the JIT'ed + * code to determine active inputs. + */ + lp_linear_check_variant(variant); + } + if (needs_caching) { lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key); } @@ -3640,7 +3836,6 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, for (i = 0; i < shader->info.base.num_inputs; i++) { shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; - shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i]; shader->inputs[i].location = shader->info.base.input_interpolate_loc[i]; switch (shader->info.base.input_interpolate[i]) { @@ -3677,7 +3872,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, shader->inputs[i].src_index = i+1; } - if (LP_DEBUG & DEBUG_TGSI) { + if (LP_DEBUG & DEBUG_TGSI && templ->type == PIPE_SHADER_IR_TGSI) { unsigned attrib; debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); @@ -3695,6 +3890,12 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, debug_printf("\n"); } + /* This will put a derived copy of the tokens into shader->base.tokens */ + if (templ->type == PIPE_SHADER_IR_TGSI) + llvmpipe_fs_analyse(shader, templ->tokens); + else + shader->kind = LP_FS_KIND_GENERAL; + return shader; } @@ -3799,7 +4000,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, const struct pipe_constant_buffer *cb) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct pipe_resource *constants = cb ? cb->buffer : NULL; + struct pipe_constant_buffer *constants = &llvmpipe->constants[shader][index]; assert(shader < PIPE_SHADER_TYPES); assert(index < ARRAY_SIZE(llvmpipe->constants[shader])); @@ -3808,10 +4009,19 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb, take_ownership); - if (constants) { - if (!(constants->bind & PIPE_BIND_CONSTANT_BUFFER)) { + /* user_buffer is only valid until the next set_constant_buffer (at most, + * possibly until shader deletion), so we need to upload it now to make sure + * it doesn't get updated/freed out from under us. + */ + if (constants->user_buffer) { + u_upload_data(llvmpipe->pipe.const_uploader, 0, constants->buffer_size, 16, + constants->user_buffer, &constants->buffer_offset, + &constants->buffer); + } + if (constants->buffer) { + if (!(constants->buffer->bind & PIPE_BIND_CONSTANT_BUFFER)) { debug_printf("Illegal set constant without bind flag\n"); - constants->bind |= PIPE_BIND_CONSTANT_BUFFER; + constants->buffer->bind |= PIPE_BIND_CONSTANT_BUFFER; } } @@ -3821,20 +4031,10 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, shader == PIPE_SHADER_TESS_EVAL) { /* Pass the constants to the 'draw' module */ const unsigned size = cb ? cb->buffer_size : 0; - const ubyte *data; - if (constants) { - data = (ubyte *) llvmpipe_resource_data(constants); - } - else if (cb && cb->user_buffer) { - data = (ubyte *) cb->user_buffer; - } - else { - data = NULL; - } - - if (data) - data += cb->buffer_offset; + const ubyte *data = NULL; + if (constants->buffer) + data = (ubyte *) llvmpipe_resource_data(constants->buffer) + constants->buffer_offset; draw_set_mapped_constant_buffer(llvmpipe->draw, shader, index, data, size); @@ -3843,10 +4043,6 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, llvmpipe->cs_dirty |= LP_CSNEW_CONSTANTS; else llvmpipe->dirty |= LP_NEW_FS_CONSTANTS; - - if (cb && cb->user_buffer) { - pipe_resource_reference(&constants, NULL); - } } static void @@ -3957,7 +4153,7 @@ make_variant_key(struct llvmpipe_context *lp, key = (struct lp_fragment_shader_variant_key *)store; - memset(key, 0, offsetof(struct lp_fragment_shader_variant_key, samplers[1])); + memset(key, 0, sizeof(*key)); if (lp->framebuffer.zsbuf) { enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format; @@ -3984,10 +4180,8 @@ make_variant_key(struct llvmpipe_context *lp, /* * Propagate the depth clamp setting from the rasterizer state. - * depth_clip == 0 implies depth clamping is enabled. - * */ - key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0; + key->depth_clamp = lp->rasterizer->depth_clamp; /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */ if (!lp->framebuffer.nr_cbufs || @@ -4104,7 +4298,7 @@ make_variant_key(struct llvmpipe_context *lp, struct lp_sampler_static_state *fs_sampler; - fs_sampler = key->samplers; + fs_sampler = lp_fs_variant_key_samplers(key); memset(fs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *fs_sampler); @@ -4153,11 +4347,18 @@ make_variant_key(struct llvmpipe_context *lp, &lp->images[PIPE_SHADER_FRAGMENT][i]); } } + + if (shader->kind == LP_FS_KIND_AERO_MINIFICATION) { + struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(key, 0); + assert(samp0); + samp0->sampler_state.min_img_filter = PIPE_TEX_FILTER_NEAREST; + samp0->sampler_state.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + } + return key; } - /** * Update fragment shader state. This is called just prior to drawing * something when some fragment-related state has changed. diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 613e5286a..b5e8c31c7 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -120,6 +120,7 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, unsigned start, unsigned num, unsigned unbind_num_trailing_slots, + bool take_ownership, struct pipe_sampler_view **views) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -150,8 +151,15 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, if (view) llvmpipe_flush_resource(pipe, view->texture, 0, true, false, false, "sampler_view"); - pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i], - view); + + if (take_ownership) { + pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i], + NULL); + llvmpipe->sampler_views[shader][start + i] = view; + } else { + pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i], + view); + } } for (; i < num + unbind_num_trailing_slots; i++) { @@ -178,8 +186,12 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, } else if (shader == PIPE_SHADER_COMPUTE) { llvmpipe->cs_dirty |= LP_CSNEW_SAMPLER_VIEW; - } else { + } + else if (shader == PIPE_SHADER_FRAGMENT) { llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; + lp_setup_set_fragment_sampler_views(llvmpipe->setup, + llvmpipe->num_sampler_views[PIPE_SHADER_FRAGMENT], + llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT]); } } @@ -341,13 +353,7 @@ prepare_shader_sampling( } else { /* display target texture/surface */ - /* - * XXX: Where should this be unmapped? - */ - struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); - struct sw_winsys *winsys = screen->winsys; - addr = winsys->displaytarget_map(winsys, lp_tex->dt, - PIPE_MAP_READ); + addr = llvmpipe_resource_map(tex, 0, 0, LP_TEX_USAGE_READ); row_stride[0] = lp_tex->row_stride[0]; img_stride[0] = lp_tex->img_stride[0]; mip_offsets[0] = 0; @@ -411,6 +417,31 @@ llvmpipe_prepare_tess_eval_sampling(struct llvmpipe_context *lp, prepare_shader_sampling(lp, num, views, PIPE_SHADER_TESS_EVAL); } +void +llvmpipe_cleanup_stage_sampling(struct llvmpipe_context *ctx, + enum pipe_shader_type stage) +{ + unsigned num, i; + struct pipe_sampler_view **views; + assert(ctx); + assert(stage < ARRAY_SIZE(ctx->num_sampler_views)); + assert(stage < ARRAY_SIZE(ctx->sampler_views)); + + num = ctx->num_sampler_views[stage]; + views = ctx->sampler_views[stage]; + + assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); + + for (i = 0; i < num; i++) { + struct pipe_sampler_view *view = views[i]; + if (view) { + struct pipe_resource *tex = view->texture; + if (tex) + llvmpipe_resource_unmap(tex, 0, 0); + } + } +} + static void prepare_shader_images( struct llvmpipe_context *lp, @@ -438,11 +469,19 @@ prepare_shader_images( if (!img) continue; - unsigned width = u_minify(img->width0, view->u.tex.level); - unsigned height = u_minify(img->height0, view->u.tex.level); + unsigned width = img->width0; + unsigned height = img->height0; unsigned num_layers = img->depth0; unsigned num_samples = img->nr_samples; + const uint32_t bw = util_format_get_blockwidth(view->resource->format); + const uint32_t bh = util_format_get_blockheight(view->resource->format); + + width = DIV_ROUND_UP(width, bw); + height = DIV_ROUND_UP(height, bh); + width = u_minify(width, view->u.tex.level); + height = u_minify(height, view->u.tex.level); + if (!lp_img->dt) { /* regular texture - setup array of mipmap level offsets */ struct pipe_resource *res = view->resource; @@ -482,13 +521,7 @@ prepare_shader_images( } else { /* display target texture/surface */ - /* - * XXX: Where should this be unmapped? - */ - struct llvmpipe_screen *screen = llvmpipe_screen(img->screen); - struct sw_winsys *winsys = screen->winsys; - addr = winsys->displaytarget_map(winsys, lp_img->dt, - PIPE_MAP_READ); + addr = llvmpipe_resource_map(img, 0, 0, LP_TEX_USAGE_READ); row_stride = lp_img->row_stride[0]; img_stride = lp_img->img_stride[0]; sample_stride = 0; @@ -552,6 +585,30 @@ llvmpipe_prepare_tess_eval_images(struct llvmpipe_context *lp, } void +llvmpipe_cleanup_stage_images(struct llvmpipe_context *ctx, + enum pipe_shader_type stage) +{ + unsigned num, i; + struct pipe_image_view *views; + assert(ctx); + assert(stage < ARRAY_SIZE(ctx->num_images)); + assert(stage < ARRAY_SIZE(ctx->images)); + + num = ctx->num_images[stage]; + views = ctx->images[stage]; + + assert(num <= LP_MAX_TGSI_SHADER_IMAGES); + + for (i = 0; i < num; i++) { + struct pipe_image_view *view = &views[i]; + assert(view); + struct pipe_resource *img = view->resource; + if (img) + llvmpipe_resource_unmap(img, 0, 0); + } +} + +void llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) { llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c index 2bc94d5d4..9f385a084 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -71,6 +71,7 @@ struct lp_setup_args LLVMValueRef a0; LLVMValueRef dadx; LLVMValueRef dady; + LLVMValueRef key; /* Derived: */ @@ -200,7 +201,7 @@ lp_twoside(struct gallivm_state *gallivm, } -static void +static LLVMValueRef lp_do_offset_tri(struct gallivm_state *gallivm, struct lp_setup_args *args, const struct lp_setup_variant_key *key, @@ -214,9 +215,7 @@ lp_do_offset_tri(struct gallivm_state *gallivm, struct lp_build_context int_scalar_bld; struct lp_build_context *bld = &args->bld; LLVMValueRef zoffset, mult; - LLVMValueRef z0_new, z1_new, z2_new; LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20; - LLVMValueRef z0z1, z0z1z2; LLVMValueRef max, max_value, res12; LLVMValueRef shuffles[4]; LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); @@ -267,8 +266,8 @@ lp_do_offset_tri(struct gallivm_state *gallivm, if (key->floating_point_depth) { /* - * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) + - * MAX2(dzdx, dzdy) * pgon_offset_scale + * bias = pgon_offset_units * 2^(exponent(max(abs(z0), abs(z1), abs(z2))) - + * mantissa_bits) + MAX2(dzdx, dzdy) * pgon_offset_scale * * NOTE: Assumes IEEE float32. */ @@ -281,11 +280,14 @@ lp_do_offset_tri(struct gallivm_state *gallivm, exp_mask = lp_build_const_int32(gallivm, 0xff << 23); maxz0z1_value = lp_build_max(&flt_scalar_bld, - LLVMBuildExtractElement(b, attribv[0], twoi, ""), - LLVMBuildExtractElement(b, attribv[1], twoi, "")); + lp_build_abs(&flt_scalar_bld, + LLVMBuildExtractElement(b, attribv[0], twoi, "")), + lp_build_abs(&flt_scalar_bld, + LLVMBuildExtractElement(b, attribv[1], twoi, ""))); maxz_value = lp_build_max(&flt_scalar_bld, - LLVMBuildExtractElement(b, attribv[2], twoi, ""), + lp_build_abs(&flt_scalar_bld, + LLVMBuildExtractElement(b, attribv[2], twoi, "")), maxz0z1_value); exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, ""); @@ -322,34 +324,7 @@ lp_do_offset_tri(struct gallivm_state *gallivm, zoffset); } - /* yuck */ - shuffles[0] = twoi; - shuffles[1] = lp_build_const_int32(gallivm, 6); - shuffles[2] = LLVMGetUndef(shuf_type); - shuffles[3] = LLVMGetUndef(shuf_type); - z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), ""); - shuffles[0] = zeroi; - shuffles[1] = onei; - shuffles[2] = lp_build_const_int32(gallivm, 6); - shuffles[3] = LLVMGetUndef(shuf_type); - z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), ""); - zoffset = lp_build_broadcast_scalar(bld, zoffset); - - /* clamp and do offset */ - /* - * FIXME I suspect the clamp (is that even right to always clamp to fixed - * 0.0/1.0?) should really be per fragment? - */ - z0z1z2 = lp_build_clamp(bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld->zero, bld->one); - - /* insert into args->a0.z, a1.z, a2.z: - */ - z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, ""); - z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, ""); - z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, ""); - attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, ""); - attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, ""); - attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, ""); + return zoffset; } static void @@ -393,12 +368,12 @@ load_attribute(struct gallivm_state *gallivm, * which obviously wouldn't work)). */ static void -emit_coef4( struct gallivm_state *gallivm, +calc_coef4( struct gallivm_state *gallivm, struct lp_setup_args *args, - unsigned slot, LLVMValueRef a0, LLVMValueRef a1, - LLVMValueRef a2) + LLVMValueRef a2, + LLVMValueRef out[3]) { LLVMBuilderRef b = gallivm->builder; LLVMValueRef attr_0; @@ -430,7 +405,23 @@ emit_coef4( struct gallivm_state *gallivm, LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); - store_coef(gallivm, args, slot, attr_0, dadx, dady); + out[0] = attr_0; + out[1] = dadx; + out[2] = dady; +} + +static void +emit_coef4( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef a1, + LLVMValueRef a2) +{ + LLVMValueRef coeffs[3]; + calc_coef4(gallivm, args, a0, a1, a2, coeffs); + store_coef(gallivm, args, slot, + coeffs[0], coeffs[1], coeffs[2]); } @@ -481,82 +472,6 @@ apply_perspective_corr( struct gallivm_state *gallivm, /** - * Apply cylindrical wrapping to vertex attributes if enabled. - * Input coordinates must be in [0, 1] range, otherwise results are undefined. - * - * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags - */ -static void -emit_apply_cyl_wrap(struct gallivm_state *gallivm, - struct lp_setup_args *args, - uint cyl_wrap, - LLVMValueRef attribv[3]) - -{ - LLVMBuilderRef builder = gallivm->builder; - struct lp_type type = args->bld.type; - LLVMTypeRef float_vec_type = args->bld.vec_type; - LLVMValueRef pos_half; - LLVMValueRef neg_half; - LLVMValueRef cyl_mask; - LLVMValueRef offset; - LLVMValueRef delta; - LLVMValueRef one; - - if (!cyl_wrap) - return; - - /* Constants */ - pos_half = lp_build_const_vec(gallivm, type, +0.5f); - neg_half = lp_build_const_vec(gallivm, type, -0.5f); - cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4); - - one = lp_build_const_vec(gallivm, type, 1.0f); - one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), ""); - one = LLVMBuildAnd(builder, one, cyl_mask, ""); - - /* Edge v0 -> v1 */ - delta = LLVMBuildFSub(builder, attribv[1], attribv[0], ""); - - offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); - - offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); - - /* Edge v1 -> v2 */ - delta = LLVMBuildFSub(builder, attribv[2], attribv[1], ""); - - offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); - - offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); - - /* Edge v2 -> v0 */ - delta = LLVMBuildFSub(builder, attribv[0], attribv[2], ""); - - offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); - - offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); -} - - -/** * Compute the inputs-> dadx, dady, a0 values. */ static void @@ -584,13 +499,11 @@ emit_tri_coef( struct gallivm_state *gallivm, case LP_INTERP_LINEAR: load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); - emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); emit_linear_coef(gallivm, args, slot+1, attribs); break; case LP_INTERP_PERSPECTIVE: load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); - emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); apply_perspective_corr(gallivm, args, slot+1, attribs); emit_linear_coef(gallivm, args, slot+1, attribs); break; @@ -641,6 +554,7 @@ init_args(struct gallivm_state *gallivm, LLVMValueRef e, f, ef, ooa; LLVMValueRef shuffles[4], shuf10; LLVMValueRef attr_pos[3]; + LLVMValueRef polygon_offset; struct lp_type typef4 = lp_type_float_vec(32, 128); struct lp_build_context bld; @@ -681,7 +595,9 @@ init_args(struct gallivm_state *gallivm, /* tri offset calc shares a lot of arithmetic, do it here */ if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) { - lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); + polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); + } else { + polygon_offset = lp_build_const_float(gallivm, 0.0f); } dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); @@ -696,7 +612,22 @@ init_args(struct gallivm_state *gallivm, args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi); args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei); - emit_linear_coef(gallivm, args, 0, attr_pos); + LLVMValueRef coeffs[3]; + calc_coef4(gallivm, args, + attr_pos[0], attr_pos[1], attr_pos[2], + coeffs); + + /* This is a bit sneaky: + * Because we observe that the X component of A0 is otherwise unused, + * we can overwrite it with the computed polygon-offset value, to make + * sure it's available in the fragment shader without having to change + * the interface (which is error-prone). + */ + coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset, + lp_build_const_int32(gallivm, 0), ""); + + store_coef(gallivm, args, 0, + coeffs[0], coeffs[1], coeffs[2]); } /** @@ -713,7 +644,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, char func_name[64]; LLVMTypeRef vec4f_type; LLVMTypeRef func_type; - LLVMTypeRef arg_types[7]; + LLVMTypeRef arg_types[8]; LLVMBasicBlockRef block; LLVMBuilderRef builder; int64_t t0 = 0, t1; @@ -757,6 +688,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ + arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key (placeholder) */ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), arg_types, ARRAY_SIZE(arg_types), 0); @@ -774,6 +706,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, args.a0 = LLVMGetParam(variant->function, 4); args.dadx = LLVMGetParam(variant->function, 5); args.dady = LLVMGetParam(variant->function, 6); + args.key = LLVMGetParam(variant->function, 7); lp_build_name(args.v0, "in_v0"); lp_build_name(args.v1, "in_v1"); @@ -782,6 +715,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, lp_build_name(args.a0, "out_a0"); lp_build_name(args.dadx, "out_dadx"); lp_build_name(args.dady, "out_dady"); + lp_build_name(args.key, "key"); /* * Function body @@ -864,11 +798,12 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp, key->pgon_offset_units = (float) lp->rasterizer->offset_units; } else { key->pgon_offset_units = - (float) (lp->rasterizer->offset_units * lp->mrd); + (float) (lp->rasterizer->offset_units * lp->mrd * 2); } key->pgon_offset_scale = lp->rasterizer->offset_scale; key->pgon_offset_clamp = lp->rasterizer->offset_clamp; + key->uses_constant_interp = 0; key->pad = 0; memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); for (i = 0; i < key->num_inputs; i++) { @@ -878,8 +813,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp, else key->inputs[i].interp = LP_INTERP_PERSPECTIVE; } + if (key->inputs[i].interp == LP_INTERP_CONSTANT) { + key->uses_constant_interp = 1; + } } - } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c index b3f8e74af..28cc1258b 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c @@ -49,7 +49,7 @@ llvmpipe_create_tcs_state(struct pipe_context *pipe, goto no_state; /* debug */ - if (LP_DEBUG & DEBUG_TGSI) { + if (LP_DEBUG & DEBUG_TGSI && templ->type == PIPE_SHADER_IR_TGSI) { debug_printf("llvmpipe: Create tess ctrl shader %p:\n", (void *)state); tgsi_dump(templ->tokens, 0); } @@ -181,6 +181,14 @@ llvmpipe_set_tess_state(struct pipe_context *pipe, draw_set_tess_state(llvmpipe->draw, default_outer_level, default_inner_level); } +static void +llvmpipe_set_patch_vertices(struct pipe_context *pipe, uint8_t patch_vertices) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + llvmpipe->patch_vertices = patch_vertices; +} + void llvmpipe_init_tess_funcs(struct llvmpipe_context *llvmpipe) { @@ -193,4 +201,5 @@ llvmpipe_init_tess_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.delete_tes_state = llvmpipe_delete_tes_state; llvmpipe->pipe.set_tess_state = llvmpipe_set_tess_state; + llvmpipe->pipe.set_patch_vertices = llvmpipe_set_patch_vertices; } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c index 9ba2b87b8..8e905b8d7 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c @@ -65,7 +65,7 @@ lp_resource_copy_ms(struct pipe_context *pipe, &dst_box, &dst_trans); if (!dst_map) { - pipe->transfer_unmap(pipe, src_trans); + pipe->texture_unmap(pipe, src_trans); return; } @@ -77,8 +77,8 @@ lp_resource_copy_ms(struct pipe_context *pipe, src_map, src_trans->stride, src_trans->layer_stride, 0, 0, 0); - pipe->transfer_unmap(pipe, dst_trans); - pipe->transfer_unmap(pipe, src_trans); + pipe->texture_unmap(pipe, dst_trans); + pipe->texture_unmap(pipe, src_trans); } } static void @@ -295,7 +295,7 @@ lp_clear_color_texture_msaa(struct pipe_context *pipe, lp_clear_color_texture_helper(dst_trans, dst_map, format, color, box->width, box->height, box->depth); } - pipe->transfer_unmap(pipe, dst_trans); + pipe->texture_unmap(pipe, dst_trans); } static void @@ -361,7 +361,7 @@ lp_clear_depth_stencil_texture_msaa(struct pipe_context *pipe, dst_trans->stride, dst_trans->layer_stride, box->width, box->height, box->depth, zstencil); - pipe->transfer_unmap(pipe, dst_trans); + pipe->texture_unmap(pipe, dst_trans); } static void @@ -456,7 +456,7 @@ llvmpipe_clear_buffer(struct pipe_context *pipe, char *dst; u_box_1d(offset, size, &box); - dst = pipe->transfer_map(pipe, + dst = pipe->buffer_map(pipe, res, 0, PIPE_MAP_WRITE, @@ -475,7 +475,7 @@ llvmpipe_clear_buffer(struct pipe_context *pipe, memcpy(&dst[i], clear_value, clear_value_size); break; } - pipe->transfer_unmap(pipe, dst_t); + pipe->buffer_unmap(pipe, dst_t); } void |