diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:50:50 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:50:50 +0000 |
commit | 9130ec005fbc78a62420643414d8354d0929ca50 (patch) | |
tree | 6762777acdd2d4eee17ef87290e80dc7afe2b73d /lib/mesa/src/gallium/drivers/llvmpipe | |
parent | ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (diff) |
Merge Mesa 21.1.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers/llvmpipe')
24 files changed, 2308 insertions, 717 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/SConscript b/lib/mesa/src/gallium/drivers/llvmpipe/SConscript deleted file mode 100644 index 1af686771..000000000 --- a/lib/mesa/src/gallium/drivers/llvmpipe/SConscript +++ /dev/null @@ -1,46 +0,0 @@ -from sys import executable as python_cmd -import distutils.version - -Import('*') - -if not env['llvm']: - print('warning: LLVM disabled: not building llvmpipe') - Return() - -env = env.Clone() - -env.MSVC2013Compat() - -llvmpipe = env.ConvenienceLibrary( - target = 'llvmpipe', - source = env.ParseSourceList('Makefile.sources', 'C_SOURCES') - ) - -env.Alias('llvmpipe', llvmpipe) - -env.Append(CPPPATH = [ - '../../../compiler/nir', -]) - -if not env['embedded']: - env = env.Clone() - - env.Prepend(LIBS = [llvmpipe, gallium, mesautil]) - - tests = [ - 'arit', - 'format', - 'blend', - 'conv', - 'printf', - ] - - for test in tests: - testname = 'lp_test_' + test - target = env.Program( - target = testname, - source = [testname + '.c', 'lp_test_main.c'], - ) - env.UnitTest(testname, target) - -Export('llvmpipe') diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 63c2fb5d8..dc559bc3f 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -66,6 +66,7 @@ #include "gallivm/lp_bld_pack.h" #include "lp_bld_depth.h" +#include "lp_state_fs.h" /** Used to select fields from pipe_stencil_state */ @@ -435,7 +436,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, assert(type.length <= 16); assert(type.floating); - if(util_cpu_caps.has_sse && type.length == 4) { + if(util_get_cpu_caps()->has_sse && type.length == 4) { const char *movmskintr = "llvm.x86.sse.movmsk.ps"; const char *popcntintr = "llvm.ctpop.i32"; LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, @@ -446,7 +447,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, LLVMInt32TypeInContext(context), bits); count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); } - else if(util_cpu_caps.has_avx && type.length == 8) { + else if(util_get_cpu_caps()->has_avx && type.length == 8) { const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; const char *popcntintr = "llvm.ctpop.i32"; LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, @@ -469,7 +470,11 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); for (i = 0; i < type.length; i++) { +#if UTIL_ARCH_LITTLE_ENDIAN shuffles[i] = lp_build_const_int32(gallivm, 4*i); +#else + shuffles[i] = lp_build_const_int32(gallivm, (4*i) + 3); +#endif } shufflev = LLVMConstVector(shuffles, type.length); @@ -599,6 +604,12 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, LLVMConstVector(shuffles, zs_type.length), ""); *s_fb = *z_fb; + if (format_desc->block.bits == 8) { + /* Extend stencil-only 8 bit values (S8_UINT) */ + *s_fb = LLVMBuildZExt(builder, *s_fb, + lp_build_int_vec_type(gallivm, z_src_type), ""); + } + if (format_desc->block.bits < z_src_type.width) { /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ *z_fb = LLVMBuildZExt(builder, *z_fb, @@ -648,7 +659,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param is_1d whether this resource has only one dimension - * \param mask the alive/dead pixel mask for the quad (vector) + * \param mask_value the alive/dead pixel mask for the quad (vector) * \param z_fb z values read from fb (with padding) * \param s_fb s values read from fb (with padding) * \param loop_counter the current loop iteration @@ -662,7 +673,7 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, boolean is_1d, - struct lp_build_mask_context *mask, + LLVMValueRef mask_value, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef loop_counter, @@ -674,7 +685,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_build_context z_bld; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef mask_value = NULL; LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; LLVMValueRef depth_offset1, depth_offset2; @@ -732,8 +742,7 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); } - if (mask) { - mask_value = lp_build_mask_value(mask); + if (mask_value) { z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); if (format_desc->block.bits > 32) { s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); @@ -806,6 +815,7 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) + * \param cov_mask coverage mask * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) * \param zs_dst the depth/stencil values in framebuffer @@ -813,11 +823,12 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, */ void lp_build_depth_stencil_test(struct gallivm_state *gallivm, - const struct pipe_depth_state *depth, + const struct lp_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, + LLVMValueRef *cov_mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef z_fb, @@ -837,7 +848,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; - LLVMValueRef current_mask = lp_build_mask_value(mask); + LLVMValueRef current_mask = mask ? lp_build_mask_value(mask) : *cov_mask; LLVMValueRef front_facing = NULL; boolean have_z, have_s; @@ -870,8 +881,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, /* Sanity checking */ { - const unsigned z_swizzle = format_desc->swizzle[0]; - const unsigned s_swizzle = format_desc->swizzle[1]; + ASSERTED const unsigned z_swizzle = format_desc->swizzle[0]; + ASSERTED const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != PIPE_SWIZZLE_NONE || s_swizzle != PIPE_SWIZZLE_NONE); @@ -1066,7 +1077,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, ""); } - if (!stencil[0].enabled) { + if (!stencil[0].enabled && mask) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. @@ -1141,10 +1152,21 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, *s_value = stencil_vals; } - if (s_pass_mask) - lp_build_mask_update(mask, s_pass_mask); + if (mask) { + if (s_pass_mask) + lp_build_mask_update(mask, s_pass_mask); - if (depth->enabled && stencil[0].enabled) - lp_build_mask_update(mask, z_pass); + if (depth->enabled && stencil[0].enabled) + lp_build_mask_update(mask, z_pass); + } else { + LLVMValueRef tmp_mask = *cov_mask; + if (s_pass_mask) + tmp_mask = LLVMBuildAnd(builder, tmp_mask, s_pass_mask, ""); + + /* for multisample we don't do the stencil optimisation so update always */ + if (depth->enabled) + tmp_mask = LLVMBuildAnd(builder, tmp_mask, z_pass, ""); + *cov_mask = tmp_mask; + } } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c index c49e66914..aaf6a80e8 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c @@ -46,6 +46,7 @@ #include "lp_surface.h" #include "lp_query.h" #include "lp_setup.h" +#include "lp_screen.h" /* This is only safe if there's just one concurrent context */ #ifdef EMBEDDED_DEVICE @@ -55,7 +56,7 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); - uint i, j; + uint i; lp_print_counters(); @@ -80,21 +81,18 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) { - pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL); - } - - for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) { - pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_VERTEX][i], NULL); - } - - for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) { - pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_GEOMETRY][i], NULL); - } - - for (i = 0; i < ARRAY_SIZE(llvmpipe->constants); i++) { - for (j = 0; j < ARRAY_SIZE(llvmpipe->constants[i]); j++) { - pipe_resource_reference(&llvmpipe->constants[i][j].buffer, NULL); + for (enum pipe_shader_type s = PIPE_SHADER_VERTEX; s < PIPE_SHADER_TYPES; s++) { + for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) { + pipe_sampler_view_reference(&llvmpipe->sampler_views[s][i], NULL); + } + for (i = 0; i < LP_MAX_TGSI_SHADER_IMAGES; i++) { + pipe_resource_reference(&llvmpipe->images[s][i].resource, NULL); + } + for (i = 0; i < LP_MAX_TGSI_SHADER_BUFFERS; i++) { + pipe_resource_reference(&llvmpipe->ssbos[s][i].buffer, NULL); + } + for (i = 0; i < ARRAY_SIZE(llvmpipe->constants[s]); i++) { + pipe_resource_reference(&llvmpipe->constants[s][i].buffer, NULL); } } @@ -134,6 +132,47 @@ llvmpipe_render_condition(struct pipe_context *pipe, llvmpipe->render_cond_cond = condition; } +static void +llvmpipe_render_condition_mem(struct pipe_context *pipe, + struct pipe_resource *buffer, + unsigned offset, + bool condition) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + + llvmpipe->render_cond_buffer = llvmpipe_resource(buffer); + llvmpipe->render_cond_offset = offset; + llvmpipe->render_cond_cond = condition; +} + +static void +llvmpipe_texture_barrier(struct pipe_context *pipe, unsigned flags) +{ + llvmpipe_flush(pipe, NULL, __FUNCTION__); +} + +static void lp_draw_disk_cache_find_shader(void *cookie, + struct lp_cached_code *cache, + unsigned char ir_sha1_cache_key[20]) +{ + struct llvmpipe_screen *screen = cookie; + lp_disk_cache_find_shader(screen, cache, ir_sha1_cache_key); +} + +static void lp_draw_disk_cache_insert_shader(void *cookie, + struct lp_cached_code *cache, + unsigned char ir_sha1_cache_key[20]) +{ + struct llvmpipe_screen *screen = cookie; + lp_disk_cache_insert_shader(screen, cache, ir_sha1_cache_key); +} + +static enum pipe_reset_status +llvmpipe_get_device_reset_status(struct pipe_context *pipe) +{ + return PIPE_NO_RESET; +} + struct pipe_context * llvmpipe_create_context(struct pipe_screen *screen, void *priv, unsigned flags) @@ -162,9 +201,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv, llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.clear = llvmpipe_clear; llvmpipe->pipe.flush = do_flush; + llvmpipe->pipe.texture_barrier = llvmpipe_texture_barrier; llvmpipe->pipe.render_condition = llvmpipe_render_condition; + llvmpipe->pipe.render_condition_mem = llvmpipe_render_condition_mem; + llvmpipe->pipe.get_device_reset_status = llvmpipe_get_device_reset_status; llvmpipe_init_blend_funcs(llvmpipe); llvmpipe_init_clip_funcs(llvmpipe); llvmpipe_init_draw_funcs(llvmpipe); @@ -176,6 +218,7 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv, llvmpipe_init_fs_funcs(llvmpipe); llvmpipe_init_vs_funcs(llvmpipe); llvmpipe_init_gs_funcs(llvmpipe); + llvmpipe_init_tess_funcs(llvmpipe); llvmpipe_init_rasterizer_funcs(llvmpipe); llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); llvmpipe_init_surface_functions(llvmpipe); @@ -197,6 +240,13 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv, if (!llvmpipe->draw) goto fail; + draw_set_disk_cache_callbacks(llvmpipe->draw, + llvmpipe_screen(screen), + lp_draw_disk_cache_find_shader, + lp_draw_disk_cache_insert_shader); + + draw_set_constant_buffer_stride(llvmpipe->draw, lp_get_constant_buffer_stride(screen)); + /* FIXME: devise alternative to draw_texture_samplers */ llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h index 0e029f591..b1adba61d 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h @@ -67,12 +67,15 @@ struct llvmpipe_context { struct lp_fragment_shader *fs; struct draw_vertex_shader *vs; const struct lp_geometry_shader *gs; + const struct lp_tess_ctrl_shader *tcs; + const struct lp_tess_eval_shader *tes; struct lp_compute_shader *cs; const struct lp_velems_state *velems; const struct lp_so_state *so; /** Other rendering state */ unsigned sample_mask; + unsigned min_samples; struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; struct pipe_clip_state clip; @@ -96,7 +99,7 @@ struct llvmpipe_context { struct draw_so_target *so_targets[PIPE_MAX_SO_BUFFERS]; int num_so_targets; - struct pipe_query_data_so_statistics so_stats; + struct pipe_query_data_so_statistics so_stats[PIPE_MAX_VERTEX_STREAMS]; struct pipe_query_data_pipeline_statistics pipeline_statistics; unsigned active_statistics_queries; @@ -167,6 +170,10 @@ struct llvmpipe_context { enum pipe_render_cond_flag render_cond_mode; boolean render_cond_cond; + /** VK render cond */ + struct llvmpipe_resource *render_cond_buffer; + unsigned render_cond_offset; + /** The LLVMContext to use for LLVM related work */ LLVMContextRef context; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index cf81111b4..e8f0ae609 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -51,8 +51,14 @@ * the drawing to the 'draw' module. */ static void -llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draws, + unsigned num_draws) { + if (!indirect && (!draws[0].count || !info->instance_count)) + return; + struct llvmpipe_context *lp = llvmpipe_context(pipe); struct draw_context *draw = lp->draw; const void *mapped_indices = NULL; @@ -61,8 +67,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (!llvmpipe_check_render_cond(lp)) return; - if (info->indirect) { - util_draw_indirect(pipe, info); + if (indirect && indirect->buffer) { + util_draw_indirect(pipe, info, indirect); return; } @@ -105,6 +111,12 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) llvmpipe_prepare_geometry_sampling(lp, lp->num_sampler_views[PIPE_SHADER_GEOMETRY], lp->sampler_views[PIPE_SHADER_GEOMETRY]); + llvmpipe_prepare_tess_ctrl_sampling(lp, + lp->num_sampler_views[PIPE_SHADER_TESS_CTRL], + lp->sampler_views[PIPE_SHADER_TESS_CTRL]); + llvmpipe_prepare_tess_eval_sampling(lp, + lp->num_sampler_views[PIPE_SHADER_TESS_EVAL], + lp->sampler_views[PIPE_SHADER_TESS_EVAL]); llvmpipe_prepare_vertex_images(lp, lp->num_images[PIPE_SHADER_VERTEX], @@ -112,6 +124,12 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) llvmpipe_prepare_geometry_images(lp, lp->num_images[PIPE_SHADER_GEOMETRY], lp->images[PIPE_SHADER_GEOMETRY]); + llvmpipe_prepare_tess_ctrl_images(lp, + lp->num_images[PIPE_SHADER_TESS_CTRL], + lp->images[PIPE_SHADER_TESS_CTRL]); + llvmpipe_prepare_tess_eval_images(lp, + lp->num_images[PIPE_SHADER_TESS_EVAL], + lp->images[PIPE_SHADER_TESS_EVAL]); if (lp->gs && lp->gs->no_tokens) { /* we have an empty geometry shader with stream output, so attach the stream output info to the current vertex shader */ @@ -127,7 +145,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) !lp->queries_disabled); /* draw! */ - draw_vbo(draw, info); + draw_vbo(draw, info, indirect, draws, num_draws); /* * unmap vertex/index buffers diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c index 00b6477f9..80d8d9e5c 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c @@ -52,6 +52,8 @@ create_jit_texture_type(struct gallivm_state *gallivm) elem_types[LP_JIT_TEXTURE_WIDTH] = elem_types[LP_JIT_TEXTURE_HEIGHT] = elem_types[LP_JIT_TEXTURE_DEPTH] = + elem_types[LP_JIT_TEXTURE_NUM_SAMPLES] = + elem_types[LP_JIT_TEXTURE_SAMPLE_STRIDE] = elem_types[LP_JIT_TEXTURE_FIRST_LEVEL] = elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc); elem_types[LP_JIT_TEXTURE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); @@ -90,6 +92,12 @@ create_jit_texture_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, mip_offsets, gallivm->target, texture_type, LP_JIT_TEXTURE_MIP_OFFSETS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, num_samples, + gallivm->target, texture_type, + LP_JIT_TEXTURE_NUM_SAMPLES); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, sample_stride, + gallivm->target, texture_type, + LP_JIT_TEXTURE_SAMPLE_STRIDE); LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, gallivm->target, texture_type); return texture_type; @@ -138,7 +146,9 @@ create_jit_image_type(struct gallivm_state *gallivm) elem_types[LP_JIT_IMAGE_DEPTH] = LLVMInt32TypeInContext(lc); elem_types[LP_JIT_IMAGE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); elem_types[LP_JIT_IMAGE_ROW_STRIDE] = - elem_types[LP_JIT_IMAGE_IMG_STRIDE] = LLVMInt32TypeInContext(lc); + elem_types[LP_JIT_IMAGE_IMG_STRIDE] = + elem_types[LP_JIT_IMAGE_NUM_SAMPLES] = + elem_types[LP_JIT_IMAGE_SAMPLE_STRIDE] = LLVMInt32TypeInContext(lc); image_type = LLVMStructTypeInContext(lc, elem_types, ARRAY_SIZE(elem_types), 0); @@ -160,6 +170,12 @@ create_jit_image_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct lp_jit_image, img_stride, gallivm->target, image_type, LP_JIT_IMAGE_IMG_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_image, num_samples, + gallivm->target, image_type, + LP_JIT_IMAGE_NUM_SAMPLES); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_image, sample_stride, + gallivm->target, image_type, + LP_JIT_IMAGE_SAMPLE_STRIDE); return image_type; } @@ -210,6 +226,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) elem_types[LP_JIT_CTX_IMAGES] = LLVMArrayType(image_type, PIPE_MAX_SHADER_IMAGES); elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc); + elem_types[LP_JIT_CTX_SAMPLE_MASK] = elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc); elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); @@ -261,6 +278,9 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, num_ssbos, gallivm->target, context_type, LP_JIT_CTX_NUM_SSBOS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, sample_mask, + gallivm->target, context_type, + LP_JIT_CTX_SAMPLE_MASK); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, gallivm->target, context_type); @@ -277,6 +297,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); elem_types[LP_JIT_THREAD_DATA_INVOCATIONS] = LLVMInt64TypeInContext(lc); elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] = + elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEW_INDEX] = LLVMInt32TypeInContext(lc); thread_data_type = LLVMStructTypeInContext(lc, elem_types, diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c index ef783ea6f..6b5160667 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c @@ -33,7 +33,7 @@ #include "util/u_pack_color.h" #include "util/u_string.h" #include "util/u_thread.h" - +#include "util/u_memset.h" #include "util/os_time.h" #include "lp_scene_queue.h" @@ -56,6 +56,10 @@ const struct lp_rast_state *jit_state = NULL; const struct lp_rasterizer_task *jit_task = NULL; #endif +const float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 }, + { 0.875, 0.375 }, + { 0.125, 0.625 }, + { 0.625, 0.875 } }; /** * Begin rasterizing a scene. @@ -152,18 +156,20 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); - - util_fill_box(scene->cbufs[cbuf].map, - format, - scene->cbufs[cbuf].stride, - scene->cbufs[cbuf].layer_stride, - task->x, - task->y, - 0, - task->width, - task->height, - scene->fb_max_layer + 1, - &uc); + for (unsigned s = 0; s < scene->cbufs[cbuf].nr_samples; s++) { + void *map = (char *)scene->cbufs[cbuf].map + scene->cbufs[cbuf].sample_stride * s; + util_fill_box(map, + format, + scene->cbufs[cbuf].stride, + scene->cbufs[cbuf].layer_stride, + task->x, + task->y, + 0, + task->width, + task->height, + scene->fb_max_layer + 1, + &uc); + } /* this will increase for each rb which probably doesn't mean much */ LP_COUNT(nr_color_tile_clear); @@ -200,86 +206,89 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, if (scene->fb.zsbuf) { unsigned layer; - uint8_t *dst_layer = task->depth_tile; - block_size = util_format_get_blocksize(scene->fb.zsbuf->format); - clear_value &= clear_mask; + for (unsigned s = 0; s < scene->zsbuf.nr_samples; s++) { + uint8_t *dst_layer = task->depth_tile + (s * scene->zsbuf.sample_stride); + block_size = util_format_get_blocksize(scene->fb.zsbuf->format); - for (layer = 0; layer <= scene->fb_max_layer; layer++) { - dst = dst_layer; + clear_value &= clear_mask; - switch (block_size) { - case 1: - assert(clear_mask == 0xff); - memset(dst, (uint8_t) clear_value, height * width); - break; - case 2: - if (clear_mask == 0xffff) { + for (layer = 0; layer <= scene->fb_max_layer; layer++) { + dst = dst_layer; + + switch (block_size) { + case 1: + assert(clear_mask == 0xff); for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) - *row++ = (uint16_t) clear_value; + uint8_t *row = (uint8_t *)dst; + memset(row, (uint8_t) clear_value, width); dst += dst_stride; } - } - else { - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) { - uint16_t tmp = ~clear_mask & *row; - *row++ = clear_value | tmp; + break; + case 2: + if (clear_mask == 0xffff) { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) clear_value; + dst += dst_stride; } - dst += dst_stride; } - } - break; - case 4: - if (clear_mask == 0xffffffff) { - for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) - *row++ = clear_value; - dst += dst_stride; + else { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) { + uint16_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } } - } - else { - for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) { - uint32_t tmp = ~clear_mask & *row; - *row++ = clear_value | tmp; + break; + case 4: + if (clear_mask == 0xffffffff) { + for (i = 0; i < height; i++) { + util_memset32(dst, clear_value, width); + dst += dst_stride; } - dst += dst_stride; } - } - break; - case 8: - clear_value64 &= clear_mask64; - if (clear_mask64 == 0xffffffffffULL) { - for (i = 0; i < height; i++) { - uint64_t *row = (uint64_t *)dst; - for (j = 0; j < width; j++) - *row++ = clear_value64; - dst += dst_stride; + else { + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) { + uint32_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } } - } - else { - for (i = 0; i < height; i++) { - uint64_t *row = (uint64_t *)dst; - for (j = 0; j < width; j++) { - uint64_t tmp = ~clear_mask64 & *row; - *row++ = clear_value64 | tmp; + break; + case 8: + clear_value64 &= clear_mask64; + if (clear_mask64 == 0xffffffffffULL) { + for (i = 0; i < height; i++) { + util_memset64(dst, clear_value64, width); + dst += dst_stride; } - dst += dst_stride; } - } - break; + else { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst; + for (j = 0; j < width; j++) { + uint64_t tmp = ~clear_mask64 & *row; + *row++ = clear_value64 | tmp; + } + dst += dst_stride; + } + } + break; - default: - assert(0); - break; + default: + assert(0); + break; + } + dst_layer += scene->zsbuf.layer_stride; } - dst_layer += scene->zsbuf.layer_stride; } } } @@ -321,19 +330,23 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, for (x = 0; x < task->width; x += 4) { uint8_t *color[PIPE_MAX_COLOR_BUFS]; unsigned stride[PIPE_MAX_COLOR_BUFS]; + unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; uint8_t *depth = NULL; unsigned depth_stride = 0; + unsigned depth_sample_stride = 0; unsigned i; /* color buffer */ for (i = 0; i < scene->fb.nr_cbufs; i++){ if (scene->fb.cbufs[i]) { stride[i] = scene->cbufs[i].stride; + sample_stride[i] = scene->cbufs[i].sample_stride; color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, - tile_y + y, inputs->layer); + tile_y + y, inputs->layer + inputs->view_index); } else { stride[i] = 0; + sample_stride[i] = 0; color[i] = NULL; } } @@ -341,12 +354,18 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, /* depth buffer */ if (scene->zsbuf.map) { depth = lp_rast_get_depth_block_pointer(task, tile_x + x, - tile_y + y, inputs->layer); + tile_y + y, inputs->layer + inputs->view_index); depth_stride = scene->zsbuf.stride; + depth_sample_stride = scene->zsbuf.sample_stride; } + uint64_t mask = 0; + for (unsigned i = 0; i < scene->fb_max_samples; i++) + mask |= (uint64_t)(0xffff) << (16 * i); + /* Propagate non-interpolated raster state. */ task->thread_data.raster_state.viewport_index = inputs->viewport_index; + task->thread_data.raster_state.view_index = inputs->view_index; /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); @@ -358,10 +377,12 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, GET_DADY(inputs), color, depth, - 0xffff, + mask, &task->thread_data, stride, - depth_stride); + depth_stride, + sample_stride, + depth_sample_stride); END_JIT_CALL(); } } @@ -395,18 +416,20 @@ lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, * \param y Y position of quad in window coords */ void -lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, - const struct lp_rast_shader_inputs *inputs, - unsigned x, unsigned y, - unsigned mask) +lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + uint64_t mask) { const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; unsigned stride[PIPE_MAX_COLOR_BUFS]; + unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; uint8_t *depth = NULL; unsigned depth_stride = 0; + unsigned depth_sample_stride = 0; unsigned i; assert(state); @@ -424,11 +447,13 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, for (i = 0; i < scene->fb.nr_cbufs; i++) { if (scene->fb.cbufs[i]) { stride[i] = scene->cbufs[i].stride; + sample_stride[i] = scene->cbufs[i].sample_stride; color[i] = lp_rast_get_color_block_pointer(task, i, x, y, - inputs->layer); + inputs->layer + inputs->view_index); } else { stride[i] = 0; + sample_stride[i] = 0; color[i] = NULL; } } @@ -436,7 +461,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, /* depth buffer */ if (scene->zsbuf.map) { depth_stride = scene->zsbuf.stride; - depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer); + depth_sample_stride = scene->zsbuf.sample_stride; + depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index); } assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); @@ -448,6 +474,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { /* Propagate non-interpolated raster state. */ task->thread_data.raster_state.viewport_index = inputs->viewport_index; + task->thread_data.raster_state.view_index = inputs->view_index; /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); @@ -462,12 +489,24 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, mask, &task->thread_data, stride, - depth_stride); + depth_stride, + sample_stride, + depth_sample_stride); END_JIT_CALL(); } } - +void +lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + unsigned mask) +{ + uint64_t new_mask = 0; + for (unsigned i = 0; i < task->scene->fb_max_samples; i++) + new_mask |= ((uint64_t)mask) << (16 * i); + lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask); +} /** * Begin a new occlusion query. @@ -588,7 +627,18 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_triangle_32_8, lp_rast_triangle_32_3_4, lp_rast_triangle_32_3_16, - lp_rast_triangle_32_4_16 + lp_rast_triangle_32_4_16, + lp_rast_triangle_ms_1, + lp_rast_triangle_ms_2, + lp_rast_triangle_ms_3, + lp_rast_triangle_ms_4, + lp_rast_triangle_ms_5, + lp_rast_triangle_ms_6, + lp_rast_triangle_ms_7, + lp_rast_triangle_ms_8, + lp_rast_triangle_ms_3_4, + lp_rast_triangle_ms_3_16, + lp_rast_triangle_ms_4_16, }; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 4b5ca8192..c8154348e 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -131,7 +131,11 @@ struct lp_rasterizer util_barrier barrier; }; - +void +lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + uint64_t mask); void lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, @@ -230,28 +234,37 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; unsigned stride[PIPE_MAX_COLOR_BUFS]; + unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; uint8_t *depth = NULL; unsigned depth_stride = 0; + unsigned depth_sample_stride = 0; unsigned i; /* color buffer */ for (i = 0; i < scene->fb.nr_cbufs; i++) { if (scene->fb.cbufs[i]) { stride[i] = scene->cbufs[i].stride; + sample_stride[i] = scene->cbufs[i].sample_stride; color[i] = lp_rast_get_color_block_pointer(task, i, x, y, - inputs->layer); + inputs->layer + inputs->view_index); } else { stride[i] = 0; + sample_stride[i] = 0; color[i] = NULL; } } if (scene->zsbuf.map) { - depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer); + depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index); + depth_sample_stride = scene->zsbuf.sample_stride; depth_stride = scene->zsbuf.stride; } + uint64_t mask = 0; + for (unsigned i = 0; i < scene->fb_max_samples; i++) + mask |= (uint64_t)0xffff << (16 * i); + /* * The rasterizer may produce fragments outside our * allocated 4x4 blocks hence need to filter them out here. @@ -259,6 +272,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { /* Propagate non-interpolated raster state. */ task->thread_data.raster_state.viewport_index = inputs->viewport_index; + task->thread_data.raster_state.view_index = inputs->view_index; /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); @@ -270,10 +284,12 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, GET_DADY(inputs), color, depth, - 0xffff, + mask, &task->thread_data, stride, - depth_stride); + depth_stride, + sample_stride, + depth_sample_stride); END_JIT_CALL(); } } @@ -331,6 +347,58 @@ void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *, void lp_rast_triangle_32_4_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_ms_3_4(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_ms_3_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_ms_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_ms_32_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_32_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_32_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_32_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_32_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_32_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_32_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_ms_32_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_ms_32_3_4(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_ms_32_3_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_ms_32_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + void lp_rast_set_state(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c index beab118ac..539b84c65 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c @@ -34,6 +34,8 @@ #include "lp_scene.h" #include "lp_fence.h" #include "lp_debug.h" +#include "lp_context.h" +#include "lp_state_fs.h" #define RESOURCE_REF_SZ 32 @@ -45,6 +47,14 @@ struct resource_ref { struct resource_ref *next; }; +#define SHADER_REF_SZ 32 +/** List of shader variant references */ +struct shader_ref { + struct lp_fragment_shader_variant *variant[SHADER_REF_SZ]; + int count; + struct shader_ref *next; +}; + /** * Create a new scene object. @@ -106,8 +116,8 @@ lp_scene_is_empty(struct lp_scene *scene ) { unsigned x, y; - for (y = 0; y < TILES_Y; y++) { - for (x = 0; x < TILES_X; x++) { + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); if (bin->head) { return FALSE; @@ -160,6 +170,8 @@ lp_scene_begin_rasterization(struct lp_scene *scene) if (!cbuf) { scene->cbufs[i].stride = 0; scene->cbufs[i].layer_stride = 0; + scene->cbufs[i].sample_stride = 0; + scene->cbufs[i].nr_samples = 0; scene->cbufs[i].map = NULL; continue; } @@ -169,18 +181,22 @@ lp_scene_begin_rasterization(struct lp_scene *scene) cbuf->u.tex.level); scene->cbufs[i].layer_stride = llvmpipe_layer_stride(cbuf->texture, cbuf->u.tex.level); + scene->cbufs[i].sample_stride = llvmpipe_sample_stride(cbuf->texture); scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture, cbuf->u.tex.level, cbuf->u.tex.first_layer, LP_TEX_USAGE_READ_WRITE); scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); + scene->cbufs[i].nr_samples = util_res_sample_count(cbuf->texture); } else { struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture); unsigned pixstride = util_format_get_blocksize(cbuf->format); scene->cbufs[i].stride = cbuf->texture->width0; scene->cbufs[i].layer_stride = 0; + scene->cbufs[i].sample_stride = 0; + scene->cbufs[i].nr_samples = 1; scene->cbufs[i].map = lpr->data; scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride; scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); @@ -191,7 +207,8 @@ lp_scene_begin_rasterization(struct lp_scene *scene) struct pipe_surface *zsbuf = scene->fb.zsbuf; scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level); scene->zsbuf.layer_stride = llvmpipe_layer_stride(zsbuf->texture, zsbuf->u.tex.level); - + scene->zsbuf.sample_stride = llvmpipe_sample_stride(zsbuf->texture); + scene->zsbuf.nr_samples = util_res_sample_count(zsbuf->texture); scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, zsbuf->u.tex.level, zsbuf->u.tex.first_layer, @@ -274,6 +291,22 @@ lp_scene_end_rasterization(struct lp_scene *scene ) j, scene->resource_reference_size); } + /* Decrement shader variant ref counts + */ + { + struct shader_ref *ref; + int i, j = 0; + + for (ref = scene->frag_shaders; ref; ref = ref->next) { + for (i = 0; i < ref->count; i++) { + if (LP_DEBUG & DEBUG_SETUP) + debug_printf("shader %d: %p\n", j, (void *) ref->variant[i]); + j++; + lp_fs_variant_reference(llvmpipe_context(scene->pipe), &ref->variant[i], NULL); + } + } + } + /* Free all scene data blocks: */ { @@ -292,6 +325,7 @@ lp_scene_end_rasterization(struct lp_scene *scene ) lp_fence_reference(&scene->fence, NULL); scene->resources = NULL; + scene->frag_shaders = NULL; scene->scene_size = 0; scene->resource_reference_size = 0; @@ -428,6 +462,53 @@ lp_scene_add_resource_reference(struct lp_scene *scene, /** + * Add a reference to a fragment shader variant + */ +boolean +lp_scene_add_frag_shader_reference(struct lp_scene *scene, + struct lp_fragment_shader_variant *variant) +{ + struct shader_ref *ref, **last = &scene->frag_shaders; + int i; + + /* Look at existing resource blocks: + */ + for (ref = scene->frag_shaders; ref; ref = ref->next) { + last = &ref->next; + + /* Search for this resource: + */ + for (i = 0; i < ref->count; i++) + if (ref->variant[i] == variant) + return TRUE; + + if (ref->count < SHADER_REF_SZ) { + /* If the block is half-empty, then append the reference here. + */ + break; + } + } + + /* Create a new block if no half-empty block was found. + */ + if (!ref) { + assert(*last == NULL); + *last = lp_scene_alloc(scene, sizeof *ref); + if (*last == NULL) + return FALSE; + + ref = *last; + memset(ref, 0, sizeof *ref); + } + + /* Append the reference to the reference block. + */ + lp_fs_variant_reference(llvmpipe_context(scene->pipe), &ref->variant[ref->count++], variant); + + return TRUE; +} + +/** * Does this scene have a reference to the given resource? */ boolean @@ -545,6 +626,13 @@ void lp_scene_begin_binning(struct lp_scene *scene, max_layer = MIN2(max_layer, zsbuf->u.tex.last_layer - zsbuf->u.tex.first_layer); } scene->fb_max_layer = max_layer; + scene->fb_max_samples = util_framebuffer_get_num_samples(fb); + if (scene->fb_max_samples == 4) { + for (unsigned i = 0; i < 4; i++) { + scene->fixed_sample_pos[i][0] = util_iround(lp_sample_pos_4x[i][0] * FIXED_ONE); + scene->fixed_sample_pos[i][1] = util_iround(lp_sample_pos_4x[i][1] * FIXED_ONE); + } + } } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h index b4ed8817e..ba6b20139 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h @@ -60,7 +60,7 @@ struct lp_rast_state; /* Scene temporary storage is clamped to this size: */ -#define LP_SCENE_MAX_SIZE (9*1024*1024) +#define LP_SCENE_MAX_SIZE (36*1024*1024) /* The maximum amount of texture storage referenced by a scene is * clamped to this size: @@ -117,6 +117,8 @@ struct data_block_list { struct resource_ref; +struct shader_ref; + /** * All bins and bin data are contained here. * Per-bin data goes into the 'tile' bins. @@ -143,17 +145,28 @@ struct lp_scene { unsigned stride; unsigned layer_stride; unsigned format_bytes; + unsigned sample_stride; + unsigned nr_samples; } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS]; /* The amount of layers in the fb (minimum of all attachments) */ unsigned fb_max_layer; + /* fixed point sample positions. */ + int32_t fixed_sample_pos[LP_MAX_SAMPLES][2]; + + /* max samples for bound framebuffer */ + unsigned fb_max_samples; + /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; /** list of resources referenced by the scene commands */ struct resource_ref *resources; + /** list of frag shaders referenced by the scene commands */ + struct shader_ref *frag_shaders; + /** Total memory used by the scene (in bytes). This sums all the * data blocks and counts all bins, state, resource references and * other random allocations within the scene. @@ -201,6 +214,10 @@ boolean lp_scene_add_resource_reference(struct lp_scene *scene, boolean lp_scene_is_resource_referenced(const struct lp_scene *scene, const struct pipe_resource *resource ); +boolean lp_scene_add_frag_shader_reference(struct lp_scene *scene, + struct lp_fragment_shader_variant *variant); + + /** * Allocate space for a command/data in the bin's data buffer. diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c index e951baa06..2adf8b786 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c @@ -38,7 +38,7 @@ #include "draw/draw_context.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_nir.h" - +#include "util/disk_cache.h" #include "util/os_misc.h" #include "util/os_time.h" #include "lp_texture.h" @@ -52,7 +52,7 @@ #include "lp_rast.h" #include "lp_cs_tpool.h" -#include "state_tracker/sw_winsys.h" +#include "frontend/sw_winsys.h" #include "nir.h" @@ -74,7 +74,7 @@ static const struct debug_named_value lp_debug_flags[] = { { "fs", DEBUG_FS, NULL }, { "cs", DEBUG_CS, NULL }, { "tgsi_ir", DEBUG_TGSI_IR, NULL }, - { "cl", DEBUG_CL, NULL }, + { "cache_stats", DEBUG_CACHE_STATS, NULL }, DEBUG_NAMED_VALUE_END }; #endif @@ -96,7 +96,7 @@ static const struct debug_named_value lp_perf_flags[] = { static const char * llvmpipe_get_vendor(struct pipe_screen *screen) { - return "VMware, Inc."; + return "Mesa/X.org"; } @@ -126,16 +126,11 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: return PIPE_MAX_SO_BUFFERS; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return PIPE_MAX_COLOR_BUFS; case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_QUERY_TIME_ELAPSED: - return 0; case PIPE_CAP_QUERY_TIMESTAMP: return 1; case PIPE_CAP_QUERY_PIPELINE_STATISTICS: @@ -144,10 +139,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: return 1; case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_LOD: return 1; - case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: - return 0; case PIPE_CAP_MAX_TEXTURE_2D_SIZE: return 1 << (LP_MAX_TEXTURE_2D_LEVELS - 1); case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: @@ -166,10 +159,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: - return 0; case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: return 1; case PIPE_CAP_DEPTH_CLIP_DISABLE: return 1; @@ -178,9 +169,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_START_INSTANCE: - return 1; - case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: - return 0; case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: @@ -194,41 +182,35 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXEL_OFFSET: return 31; case PIPE_CAP_CONDITIONAL_RENDER: - return 1; case PIPE_CAP_TEXTURE_BARRIER: - return 0; + return 1; case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: return 16*4; case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: return 1024; - case PIPE_CAP_MAX_VERTEX_STREAMS: - return 1; + case PIPE_CAP_MAX_VERTEX_STREAMS: { + struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); + return lscreen->use_tgsi ? 1 : 4; + } case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: - return 1; - case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - return 0; case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: return 1; - case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 330; + case PIPE_CAP_GLSL_FEATURE_LEVEL: { + struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); + return lscreen->use_tgsi ? 330 : 450; + } case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: return 140; - case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: - return 0; case PIPE_CAP_COMPUTE: return GALLIVM_HAVE_CORO; case PIPE_CAP_USER_VERTEX_BUFFERS: return 1; - case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: - return 0; case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_DRAW_INDIRECT: return 1; @@ -237,14 +219,13 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 16; - case PIPE_CAP_TEXTURE_MULTISAMPLE: - return 0; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return 64; case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return 1; + /* Adressing that many 64bpp texels fits in an i32 so this is a reasonable value */ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - return 65536; + return 134217728; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 16; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: @@ -253,16 +234,13 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return PIPE_MAX_VIEWPORTS; case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_NATIVE; + case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: return 1; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: return 1; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return 4; - case PIPE_CAP_TEXTURE_GATHER_SM5: - case PIPE_CAP_SAMPLE_SHADING: - case PIPE_CAP_TEXTURE_GATHER_OFFSETS: - return 0; case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: return 1; case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: @@ -270,8 +248,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TEX_TXF_LZ: case PIPE_CAP_SAMPLER_VIEW_TARGET: return 1; - case PIPE_CAP_FAKE_SW_MSAA: - return 1; + case PIPE_CAP_FAKE_SW_MSAA: { + struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); + return lscreen->use_tgsi ? 1 : 0; + } case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_TGSI_ARRAY_COMPONENTS: @@ -307,8 +287,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 0; case PIPE_CAP_CLIP_HALFZ: return 1; - case PIPE_CAP_VERTEXID_NOBASE: - return 0; case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: @@ -322,71 +300,39 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_VARYINGS: return 32; case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: - return 1; + return 16; case PIPE_CAP_QUERY_BUFFER_OBJECT: return 1; case PIPE_CAP_DRAW_PARAMETERS: return 1; + case PIPE_CAP_FBFETCH: + return 8; + case PIPE_CAP_FBFETCH_COHERENT: + return 0; case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: return 1; - case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: - case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + return 1; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: - case PIPE_CAP_DEPTH_BOUNDS_TEST: - case PIPE_CAP_TGSI_TXQS: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: - case PIPE_CAP_SHAREABLE_SHADERS: - case PIPE_CAP_TGSI_PACK_HALF_FLOAT: - case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: - case PIPE_CAP_INVALIDATE_BUFFER: - case PIPE_CAP_GENERATE_MIPMAP: - case PIPE_CAP_STRING_MARKER: - case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: - case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: - case PIPE_CAP_QUERY_MEMORY_INFO: + return 32; + case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS: + return 8; case PIPE_CAP_PCI_GROUP: case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: - case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: - case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: - case PIPE_CAP_MAX_WINDOW_RECTANGLES: - case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: - case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: - case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: - case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: - case PIPE_CAP_FBFETCH: - case PIPE_CAP_TGSI_MUL_ZERO_WINS: - case PIPE_CAP_TGSI_CLOCK: - case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: - case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: - case PIPE_CAP_TGSI_BALLOT: - case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: - case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: - case PIPE_CAP_POST_DEPTH_COVERAGE: - case PIPE_CAP_BINDLESS_TEXTURE: - case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: - case PIPE_CAP_MEMOBJ: - case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS: - case PIPE_CAP_TILE_RASTER_ORDER: - case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: - case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS: - case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: - case PIPE_CAP_CONTEXT_PRIORITY_MASK: - case PIPE_CAP_FENCE_SIGNAL: - case PIPE_CAP_CONSTBUF0_FLAGS: - case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: - case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: - case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: - case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: - case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: - case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: - case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: return 0; + + case PIPE_CAP_SHAREABLE_SHADERS: + /* Can't expose shareable shaders because the draw shaders reference the + * draw module's state, which is per-context. + */ + return 0; + case PIPE_CAP_MAX_GS_INVOCATIONS: return 32; case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: @@ -395,8 +341,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: return 1; + case PIPE_CAP_SAMPLER_REDUCTION_MINMAX: + case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_LOAD_CONSTBUF: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_GL_SPIRV: + case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_PACKED_UNIFORMS: { struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); return !lscreen->use_tgsi; @@ -411,14 +363,15 @@ llvmpipe_get_shader_param(struct pipe_screen *screen, enum pipe_shader_type shader, enum pipe_shader_cap param) { + struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); switch(shader) { case PIPE_SHADER_COMPUTE: - if ((LP_DEBUG & DEBUG_CL) && param == PIPE_SHADER_CAP_SUPPORTED_IRS) + if ((lscreen->allow_cl) && param == PIPE_SHADER_CAP_SUPPORTED_IRS) return (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_NIR_SERIALIZED); + FALLTHROUGH; case PIPE_SHADER_FRAGMENT: if (param == PIPE_SHADER_CAP_PREFERRED_IR) { - struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); if (lscreen->use_tgsi) return PIPE_SHADER_IR_TGSI; else @@ -428,10 +381,16 @@ llvmpipe_get_shader_param(struct pipe_screen *screen, default: return gallivm_get_shader_param(param); } + FALLTHROUGH; + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_TESS_EVAL: + /* Tessellation shader needs llvm coroutines support */ + if (!GALLIVM_HAVE_CORO || lscreen->use_tgsi) + return 0; + FALLTHROUGH; case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: if (param == PIPE_SHADER_CAP_PREFERRED_IR) { - struct llvmpipe_screen *lscreen = llvmpipe_screen(screen); if (lscreen->use_tgsi) return PIPE_SHADER_IR_TGSI; else @@ -466,13 +425,13 @@ llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_capf param) { switch (param) { case PIPE_CAPF_MAX_LINE_WIDTH: - /* fall-through */ + FALLTHROUGH; case PIPE_CAPF_MAX_LINE_WIDTH_AA: return 255.0; /* arbitrary */ case PIPE_CAPF_MAX_POINT_WIDTH: - /* fall-through */ + FALLTHROUGH; case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ + return LP_MAX_POINT_WIDTH; /* arbitrary */ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: return 16.0; /* not actually signficant at this time */ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: @@ -559,7 +518,7 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen, case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: if (ret) { uint32_t *images = ret; - *images = 0; + *images = LP_MAX_TGSI_SHADER_IMAGES; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: @@ -599,27 +558,40 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_fsat = true, .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, - .lower_sub = true, - .lower_ffma = true, + .lower_fdot = true, + .lower_fdph = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fmod = true, .lower_hadd = true, .lower_add_sat = true, + .lower_ldexp = true, .lower_pack_snorm_2x16 = true, .lower_pack_snorm_4x8 = true, .lower_pack_unorm_2x16 = true, .lower_pack_unorm_4x8 = true, + .lower_pack_half_2x16 = true, + .lower_pack_split = true, .lower_unpack_snorm_2x16 = true, .lower_unpack_snorm_4x8 = true, .lower_unpack_unorm_2x16 = true, .lower_unpack_unorm_4x8 = true, + .lower_unpack_half_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, .lower_rotate = true, + .lower_uadd_carry = true, + .lower_usub_borrow = true, + .lower_mul_2x32_64 = true, .lower_ifind_msb = true, - .optimize_sample_mask_in = true, .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, .lower_to_scalar = true, + .lower_cs_local_index_from_id = true, + .lower_uniforms_to_ubo = true, + .lower_vector_cmp = true, + .lower_device_index_to_zero = true, }; static void @@ -671,7 +643,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY); - if (sample_count > 1) + if (sample_count != 0 && sample_count != 1 && sample_count != 4) return false; if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) @@ -716,6 +688,10 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, } } + if (!(bind & PIPE_BIND_VERTEX_BUFFER) && + util_format_is_scaled(format)) + return false; + if (bind & PIPE_BIND_DISPLAY_TARGET) { if(!winsys->is_displaytarget_format_supported(winsys, bind, format)) return false; @@ -727,16 +703,10 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return false; - - /* TODO: Support stencil-only formats */ - if (format_desc->swizzle[0] == PIPE_SWIZZLE_NONE) { - return false; - } } if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC || - format_desc->layout == UTIL_FORMAT_LAYOUT_ATC || - format_desc->layout == UTIL_FORMAT_LAYOUT_FXT1) { + format_desc->layout == UTIL_FORMAT_LAYOUT_ATC) { /* Software decoding is not hooked up. */ return false; } @@ -758,6 +728,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, static void llvmpipe_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_context *_pipe, struct pipe_resource *resource, unsigned level, unsigned layer, void *context_private, @@ -786,6 +757,10 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) lp_jit_screen_cleanup(screen); + if (LP_DEBUG & DEBUG_CACHE_STATS) + printf("disk shader cache: hits = %u, misses = %u\n", screen->num_disk_shader_cache_hits, + screen->num_disk_shader_cache_misses); + disk_cache_destroy(screen->disk_shader_cache); if(winsys->destroy) winsys->destroy(winsys); @@ -843,6 +818,77 @@ llvmpipe_get_timestamp(struct pipe_screen *_screen) return os_time_get_nano(); } +static void update_cache_sha1_cpu(struct mesa_sha1 *ctx) +{ + const struct util_cpu_caps_t *cpu_caps = util_get_cpu_caps(); + /* + * Don't need the cpu cache affinity stuff. The rest + * is contained in first 5 dwords. + */ + STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t)); + _mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t)); +} + +static void lp_disk_cache_create(struct llvmpipe_screen *screen) +{ + struct mesa_sha1 ctx; + unsigned gallivm_perf = gallivm_get_perf_flags(); + unsigned char sha1[20]; + char cache_id[20 * 2 + 1]; + _mesa_sha1_init(&ctx); + + if (!disk_cache_get_function_identifier(lp_disk_cache_create, &ctx) || + !disk_cache_get_function_identifier(LLVMLinkInMCJIT, &ctx)) + return; + + _mesa_sha1_update(&ctx, &gallivm_perf, sizeof(gallivm_perf)); + update_cache_sha1_cpu(&ctx); + _mesa_sha1_final(&ctx, sha1); + disk_cache_format_hex_id(cache_id, sha1, 20 * 2); + + screen->disk_shader_cache = disk_cache_create("llvmpipe", cache_id, 0); +} + +static struct disk_cache *lp_get_disk_shader_cache(struct pipe_screen *_screen) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + + return screen->disk_shader_cache; +} + +void lp_disk_cache_find_shader(struct llvmpipe_screen *screen, + struct lp_cached_code *cache, + unsigned char ir_sha1_cache_key[20]) +{ + unsigned char sha1[CACHE_KEY_SIZE]; + + if (!screen->disk_shader_cache) + return; + disk_cache_compute_key(screen->disk_shader_cache, ir_sha1_cache_key, 20, sha1); + + size_t binary_size; + uint8_t *buffer = disk_cache_get(screen->disk_shader_cache, sha1, &binary_size); + if (!buffer) { + cache->data_size = 0; + p_atomic_inc(&screen->num_disk_shader_cache_misses); + return; + } + cache->data_size = binary_size; + cache->data = buffer; + p_atomic_inc(&screen->num_disk_shader_cache_hits); +} + +void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen, + struct lp_cached_code *cache, + unsigned char ir_sha1_cache_key[20]) +{ + unsigned char sha1[CACHE_KEY_SIZE]; + + if (!screen->disk_shader_cache || !cache->data_size || cache->dont_cache) + return; + disk_cache_compute_key(screen->disk_shader_cache, ir_sha1_cache_key, 20, sha1); + disk_cache_put(screen->disk_shader_cache, sha1, cache->data, cache->data_size, NULL); +} /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). @@ -893,10 +939,13 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->base.get_timestamp = llvmpipe_get_timestamp; screen->base.finalize_nir = llvmpipe_finalize_nir; + + screen->base.get_disk_shader_cache = lp_get_disk_shader_cache; llvmpipe_init_screen_resource_funcs(&screen->base); + screen->allow_cl = !!getenv("LP_CL"); screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR); - screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; + screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0; #ifdef EMBEDDED_DEVICE screen->num_threads = 0; #endif @@ -920,5 +969,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys) } (void) mtx_init(&screen->cs_mutex, mtx_plain); + lp_disk_cache_create(screen); return &screen->base; } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h index 7c57f3ec1..a790c199c 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h @@ -38,7 +38,7 @@ #include "pipe/p_defines.h" #include "os/os_thread.h" #include "gallivm/lp_bld.h" - +#include "gallivm/lp_bld_misc.h" struct sw_winsys; struct lp_cs_tpool; @@ -62,9 +62,19 @@ struct llvmpipe_screen mtx_t cs_mutex; bool use_tgsi; -}; + bool allow_cl; + struct disk_cache *disk_shader_cache; + unsigned num_disk_shader_cache_hits; + unsigned num_disk_shader_cache_misses; +}; +void lp_disk_cache_find_shader(struct llvmpipe_screen *screen, + struct lp_cached_code *cache, + unsigned char ir_sha1_cache_key[20]); +void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen, + struct lp_cached_code *cache, + unsigned char ir_sha1_cache_key[20]); static inline struct llvmpipe_screen * @@ -73,6 +83,10 @@ llvmpipe_screen( struct pipe_screen *pipe ) return (struct llvmpipe_screen *)pipe; } - +static inline unsigned lp_get_constant_buffer_stride(struct pipe_screen *_screen) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + return screen->use_tgsi ? (sizeof(float) * 4) : sizeof(float); +} #endif /* LP_SCREEN_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c index 002c8b8a2..883473919 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c @@ -53,7 +53,7 @@ #include "lp_setup_context.h" #include "lp_screen.h" #include "lp_state.h" -#include "state_tracker/sw_winsys.h" +#include "frontend/sw_winsys.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" @@ -128,6 +128,7 @@ void lp_setup_reset( struct lp_setup_context *setup ) setup->constants[i].stored_size = 0; setup->constants[i].stored_data = NULL; } + setup->fs.stored = NULL; setup->dirty = ~0; @@ -409,23 +410,7 @@ lp_setup_try_clear_color_buffer(struct lp_setup_context *setup, LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); - if (util_format_is_pure_integer(format)) { - /* - * We expect int/uint clear values here, though some APIs - * might disagree (but in any case util_pack_color() - * couldn't handle it)... - */ - if (util_format_is_pure_sint(format)) { - util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1); - } - else { - assert(util_format_is_pure_uint(format)); - util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1); - } - } - else { - util_pack_color(color->f, format, &uc); - } + util_pack_color_union(format, &uc, color); if (setup->state == SETUP_ACTIVE) { struct lp_scene *scene = setup->scene; @@ -456,7 +441,7 @@ lp_setup_try_clear_color_buffer(struct lp_setup_context *setup, else { /* Put ourselves into the 'pre-clear' state, specifically to try * and accumulate multiple clears to color and depth_stencil - * buffers which the app or state-tracker might issue + * buffers which the app or gallium frontend might issue * separately. */ set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ ); @@ -520,7 +505,7 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup, else { /* Put ourselves into the 'pre-clear' state, specifically to try * and accumulate multiple clears to color and depth_stencil - * buffers which the app or state-tracker might issue + * buffers which the app or gallium frontend might issue * separately. */ set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ ); @@ -583,13 +568,15 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, boolean ccw_is_frontface, boolean scissor, boolean half_pixel_center, - boolean bottom_edge_rule) + boolean bottom_edge_rule, + boolean multisample) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; + setup->multisample = multisample; setup->pixel_offset = half_pixel_center ? 0.5f : 0.0f; setup->bottom_edge_rule = bottom_edge_rule; @@ -638,7 +625,6 @@ lp_setup_set_fs_variant( struct lp_setup_context *setup, { LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, variant); - /* FIXME: reference count */ setup->fs.current.variant = variant; setup->dirty |= LP_SETUP_NEW_FS; @@ -656,10 +642,10 @@ lp_setup_set_fs_constants(struct lp_setup_context *setup, assert(num <= ARRAY_SIZE(setup->constants)); for (i = 0; i < num; ++i) { - util_copy_constant_buffer(&setup->constants[i].current, &buffers[i]); + util_copy_constant_buffer(&setup->constants[i].current, &buffers[i], false); } for (; i < ARRAY_SIZE(setup->constants); i++) { - util_copy_constant_buffer(&setup->constants[i].current, NULL); + util_copy_constant_buffer(&setup->constants[i].current, NULL, false); } setup->dirty |= LP_SETUP_NEW_CONSTANTS; } @@ -716,6 +702,7 @@ lp_setup_set_fs_images(struct lp_setup_context *setup, jit_image->width = res->width0; jit_image->height = res->height0; jit_image->depth = res->depth0; + jit_image->num_samples = res->nr_samples; if (llvmpipe_resource_is_texture(res)) { uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level]; @@ -741,6 +728,7 @@ lp_setup_set_fs_images(struct lp_setup_context *setup, jit_image->row_stride = lp_res->row_stride[image->u.tex.level]; jit_image->img_stride = lp_res->img_stride[image->u.tex.level]; + jit_image->sample_stride = lp_res->sample_stride; jit_image->base = (uint8_t *)jit_image->base + mip_offset; } else { @@ -753,7 +741,7 @@ lp_setup_set_fs_images(struct lp_setup_context *setup, for (; i < ARRAY_SIZE(setup->images); i++) { util_copy_image_view(&setup->images[i].current, NULL); } - setup->dirty |= LP_SETUP_NEW_IMAGES; + setup->dirty |= LP_SETUP_NEW_FS; } void @@ -815,6 +803,15 @@ lp_setup_set_scissors( struct lp_setup_context *setup, setup->dirty |= LP_SETUP_NEW_SCISSOR; } +void +lp_setup_set_sample_mask(struct lp_setup_context *setup, + uint32_t sample_mask) +{ + if (setup->fs.current.jit_context.sample_mask != sample_mask) { + setup->fs.current.jit_context.sample_mask = sample_mask; + setup->dirty |= LP_SETUP_NEW_FS; + } +} void lp_setup_set_flatshade_first(struct lp_setup_context *setup, @@ -938,6 +935,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->mip_offsets[0] = 0; jit_tex->row_stride[0] = 0; jit_tex->img_stride[0] = 0; + jit_tex->num_samples = 0; + jit_tex->sample_stride = 0; } else { jit_tex->width = res->width0; @@ -945,6 +944,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->depth = res->depth0; jit_tex->first_level = first_level; jit_tex->last_level = last_level; + jit_tex->num_samples = res->nr_samples; + jit_tex->sample_stride = 0; if (llvmpipe_resource_is_texture(res)) { for (j = first_level; j <= last_level; j++) { @@ -953,6 +954,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->img_stride[j] = lp_tex->img_stride[j]; } + jit_tex->sample_stride = lp_tex->sample_stride; + if (res->target == PIPE_TEXTURE_1D_ARRAY || res->target == PIPE_TEXTURE_2D_ARRAY || res->target == PIPE_TEXTURE_CUBE || @@ -1003,7 +1006,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, struct llvmpipe_screen *screen = llvmpipe_screen(res->screen); struct sw_winsys *winsys = screen->winsys; jit_tex->base = winsys->displaytarget_map(winsys, lp_tex->dt, - PIPE_TRANSFER_READ); + PIPE_MAP_READ); jit_tex->row_stride[0] = lp_tex->row_stride[0]; jit_tex->img_stride[0] = lp_tex->img_stride[0]; jit_tex->mip_offsets[0] = 0; @@ -1011,6 +1014,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->height = res->height0; jit_tex->depth = res->depth0; jit_tex->first_level = jit_tex->last_level = 0; + jit_tex->num_samples = res->nr_samples; + jit_tex->sample_stride = 0; assert(jit_tex->base); } } @@ -1177,6 +1182,12 @@ try_update_scene_state( struct lp_setup_context *setup ) setup->dirty |= LP_SETUP_NEW_FS; } + struct llvmpipe_context *llvmpipe = llvmpipe_context(setup->pipe); + if (llvmpipe->dirty & LP_NEW_FS_CONSTANTS) + lp_setup_set_fs_constants(llvmpipe->setup, + ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_FRAGMENT]), + llvmpipe->constants[PIPE_SHADER_FRAGMENT]); + if (setup->dirty & LP_SETUP_NEW_CONSTANTS) { for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) { struct pipe_resource *buffer = setup->constants[i].current.buffer; @@ -1196,7 +1207,7 @@ try_update_scene_state( struct lp_setup_context *setup ) current_data = (ubyte *) setup->constants[i].current.user_buffer; } - if (current_data) { + if (current_data && current_size >= sizeof(float)) { current_data += setup->constants[i].current.buffer_offset; /* TODO: copy only the actually used constants? */ @@ -1230,7 +1241,7 @@ try_update_scene_state( struct lp_setup_context *setup ) } num_constants = - DIV_ROUND_UP(setup->constants[i].stored_size, (sizeof(float) * 4)); + DIV_ROUND_UP(setup->constants[i].stored_size, lp_get_constant_buffer_stride(scene->pipe->screen)); setup->fs.current.jit_context.num_constants[i] = num_constants; setup->dirty |= LP_SETUP_NEW_FS; } @@ -1275,9 +1286,14 @@ try_update_scene_state( struct lp_setup_context *setup ) return FALSE; } - memcpy(stored, - &setup->fs.current, - sizeof setup->fs.current); + memcpy(&stored->jit_context, + &setup->fs.current.jit_context, + sizeof setup->fs.current.jit_context); + stored->variant = setup->fs.current.variant; + + if (!lp_scene_add_frag_shader_reference(scene, + setup->fs.current.variant)) + return FALSE; setup->fs.stored = stored; /* The scene now references the textures in the rasterization @@ -1504,7 +1520,6 @@ void lp_setup_begin_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) { - set_scene_state(setup, SETUP_ACTIVE, "begin_query"); if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER || diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h index 701dcadfd..82fc14b5e 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -50,7 +50,6 @@ #define LP_SETUP_NEW_SCISSOR 0x08 #define LP_SETUP_NEW_VIEWPORTS 0x10 #define LP_SETUP_NEW_SSBOS 0x20 -#define LP_SETUP_NEW_IMAGES 0x40 struct lp_setup_variant; @@ -76,6 +75,7 @@ struct lp_setup_context struct pipe_context *pipe; struct vertex_info *vertex_info; + uint view_index; uint prim; uint vertex_size; uint nr_vertices; @@ -101,6 +101,7 @@ struct lp_setup_context boolean scissor_test; boolean point_size_per_vertex; boolean rasterizer_discard; + boolean multisample; unsigned cullmode; unsigned bottom_edge_rule; float pixel_offset; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c index 5e26b1e9f..0535138df 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -298,7 +298,7 @@ try_setup_line( struct lp_setup_context *setup, int nr_planes = 4; unsigned viewport_index = 0; unsigned layer = 0; - + float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset; /* linewidth should be interpreted as integer */ int fixed_width = util_iround(width) * FIXED_ONE; @@ -320,6 +320,10 @@ try_setup_line( struct lp_setup_context *setup, boolean will_draw_start; boolean will_draw_end; + if (lp_context->active_statistics_queries) { + lp_context->pipeline_statistics.c_primitives++; + } + if (0) print_line(setup, v1, v2); @@ -357,10 +361,10 @@ try_setup_line( struct lp_setup_context *setup, if (fabsf(dx) >= fabsf(dy)) { float dydx = dy / dx; - x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; - y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; - x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; - y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f; + y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f; + x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f; + y2diff = v2[0][1] - floorf(v2[0][1]) - 0.5f; if (y2diff==-0.5 && dy<0){ y2diff = 0.5; @@ -440,25 +444,25 @@ try_setup_line( struct lp_setup_context *setup, } /* x/y positions in fixed point */ - x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); - x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); - x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); - x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); + x[0] = subpixel_snap(v1[0][0] + x_offset - pixel_offset); + x[1] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset); + x[2] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset); + x[3] = subpixel_snap(v1[0][0] + x_offset - pixel_offset); - y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2; - y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2; - y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2; - y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2; + y[0] = subpixel_snap(v1[0][1] + y_offset - pixel_offset) - fixed_width/2; + y[1] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset) - fixed_width/2; + y[2] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset) + fixed_width/2; + y[3] = subpixel_snap(v1[0][1] + y_offset - pixel_offset) + fixed_width/2; } else { const float dxdy = dx / dy; /* Y-MAJOR LINE */ - x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; - y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; - x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; - y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f; + y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f; + x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f; + y2diff = v2[0][1] - floorf(v2[0][1]) - 0.5f; if (x2diff==-0.5 && dx<0) { x2diff = 0.5; @@ -537,15 +541,15 @@ try_setup_line( struct lp_setup_context *setup, } /* x/y positions in fixed point */ - x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; - x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; - x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2; - x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2; + x[0] = subpixel_snap(v1[0][0] + x_offset - pixel_offset) - fixed_width/2; + x[1] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset) - fixed_width/2; + x[2] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset) + fixed_width/2; + x[3] = subpixel_snap(v1[0][0] + x_offset - pixel_offset) + fixed_width/2; - y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); - y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); - y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); - y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); + y[0] = subpixel_snap(v1[0][1] + y_offset - pixel_offset); + y[1] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset); + y[2] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset); + y[3] = subpixel_snap(v1[0][1] + y_offset - pixel_offset); } /* Bounding rectangle (in pixels) */ @@ -593,12 +597,9 @@ try_setup_line( struct lp_setup_context *setup, * Determine how many scissor planes we need, that is drop scissor * edges if the bounding box of the tri is fully inside that edge. */ - if (setup->scissor_test) { - /* why not just use draw_regions */ - scissor = &setup->scissors[viewport_index]; - scissor_planes_needed(s_planes, &bboxpos, scissor); - nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; - } + scissor = &setup->draw_regions[viewport_index]; + scissor_planes_needed(s_planes, &bboxpos, scissor); + nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; line = lp_setup_alloc_triangle(scene, key->num_inputs, @@ -616,10 +617,6 @@ try_setup_line( struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries) { - lp_context->pipeline_statistics.c_primitives++; - } - /* calculate the deltas */ plane = GET_PLANES(line); plane[0].dcdy = x[0] - x[1]; @@ -651,6 +648,7 @@ try_setup_line( struct lp_setup_context *setup, line->inputs.opaque = FALSE; line->inputs.layer = layer; line->inputs.viewport_index = viewport_index; + line->inputs.view_index = setup->view_index; /* * XXX: this code is mostly identical to the one in lp_setup_tri, except it @@ -673,7 +671,7 @@ try_setup_line( struct lp_setup_context *setup, plane[i].c++; } else if (plane[i].dcdx == 0) { - if (setup->pixel_offset == 0) { + if (setup->bottom_edge_rule == 0) { /* correct for top-left fill convention: */ if (plane[i].dcdy > 0) plane[i].c++; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c index 092febdba..696612309 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -81,7 +81,7 @@ point_persp_coeff(struct lp_setup_context *setup, { /* * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A - * better stratergy would be to take the primitive in consideration when + * better strategy would be to take the primitive in consideration when * generating the fragment shader key, and therefore avoid the per-fragment * perspective divide. */ @@ -240,7 +240,7 @@ setup_point_coefficients( struct lp_setup_context *setup, case LP_INTERP_LINEAR: /* Sprite tex coords may use linear interpolation someday */ - /* fall-through */ + FALLTHROUGH; case LP_INTERP_PERSPECTIVE: { /* check if the sprite coord flag is set for this attribute. * If so, set it up so it up so x and y vary from 0 to 1. @@ -270,7 +270,7 @@ setup_point_coefficients( struct lp_setup_context *setup, break; } } - /* fall-through */ + FALLTHROUGH; case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) { if (usage_mask & (1 << i)) { @@ -337,17 +337,20 @@ try_setup_point( struct lp_setup_context *setup, /* x/y positions in fixed point */ const struct lp_setup_variant_key *key = &setup->setup.variant->key; const int sizeAttr = setup->psize_slot; - const float size + float size = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] : setup->point_size; + if (size > LP_MAX_POINT_WIDTH) + size = LP_MAX_POINT_WIDTH; + /* Yes this is necessary to accurately calculate bounding boxes * with the two fill-conventions we support. GL (normally) ends * up needing a bottom-left fill convention, which requires * slightly different rounding. */ int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; - + float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset; struct lp_scene *scene = setup->scene; struct lp_rast_triangle *point; unsigned bytes; @@ -382,8 +385,8 @@ try_setup_point( struct lp_setup_context *setup, */ fixed_width = MAX2(FIXED_ONE, subpixel_snap(size)); - x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2; - y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2; + x0 = subpixel_snap(v0[0][0] - pixel_offset) - fixed_width/2; + y0 = subpixel_snap(v0[0][1] - pixel_offset) - fixed_width/2; bbox.x0 = (x0 + (FIXED_ONE-1)) >> FIXED_ORDER; bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER; @@ -401,7 +404,7 @@ try_setup_point( struct lp_setup_context *setup, * Per OpenGL 2.1 spec, section 3.3.1, "Basic Point Rasterization". * * This type of point rasterization is only available in pre 3.0 contexts - * (or compatibilility contexts which we don't support) anyway. + * (or compatibility contexts which we don't support) anyway. */ const int x0 = subpixel_snap(v0[0][0]); @@ -444,6 +447,10 @@ try_setup_point( struct lp_setup_context *setup, bbox.x1, bbox.y1); } + if (lp_context->active_statistics_queries) { + lp_context->pipeline_statistics.c_primitives++; + } + if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); @@ -466,10 +473,6 @@ try_setup_point( struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries) { - lp_context->pipeline_statistics.c_primitives++; - } - if (draw_will_inject_frontface(lp_context->draw) && setup->face_slot > 0) { point->inputs.frontfacing = v0[setup->face_slot][0]; @@ -495,6 +498,7 @@ try_setup_point( struct lp_setup_context *setup, point->inputs.opaque = FALSE; point->inputs.layer = layer; point->inputs.viewport_index = viewport_index; + point->inputs.view_index = setup->view_index; { struct lp_rast_plane *plane = GET_PLANES(point); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d24a4b4af..4fb76dd22 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -104,8 +104,9 @@ lp_setup_alloc_triangle(struct lp_scene *scene, tri->inputs.stride = input_array_sz; { - char *a = (char *)tri; - char *b = (char *)&GET_PLANES(tri)[nr_planes]; + ASSERTED char *a = (char *)tri; + ASSERTED char *b = (char *)&GET_PLANES(tri)[nr_planes]; + assert(b - a == *tri_size); } @@ -204,7 +205,18 @@ lp_rast_32_tri_tab[MAX_PLANES+1] = { LP_RAST_OP_TRIANGLE_32_8 }; - +static unsigned +lp_rast_ms_tri_tab[MAX_PLANES+1] = { + 0, /* should be impossible */ + LP_RAST_OP_MS_TRIANGLE_1, + LP_RAST_OP_MS_TRIANGLE_2, + LP_RAST_OP_MS_TRIANGLE_3, + LP_RAST_OP_MS_TRIANGLE_4, + LP_RAST_OP_MS_TRIANGLE_5, + LP_RAST_OP_MS_TRIANGLE_6, + LP_RAST_OP_MS_TRIANGLE_7, + LP_RAST_OP_MS_TRIANGLE_8 +}; /** * The primitive covers the whole tile- shade whole tile. @@ -249,7 +261,7 @@ lp_setup_whole_tile(struct lp_setup_context *setup, } else { LP_COUNT(nr_shade_64); return lp_scene_bin_cmd_with_state( scene, tx, ty, - setup->fs.stored, + setup->fs.stored, LP_RAST_OP_SHADE_TILE, lp_rast_arg_inputs(inputs) ); } @@ -273,7 +285,7 @@ do_triangle_ccw(struct lp_setup_context *setup, const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; struct lp_rast_plane *plane; - const struct u_rect *scissor; + const struct u_rect *scissor = NULL; struct u_rect bbox, bboxpos; boolean s_planes[4]; unsigned tri_bytes; @@ -348,12 +360,9 @@ do_triangle_ccw(struct lp_setup_context *setup, * Determine how many scissor planes we need, that is drop scissor * edges if the bounding box of the tri is fully inside that edge. */ - if (setup->scissor_test) { - /* why not just use draw_regions */ - scissor = &setup->scissors[viewport_index]; - scissor_planes_needed(s_planes, &bboxpos, scissor); - nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; - } + scissor = &setup->draw_regions[viewport_index]; + scissor_planes_needed(s_planes, &bboxpos, scissor); + nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; tri = lp_setup_alloc_triangle(scene, key->num_inputs, @@ -386,6 +395,7 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->inputs.opaque = setup->fs.current.variant->opaque; tri->inputs.layer = layer; tri->inputs.viewport_index = viewport_index; + tri->inputs.view_index = setup->view_index; if (0) lp_dump_setup_coef(&setup->setup.variant->key, @@ -759,6 +769,8 @@ lp_setup_bin_triangle(struct lp_setup_context *setup, struct lp_scene *scene = setup->scene; struct u_rect trimmed_box = *bbox; int i; + unsigned cmd; + /* What is the largest power-of-two boundary this triangle crosses: */ int dx = floor_pot((bbox->x0 ^ bbox->x1) | @@ -808,11 +820,12 @@ lp_setup_bin_triangle(struct lp_setup_context *setup, */ assert(px + 4 <= TILE_SIZE); assert(py + 4 <= TILE_SIZE); + if (setup->multisample) + cmd = LP_RAST_OP_MS_TRIANGLE_3_4; + else + cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_4 : LP_RAST_OP_TRIANGLE_3_4; return lp_scene_bin_cmd_with_state( scene, ix0, iy0, - setup->fs.stored, - use_32bits ? - LP_RAST_OP_TRIANGLE_32_3_4 : - LP_RAST_OP_TRIANGLE_3_4, + setup->fs.stored, cmd, lp_rast_arg_triangle_contained(tri, px, py) ); } @@ -832,11 +845,12 @@ lp_setup_bin_triangle(struct lp_setup_context *setup, assert(px + 16 <= TILE_SIZE); assert(py + 16 <= TILE_SIZE); + if (setup->multisample) + cmd = LP_RAST_OP_MS_TRIANGLE_3_16; + else + cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_16 : LP_RAST_OP_TRIANGLE_3_16; return lp_scene_bin_cmd_with_state( scene, ix0, iy0, - setup->fs.stored, - use_32bits ? - LP_RAST_OP_TRIANGLE_32_3_16 : - LP_RAST_OP_TRIANGLE_3_16, + setup->fs.stored, cmd, lp_rast_arg_triangle_contained(tri, px, py) ); } } @@ -848,20 +862,24 @@ lp_setup_bin_triangle(struct lp_setup_context *setup, assert(px + 16 <= TILE_SIZE); assert(py + 16 <= TILE_SIZE); + if (setup->multisample) + cmd = LP_RAST_OP_MS_TRIANGLE_4_16; + else + cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_4_16 : LP_RAST_OP_TRIANGLE_4_16; return lp_scene_bin_cmd_with_state(scene, ix0, iy0, - setup->fs.stored, - use_32bits ? - LP_RAST_OP_TRIANGLE_32_4_16 : - LP_RAST_OP_TRIANGLE_4_16, + setup->fs.stored, cmd, lp_rast_arg_triangle_contained(tri, px, py)); } /* Triangle is contained in a single tile: */ + if (setup->multisample) + cmd = lp_rast_ms_tri_tab[nr_planes]; + else + cmd = use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes]; return lp_scene_bin_cmd_with_state( - scene, ix0, iy0, setup->fs.stored, - use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes], + scene, ix0, iy0, setup->fs.stored, cmd, lp_rast_arg_triangle(tri, (1<<nr_planes)-1)); } else @@ -933,12 +951,13 @@ lp_setup_bin_triangle(struct lp_setup_context *setup, */ int count = util_bitcount(partial); in = TRUE; - + + if (setup->multisample) + cmd = lp_rast_ms_tri_tab[count]; + else + cmd = use_32bits ? lp_rast_32_tri_tab[count] : lp_rast_tri_tab[count]; if (!lp_scene_bin_cmd_with_state( scene, x, y, - setup->fs.stored, - use_32bits ? - lp_rast_32_tri_tab[count] : - lp_rast_tri_tab[count], + setup->fs.stored, cmd, lp_rast_arg_triangle(tri, partial) )) goto fail; @@ -1008,6 +1027,7 @@ calc_fixed_position(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset; /* * The rounding may not be quite the same with PIPE_ARCH_SSE * (util_iround right now only does nearest/even on x87, @@ -1019,7 +1039,7 @@ calc_fixed_position(struct lp_setup_context *setup, __m128 vxy0xy2, vxy1xy0; __m128i vxy0xy2i, vxy1xy0i; __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120; - __m128 pix_offset = _mm_set1_ps(setup->pixel_offset); + __m128 pix_offset = _mm_set1_ps(pixel_offset); __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE); v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0])); vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]); @@ -1045,14 +1065,14 @@ calc_fixed_position(struct lp_setup_context *setup, _mm_store_si128((__m128i *)&position->y[0], y0120); #else - position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); - position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); - position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + position->x[0] = subpixel_snap(v0[0][0] - pixel_offset); + position->x[1] = subpixel_snap(v1[0][0] - pixel_offset); + position->x[2] = subpixel_snap(v2[0][0] - pixel_offset); position->x[3] = 0; // should be unused - position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); - position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); - position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); + position->y[0] = subpixel_snap(v0[0][1] - pixel_offset); + position->y[1] = subpixel_snap(v1[0][1] - pixel_offset); + position->y[2] = subpixel_snap(v2[0][1] - pixel_offset); position->y[3] = 0; // should be unused position->dx01 = position->x[0] - position->x[1]; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6f8e855e8..04899dd9b 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -178,7 +178,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) * Called just prior to drawing anything (pipe::draw_arrays(), etc). * * Hopefully this will remain quite simple, otherwise need to pull in - * something like the state tracker mechanism. + * something like the gallium frontend mechanism. */ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) { @@ -195,6 +195,8 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | LP_NEW_GS | + LP_NEW_TCS | + LP_NEW_TES | LP_NEW_VS)) compute_vertex_info(llvmpipe); @@ -212,6 +214,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_FRAMEBUFFER | LP_NEW_RASTERIZER | + LP_NEW_SAMPLE_MASK | LP_NEW_DEPTH_STENCIL_ALPHA)) { /* @@ -223,10 +226,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) boolean null_fs = !llvmpipe->fs || llvmpipe->fs->info.base.num_instructions <= 1; boolean discard = - (llvmpipe->sample_mask & 1) == 0 || + (llvmpipe->sample_mask) == 0 || (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE) || (null_fs && - !llvmpipe->depth_stencil->depth.enabled && + !llvmpipe->depth_stencil->depth_enabled && !llvmpipe->depth_stencil->stencil[0].enabled); lp_setup_set_rasterizer_discard(llvmpipe->setup, discard); } @@ -236,6 +239,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_RASTERIZER)) llvmpipe_update_setup( llvmpipe ); + if (llvmpipe->dirty & LP_NEW_SAMPLE_MASK) + lp_setup_set_sample_mask(llvmpipe->setup, llvmpipe->sample_mask); + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); @@ -245,7 +251,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) { lp_setup_set_alpha_ref_value(llvmpipe->setup, - llvmpipe->depth_stencil->alpha.ref_value); + llvmpipe->depth_stencil->alpha_ref_value); lp_setup_set_stencil_ref_values(llvmpipe->setup, llvmpipe->stencil_ref.ref_value); } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c index 1c81155aa..2fe01ce48 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -89,6 +89,7 @@ #include "gallivm/lp_bld_pack.h" #include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_quad.h" +#include "gallivm/lp_bld_gather.h" #include "lp_bld_alpha.h" #include "lp_bld_blend.h" @@ -105,9 +106,106 @@ #include "lp_rast.h" #include "nir/nir_to_tgsi_info.h" +#include "lp_screen.h" +#include "compiler/nir/nir_serialize.h" +#include "util/mesa-sha1.h" /** Fragment shader number (for debugging) */ static unsigned fs_no = 0; +static void +load_unswizzled_block(struct gallivm_state *gallivm, + LLVMValueRef base_ptr, + LLVMValueRef stride, + unsigned block_width, + unsigned block_height, + LLVMValueRef* dst, + struct lp_type dst_type, + unsigned dst_count, + unsigned dst_alignment, + LLVMValueRef x_offset, + LLVMValueRef y_offset, + bool fb_fetch_twiddle); +/** + * Checks if a format description is an arithmetic format + * + * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5. + */ +static inline boolean +is_arithmetic_format(const struct util_format_description *format_desc) +{ + boolean arith = false; + unsigned i; + + for (i = 0; i < format_desc->nr_channels; ++i) { + arith |= format_desc->channel[i].size != format_desc->channel[0].size; + arith |= (format_desc->channel[i].size % 8) != 0; + } + + return arith; +} + +/** + * Checks if this format requires special handling due to required expansion + * to floats for blending, and furthermore has "natural" packed AoS -> unpacked + * SoA conversion. + */ +static inline boolean +format_expands_to_float_soa(const struct util_format_description *format_desc) +{ + if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + return true; + } + return false; +} + + +/** + * Retrieves the type representing the memory layout for a format + * + * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte + */ +static inline void +lp_mem_type_from_format_desc(const struct util_format_description *format_desc, + struct lp_type* type) +{ + unsigned i; + unsigned chan; + + if (format_expands_to_float_soa(format_desc)) { + /* just make this a uint with width of block */ + type->floating = false; + type->fixed = false; + type->sign = false; + type->norm = false; + type->width = format_desc->block.bits; + type->length = 1; + return; + } + + for (i = 0; i < 4; i++) + if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + chan = i; + + memset(type, 0, sizeof(struct lp_type)); + type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT; + type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED; + type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED; + type->norm = format_desc->channel[chan].normalized; + + if (is_arithmetic_format(format_desc)) { + type->width = 0; + type->length = 1; + + for (i = 0; i < format_desc->nr_channels; ++i) { + type->width += format_desc->channel[i].size; + } + } else { + type->width = format_desc->channel[chan].size; + type->length = format_desc->nr_channels; + } +} /** * Expand the relevant bits of mask_input to a n*4-dword mask for the @@ -123,7 +221,8 @@ static LLVMValueRef generate_quad_mask(struct gallivm_state *gallivm, struct lp_type fs_type, unsigned first_quad, - LLVMValueRef mask_input) /* int32 */ + unsigned sample, + LLVMValueRef mask_input) /* int64 */ { LLVMBuilderRef builder = gallivm->builder; struct lp_type mask_type; @@ -162,6 +261,11 @@ generate_quad_mask(struct gallivm_state *gallivm, shift = 0; } + mask_input = LLVMBuildLShr(builder, mask_input, lp_build_const_int64(gallivm, 16 * sample), ""); + mask_input = LLVMBuildTrunc(builder, mask_input, + i32t, ""); + mask_input = LLVMBuildAnd(builder, mask_input, lp_build_const_int32(gallivm, 0xffff), ""); + mask_input = LLVMBuildLShr(builder, mask_input, LLVMConstInt(i32t, shift, 0), @@ -287,6 +391,163 @@ lp_build_depth_clamp(struct gallivm_state *gallivm, return lp_build_clamp(&f32_bld, z, min_depth, max_depth); } +static void +lp_build_sample_alpha_to_coverage(struct gallivm_state *gallivm, + struct lp_type type, + unsigned coverage_samples, + LLVMValueRef num_loop, + LLVMValueRef loop_counter, + LLVMValueRef coverage_mask_store, + LLVMValueRef alpha) +{ + struct lp_build_context bld; + LLVMBuilderRef builder = gallivm->builder; + float step = 1.0 / coverage_samples; + + lp_build_context_init(&bld, gallivm, type); + for (unsigned s = 0; s < coverage_samples; s++) { + LLVMValueRef alpha_ref_value = lp_build_const_vec(gallivm, type, step * s); + LLVMValueRef test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); + + LLVMValueRef s_mask_idx = LLVMBuildMul(builder, lp_build_const_int32(gallivm, s), num_loop, ""); + s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_counter, ""); + LLVMValueRef s_mask_ptr = LLVMBuildGEP(builder, coverage_mask_store, &s_mask_idx, 1, ""); + LLVMValueRef s_mask = LLVMBuildLoad(builder, s_mask_ptr, ""); + s_mask = LLVMBuildAnd(builder, s_mask, test, ""); + LLVMBuildStore(builder, s_mask, s_mask_ptr); + } +}; + +struct lp_build_fs_llvm_iface { + struct lp_build_fs_iface base; + struct lp_build_interp_soa_context *interp; + struct lp_build_for_loop_state *loop_state; + LLVMValueRef mask_store; + LLVMValueRef sample_id; + LLVMValueRef color_ptr_ptr; + LLVMValueRef color_stride_ptr; + LLVMValueRef color_sample_stride_ptr; + const struct lp_fragment_shader_variant_key *key; +}; + +static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface, + struct lp_build_context *bld, + unsigned attrib, unsigned chan, + bool centroid, bool sample, + LLVMValueRef attrib_indir, + LLVMValueRef offsets[2]) +{ + struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface; + struct lp_build_interp_soa_context *interp = fs_iface->interp; + unsigned loc = TGSI_INTERPOLATE_LOC_CENTER; + if (centroid) + loc = TGSI_INTERPOLATE_LOC_CENTROID; + if (sample) + loc = TGSI_INTERPOLATE_LOC_SAMPLE; + + return lp_build_interp_soa(interp, bld->gallivm, fs_iface->loop_state->counter, + fs_iface->mask_store, + attrib, chan, loc, attrib_indir, offsets); +} + +static void fs_fb_fetch(const struct lp_build_fs_iface *iface, + struct lp_build_context *bld, + unsigned cbuf, + LLVMValueRef result[4]) +{ + struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface; + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + const struct lp_fragment_shader_variant_key *key = fs_iface->key; + LLVMValueRef index = lp_build_const_int32(gallivm, cbuf); + LLVMValueRef color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_ptr_ptr, &index, 1, ""), ""); + LLVMValueRef stride = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_stride_ptr, &index, 1, ""), ""); + + LLVMValueRef dst[4 * 4]; + enum pipe_format cbuf_format = key->cbuf_format[cbuf]; + const struct util_format_description* out_format_desc = util_format_description(cbuf_format); + struct lp_type dst_type; + unsigned block_size = bld->type.length; + unsigned block_height = key->resource_1d ? 1 : 2; + unsigned block_width = block_size / block_height; + + lp_mem_type_from_format_desc(out_format_desc, &dst_type); + + struct lp_type blend_type; + memset(&blend_type, 0, sizeof blend_type); + blend_type.floating = FALSE; /* values are integers */ + blend_type.sign = FALSE; /* values are unsigned */ + blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ + blend_type.width = 8; /* 8-bit ubyte values */ + blend_type.length = 16; /* 16 elements per vector */ + + uint32_t dst_alignment; + /* + * Compute the alignment of the destination pointer in bytes + * We fetch 1-4 pixels, if the format has pot alignment then those fetches + * are always aligned by MIN2(16, fetch_width) except for buffers (not + * 1d tex but can't distinguish here) so need to stick with per-pixel + * alignment in this case. + */ + if (key->resource_1d) { + dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8); + } + else { + dst_alignment = dst_type.length * dst_type.width / 8; + } + /* Force power-of-two alignment by extracting only the least-significant-bit */ + dst_alignment = 1 << (ffs(dst_alignment) - 1); + /* + * Resource base and stride pointers are aligned to 16 bytes, so that's + * the maximum alignment we can guarantee + */ + dst_alignment = MIN2(16, dst_alignment); + + LLVMTypeRef blend_vec_type = lp_build_vec_type(gallivm, blend_type); + color_ptr = LLVMBuildBitCast(builder, color_ptr, LLVMPointerType(blend_vec_type, 0), ""); + + if (key->multisample) { + LLVMValueRef sample_stride = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, fs_iface->color_sample_stride_ptr, + &index, 1, ""), ""); + LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, fs_iface->sample_id, ""); + color_ptr = LLVMBuildGEP(builder, color_ptr, &sample_offset, 1, ""); + } + /* fragment shader executes on 4x4 blocks. depending on vector width it can execute 2 or 4 iterations. + * only move to the next row once the top row has completed 8 wide 1 iteration, 4 wide 2 iterations */ + LLVMValueRef x_offset = NULL, y_offset = NULL; + if (!key->resource_1d) { + LLVMValueRef counter = fs_iface->loop_state->counter; + + if (block_size == 4) { + x_offset = LLVMBuildShl(builder, + LLVMBuildAnd(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), ""), + lp_build_const_int32(gallivm, 1), ""); + counter = LLVMBuildLShr(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), ""); + } + y_offset = LLVMBuildMul(builder, counter, lp_build_const_int32(gallivm, 2), ""); + } + load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, block_size, dst_alignment, x_offset, y_offset, true); + + for (unsigned i = 0; i < block_size; i++) { + dst[i] = LLVMBuildBitCast(builder, dst[i], LLVMInt32TypeInContext(gallivm->context), ""); + } + LLVMValueRef packed = lp_build_gather_values(gallivm, dst, block_size); + + struct lp_type texel_type = bld->type; + if (out_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && + out_format_desc->channel[0].pure_integer) { + if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + texel_type = lp_type_int_vec(bld->type.width, bld->type.width * bld->type.length); + } + else if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { + texel_type = lp_type_uint_vec(bld->type.width, bld->type.width * bld->type.length); + } + } + lp_build_unpack_rgba_soa(gallivm, out_format_desc, + texel_type, + packed, result); +} /** * Generate the fragment shader, depth/stencil test, and alpha tests. @@ -298,14 +559,19 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, + LLVMValueRef sample_pos_array, LLVMValueRef num_loop, struct lp_build_interp_soa_context *interp, const struct lp_build_sampler_soa *sampler, const struct lp_build_image_soa *image, LLVMValueRef mask_store, LLVMValueRef (*out_color)[4], - LLVMValueRef depth_ptr, + LLVMValueRef depth_base_ptr, LLVMValueRef depth_stride, + LLVMValueRef depth_sample_stride, + LLVMValueRef color_ptr_ptr, + LLVMValueRef color_stride_ptr, + LLVMValueRef color_sample_stride_ptr, LLVMValueRef facing, LLVMValueRef thread_data_ptr) { @@ -313,15 +579,17 @@ generate_fs_loop(struct gallivm_state *gallivm, const struct tgsi_token *tokens = shader->base.tokens; struct lp_type int_type = lp_int_type(type); LLVMTypeRef vec_type, int_vec_type; - LLVMValueRef mask_ptr, mask_val; + LLVMValueRef mask_ptr = NULL, mask_val = NULL; LLVMValueRef consts_ptr, num_consts_ptr; LLVMValueRef ssbo_ptr, num_ssbo_ptr; LLVMValueRef z; LLVMValueRef z_value, s_value; LLVMValueRef z_fb, s_fb; + LLVMValueRef depth_ptr; LLVMValueRef stencil_refs[2]; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; - struct lp_build_for_loop_state loop_state; + LLVMValueRef zs_samples = lp_build_const_int32(gallivm, key->zsbuf_nr_samples); + struct lp_build_for_loop_state loop_state, sample_loop_state; struct lp_build_mask_context mask; /* * TODO: figure out if simple_shader optimization is really worthwile to @@ -333,6 +601,7 @@ generate_fs_loop(struct gallivm_state *gallivm, shader->info.base.num_instructions < 8) && 0; const boolean dual_source_blend = key->blend.rt[0].blend_enable && util_blend_state_is_dual(&key->blend, 0); + const bool post_depth_coverage = shader->info.base.properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE]; unsigned attrib; unsigned chan; unsigned cbuf; @@ -345,7 +614,8 @@ generate_fs_loop(struct gallivm_state *gallivm, /* truncate then sign extend. */ system_values.front_facing = LLVMBuildTrunc(gallivm->builder, facing, LLVMInt1TypeInContext(gallivm->context), ""); system_values.front_facing = LLVMBuildSExt(gallivm->builder, system_values.front_facing, LLVMInt32TypeInContext(gallivm->context), ""); - + system_values.view_index = lp_jit_thread_data_raster_state_view_index(gallivm, + thread_data_ptr); if (key->depth.enabled || key->stencil[0].enabled) { @@ -406,24 +676,19 @@ generate_fs_loop(struct gallivm_state *gallivm, ssbo_ptr = lp_jit_context_ssbos(gallivm, context_ptr); num_ssbo_ptr = lp_jit_context_num_ssbos(gallivm, context_ptr); - lp_build_for_loop_begin(&loop_state, gallivm, - lp_build_const_int32(gallivm, 0), - LLVMIntULT, - num_loop, - lp_build_const_int32(gallivm, 1)); - - mask_ptr = LLVMBuildGEP(builder, mask_store, - &loop_state.counter, 1, "mask_ptr"); - mask_val = LLVMBuildLoad(builder, mask_ptr, ""); - memset(outputs, 0, sizeof outputs); + /* Allocate color storage for each fragment sample */ + LLVMValueRef color_store_size = num_loop; + if (key->min_samples > 1) + color_store_size = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, key->min_samples), ""); + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { out_color[cbuf][chan] = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, type), - num_loop, "color"); + color_store_size, "color"); } } if (dual_source_blend) { @@ -432,10 +697,41 @@ generate_fs_loop(struct gallivm_state *gallivm, out_color[1][chan] = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, type), - num_loop, "color1"); + color_store_size, "color1"); } } + lp_build_for_loop_begin(&loop_state, gallivm, + lp_build_const_int32(gallivm, 0), + LLVMIntULT, + num_loop, + lp_build_const_int32(gallivm, 1)); + + LLVMValueRef sample_mask_in; + if (key->multisample) { + sample_mask_in = lp_build_const_int_vec(gallivm, type, 0); + /* create shader execution mask by combining all sample masks. */ + for (unsigned s = 0; s < key->coverage_samples; s++) { + LLVMValueRef s_mask_idx = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, s), ""); + s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, ""); + LLVMValueRef s_mask = lp_build_pointer_get(builder, mask_store, s_mask_idx); + if (s == 0) + mask_val = s_mask; + else + mask_val = LLVMBuildOr(builder, s_mask, mask_val, ""); + + LLVMValueRef mask_in = LLVMBuildAnd(builder, s_mask, lp_build_const_int_vec(gallivm, type, (1ll << s)), ""); + sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, ""); + } + } else { + sample_mask_in = lp_build_const_int_vec(gallivm, type, 1); + mask_ptr = LLVMBuildGEP(builder, mask_store, + &loop_state.counter, 1, "mask_ptr"); + mask_val = LLVMBuildLoad(builder, mask_ptr, ""); + + LLVMValueRef mask_in = LLVMBuildAnd(builder, mask_val, lp_build_const_int_vec(gallivm, type, 1), ""); + sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, ""); + } /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, gallivm, type, mask_val); @@ -443,9 +739,70 @@ generate_fs_loop(struct gallivm_state *gallivm, if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) lp_build_mask_check(&mask); - lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter); + /* Create storage for recombining sample masks after early Z pass. */ + LLVMValueRef s_mask_or = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, type), "cov_mask_early_depth"); + LLVMBuildStore(builder, LLVMConstNull(lp_build_int_vec_type(gallivm, type)), s_mask_or); + + /* Create storage for post depth sample mask */ + LLVMValueRef post_depth_sample_mask_in = NULL; + if (post_depth_coverage) + post_depth_sample_mask_in = lp_build_alloca(gallivm, int_vec_type, "post_depth_sample_mask_in"); + + LLVMValueRef s_mask = NULL, s_mask_ptr = NULL; + LLVMValueRef z_sample_value_store = NULL, s_sample_value_store = NULL; + LLVMValueRef z_fb_store = NULL, s_fb_store = NULL; + LLVMTypeRef z_type = NULL, z_fb_type = NULL; + + /* Run early depth once per sample */ + if (key->multisample) { + + if (zs_format_desc) { + struct lp_type zs_type = lp_depth_type(zs_format_desc, type.length); + struct lp_type z_type = zs_type; + struct lp_type s_type = zs_type; + if (zs_format_desc->block.bits < type.width) + z_type.width = type.width; + if (zs_format_desc->block.bits == 8) + s_type.width = type.width; + + else if (zs_format_desc->block.bits > 32) { + z_type.width = z_type.width / 2; + s_type.width = s_type.width / 2; + s_type.floating = 0; + } + z_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type), + zs_samples, "z_sample_store"); + s_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type), + zs_samples, "s_sample_store"); + z_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, z_type), + zs_samples, "z_fb_store"); + s_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, s_type), + zs_samples, "s_fb_store"); + } + lp_build_for_loop_begin(&sample_loop_state, gallivm, + lp_build_const_int32(gallivm, 0), + LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples), + lp_build_const_int32(gallivm, 1)); + + LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""); + s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, ""); + s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, ""); + + s_mask = LLVMBuildLoad(builder, s_mask_ptr, ""); + s_mask = LLVMBuildAnd(builder, s_mask, mask_val, ""); + } + + + /* for multisample Z needs to be interpolated at sample points for testing. */ + lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL); z = interp->pos[2]; + depth_ptr = depth_base_ptr; + if (key->multisample) { + LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, ""); + depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, ""); + } + if (depth_mode & EARLY_DEPTH_TEST) { /* * Clamp according to ARB_depth_clamp semantics. @@ -463,12 +820,13 @@ generate_fs_loop(struct gallivm_state *gallivm, key->stencil, type, zs_format_desc, - &mask, + key->multisample ? NULL : &mask, + &s_mask, stencil_refs, z, z_fb, s_fb, facing, &z_value, &s_value, - !simple_shader); + !simple_shader && !key->multisample); if (depth_mode & EARLY_DEPTH_WRITE) { lp_build_depth_stencil_write_swizzled(gallivm, type, @@ -482,17 +840,114 @@ generate_fs_loop(struct gallivm_state *gallivm, * stencil test otherwise new stencil values may not get written if all * fragments got killed by depth/stencil test. */ - if (!simple_shader && key->stencil[0].enabled) + if (!simple_shader && key->stencil[0].enabled && !key->multisample) lp_build_mask_check(&mask); + + if (key->multisample) { + z_fb_type = LLVMTypeOf(z_fb); + z_type = LLVMTypeOf(z_value); + lp_build_pointer_set(builder, z_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, z_value, lp_build_int_vec_type(gallivm, type), "")); + lp_build_pointer_set(builder, s_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, s_value, lp_build_int_vec_type(gallivm, type), "")); + lp_build_pointer_set(builder, z_fb_store, sample_loop_state.counter, z_fb); + lp_build_pointer_set(builder, s_fb_store, sample_loop_state.counter, s_fb); + } } - lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter); + if (key->multisample) { + /* + * Store the post-early Z coverage mask. + * Recombine the resulting coverage masks post early Z into the fragment + * shader execution mask. + */ + LLVMValueRef tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, ""); + tmp_s_mask_or = LLVMBuildOr(builder, tmp_s_mask_or, s_mask, ""); + LLVMBuildStore(builder, tmp_s_mask_or, s_mask_or); + + if (post_depth_coverage) { + LLVMValueRef mask_bit_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, ""); + LLVMValueRef post_depth_mask_in = LLVMBuildLoad(builder, post_depth_sample_mask_in, ""); + mask_bit_idx = LLVMBuildAnd(builder, s_mask, lp_build_broadcast(gallivm, int_vec_type, mask_bit_idx), ""); + post_depth_mask_in = LLVMBuildOr(builder, post_depth_mask_in, mask_bit_idx, ""); + LLVMBuildStore(builder, post_depth_mask_in, post_depth_sample_mask_in); + } + + LLVMBuildStore(builder, s_mask, s_mask_ptr); + + lp_build_for_loop_end(&sample_loop_state); + + /* recombined all the coverage masks in the shader exec mask. */ + tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, ""); + lp_build_mask_update(&mask, tmp_s_mask_or); + + if (key->min_samples == 1) { + /* for multisample Z needs to be re interpolated at pixel center */ + lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, NULL); + z = interp->pos[2]; + lp_build_mask_update(&mask, tmp_s_mask_or); + } + } else { + if (post_depth_coverage) { + LLVMValueRef post_depth_mask_in = LLVMBuildAnd(builder, lp_build_mask_value(&mask), lp_build_const_int_vec(gallivm, type, 1), ""); + LLVMBuildStore(builder, post_depth_mask_in, post_depth_sample_mask_in); + } + } + + LLVMValueRef out_sample_mask_storage = NULL; + if (shader->info.base.writes_samplemask) { + out_sample_mask_storage = lp_build_alloca(gallivm, int_vec_type, "write_mask"); + if (key->min_samples > 1) + LLVMBuildStore(builder, LLVMConstNull(int_vec_type), out_sample_mask_storage); + } + + if (post_depth_coverage) { + system_values.sample_mask_in = LLVMBuildLoad(builder, post_depth_sample_mask_in, ""); + } + else + system_values.sample_mask_in = sample_mask_in; + if (key->multisample && key->min_samples > 1) { + lp_build_for_loop_begin(&sample_loop_state, gallivm, + lp_build_const_int32(gallivm, 0), + LLVMIntULT, + lp_build_const_int32(gallivm, key->min_samples), + lp_build_const_int32(gallivm, 1)); + + LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""); + s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, ""); + s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, ""); + s_mask = LLVMBuildLoad(builder, s_mask_ptr, ""); + lp_build_mask_force(&mask, s_mask); + lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, sample_loop_state.counter); + system_values.sample_id = sample_loop_state.counter; + system_values.sample_mask_in = LLVMBuildAnd(builder, system_values.sample_mask_in, + lp_build_broadcast(gallivm, int_vec_type, + LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "")), ""); + } else { + system_values.sample_id = lp_build_const_int32(gallivm, 0); + + } + system_values.sample_pos = sample_pos_array; + + lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter, mask_store, sample_loop_state.counter); + + struct lp_build_fs_llvm_iface fs_iface = { + .base.interp_fn = fs_interp, + .base.fb_fetch = fs_fb_fetch, + .interp = interp, + .loop_state = &loop_state, + .sample_id = system_values.sample_id, + .mask_store = mask_store, + .color_ptr_ptr = color_ptr_ptr, + .color_stride_ptr = color_stride_ptr, + .color_sample_stride_ptr = color_sample_stride_ptr, + .key = key, + }; struct lp_build_tgsi_params params; memset(¶ms, 0, sizeof(params)); params.type = type; params.mask = &mask; + params.fs_iface = &fs_iface.base; params.consts_ptr = consts_ptr; params.const_sizes_ptr = num_consts_ptr; params.system_values = &system_values; @@ -544,29 +999,121 @@ generate_fs_loop(struct gallivm_state *gallivm, if (color0 != -1 && outputs[color0][3]) { LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); - lp_build_alpha_to_coverage(gallivm, type, - &mask, alpha, - (depth_mode & LATE_DEPTH_TEST) != 0); + if (!key->multisample) { + lp_build_alpha_to_coverage(gallivm, type, + &mask, alpha, + (depth_mode & LATE_DEPTH_TEST) != 0); + } else { + lp_build_sample_alpha_to_coverage(gallivm, type, key->coverage_samples, num_loop, + loop_state.counter, + mask_store, alpha); + } + } + } + if (key->blend.alpha_to_one && key->multisample) { + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) && + ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend))) + if (outputs[cbuf][3]) { + LLVMBuildStore(builder, lp_build_const_vec(gallivm, type, 1.0), outputs[cbuf][3]); + } } } - if (shader->info.base.writes_samplemask) { + LLVMValueRef output_smask = NULL; int smaski = find_output_by_semantic(&shader->info.base, TGSI_SEMANTIC_SAMPLEMASK, 0); - LLVMValueRef smask; struct lp_build_context smask_bld; lp_build_context_init(&smask_bld, gallivm, int_type); assert(smaski >= 0); - smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask"); - /* - * Pixel is alive according to the first sample in the mask. - */ - smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, ""); - smask = lp_build_and(&smask_bld, smask, smask_bld.one); - smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, smask_bld.zero); - lp_build_mask_update(&mask, smask); + output_smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask"); + output_smask = LLVMBuildBitCast(builder, output_smask, smask_bld.vec_type, ""); + if (!key->multisample && key->no_ms_sample_mask_out) { + output_smask = lp_build_and(&smask_bld, output_smask, smask_bld.one); + output_smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, output_smask, smask_bld.zero); + lp_build_mask_update(&mask, output_smask); + } + + if (key->min_samples > 1) { + /* only the bit corresponding to this sample is to be used. */ + LLVMValueRef tmp_mask = LLVMBuildLoad(builder, out_sample_mask_storage, "tmp_mask"); + LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, ""); + LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, lp_build_broadcast(gallivm, int_vec_type, out_smask_idx), ""); + output_smask = LLVMBuildOr(builder, tmp_mask, smask_bit, ""); + } + + LLVMBuildStore(builder, output_smask, out_sample_mask_storage); + } + + /* Color write - per fragment sample */ + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) + { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) && + ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend))) + { + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + LLVMValueRef color_ptr; + LLVMValueRef color_idx = loop_state.counter; + if (key->min_samples > 1) + color_idx = LLVMBuildAdd(builder, color_idx, + LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), ""); + color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan], + &color_idx, 1, ""); + lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color_ptr); + } + } + } + } + + if (key->multisample && key->min_samples > 1) { + LLVMBuildStore(builder, lp_build_mask_value(&mask), s_mask_ptr); + lp_build_for_loop_end(&sample_loop_state); + } + + if (key->multisample) { + /* execute depth test for each sample */ + lp_build_for_loop_begin(&sample_loop_state, gallivm, + lp_build_const_int32(gallivm, 0), + LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples), + lp_build_const_int32(gallivm, 1)); + + /* load the per-sample coverage mask */ + LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""); + s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, ""); + s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, ""); + + /* combine the execution mask post fragment shader with the coverage mask. */ + s_mask = LLVMBuildLoad(builder, s_mask_ptr, ""); + if (key->min_samples == 1) + s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), ""); + + /* if the shader writes sample mask use that */ + if (shader->info.base.writes_samplemask) { + LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, ""); + out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx); + LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, ""); + LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, out_smask_idx, ""); + LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int_vec(gallivm, int_type, 0), ""); + smask_bit = LLVMBuildSExt(builder, cmp, int_vec_type, ""); + + s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, ""); + } + } + + depth_ptr = depth_base_ptr; + if (key->multisample) { + LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, ""); + depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, ""); } /* Late Z test */ @@ -579,13 +1126,25 @@ generate_fs_loop(struct gallivm_state *gallivm, 0); if (pos0 != -1 && outputs[pos0][2]) { z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + } else { + if (key->multisample) { + lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL); + z = interp->pos[2]; + } } + /* * Clamp according to ARB_depth_clamp semantics. */ if (key->depth_clamp) { z = lp_build_depth_clamp(gallivm, builder, type, context_ptr, thread_data_ptr, z); + } else { + struct lp_build_context f32_bld; + lp_build_context_init(&f32_bld, gallivm, type); + z = lp_build_clamp(&f32_bld, z, + lp_build_const_vec(gallivm, type, 0.0), + lp_build_const_vec(gallivm, type, 1.0)); } if (s_out != -1 && outputs[s_out][1]) { @@ -607,7 +1166,8 @@ generate_fs_loop(struct gallivm_state *gallivm, key->stencil, type, zs_format_desc, - &mask, + key->multisample ? NULL : &mask, + &s_mask, stencil_refs, z, z_fb, s_fb, facing, @@ -629,46 +1189,36 @@ generate_fs_loop(struct gallivm_state *gallivm, * depth value, update from zs_value with the new mask value and * write that out. */ + if (key->multisample) { + z_value = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_sample_value_store, sample_loop_state.counter), z_type, "");; + s_value = lp_build_pointer_get(builder, s_sample_value_store, sample_loop_state.counter); + z_fb = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_fb_store, sample_loop_state.counter), z_fb_type, ""); + s_fb = lp_build_pointer_get(builder, s_fb_store, sample_loop_state.counter); + } lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, key->resource_1d, - &mask, z_fb, s_fb, loop_state.counter, + key->multisample ? s_mask : lp_build_mask_value(&mask), z_fb, s_fb, loop_state.counter, depth_ptr, depth_stride, z_value, s_value); } - - /* Color write */ - for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) - { - unsigned cbuf = shader->info.base.output_semantic_index[attrib]; - if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) && - ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend))) - { - for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { - /* XXX: just initialize outputs to point at colors[] and - * skip this. - */ - LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - LLVMValueRef color_ptr; - color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan], - &loop_state.counter, 1, ""); - lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]); - LLVMBuildStore(builder, out, color_ptr); - } - } - } - } - if (key->occlusion_count) { LLVMValueRef counter = lp_jit_thread_data_counter(gallivm, thread_data_ptr); lp_build_name(counter, "counter"); + lp_build_occlusion_count(gallivm, type, - lp_build_mask_value(&mask), counter); + key->multisample ? s_mask : lp_build_mask_value(&mask), counter); + } + + if (key->multisample) { + /* store the sample mask for this loop */ + LLVMBuildStore(builder, s_mask, s_mask_ptr); + lp_build_for_loop_end(&sample_loop_state); } mask_val = lp_build_mask_end(&mask); - LLVMBuildStore(builder, mask_val, mask_ptr); + if (!key->multisample) + LLVMBuildStore(builder, mask_val, mask_ptr); lp_build_for_loop_end(&loop_state); } @@ -921,7 +1471,10 @@ load_unswizzled_block(struct gallivm_state *gallivm, LLVMValueRef* dst, struct lp_type dst_type, unsigned dst_count, - unsigned dst_alignment) + unsigned dst_alignment, + LLVMValueRef x_offset, + LLVMValueRef y_offset, + bool fb_fetch_twiddle) { LLVMBuilderRef builder = gallivm->builder; unsigned row_size = dst_count / block_height; @@ -934,8 +1487,28 @@ load_unswizzled_block(struct gallivm_state *gallivm, unsigned x = i % row_size; unsigned y = i / row_size; - LLVMValueRef bx = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length); - LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, ""); + if (block_height == 2 && dst_count == 8 && fb_fetch_twiddle) { + /* remap the raw slots into the fragment shader execution mode. */ + /* this math took me way too long to work out, I'm sure it's overkill. */ + x = (i & 1) + ((i >> 2) << 1); + y = (i & 2) >> 1; + } + + LLVMValueRef x_val; + if (x_offset) { + x_val = lp_build_const_int32(gallivm, x); + if (x_offset) + x_val = LLVMBuildAdd(builder, x_val, x_offset, ""); + x_val = LLVMBuildMul(builder, x_val, lp_build_const_int32(gallivm, (dst_type.width / 8) * dst_type.length), ""); + } else + x_val = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length); + + LLVMValueRef bx = x_val; + + LLVMValueRef y_val = lp_build_const_int32(gallivm, y); + if (y_offset) + y_val = LLVMBuildAdd(builder, y_val, y_offset, ""); + LLVMValueRef by = LLVMBuildMul(builder, y_val, stride, ""); LLVMValueRef gep[2]; LLVMValueRef dst_ptr; @@ -999,89 +1572,6 @@ store_unswizzled_block(struct gallivm_state *gallivm, } -/** - * Checks if a format description is an arithmetic format - * - * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5. - */ -static inline boolean -is_arithmetic_format(const struct util_format_description *format_desc) -{ - boolean arith = false; - unsigned i; - - for (i = 0; i < format_desc->nr_channels; ++i) { - arith |= format_desc->channel[i].size != format_desc->channel[0].size; - arith |= (format_desc->channel[i].size % 8) != 0; - } - - return arith; -} - - -/** - * Checks if this format requires special handling due to required expansion - * to floats for blending, and furthermore has "natural" packed AoS -> unpacked - * SoA conversion. - */ -static inline boolean -format_expands_to_float_soa(const struct util_format_description *format_desc) -{ - if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT || - format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { - return true; - } - return false; -} - - -/** - * Retrieves the type representing the memory layout for a format - * - * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte - */ -static inline void -lp_mem_type_from_format_desc(const struct util_format_description *format_desc, - struct lp_type* type) -{ - unsigned i; - unsigned chan; - - if (format_expands_to_float_soa(format_desc)) { - /* just make this a uint with width of block */ - type->floating = false; - type->fixed = false; - type->sign = false; - type->norm = false; - type->width = format_desc->block.bits; - type->length = 1; - return; - } - - for (i = 0; i < 4; i++) - if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) - break; - chan = i; - - memset(type, 0, sizeof(struct lp_type)); - type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT; - type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED; - type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED; - type->norm = format_desc->channel[chan].normalized; - - if (is_arithmetic_format(format_desc)) { - type->width = 0; - type->length = 1; - - for (i = 0; i < format_desc->nr_channels; ++i) { - type->width += format_desc->channel[i].size; - } - } else { - type->width = format_desc->channel[chan].size; - type->length = format_desc->nr_channels; - } -} - /** * Retrieves the type for a format which is usable in the blending code. @@ -1590,6 +2080,7 @@ convert_from_blend_type(struct gallivm_state *gallivm, for (j = 0; j < src_fmt->nr_channels; ++j) { unsigned mask = 0; unsigned sa = src_fmt->channel[j].shift; + unsigned sz_a = src_fmt->channel[j].size; #if UTIL_ARCH_LITTLE_ENDIAN unsigned from_lsb = j; #else @@ -1618,6 +2109,10 @@ convert_from_blend_type(struct gallivm_state *gallivm, if (src_type.norm) { chans[j] = scale_bits(gallivm, blend_type.width, src_fmt->channel[j].size, chans[j], src_type); + } else if (!src_type.floating && sz_a < blend_type.width) { + LLVMValueRef mask_val = lp_build_const_int_vec(gallivm, src_type, (1UL << sz_a) - 1); + LLVMValueRef mask = LLVMBuildICmp(builder, LLVMIntUGT, chans[j], mask_val, ""); + chans[j] = LLVMBuildSelect(builder, mask, mask_val, chans[j], ""); } /* Insert bits */ @@ -1868,7 +2363,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, continue; } - /* Ensure we havn't already found all channels */ + /* Ensure we haven't already found all channels */ if (dst_channels >= out_format_desc->nr_channels) { continue; } @@ -2294,7 +2789,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, if (is_1d) { load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1, - dst, ls_type, dst_count / 4, dst_alignment); + dst, ls_type, dst_count / 4, dst_alignment, NULL, NULL, false); for (i = dst_count / 4; i < dst_count; i++) { dst[i] = lp_build_undef(gallivm, ls_type); } @@ -2302,7 +2797,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, } else { load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, - dst, ls_type, dst_count, dst_alignment); + dst, ls_type, dst_count, dst_alignment, NULL, NULL, false); } @@ -2442,7 +2937,7 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef arg_types[13]; + LLVMTypeRef arg_types[15]; LLVMTypeRef func_type; LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); @@ -2454,8 +2949,10 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef stride_ptr; + LLVMValueRef color_sample_stride_ptr; LLVMValueRef depth_ptr; LLVMValueRef depth_stride; + LLVMValueRef depth_sample_stride; LLVMValueRef mask_input; LLVMValueRef thread_data_ptr; LLVMBasicBlockRef block; @@ -2463,8 +2960,8 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_build_sampler_soa *sampler; struct lp_build_image_soa *image; struct lp_build_interp_soa_context interp; - LLVMValueRef fs_mask[16 / 4]; - LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4]; + LLVMValueRef fs_mask[(16 / 4) * LP_MAX_SAMPLES]; + LLVMValueRef fs_out_color[LP_MAX_SAMPLES][PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4]; LLVMValueRef function; LLVMValueRef facing; unsigned num_fs; @@ -2519,8 +3016,8 @@ generate_fragment(struct llvmpipe_context *lp, blend_vec_type = lp_build_vec_type(gallivm, blend_type); - snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", - shader->no, variant->no, partial_mask ? "partial" : "whole"); + snprintf(func_name, sizeof(func_name), "fs_variant_%s", + partial_mask ? "partial" : "whole"); arg_types[0] = variant->jit_context_ptr_type; /* context */ arg_types[1] = int32_type; /* x */ @@ -2529,12 +3026,14 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ - arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ + arg_types[7] = LLVMPointerType(LLVMPointerType(int8_type, 0), 0); /* color */ arg_types[8] = LLVMPointerType(int8_type, 0); /* depth */ - arg_types[9] = int32_type; /* mask_input */ + arg_types[9] = LLVMInt64TypeInContext(gallivm->context); /* mask_input */ arg_types[10] = variant->jit_thread_data_ptr_type; /* per thread data */ arg_types[11] = LLVMPointerType(int32_type, 0); /* stride */ arg_types[12] = int32_type; /* depth_stride */ + arg_types[13] = LLVMPointerType(int32_type, 0); /* color sample strides */ + arg_types[14] = int32_type; /* depth sample stride */ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), arg_types, ARRAY_SIZE(arg_types), 0); @@ -2551,6 +3050,9 @@ generate_fragment(struct llvmpipe_context *lp, if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); + if (variant->gallivm->cache->data_size) + return; + context_ptr = LLVMGetParam(function, 0); x = LLVMGetParam(function, 1); y = LLVMGetParam(function, 2); @@ -2564,6 +3066,8 @@ generate_fragment(struct llvmpipe_context *lp, thread_data_ptr = LLVMGetParam(function, 10); stride_ptr = LLVMGetParam(function, 11); depth_stride = LLVMGetParam(function, 12); + color_sample_stride_ptr = LLVMGetParam(function, 13); + depth_sample_stride = LLVMGetParam(function, 14); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -2577,6 +3081,8 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(stride_ptr, "stride_ptr"); lp_build_name(depth_stride, "depth_stride"); + lp_build_name(color_sample_stride_ptr, "color_sample_stride_ptr"); + lp_build_name(depth_sample_stride, "depth_sample_stride"); /* * Function body @@ -2607,8 +3113,8 @@ generate_fragment(struct llvmpipe_context *lp, } /* code generated texture sampling */ - sampler = lp_llvm_sampler_soa_create(key->samplers); - image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key)); + sampler = lp_llvm_sampler_soa_create(key->samplers, key->nr_samplers); + image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key), key->nr_images); num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */ /* for 1d resources only run "upper half" of stamp */ @@ -2618,8 +3124,29 @@ generate_fragment(struct llvmpipe_context *lp, { LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs); LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type); + LLVMValueRef num_loop_samp = lp_build_const_int32(gallivm, num_fs * key->coverage_samples); LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type, - num_loop, "mask_store"); + num_loop_samp, "mask_store"); + + LLVMTypeRef flt_type = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef glob_sample_pos = LLVMAddGlobal(gallivm->module, LLVMArrayType(flt_type, key->coverage_samples * 2), ""); + LLVMValueRef sample_pos_array; + + if (key->multisample && key->coverage_samples == 4) { + LLVMValueRef sample_pos_arr[8]; + for (unsigned i = 0; i < 4; i++) { + sample_pos_arr[i * 2] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][0]); + sample_pos_arr[i * 2 + 1] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][1]); + } + sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 8); + } else { + LLVMValueRef sample_pos_arr[2]; + sample_pos_arr[0] = LLVMConstReal(flt_type, 0.5); + sample_pos_arr[1] = LLVMConstReal(flt_type, 0.5); + sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 2); + } + LLVMSetInitializer(glob_sample_pos, sample_pos_array); + LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS]; boolean pixel_center_integer = shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER]; @@ -2634,25 +3161,53 @@ generate_fragment(struct llvmpipe_context *lp, shader->info.base.num_inputs, inputs, pixel_center_integer, + key->coverage_samples, glob_sample_pos, + num_loop, key->depth_clamp, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x, y); for (i = 0; i < num_fs; i++) { - LLVMValueRef mask; - LLVMValueRef indexi = lp_build_const_int32(gallivm, i); - LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store, - &indexi, 1, "mask_ptr"); - - if (partial_mask) { - mask = generate_quad_mask(gallivm, fs_type, - i*fs_type.length/4, mask_input); - } - else { - mask = lp_build_const_int_vec(gallivm, fs_type, ~0); + if (key->multisample) { + LLVMValueRef smask_val = LLVMBuildLoad(builder, lp_jit_context_sample_mask(gallivm, context_ptr), ""); + + /* + * For multisampling, extract the per-sample mask from the incoming 64-bit mask, + * store to the per sample mask storage. Or all of them together to generate + * the fragment shader mask. (sample shading TODO). + * Take the incoming state coverage mask into account. + */ + for (unsigned s = 0; s < key->coverage_samples; s++) { + LLVMValueRef sindexi = lp_build_const_int32(gallivm, i + (s * num_fs)); + LLVMValueRef sample_mask_ptr = LLVMBuildGEP(builder, mask_store, + &sindexi, 1, "sample_mask_ptr"); + LLVMValueRef s_mask = generate_quad_mask(gallivm, fs_type, + i*fs_type.length/4, s, mask_input); + + LLVMValueRef smask_bit = LLVMBuildAnd(builder, smask_val, lp_build_const_int32(gallivm, (1 << s)), ""); + LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int32(gallivm, 0), ""); + smask_bit = LLVMBuildSExt(builder, cmp, int32_type, ""); + smask_bit = lp_build_broadcast(gallivm, mask_type, smask_bit); + + s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, ""); + LLVMBuildStore(builder, s_mask, sample_mask_ptr); + } + } else { + LLVMValueRef mask; + LLVMValueRef indexi = lp_build_const_int32(gallivm, i); + LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store, + &indexi, 1, "mask_ptr"); + + if (partial_mask) { + mask = generate_quad_mask(gallivm, fs_type, + i*fs_type.length/4, 0, mask_input); + } + else { + mask = lp_build_const_int_vec(gallivm, fs_type, ~0); + } + LLVMBuildStore(builder, mask, mask_ptr); } - LLVMBuildStore(builder, mask, mask_ptr); } generate_fs_loop(gallivm, @@ -2660,6 +3215,7 @@ generate_fragment(struct llvmpipe_context *lp, builder, fs_type, context_ptr, + glob_sample_pos, num_loop, &interp, sampler, @@ -2668,30 +3224,43 @@ generate_fragment(struct llvmpipe_context *lp, color_store, depth_ptr, depth_stride, + depth_sample_stride, + color_ptr_ptr, + stride_ptr, + color_sample_stride_ptr, facing, thread_data_ptr); for (i = 0; i < num_fs; i++) { - LLVMValueRef indexi = lp_build_const_int32(gallivm, i); - LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store, - &indexi, 1, ""); - fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask"); - /* This is fucked up need to reorganize things */ - for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { - ptr = LLVMBuildGEP(builder, - color_store[cbuf * !cbuf0_write_all][chan], - &indexi, 1, ""); - fs_out_color[cbuf][chan][i] = ptr; - } + LLVMValueRef ptr; + for (unsigned s = 0; s < key->coverage_samples; s++) { + int idx = (i + (s * num_fs)); + LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx); + ptr = LLVMBuildGEP(builder, mask_store, &sindexi, 1, ""); + + fs_mask[idx] = LLVMBuildLoad(builder, ptr, "smask"); } - if (dual_source_blend) { - /* only support one dual source blend target hence always use output 1 */ - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { - ptr = LLVMBuildGEP(builder, - color_store[1][chan], - &indexi, 1, ""); - fs_out_color[1][chan][i] = ptr; + + for (unsigned s = 0; s < key->min_samples; s++) { + /* This is fucked up need to reorganize things */ + int idx = s * num_fs + i; + LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx); + for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + ptr = LLVMBuildGEP(builder, + color_store[cbuf * !cbuf0_write_all][chan], + &sindexi, 1, ""); + fs_out_color[s][cbuf][chan][i] = ptr; + } + } + if (dual_source_blend) { + /* only support one dual source blend target hence always use output 1 */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + ptr = LLVMBuildGEP(builder, + color_store[1][chan], + &sindexi, 1, ""); + fs_out_color[s][1][chan][i] = ptr; + } } } } @@ -2705,6 +3274,7 @@ generate_fragment(struct llvmpipe_context *lp, if (key->cbuf_format[cbuf] != PIPE_FORMAT_NONE) { LLVMValueRef color_ptr; LLVMValueRef stride; + LLVMValueRef sample_stride = NULL; LLVMValueRef index = lp_build_const_int32(gallivm, cbuf); boolean do_branch = ((key->depth.enabled @@ -2717,17 +3287,34 @@ generate_fragment(struct llvmpipe_context *lp, &index, 1, ""), ""); - lp_build_name(color_ptr, "color_ptr%d", cbuf); - stride = LLVMBuildLoad(builder, LLVMBuildGEP(builder, stride_ptr, &index, 1, ""), ""); - generate_unswizzled_blend(gallivm, cbuf, variant, - key->cbuf_format[cbuf], - num_fs, fs_type, fs_mask, fs_out_color, - context_ptr, color_ptr, stride, - partial_mask, do_branch); + if (key->cbuf_nr_samples[cbuf] > 1) + sample_stride = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, color_sample_stride_ptr, + &index, 1, ""), ""); + + for (unsigned s = 0; s < key->cbuf_nr_samples[cbuf]; s++) { + unsigned mask_idx = num_fs * (key->multisample ? s : 0); + unsigned out_idx = key->min_samples == 1 ? 0 : s; + LLVMValueRef out_ptr = color_ptr;; + + if (sample_stride) { + LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, lp_build_const_int32(gallivm, s), ""); + out_ptr = LLVMBuildGEP(builder, out_ptr, &sample_offset, 1, ""); + } + out_ptr = LLVMBuildBitCast(builder, out_ptr, LLVMPointerType(blend_vec_type, 0), ""); + + lp_build_name(out_ptr, "color_ptr%d", cbuf); + + generate_unswizzled_blend(gallivm, cbuf, variant, + key->cbuf_format[cbuf], + num_fs, fs_type, &fs_mask[mask_idx], fs_out_color[out_idx], + context_ptr, out_ptr, stride, + partial_mask, do_branch); + } } } @@ -2747,11 +3334,18 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key) if (key->flatshade) { debug_printf("flatshade = 1\n"); } + if (key->multisample) { + debug_printf("multisample = 1\n"); + debug_printf("coverage samples = %d\n", key->coverage_samples); + debug_printf("min samples = %d\n", key->min_samples); + } for (i = 0; i < key->nr_cbufs; ++i) { debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); + debug_printf("cbuf nr_samples[%u] = %d\n", i, key->cbuf_nr_samples[i]); } if (key->depth.enabled || key->stencil[0].enabled) { debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); + debug_printf("depth nr_samples = %d\n", key->zsbuf_nr_samples); } if (key->depth.enabled) { debug_printf("depth.func = %s\n", util_str_func(key->depth.func, TRUE)); @@ -2812,6 +3406,7 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key) debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero); debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod); debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod); + debug_printf(" .reduction_mode = %u\n", sampler->reduction_mode); } for (i = 0; i < key->nr_sampler_views; ++i) { const struct lp_static_texture_state *texture = &key->samplers[i].texture_state; @@ -2859,6 +3454,27 @@ lp_debug_fs_variant(struct lp_fragment_shader_variant *variant) debug_printf("\n"); } +static void +lp_fs_get_ir_cache_key(struct lp_fragment_shader_variant *variant, + unsigned char ir_sha1_cache_key[20]) +{ + struct blob blob = { 0 }; + unsigned ir_size; + void *ir_binary; + + blob_init(&blob); + nir_serialize(&blob, variant->shader->base.ir.nir, true); + ir_binary = blob.data; + ir_size = blob.size; + + struct mesa_sha1 ctx; + _mesa_sha1_init(&ctx); + _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size); + _mesa_sha1_update(&ctx, ir_binary, ir_size); + _mesa_sha1_final(&ctx, ir_sha1_cache_key); + + blob_finish(&blob); +} /** * Generate a new fragment shader variant from the shader code and @@ -2869,11 +3485,14 @@ generate_variant(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; const struct util_format_description *cbuf0_format_desc = NULL; boolean fullcolormask; char module_name[64]; - + unsigned char ir_sha1_cache_key[20]; + struct lp_cached_code cached = { 0 }; + bool needs_caching = false; variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key); if (!variant) return NULL; @@ -2882,18 +3501,29 @@ generate_variant(struct llvmpipe_context *lp, snprintf(module_name, sizeof(module_name), "fs%u_variant%u", shader->no, shader->variants_created); - variant->gallivm = gallivm_create(module_name, lp->context); + pipe_reference_init(&variant->reference, 1); + lp_fs_reference(lp, &variant->shader, shader); + + memcpy(&variant->key, key, shader->variant_key_size); + + if (shader->base.ir.nir) { + lp_fs_get_ir_cache_key(variant, ir_sha1_cache_key); + + lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key); + if (!cached.data_size) + needs_caching = true; + } + variant->gallivm = gallivm_create(module_name, lp->context, &cached); if (!variant->gallivm) { FREE(variant); return NULL; } - variant->shader = shader; variant->list_item_global.base = variant; variant->list_item_local.base = variant; variant->no = shader->variants_created++; - memcpy(&variant->key, key, shader->variant_key_size); + /* * Determine whether we are touching all channels in the color buffer. @@ -2910,6 +3540,7 @@ generate_variant(struct llvmpipe_context *lp, fullcolormask && !key->stencil[0].enabled && !key->alpha.enabled && + !key->multisample && !key->blend.alpha_to_coverage && !key->depth.enabled && !shader->info.base.uses_kill && @@ -2954,6 +3585,10 @@ generate_variant(struct llvmpipe_context *lp, variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; } + if (needs_caching) { + lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key); + } + gallivm_free_ir(variant->gallivm); return variant; @@ -2975,6 +3610,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, if (!shader) return NULL; + pipe_reference_init(&shader->reference, 1); shader->no = fs_no++; make_empty_list(&shader->variants); @@ -3005,6 +3641,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, for (i = 0; i < shader->info.base.num_inputs; i++) { shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i]; + shader->inputs[i].location = shader->info.base.input_interpolate_loc[i]; switch (shader->info.base.input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: @@ -3066,15 +3703,17 @@ static void llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - - if (llvmpipe->fs == fs) + struct lp_fragment_shader *lp_fs = (struct lp_fragment_shader *)fs; + if (llvmpipe->fs == lp_fs) return; - llvmpipe->fs = (struct lp_fragment_shader *) fs; - draw_bind_fragment_shader(llvmpipe->draw, - (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL)); + (lp_fs ? lp_fs->draw_data : NULL)); + lp_fs_reference(llvmpipe, &llvmpipe->fs, lp_fs); + + /* invalidate the setup link, NEW_FS will make it update */ + lp_setup_set_fs_variant(llvmpipe->setup, NULL); llvmpipe->dirty |= LP_NEW_FS; } @@ -3083,9 +3722,10 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) * Remove shader variant from two lists: the shader's variant list * and the context's variant list. */ -static void -llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, - struct lp_fragment_shader_variant *variant) + +static +void llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant) { if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { debug_printf("llvmpipe: del fs #%u var %u v created %u v cached %u " @@ -3096,8 +3736,6 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, lp->nr_fs_variants, variant->nr_instrs, lp->nr_fs_instrs); } - gallivm_destroy(variant->gallivm); - /* remove from shader's list */ remove_from_list(&variant->list_item_local); variant->shader->variants_cached--; @@ -3106,10 +3744,32 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, remove_from_list(&variant->list_item_global); lp->nr_fs_variants--; lp->nr_fs_instrs -= variant->nr_instrs; +} + +void +llvmpipe_destroy_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant) +{ + gallivm_destroy(variant->gallivm); + + lp_fs_reference(lp, &variant->shader, NULL); FREE(variant); } +void +llvmpipe_destroy_fs(struct llvmpipe_context *llvmpipe, + struct lp_fragment_shader *shader) +{ + /* Delete draw module's data */ + draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); + + if (shader->base.ir.nir) + ralloc_free(shader->base.ir.nir); + assert(shader->variants_cached == 0); + FREE((void *) shader->base.tokens); + FREE(shader); +} static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) @@ -3118,38 +3778,24 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) struct lp_fragment_shader *shader = fs; struct lp_fs_variant_list_item *li; - assert(fs != llvmpipe->fs); - - /* - * XXX: we need to flush the context until we have some sort of reference - * counting in fragment shaders as they may still be binned - * Flushing alone might not sufficient we need to wait on it too. - */ - llvmpipe_finish(pipe, __FUNCTION__); - /* Delete all the variants */ li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { struct lp_fs_variant_list_item *next = next_elem(li); + struct lp_fragment_shader_variant *variant; + variant = li->base; llvmpipe_remove_shader_variant(llvmpipe, li->base); + lp_fs_variant_reference(llvmpipe, &variant, NULL); li = next; } - /* Delete draw module's data */ - draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); - - if (shader->base.ir.nir) - ralloc_free(shader->base.ir.nir); - assert(shader->variants_cached == 0); - FREE((void *) shader->base.tokens); - FREE(shader); + lp_fs_reference(llvmpipe, &shader, NULL); } - - static void llvmpipe_set_constant_buffer(struct pipe_context *pipe, enum pipe_shader_type shader, uint index, + bool take_ownership, const struct pipe_constant_buffer *cb) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -3159,7 +3805,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(index < ARRAY_SIZE(llvmpipe->constants[shader])); /* note: reference counting */ - util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb); + util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb, + take_ownership); if (constants) { if (!(constants->bind & PIPE_BIND_CONSTANT_BUFFER)) { @@ -3169,7 +3816,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, } if (shader == PIPE_SHADER_VERTEX || - shader == PIPE_SHADER_GEOMETRY) { + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { /* Pass the constants to the 'draw' module */ const unsigned size = cb ? cb->buffer_size : 0; const ubyte *data; @@ -3214,7 +3863,9 @@ llvmpipe_set_shader_buffers(struct pipe_context *pipe, util_copy_shader_buffer(&llvmpipe->ssbos[shader][i], buffer); if (shader == PIPE_SHADER_VERTEX || - shader == PIPE_SHADER_GEOMETRY) { + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { const unsigned size = buffer ? buffer->buffer_size : 0; const ubyte *data = NULL; if (buffer && buffer->buffer) @@ -3234,7 +3885,8 @@ llvmpipe_set_shader_buffers(struct pipe_context *pipe, static void llvmpipe_set_shader_images(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start_slot, - unsigned count, const struct pipe_image_view *images) + unsigned count, unsigned unbind_num_trailing_slots, + const struct pipe_image_view *images) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned i, idx; @@ -3248,7 +3900,9 @@ llvmpipe_set_shader_images(struct pipe_context *pipe, llvmpipe->num_images[shader] = start_slot + count; if (shader == PIPE_SHADER_VERTEX || - shader == PIPE_SHADER_GEOMETRY) { + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { draw_set_images(llvmpipe->draw, shader, llvmpipe->images[shader], @@ -3257,6 +3911,11 @@ llvmpipe_set_shader_images(struct pipe_context *pipe, llvmpipe->cs_dirty |= LP_CSNEW_IMAGES; else llvmpipe->dirty |= LP_NEW_FS_IMAGES; + + if (unbind_num_trailing_slots) { + llvmpipe_set_shader_images(pipe, shader, start_slot + count, + unbind_num_trailing_slots, 0, NULL); + } } /** @@ -3305,10 +3964,12 @@ make_variant_key(struct llvmpipe_context *lp, const struct util_format_description *zsbuf_desc = util_format_description(zsbuf_format); - if (lp->depth_stencil->depth.enabled && + if (lp->depth_stencil->depth_enabled && util_format_has_depth(zsbuf_desc)) { key->zsbuf_format = zsbuf_format; - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + key->depth.enabled = lp->depth_stencil->depth_enabled; + key->depth.writemask = lp->depth_stencil->depth_writemask; + key->depth.func = lp->depth_stencil->depth_func; } if (lp->depth_stencil->stencil[0].enabled && util_format_has_stencil(zsbuf_desc)) { @@ -3318,46 +3979,41 @@ make_variant_key(struct llvmpipe_context *lp, if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) { key->resource_1d = TRUE; } + key->zsbuf_nr_samples = util_res_sample_count(lp->framebuffer.zsbuf->texture); } /* * Propagate the depth clamp setting from the rasterizer state. * depth_clip == 0 implies depth clamping is enabled. * - * When clip_halfz is enabled, then always clamp the depth values. - * - * XXX: This is incorrect for GL, but correct for d3d10 (depth - * clamp is always active in d3d10, regardless if depth clip is - * enabled or not). - * (GL has an always-on [0,1] clamp on fs depth output instead - * to ensure the depth values stay in range. Doesn't look like - * we do that, though...) */ - if (lp->rasterizer->clip_halfz) { - key->depth_clamp = 1; - } else { - key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0; - } + key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0; /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */ if (!lp->framebuffer.nr_cbufs || !lp->framebuffer.cbufs[0] || !util_format_is_pure_integer(lp->framebuffer.cbufs[0]->format)) { - key->alpha.enabled = lp->depth_stencil->alpha.enabled; + key->alpha.enabled = lp->depth_stencil->alpha_enabled; } if(key->alpha.enabled) - key->alpha.func = lp->depth_stencil->alpha.func; + key->alpha.func = lp->depth_stencil->alpha_func; /* alpha.ref_value is passed in jit_context */ key->flatshade = lp->rasterizer->flatshade; + key->multisample = lp->rasterizer->multisample; + key->no_ms_sample_mask_out = lp->rasterizer->no_ms_sample_mask_out; if (lp->active_occlusion_queries && !lp->queries_disabled) { key->occlusion_count = TRUE; } - if (lp->framebuffer.nr_cbufs) { - memcpy(&key->blend, lp->blend, sizeof key->blend); - } + memcpy(&key->blend, lp->blend, sizeof key->blend); + key->coverage_samples = 1; + key->min_samples = 1; + if (key->multisample) { + key->coverage_samples = util_framebuffer_get_num_samples(&lp->framebuffer); + key->min_samples = lp->min_samples == 1 ? 1 : key->coverage_samples; + } key->nr_cbufs = lp->framebuffer.nr_cbufs; if (!key->blend.independent_blend_enable) { @@ -3376,6 +4032,7 @@ make_variant_key(struct llvmpipe_context *lp, const struct util_format_description *format_desc; key->cbuf_format[i] = format; + key->cbuf_nr_samples[i] = util_res_sample_count(lp->framebuffer.cbufs[i]->texture); /* * Figure out if this is a 1d resource. Note that OpenGL allows crazy @@ -3435,6 +4092,7 @@ make_variant_key(struct llvmpipe_context *lp, else { /* no color buffer for this fragment output */ key->cbuf_format[i] = PIPE_FORMAT_NONE; + key->cbuf_nr_samples[i] = 0; blend_rt->colormask = 0x0; blend_rt->blend_enable = 0; } @@ -3551,8 +4209,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) if (variants_to_cull || lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) { - struct pipe_context *pipe = &lp->pipe; - if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("Evicting FS: %u fs variants,\t%u total variants," "\t%u instrs,\t%u instrs/variant\n", @@ -3562,13 +4218,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } /* - * XXX: we need to flush the context until we have some sort of - * reference counting in fragment shaders as they may still be binned - * Flushing alone might not be sufficient we need to wait on it too. - */ - llvmpipe_finish(pipe, __FUNCTION__); - - /* * We need to re-check lp->nr_fs_variants because an arbitrarliy large * number of shader variants (potentially all of them) could be * pending for destruction on flush. @@ -3583,6 +4232,8 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) assert(item); assert(item->base); llvmpipe_remove_shader_variant(lp, item->base); + struct lp_fragment_shader_variant *variant = item->base; + lp_fs_variant_reference(lp, &variant, NULL); } } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 0428b5cd2..613e5286a 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -38,7 +38,8 @@ #include "lp_screen.h" #include "lp_state.h" #include "lp_debug.h" -#include "state_tracker/sw_winsys.h" +#include "frontend/sw_winsys.h" +#include "lp_flush.h" static void * @@ -81,7 +82,11 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, /* set the new samplers */ for (i = 0; i < num; i++) { - llvmpipe->samplers[shader][start + i] = samplers[i]; + void *sampler = NULL; + + if (samplers && samplers[i]) + sampler = samplers[i]; + llvmpipe->samplers[shader][start + i] = sampler; } /* find highest non-null samplers[] entry */ @@ -92,7 +97,10 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, llvmpipe->num_samplers[shader] = j; } - if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + if (shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { draw_set_samplers(llvmpipe->draw, shader, llvmpipe->samplers[shader], @@ -111,6 +119,7 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start, unsigned num, + unsigned unbind_num_trailing_slots, struct pipe_sampler_view **views) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -125,17 +134,29 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, /* set the new sampler views */ for (i = 0; i < num; i++) { + struct pipe_sampler_view *view = NULL; + + if (views && views[i]) + view = views[i]; /* * Warn if someone tries to set a view created in a different context * (which is why we need the hack above in the first place). * An assert would be better but st/mesa relies on it... */ - if (views[i] && views[i]->context != pipe) { + if (view && view->context != pipe) { debug_printf("Illegal setting of sampler_view %d created in another " "context\n", i); } + + if (view) + llvmpipe_flush_resource(pipe, view->texture, 0, true, false, false, "sampler_view"); pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i], - views[i]); + view); + } + + for (; i < num + unbind_num_trailing_slots; i++) { + pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i], + NULL); } /* find highest non-null sampler_views[] entry */ @@ -146,7 +167,10 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, llvmpipe->num_sampler_views[shader] = j; } - if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + if (shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { draw_set_sampler_views(llvmpipe->draw, shader, llvmpipe->sampler_views[shader], @@ -185,7 +209,7 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe, #ifdef DEBUG /* * This is possibly too lenient, but the primary reason is just - * to catch state trackers which forget to initialize this, so + * to catch gallium frontends which forget to initialize this, so * it only catches clearly impossible view targets. */ if (view->target != texture->target) { @@ -262,6 +286,8 @@ prepare_shader_sampling( unsigned num_layers = tex->depth0; unsigned first_level = 0; unsigned last_level = 0; + unsigned sample_stride = 0; + unsigned num_samples = tex->nr_samples; if (!lp_tex->dt) { /* regular texture - setup array of mipmap level offsets */ @@ -275,6 +301,8 @@ prepare_shader_sampling( assert(last_level <= res->last_level); addr = lp_tex->tex_data; + sample_stride = lp_tex->sample_stride; + for (j = first_level; j <= last_level; j++) { mip_offsets[j] = lp_tex->mip_offsets[j]; row_stride[j] = lp_tex->row_stride[j]; @@ -319,7 +347,7 @@ prepare_shader_sampling( struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); struct sw_winsys *winsys = screen->winsys; addr = winsys->displaytarget_map(winsys, lp_tex->dt, - PIPE_TRANSFER_READ); + PIPE_MAP_READ); row_stride[0] = lp_tex->row_stride[0]; img_stride[0] = lp_tex->img_stride[0]; mip_offsets[0] = 0; @@ -330,6 +358,7 @@ prepare_shader_sampling( i, width0, tex->height0, num_layers, first_level, last_level, + num_samples, sample_stride, addr, row_stride, img_stride, mip_offsets); } @@ -360,6 +389,28 @@ llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp, prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY); } +/** + * Called whenever we're about to draw (no dirty flag, FIXME?). + */ +void +llvmpipe_prepare_tess_ctrl_sampling(struct llvmpipe_context *lp, + unsigned num, + struct pipe_sampler_view **views) +{ + prepare_shader_sampling(lp, num, views, PIPE_SHADER_TESS_CTRL); +} + +/** + * Called whenever we're about to draw (no dirty flag, FIXME?). + */ +void +llvmpipe_prepare_tess_eval_sampling(struct llvmpipe_context *lp, + unsigned num, + struct pipe_sampler_view **views) +{ + prepare_shader_sampling(lp, num, views, PIPE_SHADER_TESS_EVAL); +} + static void prepare_shader_images( struct llvmpipe_context *lp, @@ -371,6 +422,7 @@ prepare_shader_images( unsigned i; uint32_t row_stride; uint32_t img_stride; + uint32_t sample_stride; const void *addr; assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); @@ -389,6 +441,7 @@ prepare_shader_images( unsigned width = u_minify(img->width0, view->u.tex.level); unsigned height = u_minify(img->height0, view->u.tex.level); unsigned num_layers = img->depth0; + unsigned num_samples = img->nr_samples; if (!lp_img->dt) { /* regular texture - setup array of mipmap level offsets */ @@ -410,6 +463,7 @@ prepare_shader_images( row_stride = lp_img->row_stride[view->u.tex.level]; img_stride = lp_img->img_stride[view->u.tex.level]; + sample_stride = lp_img->sample_stride; addr = (uint8_t *)addr + mip_offset; } else { @@ -418,6 +472,7 @@ prepare_shader_images( /* probably don't really need to fill that out */ row_stride = 0; img_stride = 0; + sample_stride = 0; /* everything specified in number of elements here. */ width = view->u.buf.size / view_blocksize; @@ -433,9 +488,10 @@ prepare_shader_images( struct llvmpipe_screen *screen = llvmpipe_screen(img->screen); struct sw_winsys *winsys = screen->winsys; addr = winsys->displaytarget_map(winsys, lp_img->dt, - PIPE_TRANSFER_READ); + PIPE_MAP_READ); row_stride = lp_img->row_stride[0]; img_stride = lp_img->img_stride[0]; + sample_stride = 0; assert(addr); } draw_set_mapped_image(lp->draw, @@ -443,7 +499,8 @@ prepare_shader_images( i, width, height, num_layers, addr, - row_stride, img_stride); + row_stride, img_stride, + num_samples, sample_stride); } } } @@ -472,6 +529,28 @@ llvmpipe_prepare_geometry_images(struct llvmpipe_context *lp, prepare_shader_images(lp, num, views, PIPE_SHADER_GEOMETRY); } +/** + * Called whenever we're about to draw (no dirty flag, FIXME?). + */ +void +llvmpipe_prepare_tess_ctrl_images(struct llvmpipe_context *lp, + unsigned num, + struct pipe_image_view *views) +{ + prepare_shader_images(lp, num, views, PIPE_SHADER_TESS_CTRL); +} + +/** + * Called whenever we're about to draw (no dirty flag, FIXME?). + */ +void +llvmpipe_prepare_tess_eval_images(struct llvmpipe_context *lp, + unsigned num, + struct pipe_image_view *views) +{ + prepare_shader_images(lp, num, views, PIPE_SHADER_TESS_EVAL); +} + void llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) { diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c index 915e21db5..2bc94d5d4 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -481,7 +481,7 @@ apply_perspective_corr( struct gallivm_state *gallivm, /** - * Applys cylindrical wrapping to vertex attributes if enabled. + * Apply cylindrical wrapping to vertex attributes if enabled. * Input coordinates must be in [0, 1] range, otherwise results are undefined. * * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags @@ -652,7 +652,7 @@ init_args(struct gallivm_state *gallivm, load_attribute(gallivm, args, key, 0, attr_pos); pixel_center = lp_build_const_vec(gallivm, typef4, - key->pixel_center_half ? 0.5 : 0.0); + (!key->multisample && key->pixel_center_half) ? 0.5 : 0.0); /* * xy are first two elems in v0a/v1a/v2a but just use vec4 arit @@ -730,7 +730,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, snprintf(func_name, sizeof(func_name), "setup_variant_%u", variant->no); - variant->gallivm = gallivm = gallivm_create(func_name, lp->context); + variant->gallivm = gallivm = gallivm_create(func_name, lp->context, NULL); if (!variant->gallivm) { goto fail; } @@ -843,6 +843,7 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp, key->num_inputs = fs->info.base.num_inputs; key->flatshade_first = lp->rasterizer->flatshade_first; key->pixel_center_half = lp->rasterizer->half_pixel_center; + key->multisample = lp->rasterizer->multisample; key->twoside = lp->rasterizer->light_twoside; key->size = Offset(struct lp_setup_variant_key, inputs[key->num_inputs]); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c new file mode 100644 index 000000000..b3f8e74af --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2019 Red Hat. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **************************************************************************/ + +#include "lp_context.h" +#include "lp_state.h" +#include "lp_texture.h" +#include "lp_debug.h" + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" + + +static void * +llvmpipe_create_tcs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_tess_ctrl_shader *state; + + state = CALLOC_STRUCT(lp_tess_ctrl_shader); + if (!state) + goto no_state; + + /* debug */ + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create tess ctrl shader %p:\n", (void *)state); + tgsi_dump(templ->tokens, 0); + } + + /* copy stream output info */ + state->no_tokens = !templ->tokens; + memcpy(&state->stream_output, &templ->stream_output, sizeof state->stream_output); + + if (templ->tokens || templ->type == PIPE_SHADER_IR_NIR) { + state->dtcs = draw_create_tess_ctrl_shader(llvmpipe->draw, templ); + if (state->dtcs == NULL) { + goto no_dgs; + } + } + + return state; + +no_dgs: + FREE( state ); +no_state: + return NULL; +} + + +static void +llvmpipe_bind_tcs_state(struct pipe_context *pipe, void *tcs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + llvmpipe->tcs = (struct lp_tess_ctrl_shader *)tcs; + + draw_bind_tess_ctrl_shader(llvmpipe->draw, + (llvmpipe->tcs ? llvmpipe->tcs->dtcs : NULL)); + + llvmpipe->dirty |= LP_NEW_TCS; +} + + +static void +llvmpipe_delete_tcs_state(struct pipe_context *pipe, void *tcs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + struct lp_tess_ctrl_shader *state = + (struct lp_tess_ctrl_shader *)tcs; + + if (!state) { + return; + } + + draw_delete_tess_ctrl_shader(llvmpipe->draw, state->dtcs); + FREE(state); +} + + +static void * +llvmpipe_create_tes_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_tess_eval_shader *state; + + state = CALLOC_STRUCT(lp_tess_eval_shader); + if (!state) + goto no_state; + + /* debug */ + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create tess eval shader %p:\n", (void *)state); + tgsi_dump(templ->tokens, 0); + } + + /* copy stream output info */ + state->no_tokens = !templ->tokens; + memcpy(&state->stream_output, &templ->stream_output, sizeof state->stream_output); + + if (templ->tokens || templ->type == PIPE_SHADER_IR_NIR) { + state->dtes = draw_create_tess_eval_shader(llvmpipe->draw, templ); + if (state->dtes == NULL) { + goto no_dgs; + } + } + + return state; + +no_dgs: + FREE( state ); +no_state: + return NULL; +} + + +static void +llvmpipe_bind_tes_state(struct pipe_context *pipe, void *tes) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + llvmpipe->tes = (struct lp_tess_eval_shader *)tes; + + draw_bind_tess_eval_shader(llvmpipe->draw, + (llvmpipe->tes ? llvmpipe->tes->dtes : NULL)); + + llvmpipe->dirty |= LP_NEW_TES; +} + + +static void +llvmpipe_delete_tes_state(struct pipe_context *pipe, void *tes) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + struct lp_tess_eval_shader *state = + (struct lp_tess_eval_shader *)tes; + + if (!state) { + return; + } + + draw_delete_tess_eval_shader(llvmpipe->draw, state->dtes); + FREE(state); +} + +static void +llvmpipe_set_tess_state(struct pipe_context *pipe, + const float default_outer_level[4], + const float default_inner_level[2]) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + draw_set_tess_state(llvmpipe->draw, default_outer_level, default_inner_level); +} + +void +llvmpipe_init_tess_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_tcs_state = llvmpipe_create_tcs_state; + llvmpipe->pipe.bind_tcs_state = llvmpipe_bind_tcs_state; + llvmpipe->pipe.delete_tcs_state = llvmpipe_delete_tcs_state; + + llvmpipe->pipe.create_tes_state = llvmpipe_create_tes_state; + llvmpipe->pipe.bind_tes_state = llvmpipe_bind_tes_state; + llvmpipe->pipe.delete_tes_state = llvmpipe_delete_tes_state; + + llvmpipe->pipe.set_tess_state = llvmpipe_set_tess_state; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 702ecf96a..ee87bed5d 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -77,6 +77,8 @@ llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) static void llvmpipe_set_vertex_buffers(struct pipe_context *pipe, unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, const struct pipe_vertex_buffer *buffers) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -85,11 +87,14 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, util_set_vertex_buffers_count(llvmpipe->vertex_buffer, &llvmpipe->num_vertex_buffers, - buffers, start_slot, count); + buffers, start_slot, count, + unbind_num_trailing_slots, + take_ownership); llvmpipe->dirty |= LP_NEW_VERTEX; - draw_set_vertex_buffers(llvmpipe->draw, start_slot, count, buffers); + draw_set_vertex_buffers(llvmpipe->draw, start_slot, count, + unbind_num_trailing_slots, buffers); } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c index 953b26e8c..9ba2b87b8 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c @@ -27,14 +27,60 @@ #include "util/u_rect.h" #include "util/u_surface.h" +#include "util/u_memset.h" #include "lp_context.h" #include "lp_flush.h" #include "lp_limits.h" #include "lp_surface.h" #include "lp_texture.h" #include "lp_query.h" +#include "lp_rast.h" +static void +lp_resource_copy_ms(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct pipe_box dst_box = *src_box; + enum pipe_format src_format; + dst_box.x = dstx; + dst_box.y = dsty; + dst_box.z = dstz; + + src_format = src->format; + for (unsigned i = 0; i < src->nr_samples; i++) { + struct pipe_transfer *src_trans, *dst_trans; + const uint8_t *src_map = llvmpipe_transfer_map_ms(pipe, + src, 0, PIPE_MAP_READ, i, + src_box, + &src_trans); + if (!src_map) + return; + + uint8_t *dst_map = llvmpipe_transfer_map_ms(pipe, + dst, 0, PIPE_MAP_WRITE, i, + &dst_box, + &dst_trans); + if (!dst_map) { + pipe->transfer_unmap(pipe, src_trans); + return; + } + + util_copy_box(dst_map, + src_format, + dst_trans->stride, dst_trans->layer_stride, + 0, 0, 0, + src_box->width, src_box->height, src_box->depth, + src_map, + src_trans->stride, src_trans->layer_stride, + 0, 0, 0); + pipe->transfer_unmap(pipe, dst_trans); + pipe->transfer_unmap(pipe, src_trans); + } +} static void lp_resource_copy(struct pipe_context *pipe, struct pipe_resource *dst, unsigned dst_level, @@ -56,6 +102,12 @@ lp_resource_copy(struct pipe_context *pipe, FALSE, /* do_not_block */ "blit src"); + if (dst->nr_samples > 1 && + dst->nr_samples == src->nr_samples) { + lp_resource_copy_ms(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); } @@ -70,14 +122,6 @@ static void lp_blit(struct pipe_context *pipe, if (blit_info->render_condition_enable && !llvmpipe_check_render_cond(lp)) return; - if (info.src.resource->nr_samples > 1 && - info.dst.resource->nr_samples <= 1 && - !util_format_is_depth_or_stencil(info.src.resource->format) && - !util_format_is_pure_integer(info.src.resource->format)) { - debug_printf("llvmpipe: color resolve unimplemented\n"); - return; - } - if (util_try_blit_via_copy_region(pipe, &info)) { return; /* done */ } @@ -89,6 +133,15 @@ static void lp_blit(struct pipe_context *pipe, return; } + /* for 32-bit unorm depth, avoid the conversions to float and back, + which can introduce accuracy errors. */ + if (blit_info->src.format == PIPE_FORMAT_Z32_UNORM && + blit_info->dst.format == PIPE_FORMAT_Z32_UNORM && info.filter == PIPE_TEX_FILTER_NEAREST) { + info.src.format = PIPE_FORMAT_R32_UINT; + info.dst.format = PIPE_FORMAT_R32_UINT; + info.mask = PIPE_MASK_R; + } + /* XXX turn off occlusion and streamout queries */ util_blitter_save_vertex_buffer_slot(lp->blitter, lp->vertex_buffer); @@ -102,9 +155,11 @@ static void lp_blit(struct pipe_context *pipe, util_blitter_save_scissor(lp->blitter, &lp->scissors[0]); util_blitter_save_fragment_shader(lp->blitter, lp->fs); util_blitter_save_blend(lp->blitter, (void*)lp->blend); + util_blitter_save_tessctrl_shader(lp->blitter, (void*)lp->tcs); + util_blitter_save_tesseval_shader(lp->blitter, (void*)lp->tes); util_blitter_save_depth_stencil_alpha(lp->blitter, (void*)lp->depth_stencil); util_blitter_save_stencil_ref(lp->blitter, &lp->stencil_ref); - /*util_blitter_save_sample_mask(sp->blitter, lp->sample_mask);*/ + util_blitter_save_sample_mask(lp->blitter, lp->sample_mask); util_blitter_save_framebuffer(lp->blitter, &lp->framebuffer); util_blitter_save_fragment_sampler_states(lp->blitter, lp->num_samplers[PIPE_SHADER_FRAGMENT], @@ -185,6 +240,64 @@ llvmpipe_surface_destroy(struct pipe_context *pipe, } + +static void +llvmpipe_get_sample_position(struct pipe_context *pipe, + unsigned sample_count, + unsigned sample_index, + float *out_value) +{ + switch (sample_count) { + case 4: + out_value[0] = lp_sample_pos_4x[sample_index][0]; + out_value[1] = lp_sample_pos_4x[sample_index][1]; + break; + default: + break; + } +} + +static void +lp_clear_color_texture_helper(struct pipe_transfer *dst_trans, + ubyte *dst_map, + enum pipe_format format, + const union pipe_color_union *color, + unsigned width, unsigned height, unsigned depth) +{ + union util_color uc; + + assert(dst_trans->stride > 0); + + util_pack_color_union(format, &uc, color); + + util_fill_box(dst_map, format, + dst_trans->stride, dst_trans->layer_stride, + 0, 0, 0, width, height, depth, &uc); +} + +static void +lp_clear_color_texture_msaa(struct pipe_context *pipe, + struct pipe_resource *texture, + enum pipe_format format, + const union pipe_color_union *color, + unsigned sample, + const struct pipe_box *box) +{ + struct pipe_transfer *dst_trans; + ubyte *dst_map; + + dst_map = llvmpipe_transfer_map_ms(pipe, texture, 0, PIPE_MAP_WRITE, + sample, box, &dst_trans); + if (!dst_map) + return; + + if (dst_trans->stride > 0) { + lp_clear_color_texture_helper(dst_trans, dst_map, format, color, + box->width, box->height, box->depth); + } + pipe->transfer_unmap(pipe, dst_trans); +} + static void llvmpipe_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, @@ -198,12 +311,60 @@ llvmpipe_clear_render_target(struct pipe_context *pipe, if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe)) return; - util_clear_render_target(pipe, dst, color, - dstx, dsty, width, height); + if (dst->texture->nr_samples > 1) { + struct pipe_box box; + u_box_2d(dstx, dsty, width, height, &box); + if (dst->texture->target != PIPE_BUFFER) { + box.z = dst->u.tex.first_layer; + box.depth = dst->u.tex.last_layer - dst->u.tex.first_layer + 1; + } + for (unsigned s = 0; s < util_res_sample_count(dst->texture); s++) { + lp_clear_color_texture_msaa(pipe, dst->texture, dst->format, + color, s, &box); + } + } else + util_clear_render_target(pipe, dst, color, + dstx, dsty, width, height); } static void +lp_clear_depth_stencil_texture_msaa(struct pipe_context *pipe, + struct pipe_resource *texture, + enum pipe_format format, + unsigned clear_flags, + uint64_t zstencil, unsigned sample, + const struct pipe_box *box) +{ + struct pipe_transfer *dst_trans; + ubyte *dst_map; + boolean need_rmw = FALSE; + + if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) && + ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + util_format_is_depth_and_stencil(format)) + need_rmw = TRUE; + + dst_map = llvmpipe_transfer_map_ms(pipe, + texture, + 0, + (need_rmw ? PIPE_MAP_READ_WRITE : + PIPE_MAP_WRITE), + sample, box, &dst_trans); + assert(dst_map); + if (!dst_map) + return; + + assert(dst_trans->stride > 0); + + util_fill_zs_box(dst_map, format, need_rmw, clear_flags, + dst_trans->stride, dst_trans->layer_stride, + box->width, box->height, box->depth, zstencil); + + pipe->transfer_unmap(pipe, dst_trans); +} + +static void llvmpipe_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *dst, unsigned clear_flags, @@ -218,11 +379,104 @@ llvmpipe_clear_depth_stencil(struct pipe_context *pipe, if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe)) return; - util_clear_depth_stencil(pipe, dst, clear_flags, - depth, stencil, - dstx, dsty, width, height); + if (dst->texture->nr_samples > 1) { + uint64_t zstencil = util_pack64_z_stencil(dst->format, depth, stencil); + struct pipe_box box; + u_box_2d(dstx, dsty, width, height, &box); + if (dst->texture->target != PIPE_BUFFER) { + box.z = dst->u.tex.first_layer; + box.depth = dst->u.tex.last_layer - dst->u.tex.first_layer + 1; + } + for (unsigned s = 0; s < util_res_sample_count(dst->texture); s++) + lp_clear_depth_stencil_texture_msaa(pipe, dst->texture, + dst->format, clear_flags, + zstencil, s, &box); + } else + util_clear_depth_stencil(pipe, dst, clear_flags, + depth, stencil, + dstx, dsty, width, height); } +static void +llvmpipe_clear_texture(struct pipe_context *pipe, + struct pipe_resource *tex, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + const struct util_format_description *desc = + util_format_description(tex->format); + if (tex->nr_samples <= 1) { + util_clear_texture(pipe, tex, level, box, data); + return; + } + union pipe_color_union color; + + if (util_format_is_depth_or_stencil(tex->format)) { + unsigned clear = 0; + float depth = 0.0f; + uint8_t stencil = 0; + uint64_t zstencil; + + if (util_format_has_depth(desc)) { + clear |= PIPE_CLEAR_DEPTH; + util_format_unpack_z_float(tex->format, &depth, data, 1); + } + + if (util_format_has_stencil(desc)) { + clear |= PIPE_CLEAR_STENCIL; + util_format_unpack_s_8uint(tex->format, &stencil, data, 1); + } + + zstencil = util_pack64_z_stencil(tex->format, depth, stencil); + + for (unsigned s = 0; s < util_res_sample_count(tex); s++) + lp_clear_depth_stencil_texture_msaa(pipe, tex, tex->format, clear, zstencil, + s, box); + } else { + util_format_unpack_rgba(tex->format, color.ui, data, 1); + + for (unsigned s = 0; s < util_res_sample_count(tex); s++) { + lp_clear_color_texture_msaa(pipe, tex, tex->format, &color, s, + box); + } + } +} + +static void +llvmpipe_clear_buffer(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, + unsigned size, + const void *clear_value, + int clear_value_size) +{ + struct pipe_transfer *dst_t; + struct pipe_box box; + char *dst; + u_box_1d(offset, size, &box); + + dst = pipe->transfer_map(pipe, + res, + 0, + PIPE_MAP_WRITE, + &box, + &dst_t); + + switch (clear_value_size) { + case 1: + memset(dst, *(uint8_t *)clear_value, size); + break; + case 4: + util_memset32(dst, *(uint32_t *)clear_value, size / 4); + break; + default: + for (unsigned i = 0; i < size; i += clear_value_size) + memcpy(&dst[i], clear_value, clear_value_size); + break; + } + pipe->transfer_unmap(pipe, dst_t); +} void llvmpipe_init_surface_functions(struct llvmpipe_context *lp) @@ -232,8 +486,10 @@ llvmpipe_init_surface_functions(struct llvmpipe_context *lp) lp->pipe.create_surface = llvmpipe_create_surface; lp->pipe.surface_destroy = llvmpipe_surface_destroy; /* These are not actually functions dealing with surfaces */ - lp->pipe.clear_texture = util_clear_texture; + lp->pipe.clear_texture = llvmpipe_clear_texture; + lp->pipe.clear_buffer = llvmpipe_clear_buffer; lp->pipe.resource_copy_region = lp_resource_copy; lp->pipe.blit = lp_blit; lp->pipe.flush_resource = lp_flush_resource; + lp->pipe.get_sample_position = llvmpipe_get_sample_position; } |