summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 02:30:08 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 02:30:08 +0000
commit1d35364040c0ffa99133522fa5ab3bd6131d8bf7 (patch)
tree0ea3d9ca4ad10692c6477168b67e98cb50ea6bd3 /lib/mesa/src/gallium/drivers/llvmpipe
parentb24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (diff)
Merge Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/gallium/drivers/llvmpipe')
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources71
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-fails.txt15
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-replay.txt0
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c8
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h5
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c16
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c85
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c381
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h14
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c125
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h38
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c175
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h6
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c199
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h70
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c89
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c172
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c223
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c70
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c303
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c95
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c183
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c11
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c14
24 files changed, 1700 insertions, 668 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources
deleted file mode 100644
index d928ccba4..000000000
--- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources
+++ /dev/null
@@ -1,71 +0,0 @@
-C_SOURCES := \
- lp_bld_alpha.c \
- lp_bld_alpha.h \
- lp_bld_blend_aos.c \
- lp_bld_blend.c \
- lp_bld_blend.h \
- lp_bld_blend_logicop.c \
- lp_bld_depth.c \
- lp_bld_depth.h \
- lp_bld_interp.c \
- lp_bld_interp.h \
- lp_clear.c \
- lp_clear.h \
- lp_context.c \
- lp_context.h \
- lp_debug.h \
- lp_draw_arrays.c \
- lp_fence.c \
- lp_fence.h \
- lp_flush.c \
- lp_flush.h \
- lp_jit.c \
- lp_jit.h \
- lp_limits.h \
- lp_memory.c \
- lp_memory.h \
- lp_perf.c \
- lp_perf.h \
- lp_public.h \
- lp_query.c \
- lp_query.h \
- lp_rast.c \
- lp_rast_debug.c \
- lp_rast.h \
- lp_rast_priv.h \
- lp_rast_tri.c \
- lp_rast_tri_tmp.h \
- lp_scene.c \
- lp_scene.h \
- lp_scene_queue.c \
- lp_scene_queue.h \
- lp_screen.c \
- lp_screen.h \
- lp_setup.c \
- lp_setup_context.h \
- lp_setup.h \
- lp_setup_line.c \
- lp_setup_point.c \
- lp_setup_tri.c \
- lp_setup_vbuf.c \
- lp_state_blend.c \
- lp_state_clip.c \
- lp_state_derived.c \
- lp_state_fs.c \
- lp_state_fs.h \
- lp_state_gs.c \
- lp_state.h \
- lp_state_rasterizer.c \
- lp_state_sampler.c \
- lp_state_setup.c \
- lp_state_setup.h \
- lp_state_so.c \
- lp_state_surface.c \
- lp_state_vertex.c \
- lp_state_vs.c \
- lp_surface.c \
- lp_surface.h \
- lp_tex_sample.c \
- lp_tex_sample.h \
- lp_texture.c \
- lp_texture.h
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-fails.txt b/lib/mesa/src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-fails.txt
deleted file mode 100644
index 5c92cf1fd..000000000
--- a/lib/mesa/src/gallium/drivers/llvmpipe/ci/deqp-llvmpipe-fails.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
-dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
-dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail
-dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail
-dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail
-dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_y_neg_z_and_neg_x_neg_y_pos_z,Fail
-dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_y_pos_z_and_neg_x_neg_y_neg_z,Fail
-dEQP-GLES2.functional.polygon_offset.default_displacement_with_units,Fail
-dEQP-GLES2.functional.polygon_offset.fixed16_displacement_with_units,Fail
-dEQP-GLES2.functional.rasterization.interpolation.basic.line_loop_wide,Fail
-dEQP-GLES2.functional.rasterization.interpolation.basic.line_strip_wide,Fail
-dEQP-GLES2.functional.rasterization.interpolation.basic.lines_wide,Fail
-dEQP-GLES2.functional.rasterization.interpolation.projected.line_loop_wide,Fail
-dEQP-GLES2.functional.rasterization.interpolation.projected.line_strip_wide,Fail
-dEQP-GLES2.functional.rasterization.interpolation.projected.lines_wide,Fail
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-replay.txt b/lib/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-replay.txt
deleted file mode 100644
index e69de29bb..000000000
--- a/lib/mesa/src/gallium/drivers/llvmpipe/ci/llvmpipe-replay.txt
+++ /dev/null
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
index aaf6a80e8..35f3618e8 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
@@ -179,12 +179,13 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv,
{
struct llvmpipe_context *llvmpipe;
+ if (!llvmpipe_screen_late_init(llvmpipe_screen(screen)))
+ return NULL;
+
llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16);
if (!llvmpipe)
return NULL;
- util_init_math();
-
memset(llvmpipe, 0, sizeof *llvmpipe);
make_empty_list(&llvmpipe->fs_variants_list);
@@ -283,6 +284,9 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv,
draw_wide_point_threshold(llvmpipe->draw, 10000.0);
draw_wide_line_threshold(llvmpipe->draw, 10000.0);
+ /* initial state for clipping - enabled, with no guardband */
+ draw_set_driver_clipping(llvmpipe->draw, FALSE, FALSE, FALSE, TRUE);
+
lp_reset_counters();
/* If llvmpipe_set_scissor_states() is never called, we still need to
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
index b1adba61d..c42aeca80 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
@@ -117,6 +117,8 @@ struct llvmpipe_context {
/** Vertex format */
struct vertex_info vertex_info;
+
+ uint8_t patch_vertices;
/** Which vertex shader output slot contains color */
int8_t color_slot[2];
@@ -156,6 +158,9 @@ struct llvmpipe_context {
unsigned nr_fs_variants;
unsigned nr_fs_instrs;
+ boolean permit_linear_rasterizer;
+ boolean single_vp;
+
struct lp_setup_variant_list_item setup_variants_list;
unsigned nr_setup_variants;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index e8f0ae609..48d112ac0 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -52,8 +52,9 @@
*/
static void
llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
+ unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count *draws,
+ const struct pipe_draw_start_count_bias *draws,
unsigned num_draws)
{
if (!indirect && (!draws[0].count || !info->instance_count))
@@ -145,7 +146,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
!lp->queries_disabled);
/* draw! */
- draw_vbo(draw, info, indirect, draws, num_draws);
+ draw_vbo(draw, info, drawid_offset, indirect, draws, num_draws,
+ lp->patch_vertices);
/*
* unmap vertex/index buffers
@@ -165,6 +167,16 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
}
}
+ llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_VERTEX);
+ llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_GEOMETRY);
+ llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_TESS_CTRL);
+ llvmpipe_cleanup_stage_sampling(lp, PIPE_SHADER_TESS_EVAL);
+
+ llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_VERTEX);
+ llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_GEOMETRY);
+ llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_TESS_CTRL);
+ llvmpipe_cleanup_stage_images(lp, PIPE_SHADER_TESS_EVAL);
+
/*
* TODO: Flush only when a user vertex/index buffer is present
* (or even better, modify draw module to do this
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
index 80d8d9e5c..3c763240a 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -39,6 +39,7 @@
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_format.h"
#include "lp_context.h"
+#include "lp_screen.h"
#include "lp_jit.h"
static LLVMTypeRef
@@ -111,7 +112,8 @@ create_jit_sampler_type(struct gallivm_state *gallivm)
LLVMTypeRef elem_types[LP_JIT_SAMPLER_NUM_FIELDS];
elem_types[LP_JIT_SAMPLER_MIN_LOD] =
elem_types[LP_JIT_SAMPLER_MAX_LOD] =
- elem_types[LP_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(lc);
+ elem_types[LP_JIT_SAMPLER_LOD_BIAS] =
+ elem_types[LP_JIT_SAMPLER_MAX_ANISO] = LLVMFloatTypeInContext(lc);
elem_types[LP_JIT_SAMPLER_BORDER_COLOR] =
LLVMArrayType(LLVMFloatTypeInContext(lc), 4);
@@ -130,6 +132,9 @@ create_jit_sampler_type(struct gallivm_state *gallivm)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, border_color,
gallivm->target, sampler_type,
LP_JIT_SAMPLER_BORDER_COLOR);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, max_aniso,
+ gallivm->target, sampler_type,
+ LP_JIT_SAMPLER_MAX_ANISO);
LP_CHECK_STRUCT_SIZE(struct lp_jit_sampler,
gallivm->target, sampler_type);
return sampler_type;
@@ -185,6 +190,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
struct gallivm_state *gallivm = lp->gallivm;
LLVMContextRef lc = gallivm->context;
LLVMTypeRef viewport_type, texture_type, sampler_type, image_type;
+ LLVMTypeRef linear_elem_type;
/* struct lp_jit_viewport */
{
@@ -232,6 +238,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
elem_types[LP_JIT_CTX_F_BLEND_COLOR] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
elem_types[LP_JIT_CTX_VIEWPORTS] = LLVMPointerType(viewport_type, 0);
+ elem_types[LP_JIT_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
elem_types[LP_JIT_CTX_SSBOS] =
LLVMArrayType(LLVMPointerType(LLVMInt32TypeInContext(lc), 0), LP_MAX_TGSI_SHADER_BUFFERS);
elem_types[LP_JIT_CTX_NUM_SSBOS] =
@@ -281,6 +288,9 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, sample_mask,
gallivm->target, context_type,
LP_JIT_CTX_SAMPLE_MASK);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, aniso_filter_table,
+ gallivm->target, context_type,
+ LP_JIT_CTX_ANISO_FILTER_TABLE);
LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
gallivm->target, context_type);
@@ -306,6 +316,74 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0);
}
+ /*
+ * lp_linear_elem
+ *
+ * XXX: it can be instanced only once due to the use of opaque types, and
+ * the fact that screen->module is also a global.
+ */
+ {
+ LLVMTypeRef ret_type;
+ LLVMTypeRef arg_types[1];
+ LLVMTypeRef func_type;
+
+ ret_type = LLVMPointerType(LLVMVectorType(LLVMInt8TypeInContext(lc), 16), 0);
+
+ arg_types[0] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
+
+ /* lp_linear_func */
+ func_type = LLVMFunctionType(ret_type, arg_types, ARRAY_SIZE(arg_types), 0);
+
+ /*
+ * We actually define lp_linear_elem not as a structure but simply as a
+ * lp_linear_func pointer
+ */
+ linear_elem_type = LLVMPointerType(func_type, 0);
+ }
+
+ /* struct lp_jit_linear_context */
+ {
+ LLVMTypeRef linear_elem_ptr_type = LLVMPointerType(linear_elem_type, 0);
+ LLVMTypeRef elem_types[LP_JIT_LINEAR_CTX_COUNT];
+ LLVMTypeRef linear_context_type;
+
+
+ elem_types[LP_JIT_LINEAR_CTX_CONSTANTS] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
+ elem_types[LP_JIT_LINEAR_CTX_TEX] =
+ LLVMArrayType(linear_elem_ptr_type, LP_MAX_LINEAR_TEXTURES);
+ elem_types[LP_JIT_LINEAR_CTX_INPUTS] =
+ LLVMArrayType(linear_elem_ptr_type, LP_MAX_LINEAR_INPUTS);
+ elem_types[LP_JIT_LINEAR_CTX_COLOR0] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
+ elem_types[LP_JIT_LINEAR_CTX_BLEND_COLOR] = LLVMInt32TypeInContext(lc);
+ elem_types[LP_JIT_LINEAR_CTX_ALPHA_REF] = LLVMInt8TypeInContext(lc);
+
+ linear_context_type = LLVMStructTypeInContext(lc, elem_types,
+ ARRAY_SIZE(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, constants,
+ gallivm->target, linear_context_type,
+ LP_JIT_LINEAR_CTX_CONSTANTS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, tex,
+ gallivm->target, linear_context_type,
+ LP_JIT_LINEAR_CTX_TEX);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, inputs,
+ gallivm->target, linear_context_type,
+ LP_JIT_LINEAR_CTX_INPUTS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, color0,
+ gallivm->target, linear_context_type,
+ LP_JIT_LINEAR_CTX_COLOR0);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, blend_color,
+ gallivm->target, linear_context_type,
+ LP_JIT_LINEAR_CTX_BLEND_COLOR);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_linear_context, alpha_ref_value,
+ gallivm->target, linear_context_type,
+ LP_JIT_LINEAR_CTX_ALPHA_REF);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_linear_context,
+ gallivm->target, linear_context_type);
+
+ lp->jit_linear_context_ptr_type = LLVMPointerType(linear_context_type, 0);
+ }
+
if (gallivm_debug & GALLIVM_DEBUG_IR) {
char *str = LLVMPrintModuleToString(gallivm->module);
fprintf(stderr, "%s", str);
@@ -385,6 +463,8 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp)
elem_types[LP_JIT_CS_CTX_KERNEL_ARGS] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
+ elem_types[LP_JIT_CS_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
+
cs_context_type = LLVMStructTypeInContext(lc, elem_types,
ARRAY_SIZE(elem_types), 0);
@@ -415,6 +495,9 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, kernel_args,
gallivm->target, cs_context_type,
LP_JIT_CS_CTX_KERNEL_ARGS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, aniso_filter_table,
+ gallivm->target, cs_context_type,
+ LP_JIT_CS_CTX_ANISO_FILTER_TABLE);
LP_CHECK_STRUCT_SIZE(struct lp_jit_cs_context,
gallivm->target, cs_context_type);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
index 94a0711df..f67fbda6b 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -509,6 +509,126 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
}
/**
+ * Directly copy pixels from a texture to the destination color buffer.
+ * This is a bin command called during bin processing.
+ */
+static void
+lp_rast_blit_tile_to_dest(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_scene *scene = task->scene;
+ const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ const struct lp_rast_state *state = task->state;
+ struct lp_fragment_shader_variant *variant = state->variant;
+ const struct lp_jit_texture *texture = &state->jit_context.textures[0];
+ const uint8_t *src;
+ uint8_t *dst;
+ unsigned src_stride;
+ unsigned dst_stride;
+ struct pipe_surface *cbuf = scene->fb.cbufs[0];
+ const unsigned face_slice = cbuf->u.tex.first_layer;
+ const unsigned level = cbuf->u.tex.level;
+ struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
+ int src_x, src_y;
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ if (inputs->disable) {
+ /* This command was partially binned and has been disabled */
+ return;
+ }
+
+ dst = llvmpipe_get_texture_image_address(lpt, face_slice, level);
+
+ if (!dst)
+ return;
+
+ dst_stride = lpt->row_stride[level];
+
+ src = texture->base;
+ src_stride = texture->row_stride[0];
+
+ src_x = util_iround(GET_A0(inputs)[1][0]*texture->width - 0.5f);
+ src_y = util_iround(GET_A0(inputs)[1][1]*texture->height - 0.5f);
+
+ src_x = src_x + task->x;
+ src_y = src_y + task->y;
+
+ if (0) {
+ union util_color uc;
+ uc.ui[0] = 0xff0000ff;
+ util_fill_rect(dst,
+ cbuf->format,
+ dst_stride,
+ task->x,
+ task->y,
+ task->width,
+ task->height,
+ &uc);
+ return;
+ }
+
+ if (src_x >= 0 &&
+ src_y >= 0 &&
+ src_x + task->width <= texture->width &&
+ src_y + task->height <= texture->height) {
+
+ if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA ||
+ (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
+ cbuf->format == PIPE_FORMAT_B8G8R8X8_UNORM)) {
+ util_copy_rect(dst,
+ cbuf->format,
+ dst_stride,
+ task->x, task->y,
+ task->width, task->height,
+ src, src_stride,
+ src_x, src_y);
+ return;
+ }
+
+ if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1) {
+ if (cbuf->format == PIPE_FORMAT_B8G8R8A8_UNORM) {
+ int x, y;
+
+ dst += task->x * 4;
+ src += src_x * 4;
+ dst += task->y * dst_stride;
+ src += src_y * src_stride;
+
+ for (y = 0; y < task->height; ++y) {
+ const uint32_t *src_row = (const uint32_t *)src;
+ uint32_t *dst_row = (uint32_t *)dst;
+
+ for (x = 0; x < task->width; ++x) {
+ *dst_row++ = *src_row++ | 0xff000000;
+ }
+ dst += dst_stride;
+ src += src_stride;
+ }
+
+ return;
+ }
+ }
+
+ }
+
+ /*
+ * Fall back to the jit shaders.
+ */
+
+ lp_rast_shade_tile_opaque(task, arg);
+}
+
+static void
+lp_rast_blit_tile(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ /* This kindof just works, but isn't efficient:
+ */
+ lp_rast_blit_tile_to_dest(task, arg);
+}
+
+/**
* Begin a new occlusion query.
* This is a bin command put in all bins.
* Called per thread.
@@ -601,8 +721,123 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
task->bin = NULL;
}
-static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
-{
+
+
+
+
+
+/* Currently have two rendering paths only - the general case triangle
+ * path and the super-specialized blit/clear path.
+ */
+#define TRI ((LP_RAST_FLAGS_TRI <<1)-1) /* general case */
+#define RECT ((LP_RAST_FLAGS_RECT<<1)-1) /* direct rectangle rasterizer */
+#define BLIT ((LP_RAST_FLAGS_BLIT<<1)-1) /* write direct-to-dest */
+
+static const unsigned
+rast_flags[] = {
+ BLIT, /* clear color */
+ TRI, /* clear zstencil */
+ TRI, /* triangle_1 */
+ TRI, /* triangle_2 */
+ TRI, /* triangle_3 */
+ TRI, /* triangle_4 */
+ TRI, /* triangle_5 */
+ TRI, /* triangle_6 */
+ TRI, /* triangle_7 */
+ TRI, /* triangle_8 */
+ TRI, /* triangle_3_4 */
+ TRI, /* triangle_3_16 */
+ TRI, /* triangle_4_16 */
+ RECT, /* shade_tile */
+ RECT, /* shade_tile_opaque */
+ TRI, /* begin_query */
+ TRI, /* end_query */
+ BLIT, /* set_state, */
+ TRI, /* lp_rast_triangle_32_1 */
+ TRI, /* lp_rast_triangle_32_2 */
+ TRI, /* lp_rast_triangle_32_3 */
+ TRI, /* lp_rast_triangle_32_4 */
+ TRI, /* lp_rast_triangle_32_5 */
+ TRI, /* lp_rast_triangle_32_6 */
+ TRI, /* lp_rast_triangle_32_7 */
+ TRI, /* lp_rast_triangle_32_8 */
+ TRI, /* lp_rast_triangle_32_3_4 */
+ TRI, /* lp_rast_triangle_32_3_16 */
+ TRI, /* lp_rast_triangle_32_4_16 */
+ TRI, /* lp_rast_triangle_ms_1 */
+ TRI, /* lp_rast_triangle_ms_2 */
+ TRI, /* lp_rast_triangle_ms_3 */
+ TRI, /* lp_rast_triangle_ms_4 */
+ TRI, /* lp_rast_triangle_ms_5 */
+ TRI, /* lp_rast_triangle_ms_6 */
+ TRI, /* lp_rast_triangle_ms_7 */
+ TRI, /* lp_rast_triangle_ms_8 */
+ TRI, /* lp_rast_triangle_ms_3_4 */
+ TRI, /* lp_rast_triangle_ms_3_16 */
+ TRI, /* lp_rast_triangle_ms_4_16 */
+
+ RECT, /* rectangle */
+ BLIT, /* blit */
+};
+
+/*
+ */
+static const lp_rast_cmd_func
+dispatch_blit[] = {
+ lp_rast_clear_color,
+ NULL, /* clear_zstencil */
+ NULL, /* triangle_1 */
+ NULL, /* triangle_2 */
+ NULL, /* triangle_3 */
+ NULL, /* triangle_4 */
+ NULL, /* triangle_5 */
+ NULL, /* triangle_6 */
+ NULL, /* triangle_7 */
+ NULL, /* triangle_8 */
+ NULL, /* triangle_3_4 */
+ NULL, /* triangle_3_16 */
+ NULL, /* triangle_4_16 */
+ NULL, /* shade_tile */
+ NULL, /* shade_tile_opaque */
+ NULL, /* begin_query */
+ NULL, /* end_query */
+ lp_rast_set_state, /* set_state */
+ NULL, /* lp_rast_triangle_32_1 */
+ NULL, /* lp_rast_triangle_32_2 */
+ NULL, /* lp_rast_triangle_32_3 */
+ NULL, /* lp_rast_triangle_32_4 */
+ NULL, /* lp_rast_triangle_32_5 */
+ NULL, /* lp_rast_triangle_32_6 */
+ NULL, /* lp_rast_triangle_32_7 */
+ NULL, /* lp_rast_triangle_32_8 */
+ NULL, /* lp_rast_triangle_32_3_4 */
+ NULL, /* lp_rast_triangle_32_3_16 */
+ NULL, /* lp_rast_triangle_32_4_16 */
+ NULL, /* lp_rast_triangle_ms_1 */
+ NULL, /* lp_rast_triangle_ms_2 */
+ NULL, /* lp_rast_triangle_ms_3 */
+ NULL, /* lp_rast_triangle_ms_4 */
+ NULL, /* lp_rast_triangle_ms_5 */
+ NULL, /* lp_rast_triangle_ms_6 */
+ NULL, /* lp_rast_triangle_ms_7 */
+ NULL, /* lp_rast_triangle_ms_8 */
+ NULL, /* lp_rast_triangle_ms_3_4 */
+ NULL, /* lp_rast_triangle_ms_3_16 */
+ NULL, /* lp_rast_triangle_ms_4_16 */
+
+ NULL, /* rectangle */
+ lp_rast_blit_tile_to_dest,
+};
+
+
+
+/* Triangle and general case rasterization: Use the SOA llvm shdaers,
+ * an active swizzled tile for each color buf, etc. Don't blit/clear
+ * directly to destination surface as we know there are swizzled
+ * operations coming.
+ */
+static const lp_rast_cmd_func
+dispatch_tri[] = {
lp_rast_clear_color,
lp_rast_clear_zstencil,
lp_rast_triangle_1,
@@ -643,27 +878,133 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
lp_rast_triangle_ms_3_4,
lp_rast_triangle_ms_3_16,
lp_rast_triangle_ms_4_16,
+ lp_rast_rectangle,
+ lp_rast_blit_tile,
+};
+
+
+/* Debug rasterization with most fastpaths disabled.
+ */
+static const lp_rast_cmd_func
+dispatch_tri_debug[] =
+{
+ lp_rast_clear_color,
+ lp_rast_clear_zstencil,
+ lp_rast_triangle_1,
+ lp_rast_triangle_2,
+ lp_rast_triangle_3,
+ lp_rast_triangle_4,
+ lp_rast_triangle_5,
+ lp_rast_triangle_6,
+ lp_rast_triangle_7,
+ lp_rast_triangle_8,
+ lp_rast_triangle_3_4,
+ lp_rast_triangle_3_16,
+ lp_rast_triangle_4_16,
+ lp_rast_shade_tile,
+ lp_rast_shade_tile,
+ lp_rast_begin_query,
+ lp_rast_end_query,
+ lp_rast_set_state,
+ lp_rast_triangle_32_1,
+ lp_rast_triangle_32_2,
+ lp_rast_triangle_32_3,
+ lp_rast_triangle_32_4,
+ lp_rast_triangle_32_5,
+ lp_rast_triangle_32_6,
+ lp_rast_triangle_32_7,
+ lp_rast_triangle_32_8,
+ lp_rast_triangle_32_3_4,
+ lp_rast_triangle_32_3_16,
+ lp_rast_triangle_32_4_16,
+ lp_rast_triangle_ms_1,
+ lp_rast_triangle_ms_2,
+ lp_rast_triangle_ms_3,
+ lp_rast_triangle_ms_4,
+ lp_rast_triangle_ms_5,
+ lp_rast_triangle_ms_6,
+ lp_rast_triangle_ms_7,
+ lp_rast_triangle_ms_8,
+ lp_rast_triangle_ms_3_4,
+ lp_rast_triangle_ms_3_16,
+ lp_rast_triangle_ms_4_16,
+
+ lp_rast_rectangle,
+ lp_rast_shade_tile,
};
+struct lp_bin_info
+lp_characterize_bin(const struct cmd_bin *bin)
+{
+ struct cmd_block *block;
+ struct lp_bin_info info;
+ unsigned andflags = ~0;
+ unsigned k, j = 0;
+
+ STATIC_ASSERT(ARRAY_SIZE(rast_flags) == LP_RAST_OP_MAX);
+
+ for (block = bin->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++, j++) {
+ andflags &= rast_flags[block->cmd[k]];
+ }
+ }
+
+ info.type = andflags;
+ info.count = j;
+
+ return info;
+}
+
static void
-do_rasterize_bin(struct lp_rasterizer_task *task,
- const struct cmd_bin *bin,
- int x, int y)
+blit_rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin)
{
const struct cmd_block *block;
unsigned k;
- if (0)
- lp_debug_bin(bin, x, y);
+ STATIC_ASSERT(ARRAY_SIZE(dispatch_blit) == LP_RAST_OP_MAX);
+ if (0) debug_printf("%s\n", __FUNCTION__);
for (block = bin->head; block; block = block->next) {
for (k = 0; k < block->count; k++) {
- dispatch[block->cmd[k]]( task, block->arg[k] );
+ dispatch_blit[block->cmd[k]]( task, block->arg[k] );
}
}
}
+static void
+tri_rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin,
+ int x, int y)
+{
+ const struct cmd_block *block;
+ unsigned k;
+
+ STATIC_ASSERT(ARRAY_SIZE(dispatch_tri) == LP_RAST_OP_MAX);
+
+ for (block = bin->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++) {
+ dispatch_tri[block->cmd[k]]( task, block->arg[k] );
+ }
+ }
+}
+
+static void
+debug_rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin)
+{
+ const struct cmd_block *block;
+ unsigned k;
+
+ STATIC_ASSERT(ARRAY_SIZE(dispatch_tri_debug) == LP_RAST_OP_MAX);
+
+ for (block = bin->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++) {
+ dispatch_tri_debug[block->cmd[k]]( task, block->arg[k] );
+ }
+ }
+}
/**
@@ -676,9 +1017,20 @@ static void
rasterize_bin(struct lp_rasterizer_task *task,
const struct cmd_bin *bin, int x, int y )
{
+ struct lp_bin_info info = lp_characterize_bin(bin);
+
lp_rast_tile_begin( task, bin, x, y );
- do_rasterize_bin(task, bin, x, y);
+ if (LP_DEBUG & DEBUG_NO_FASTPATH)
+ debug_rasterize_bin(task, bin);
+ else if (info.type & LP_RAST_FLAGS_BLIT)
+ blit_rasterize_bin(task, bin);
+ else if (task->scene->permit_linear_rasterizer &&
+ !(LP_PERF & PERF_NO_RAST_LINEAR) &&
+ (info.type & LP_RAST_FLAGS_RECT))
+ lp_linear_rasterize_bin(task, bin);
+ else
+ tri_rasterize_bin(task, bin, x, y);
lp_rast_tile_end(task);
@@ -686,7 +1038,9 @@ rasterize_bin(struct lp_rasterizer_task *task,
/* Debug/Perf flags:
*/
if (bin->head->count == 1) {
- if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
+ if (bin->head->cmd[0] == LP_RAST_OP_BLIT)
+ LP_COUNT(nr_pure_blit_64);
+ else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
LP_COUNT(nr_pure_shade_opaque_64);
else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
LP_COUNT(nr_pure_shade_64);
@@ -1012,7 +1366,12 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
* per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */
for (i = 0; i < rast->num_threads; i++) {
#ifdef _WIN32
- pipe_semaphore_wait(&rast->tasks[i].work_done);
+ /* Threads might already be dead - Windows apparently terminates other threads when
+ * returning from main.
+ */
+ DWORD exit_code = STILL_ACTIVE;
+ if (GetExitCodeThread(rast->threads[i], &exit_code) && exit_code == STILL_ACTIVE)
+ pipe_semaphore_wait(&rast->tasks[i].work_done);
#else
thrd_join(rast->threads[i], NULL);
#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index c8154348e..c4da9cca2 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -176,6 +176,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
color = task->color_tiles[buf] + pixel_offset;
if (layer) {
+ assert(layer <= task->scene->fb_max_layer);
color += layer * task->scene->cbufs[buf].layer_stride;
}
@@ -347,6 +348,10 @@ void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *,
void lp_rast_triangle_32_4_16( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+
+void lp_rast_rectangle( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
void lp_rast_triangle_ms_1( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_ms_2( struct lp_rasterizer_task *,
@@ -406,4 +411,13 @@ lp_rast_set_state(struct lp_rasterizer_task *task,
void
lp_debug_bin( const struct cmd_bin *bin, int x, int y );
+void
+lp_linear_rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin);
+
+void
+lp_rast_linear_rect_fallback(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ const struct u_rect *box);
+
#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
index 539b84c65..49db1832e 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -68,9 +68,7 @@ lp_scene_create( struct pipe_context *pipe )
return NULL;
scene->pipe = pipe;
-
- scene->data.head =
- CALLOC_STRUCT(data_block);
+ scene->data.head = &scene->data.first;
(void) mtx_init(&scene->mutex, mtx_plain);
@@ -101,8 +99,7 @@ lp_scene_destroy(struct lp_scene *scene)
{
lp_fence_reference(&scene->fence, NULL);
mtx_destroy(&scene->mutex);
- assert(scene->data.head->next == NULL);
- FREE(scene->data.head);
+ assert(scene->data.head == &scene->data.first);
FREE(scene);
}
@@ -129,8 +126,8 @@ lp_scene_is_empty(struct lp_scene *scene )
/* Returns true if there has ever been a failed allocation attempt in
- * this scene. Used in triangle emit to avoid having to check success
- * at each bin.
+ * this scene. Used in triangle/rectangle emit to avoid having to
+ * check success at each bin.
*/
boolean
lp_scene_is_oom(struct lp_scene *scene)
@@ -155,6 +152,44 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y)
}
}
+static void
+init_scene_texture(struct lp_scene_surface *ssurf, struct pipe_surface *psurf)
+{
+ if (!psurf) {
+ ssurf->stride = 0;
+ ssurf->layer_stride = 0;
+ ssurf->sample_stride = 0;
+ ssurf->nr_samples = 0;
+ ssurf->map = NULL;
+ return;
+ }
+
+ if (llvmpipe_resource_is_texture(psurf->texture)) {
+ ssurf->stride = llvmpipe_resource_stride(psurf->texture,
+ psurf->u.tex.level);
+ ssurf->layer_stride = llvmpipe_layer_stride(psurf->texture,
+ psurf->u.tex.level);
+ ssurf->sample_stride = llvmpipe_sample_stride(psurf->texture);
+
+ ssurf->map = llvmpipe_resource_map(psurf->texture,
+ psurf->u.tex.level,
+ psurf->u.tex.first_layer,
+ LP_TEX_USAGE_READ_WRITE);
+ ssurf->format_bytes = util_format_get_blocksize(psurf->format);
+ ssurf->nr_samples = util_res_sample_count(psurf->texture);
+ }
+ else {
+ struct llvmpipe_resource *lpr = llvmpipe_resource(psurf->texture);
+ unsigned pixstride = util_format_get_blocksize(psurf->format);
+ ssurf->stride = psurf->texture->width0;
+ ssurf->layer_stride = 0;
+ ssurf->sample_stride = 0;
+ ssurf->nr_samples = 1;
+ ssurf->map = lpr->data;
+ ssurf->map += psurf->u.buf.first_element * pixstride;
+ ssurf->format_bytes = util_format_get_blocksize(psurf->format);
+ }
+}
void
lp_scene_begin_rasterization(struct lp_scene *scene)
@@ -166,54 +201,12 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
for (i = 0; i < scene->fb.nr_cbufs; i++) {
struct pipe_surface *cbuf = scene->fb.cbufs[i];
-
- if (!cbuf) {
- scene->cbufs[i].stride = 0;
- scene->cbufs[i].layer_stride = 0;
- scene->cbufs[i].sample_stride = 0;
- scene->cbufs[i].nr_samples = 0;
- scene->cbufs[i].map = NULL;
- continue;
- }
-
- if (llvmpipe_resource_is_texture(cbuf->texture)) {
- scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture,
- cbuf->u.tex.level);
- scene->cbufs[i].layer_stride = llvmpipe_layer_stride(cbuf->texture,
- cbuf->u.tex.level);
- scene->cbufs[i].sample_stride = llvmpipe_sample_stride(cbuf->texture);
-
- scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture,
- cbuf->u.tex.level,
- cbuf->u.tex.first_layer,
- LP_TEX_USAGE_READ_WRITE);
- scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
- scene->cbufs[i].nr_samples = util_res_sample_count(cbuf->texture);
- }
- else {
- struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
- unsigned pixstride = util_format_get_blocksize(cbuf->format);
- scene->cbufs[i].stride = cbuf->texture->width0;
- scene->cbufs[i].layer_stride = 0;
- scene->cbufs[i].sample_stride = 0;
- scene->cbufs[i].nr_samples = 1;
- scene->cbufs[i].map = lpr->data;
- scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
- scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
- }
+ init_scene_texture(&scene->cbufs[i], cbuf);
}
if (fb->zsbuf) {
struct pipe_surface *zsbuf = scene->fb.zsbuf;
- scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level);
- scene->zsbuf.layer_stride = llvmpipe_layer_stride(zsbuf->texture, zsbuf->u.tex.level);
- scene->zsbuf.sample_stride = llvmpipe_sample_stride(zsbuf->texture);
- scene->zsbuf.nr_samples = util_res_sample_count(zsbuf->texture);
- scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
- zsbuf->u.tex.level,
- zsbuf->u.tex.first_layer,
- LP_TEX_USAGE_READ_WRITE);
- scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format);
+ init_scene_texture(&scene->zsbuf, zsbuf);
}
}
@@ -226,7 +219,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
void
lp_scene_end_rasterization(struct lp_scene *scene )
{
- int i, j;
+ int i;
/* Unmap color buffers */
for (i = 0; i < scene->fb.nr_cbufs; i++) {
@@ -252,19 +245,7 @@ lp_scene_end_rasterization(struct lp_scene *scene )
/* Reset all command lists:
*/
- for (i = 0; i < scene->tiles_x; i++) {
- for (j = 0; j < scene->tiles_y; j++) {
- struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- bin->head = NULL;
- bin->tail = NULL;
- bin->last_state = NULL;
- }
- }
-
- /* If there are any bins which weren't cleared by the loop above,
- * they will be caught (on debug builds at least) by this assert:
- */
- assert(lp_scene_is_empty(scene));
+ memset(scene->tile, 0, sizeof scene->tile);
/* Decrement texture ref counts
*/
@@ -282,6 +263,7 @@ lp_scene_end_rasterization(struct lp_scene *scene )
ref->resource[i]->height0,
llvmpipe_resource_size(ref->resource[i]));
j++;
+ llvmpipe_resource_unmap(ref->resource[i], 0, 0);
pipe_resource_reference(&ref->resource[i], NULL);
}
}
@@ -313,13 +295,14 @@ lp_scene_end_rasterization(struct lp_scene *scene )
struct data_block_list *list = &scene->data;
struct data_block *block, *tmp;
- for (block = list->head->next; block; block = tmp) {
+ for (block = list->head; block; block = tmp) {
tmp = block->next;
- FREE(block);
+ if (block != &list->first)
+ FREE(block);
}
+ list->head = &list->first;
list->head->next = NULL;
- list->head->used = 0;
}
lp_fence_reference(&scene->fence, NULL);
@@ -443,6 +426,12 @@ lp_scene_add_resource_reference(struct lp_scene *scene,
memset(ref, 0, sizeof *ref);
}
+ /* Map resource again to increment the map count. We likely use the
+ * already-mapped pointer in a texture of the jit context, and that pointer
+ * needs to stay mapped during rasterization. This map is unmap'ed when
+ * finalizing scene rasterization. */
+ llvmpipe_resource_map(resource, 0, 0, LP_TEX_USAGE_READ);
+
/* Append the reference to the reference block.
*/
pipe_resource_reference(&ref->resource[ref->count++], resource);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
index ba6b20139..a089e6a49 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -54,7 +54,8 @@ struct lp_rast_state;
*/
#define CMD_BLOCK_MAX 29
-/* Bytes per data block.
+/* Bytes per data block. This effectively limits the maximum constant buffer
+ * size.
*/
#define DATA_BLOCK_SIZE (64 * 1024)
@@ -119,6 +120,15 @@ struct resource_ref;
struct shader_ref;
+struct lp_scene_surface {
+ uint8_t *map;
+ unsigned stride;
+ unsigned layer_stride;
+ unsigned format_bytes;
+ unsigned sample_stride;
+ unsigned nr_samples;
+};
+
/**
* All bins and bin data are contained here.
* Per-bin data goes into the 'tile' bins.
@@ -140,14 +150,7 @@ struct lp_scene {
/* Framebuffer mappings - valid only between begin_rasterization()
* and end_rasterization().
*/
- struct {
- uint8_t *map;
- unsigned stride;
- unsigned layer_stride;
- unsigned format_bytes;
- unsigned sample_stride;
- unsigned nr_samples;
- } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
+ struct lp_scene_surface zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
/* The amount of layers in the fb (minimum of all attachments) */
unsigned fb_max_layer;
@@ -179,6 +182,8 @@ struct lp_scene {
unsigned resource_reference_size;
boolean alloc_failed;
+ boolean permit_linear_rasterizer;
+
/**
* Number of active tiles in each dimension.
* This basically the framebuffer size divided by tile size
@@ -234,7 +239,7 @@ lp_scene_alloc( struct lp_scene *scene, unsigned size)
if (LP_DEBUG & DEBUG_MEM)
debug_printf("alloc %u block %u/%u tot %u/%u\n",
- size, block->used, DATA_BLOCK_SIZE,
+ size, block->used, (unsigned)DATA_BLOCK_SIZE,
scene->scene_size, LP_SCENE_MAX_SIZE);
if (block->used + size > DATA_BLOCK_SIZE) {
@@ -268,7 +273,7 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
if (LP_DEBUG & DEBUG_MEM)
debug_printf("alloc %u block %u/%u tot %u/%u\n",
size + alignment - 1,
- block->used, DATA_BLOCK_SIZE,
+ block->used, (unsigned)DATA_BLOCK_SIZE,
scene->scene_size, LP_SCENE_MAX_SIZE);
if (block->used + size + alignment - 1 > DATA_BLOCK_SIZE) {
@@ -286,17 +291,6 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
}
-/* Put back data if we decide not to use it, eg. culled triangles.
- */
-static inline void
-lp_scene_putback_data( struct lp_scene *scene, unsigned size)
-{
- struct data_block_list *list = &scene->data;
- assert(list->head && list->head->used >= size);
- list->head->used -= size;
-}
-
-
/** Return pointer to a particular tile's bin. */
static inline struct cmd_bin *
lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y)
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
index c8c577623..d308319af 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -70,11 +70,15 @@ static const struct debug_named_value lp_debug_flags[] = {
{ "counters", DEBUG_COUNTERS, NULL },
{ "scene", DEBUG_SCENE, NULL },
{ "fence", DEBUG_FENCE, NULL },
+ { "no_fastpath", DEBUG_NO_FASTPATH, NULL },
+ { "linear", DEBUG_LINEAR, NULL },
+ { "linear2", DEBUG_LINEAR2, NULL },
{ "mem", DEBUG_MEM, NULL },
{ "fs", DEBUG_FS, NULL },
{ "cs", DEBUG_CS, NULL },
{ "tgsi_ir", DEBUG_TGSI_IR, NULL },
{ "cache_stats", DEBUG_CACHE_STATS, NULL },
+ { "accurate_a0", DEBUG_ACCURATE_A0 },
DEBUG_NAMED_VALUE_END
};
#endif
@@ -89,6 +93,8 @@ static const struct debug_named_value lp_perf_flags[] = {
{ "no_blend", PERF_NO_BLEND, NULL },
{ "no_depth", PERF_NO_DEPTH, NULL },
{ "no_alphatest", PERF_NO_ALPHATEST, NULL },
+ { "no_rast_linear", PERF_NO_RAST_LINEAR, NULL },
+ { "no_shade", PERF_NO_SHADE, NULL },
DEBUG_NAMED_VALUE_END
};
@@ -103,10 +109,8 @@ llvmpipe_get_vendor(struct pipe_screen *screen)
static const char *
llvmpipe_get_name(struct pipe_screen *screen)
{
- static char buf[100];
- snprintf(buf, sizeof(buf), "llvmpipe (LLVM " MESA_LLVM_VERSION_STRING ", %u bits)",
- lp_native_vector_width );
- return buf;
+ struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
+ return lscreen->renderer_string;
}
@@ -117,6 +121,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
@@ -165,6 +170,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_DEPTH_CLIP_DISABLE:
return 1;
+ case PIPE_CAP_DEPTH_CLAMP_ENABLE:
+ return 1;
case PIPE_CAP_SHADER_STENCIL_EXPORT:
return 1;
case PIPE_CAP_TGSI_INSTANCEID:
@@ -202,12 +209,11 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
case PIPE_CAP_GLSL_FEATURE_LEVEL: {
struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
return lscreen->use_tgsi ? 330 : 450;
}
- case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
- return 140;
case PIPE_CAP_COMPUTE:
return GALLIVM_HAVE_CORO;
case PIPE_CAP_USER_VERTEX_BUFFERS:
@@ -341,7 +347,12 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return 1;
+#ifdef PIPE_MEMORY_FD
+ case PIPE_CAP_MEMOBJ:
+ return 1;
+#endif
case PIPE_CAP_SAMPLER_REDUCTION_MINMAX:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_TGSI_VOTE:
@@ -378,11 +389,8 @@ llvmpipe_get_shader_param(struct pipe_screen *screen,
else
return PIPE_SHADER_IR_NIR;
}
- switch (param) {
- default:
- return gallivm_get_shader_param(param);
- }
- FALLTHROUGH;
+
+ return gallivm_get_shader_param(param);
case PIPE_SHADER_TESS_CTRL:
case PIPE_SHADER_TESS_EVAL:
/* Tessellation shader needs llvm coroutines support */
@@ -513,7 +521,7 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen,
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
if (ret) {
uint64_t *max_input = ret;
- *max_input = 4096;
+ *max_input = 1576;
}
return sizeof(uint64_t);
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
@@ -552,6 +560,18 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen,
return 0;
}
+static void
+llvmpipe_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ memset(uuid, 0, PIPE_UUID_SIZE);
+}
+
+static void
+llvmpipe_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ memset(uuid, 0, PIPE_UUID_SIZE);
+}
+
static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_scmp = true,
.lower_flrp32 = true,
@@ -564,9 +584,11 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
+ .lower_flrp16 = true,
.lower_fmod = true,
.lower_hadd = true,
- .lower_add_sat = true,
+ .lower_uadd_sat = true,
+ .lower_iadd_sat = true,
.lower_ldexp = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true,
@@ -581,6 +603,8 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
@@ -589,19 +613,20 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
.lower_to_scalar = true,
- .lower_cs_local_index_from_id = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.lower_device_index_to_zero = true,
+ .support_16bit_alu = true,
+ .lower_fisnormal = true,
};
-static void
+static char *
llvmpipe_finalize_nir(struct pipe_screen *screen,
- void *nirptr,
- bool optimize)
+ void *nirptr)
{
struct nir_shader *nir = (struct nir_shader *)nirptr;
lp_build_opt_nir(nir);
+ return NULL;
}
static inline const void *
@@ -650,7 +675,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
- if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SHADER_IMAGE)) {
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
/* this is a lie actually other formats COULD exist where we would fail */
if (format_desc->nr_channels < 3)
@@ -674,6 +699,54 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
return false;
}
+ if (bind & PIPE_BIND_SHADER_IMAGE) {
+ switch (format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_FLOAT:
+ case PIPE_FORMAT_R11G11B10_FLOAT:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_UINT:
+ case PIPE_FORMAT_R16G16B16A16_UINT:
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ case PIPE_FORMAT_R8G8B8A8_UINT:
+ case PIPE_FORMAT_R32G32_UINT:
+ case PIPE_FORMAT_R16G16_UINT:
+ case PIPE_FORMAT_R8G8_UINT:
+ case PIPE_FORMAT_R32_UINT:
+ case PIPE_FORMAT_R16_UINT:
+ case PIPE_FORMAT_R8_UINT:
+ case PIPE_FORMAT_R32G32B32A32_SINT:
+ case PIPE_FORMAT_R16G16B16A16_SINT:
+ case PIPE_FORMAT_R8G8B8A8_SINT:
+ case PIPE_FORMAT_R32G32_SINT:
+ case PIPE_FORMAT_R16G16_SINT:
+ case PIPE_FORMAT_R8G8_SINT:
+ case PIPE_FORMAT_R32_SINT:
+ case PIPE_FORMAT_R16_SINT:
+ case PIPE_FORMAT_R8_SINT:
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R16G16_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R16G16_SNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_R8_SNORM:
+ break;
+
+ default:
+ return false;
+ }
+ }
+
if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) &&
((bind & PIPE_BIND_DISPLAY_TARGET) == 0)) {
/* Disable all 3-channel formats, where channel size != 32 bits.
@@ -687,6 +760,16 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
format_desc->block.bits != 96) {
return false;
}
+
+ /* Disable 64-bit integer formats for RT/samplers.
+ * VK CTS crashes with these and they don't make much sense.
+ */
+ int c = util_format_get_first_non_void_channel(format_desc->format);
+ if (c >= 0) {
+ if (format_desc->channel[c].pure_integer && format_desc->channel[c].size == 64)
+ return false;
+ }
+
}
if (!(bind & PIPE_BIND_VERTEX_BUFFER) &&
@@ -890,6 +973,36 @@ void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen,
disk_cache_compute_key(screen->disk_shader_cache, ir_sha1_cache_key, 20, sha1);
disk_cache_put(screen->disk_shader_cache, sha1, cache->data, cache->data_size, NULL);
}
+
+bool
+llvmpipe_screen_late_init(struct llvmpipe_screen *screen)
+{
+ bool ret = true;
+ mtx_lock(&screen->late_mutex);
+
+ if (screen->late_init_done)
+ goto out;
+
+ screen->rast = lp_rast_create(screen->num_threads);
+ if (!screen->rast) {
+ ret = false;
+ goto out;
+ }
+
+ screen->cs_tpool = lp_cs_tpool_create(screen->num_threads);
+ if (!screen->cs_tpool) {
+ lp_rast_destroy(screen->rast);
+ ret = false;
+ goto out;
+ }
+
+ lp_disk_cache_create(screen);
+ screen->late_init_done = true;
+out:
+ mtx_unlock(&screen->late_mutex);
+ return ret;
+}
+
/**
* Create a new pipe_screen object
* Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
@@ -939,6 +1052,9 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->base.get_timestamp = llvmpipe_get_timestamp;
+ screen->base.get_driver_uuid = llvmpipe_get_driver_uuid;
+ screen->base.get_device_uuid = llvmpipe_get_device_uuid;
+
screen->base.finalize_nir = llvmpipe_finalize_nir;
screen->base.get_disk_shader_cache = lp_get_disk_shader_cache;
@@ -948,28 +1064,19 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR);
screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0;
#ifdef EMBEDDED_DEVICE
- screen->num_threads = 0;
+ screen->num_threads = MIN2(screen->num_threads, 2);
#endif
screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);
- screen->rast = lp_rast_create(screen->num_threads);
- if (!screen->rast) {
- lp_jit_screen_cleanup(screen);
- FREE(screen);
- return NULL;
- }
- (void) mtx_init(&screen->rast_mutex, mtx_plain);
+ lp_build_init(); /* get lp_native_vector_width initialised */
+
+ snprintf(screen->renderer_string, sizeof(screen->renderer_string), "llvmpipe (LLVM " MESA_LLVM_VERSION_STRING ", %u bits)", lp_native_vector_width );
- screen->cs_tpool = lp_cs_tpool_create(screen->num_threads);
- if (!screen->cs_tpool) {
- lp_rast_destroy(screen->rast);
- lp_jit_screen_cleanup(screen);
- FREE(screen);
- return NULL;
- }
(void) mtx_init(&screen->cs_mutex, mtx_plain);
+ (void) mtx_init(&screen->rast_mutex, mtx_plain);
+
+ (void) mtx_init(&screen->late_mutex, mtx_plain);
- lp_disk_cache_create(screen);
return &screen->base;
}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
index a790c199c..c72bf838a 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -64,6 +64,11 @@ struct llvmpipe_screen
bool use_tgsi;
bool allow_cl;
+ mtx_t late_mutex;
+ bool late_init_done;
+
+ char renderer_string[100];
+
struct disk_cache *disk_shader_cache;
unsigned num_disk_shader_cache_hits;
unsigned num_disk_shader_cache_misses;
@@ -76,6 +81,7 @@ void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen,
struct lp_cached_code *cache,
unsigned char ir_sha1_cache_key[20]);
+bool llvmpipe_screen_late_init(struct llvmpipe_screen *screen);
static inline struct llvmpipe_screen *
llvmpipe_screen( struct pipe_screen *pipe )
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
index 43177745a..50f3cea7b 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -39,6 +39,7 @@
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
+#include "util/u_cpu_detect.h"
#include "util/u_viewport.h"
#include "draw/draw_pipe.h"
#include "util/os_time.h"
@@ -53,6 +54,7 @@
#include "lp_setup_context.h"
#include "lp_screen.h"
#include "lp_state.h"
+#include "lp_jit.h"
#include "frontend/sw_winsys.h"
#include "draw/draw_context.h"
@@ -84,6 +86,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup)
lp_scene_begin_binning(setup->scene, &setup->fb);
+ setup->scene->permit_linear_rasterizer = setup->permit_linear_rasterizer;
}
@@ -98,6 +101,20 @@ first_triangle( struct lp_setup_context *setup,
setup->triangle( setup, v0, v1, v2 );
}
+static boolean
+first_rectangle( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4],
+ const float (*v4)[4],
+ const float (*v5)[4])
+{
+ assert(setup->state == SETUP_ACTIVE);
+ lp_setup_choose_rect( setup );
+ return setup->rect( setup, v0, v1, v2, v3, v4, v5 );
+}
+
static void
first_line( struct lp_setup_context *setup,
const float (*v0)[4],
@@ -117,7 +134,8 @@ first_point( struct lp_setup_context *setup,
setup->point( setup, v0 );
}
-void lp_setup_reset( struct lp_setup_context *setup )
+void
+lp_setup_reset( struct lp_setup_context *setup )
{
unsigned i;
@@ -145,6 +163,7 @@ void lp_setup_reset( struct lp_setup_context *setup )
setup->line = first_line;
setup->point = first_point;
setup->triangle = first_triangle;
+ setup->rect = first_rectangle;
}
@@ -576,6 +595,7 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup,
setup->ccw_is_frontface = ccw_is_frontface;
setup->cullmode = cull_mode;
setup->triangle = first_triangle;
+ setup->rect = first_rectangle;
setup->multisample = multisample;
setup->pixel_offset = half_pixel_center ? 0.5f : 0.0f;
setup->bottom_edge_rule = bottom_edge_rule;
@@ -588,26 +608,32 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup,
void
lp_setup_set_line_state( struct lp_setup_context *setup,
- float line_width)
+ float line_width,
+ boolean line_rectangular)
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
setup->line_width = line_width;
+ setup->rectangular_lines = line_rectangular;
}
void
lp_setup_set_point_state( struct lp_setup_context *setup,
float point_size,
+ boolean point_tri_clip,
boolean point_size_per_vertex,
uint sprite_coord_enable,
- uint sprite_coord_origin)
+ uint sprite_coord_origin,
+ boolean point_quad_rasterization)
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
setup->point_size = point_size;
setup->sprite_coord_enable = sprite_coord_enable;
setup->sprite_coord_origin = sprite_coord_origin;
+ setup->point_tri_clip = point_tri_clip;
setup->point_size_per_vertex = point_size_per_vertex;
+ setup->legacy_points = !point_quad_rasterization;
}
void
@@ -706,7 +732,11 @@ lp_setup_set_fs_images(struct lp_setup_context *setup,
if (llvmpipe_resource_is_texture(res)) {
uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level];
+ const uint32_t bw = util_format_get_blockwidth(image->resource->format);
+ const uint32_t bh = util_format_get_blockheight(image->resource->format);
+ jit_image->width = DIV_ROUND_UP(jit_image->width, bw);
+ jit_image->height = DIV_ROUND_UP(jit_image->height, bh);
jit_image->width = u_minify(jit_image->width, image->u.tex.level);
jit_image->height = u_minify(jit_image->height, image->u.tex.level);
@@ -829,6 +859,7 @@ lp_setup_set_rasterizer_discard(struct lp_setup_context *setup,
setup->line = first_line;
setup->point = first_point;
setup->triangle = first_triangle;
+ setup->rect = first_rectangle;
}
}
@@ -842,6 +873,24 @@ lp_setup_set_vertex_info(struct lp_setup_context *setup,
}
+void
+lp_setup_set_linear_mode( struct lp_setup_context *setup,
+ boolean mode )
+{
+ /* The linear rasterizer requires sse2 both at compile and runtime,
+ * in particular for the code in lp_rast_linear_fallback.c. This
+ * is more than ten-year-old technology, so it's a reasonable
+ * baseline.
+ */
+#if defined(PIPE_ARCH_SSE)
+ setup->permit_linear_rasterizer = (mode &&
+ util_get_cpu_caps()->has_sse2);
+#else
+ setup->permit_linear_rasterizer = FALSE;
+#endif
+}
+
+
/**
* Called during state validation when LP_NEW_VIEWPORT is set.
*/
@@ -851,6 +900,7 @@ lp_setup_set_viewports(struct lp_setup_context *setup,
const struct pipe_viewport_state *viewports)
{
struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
+ float half_height, x0, y0;
unsigned i;
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
@@ -859,6 +909,26 @@ lp_setup_set_viewports(struct lp_setup_context *setup,
assert(viewports);
/*
+ * Linear rasterizer path for scissor/viewport intersection.
+ *
+ * Calculate "scissor" rect from the (first) viewport.
+ * Just like stored scissor rects need inclusive coords.
+ * For rounding, assume half pixel center (d3d9 should not end up
+ * with fractional viewports) - quite obviously for msaa we'd need
+ * fractional values here (and elsewhere for the point bounding box).
+ *
+ * See: lp_setup.c::try_update_scene_state
+ */
+ half_height = fabsf(viewports[0].scale[1]);
+ x0 = viewports[0].translate[0] - viewports[0].scale[0];
+ y0 = viewports[0].translate[1] - half_height;
+ setup->vpwh.x0 = (int)(x0 + 0.5f);
+ setup->vpwh.x1 = (int)(viewports[0].scale[0] * 2.0f + x0 - 0.5f);
+ setup->vpwh.y0 = (int)(y0 + 0.5f);
+ setup->vpwh.y1 = (int)(half_height * 2.0f + y0 - 0.5f);
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
+
+ /*
* For use in lp_state_fs.c, propagate the viewport values for all viewports.
*/
for (i = 0; i < num_viewports; i++) {
@@ -878,7 +948,7 @@ lp_setup_set_viewports(struct lp_setup_context *setup,
/**
- * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ * Called directly by llvmpipe_set_sampler_views
*/
void
lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
@@ -896,6 +966,12 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
for (i = 0; i < max_tex_num; i++) {
struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+ /* We are going to overwrite/unref the current texture further below. If
+ * set, make sure to unmap its resource to avoid leaking previous
+ * mapping. */
+ if (setup->fs.current_tex[i])
+ llvmpipe_resource_unmap(setup->fs.current_tex[i], 0, 0);
+
if (view) {
struct pipe_resource *res = view->texture;
struct llvmpipe_resource *lp_tex = llvmpipe_resource(res);
@@ -1000,13 +1076,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
}
else {
/* display target texture/surface */
- /*
- * XXX: Where should this be unmapped?
- */
- struct llvmpipe_screen *screen = llvmpipe_screen(res->screen);
- struct sw_winsys *winsys = screen->winsys;
- jit_tex->base = winsys->displaytarget_map(winsys, lp_tex->dt,
- PIPE_MAP_READ);
+ jit_tex->base = llvmpipe_resource_map(res, 0, 0, LP_TEX_USAGE_READ);
jit_tex->row_stride[0] = lp_tex->row_stride[0];
jit_tex->img_stride[0] = lp_tex->img_stride[0];
jit_tex->mip_offsets[0] = 0;
@@ -1028,7 +1098,6 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
setup->dirty |= LP_SETUP_NEW_FS;
}
-
/**
* Called during state validation when LP_NEW_SAMPLER is set.
*/
@@ -1053,6 +1122,7 @@ lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup,
jit_sam->min_lod = sampler->min_lod;
jit_sam->max_lod = sampler->max_lod;
jit_sam->lod_bias = sampler->lod_bias;
+ jit_sam->max_aniso = sampler->max_anisotropy;
COPY_4V(jit_sam->border_color, sampler->border_color.f);
}
}
@@ -1061,6 +1131,8 @@ lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup,
}
+
+
/**
* Is the given texture referenced by any scene?
* Note: we have to check all scenes including any scenes currently
@@ -1289,6 +1361,7 @@ try_update_scene_state( struct lp_setup_context *setup )
memcpy(&stored->jit_context,
&setup->fs.current.jit_context,
sizeof setup->fs.current.jit_context);
+ stored->jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
stored->variant = setup->fs.current.variant;
if (!lp_scene_add_frag_shader_reference(scene,
@@ -1314,6 +1387,7 @@ try_update_scene_state( struct lp_setup_context *setup )
if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
unsigned i;
+
for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) {
setup->draw_regions[i] = setup->framebuffer;
if (setup->scissor_test) {
@@ -1321,6 +1395,35 @@ try_update_scene_state( struct lp_setup_context *setup )
&setup->draw_regions[i]);
}
}
+ if (setup->permit_linear_rasterizer) {
+ /* NOTE: this only takes first vp into account. */
+ boolean need_vp_scissoring = !!memcmp(&setup->vpwh, &setup->framebuffer,
+ sizeof(setup->framebuffer));
+ assert(setup->viewport_index_slot < 0);
+ if (need_vp_scissoring) {
+ u_rect_possible_intersection(&setup->vpwh,
+ &setup->draw_regions[0]);
+ }
+ }
+ else if (setup->point_tri_clip) {
+ /*
+ * for d3d-style point clipping, we're going to need
+ * the fake vp scissor too. Hence do the intersection with vp,
+ * but don't indicate this. As above this will only work for first vp
+ * which should be ok because we instruct draw to only skip point
+ * clipping when there's only one viewport (this works because d3d10
+ * points are always single pixel).
+ * (Also note that if we have permit_linear_rasterizer this will
+ * cause large points to always get vp scissored, regardless the
+ * point_tri_clip setting.)
+ */
+ boolean need_vp_scissoring = !!memcmp(&setup->vpwh, &setup->framebuffer,
+ sizeof(setup->framebuffer));
+ if (need_vp_scissoring) {
+ u_rect_possible_intersection(&setup->vpwh,
+ &setup->draw_regions[0]);
+ }
+ }
}
setup->dirty = 0;
@@ -1417,7 +1520,10 @@ lp_setup_destroy( struct lp_setup_context *setup )
util_unreference_framebuffer_state(&setup->fb);
for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) {
- pipe_resource_reference(&setup->fs.current_tex[i], NULL);
+ struct pipe_resource **res_ptr = &setup->fs.current_tex[i];
+ if (*res_ptr)
+ llvmpipe_resource_unmap(*res_ptr, 0, 0);
+ pipe_resource_reference(res_ptr, NULL);
}
for (i = 0; i < ARRAY_SIZE(setup->constants); i++) {
@@ -1650,4 +1756,69 @@ lp_setup_flush_and_restart(struct lp_setup_context *setup)
return TRUE;
}
-
+void
+lp_setup_add_scissor_planes(const struct u_rect *scissor,
+ struct lp_rast_plane *plane_s,
+ boolean s_planes[4], bool multisample)
+{
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
+ */
+ int adj = multisample ? 127 : 0;
+ if (s_planes[0]) {
+ int x0 = scissor->x0 - 1;
+ plane_s->dcdx = ~0U << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = x0 << 8;
+ plane_s->c += adj;
+ plane_s->c = -plane_s->c; /* flip sign */
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ int x1 = scissor->x1;
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = x1 << 8;
+ plane_s->c += 127 + adj;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ int y0 = scissor->y0 - 1;
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = y0 << 8;
+ plane_s->c += adj;
+ plane_s->c = -plane_s->c; /* flip sign */
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ int y1 = scissor->y1;
+ plane_s->dcdx = 0;
+ plane_s->dcdy = ~0U << 8;
+ plane_s->c = y1 << 8;
+ plane_s->c += 127 + adj;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 82fc14b5e..656a64f40 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -96,13 +96,17 @@ struct lp_setup_context
struct llvmpipe_query *active_queries[LP_MAX_ACTIVE_BINNED_QUERIES];
unsigned active_binned_queries;
- boolean flatshade_first;
- boolean ccw_is_frontface;
- boolean scissor_test;
- boolean point_size_per_vertex;
- boolean rasterizer_discard;
- boolean multisample;
- unsigned cullmode;
+ unsigned flatshade_first:1;
+ unsigned ccw_is_frontface:1;
+ unsigned scissor_test:1;
+ unsigned point_tri_clip:1;
+ unsigned point_size_per_vertex:1;
+ unsigned legacy_points:1;
+ unsigned rasterizer_discard:1;
+ unsigned permit_linear_rasterizer:1;
+ unsigned multisample:1;
+ unsigned rectangular_lines:1;
+ unsigned cullmode:2; /**< PIPE_FACE_x */
unsigned bottom_edge_rule;
float pixel_offset;
float line_width;
@@ -115,6 +119,7 @@ struct lp_setup_context
struct pipe_framebuffer_state fb;
struct u_rect framebuffer;
struct u_rect scissors[PIPE_MAX_VIEWPORTS];
+ struct u_rect vpwh;
struct u_rect draw_regions[PIPE_MAX_VIEWPORTS]; /* intersection of fb & scissor */
struct lp_jit_viewport viewports[PIPE_MAX_VIEWPORTS];
@@ -177,6 +182,15 @@ struct lp_setup_context
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4]);
+
+ boolean
+ (*rect)( struct lp_setup_context *,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4],
+ const float (*v4)[4],
+ const float (*v5)[4]);
};
static inline void
@@ -193,10 +207,15 @@ scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox,
scis_planes[3] = (bbox->y1 > scissor->y1);
}
+void
+lp_setup_add_scissor_planes(const struct u_rect *scissor,
+ struct lp_rast_plane *plane_s,
+ boolean s_planes[4], bool multisample);
void lp_setup_choose_triangle( struct lp_setup_context *setup );
void lp_setup_choose_line( struct lp_setup_context *setup );
void lp_setup_choose_point( struct lp_setup_context *setup );
+void lp_setup_choose_rect( struct lp_setup_context *setup );
void lp_setup_init_vbuf(struct lp_setup_context *setup);
@@ -207,6 +226,15 @@ void lp_setup_destroy( struct lp_setup_context *setup );
boolean lp_setup_flush_and_restart(struct lp_setup_context *setup);
+boolean
+lp_setup_whole_tile(struct lp_setup_context *setup,
+ const struct lp_rast_shader_inputs *inputs,
+ int tx, int ty);
+
+boolean
+lp_setup_is_blit(const struct lp_setup_context *setup,
+ const struct lp_rast_shader_inputs *inputs);
+
void
lp_setup_print_triangle(struct lp_setup_context *setup,
const float (*v0)[4],
@@ -218,6 +246,19 @@ lp_setup_print_vertex(struct lp_setup_context *setup,
const char *name,
const float (*v)[4]);
+void
+lp_rect_cw(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean frontfacing);
+
+void
+lp_setup_triangle_ccw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front );
struct lp_rast_triangle *
lp_setup_alloc_triangle(struct lp_scene *scene,
@@ -225,6 +266,16 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
unsigned nr_planes,
unsigned *tri_size);
+struct lp_rast_rectangle *
+lp_setup_alloc_rectangle(struct lp_scene *scene,
+ unsigned nr_inputs);
+
+boolean
+lp_setup_analyse_triangles(struct lp_setup_context *setup,
+ const void *vb,
+ int stride,
+ int nr);
+
boolean
lp_setup_bin_triangle(struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
@@ -233,4 +284,9 @@ lp_setup_bin_triangle(struct lp_setup_context *setup,
int nr_planes,
unsigned scissor_index);
+boolean
+lp_setup_bin_rectangle(struct lp_setup_context *setup,
+ struct lp_rast_rectangle *rect);
+
+
#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 0535138df..1f812e8ea 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -357,10 +357,24 @@ try_setup_line( struct lp_setup_context *setup,
info.v2 = v2;
- /* X-MAJOR LINE */
- if (fabsf(dx) >= fabsf(dy)) {
+ if (setup->rectangular_lines) {
+ float scale = (setup->line_width * 0.5f) / sqrtf(area);
+ int tx = subpixel_snap(-dy * scale);
+ int ty = subpixel_snap(+dx * scale);
+
+ x[0] = subpixel_snap(v1[0][0] - pixel_offset) - tx;
+ x[1] = subpixel_snap(v2[0][0] - pixel_offset) - tx;
+ x[2] = subpixel_snap(v2[0][0] - pixel_offset) + tx;
+ x[3] = subpixel_snap(v1[0][0] - pixel_offset) + tx;
+
+ y[0] = subpixel_snap(v1[0][1] - pixel_offset) - ty;
+ y[1] = subpixel_snap(v2[0][1] - pixel_offset) - ty;
+ y[2] = subpixel_snap(v2[0][1] - pixel_offset) + ty;
+ y[3] = subpixel_snap(v1[0][1] - pixel_offset) + ty;
+ } else if (fabsf(dx) >= fabsf(dy)) {
float dydx = dy / dx;
+ /* X-MAJOR LINE */
x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f;
y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f;
x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f;
@@ -412,6 +426,10 @@ try_setup_line( struct lp_setup_context *setup,
will_draw_start = sign(-x1diff) != sign(dx);
will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0;
+ /* interpolate using the preferred wide-lines formula */
+ info.dx *= 1 + dydx * dydx;
+ info.dy = 0;
+
if (dx < 0) {
/* if v2 is to the right of v1, swap pointers */
const float (*temp)[4] = v1;
@@ -509,6 +527,10 @@ try_setup_line( struct lp_setup_context *setup,
will_draw_start = sign(y1diff) == sign(dy);
will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0;
+ /* interpolate using the preferred wide-lines formula */
+ info.dx = 0;
+ info.dy *= 1 + dxdy * dxdy;
+
if (dy > 0) {
/* if v2 is on top of v1, swap pointers */
const float (*temp)[4] = v1;
@@ -572,15 +594,8 @@ try_setup_line( struct lp_setup_context *setup,
bbox.y1--;
}
- if (bbox.x1 < bbox.x0 ||
- bbox.y1 < bbox.y0) {
- if (0) debug_printf("empty bounding box\n");
- LP_COUNT(nr_culled_tris);
- return TRUE;
- }
-
if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
- if (0) debug_printf("offscreen\n");
+ if (0) debug_printf("no intersection\n");
LP_COUNT(nr_culled_tris);
return TRUE;
}
@@ -696,60 +711,8 @@ try_setup_line( struct lp_setup_context *setup,
if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
}
-
- /*
- * When rasterizing scissored tris, use the intersection of the
- * triangle bounding box and the scissor rect to generate the
- * scissor planes.
- *
- * This permits us to cut off the triangle "tails" that are present
- * in the intermediate recursive levels caused when two of the
- * triangles edges don't diverge quickly enough to trivially reject
- * exterior blocks from the triangle.
- *
- * It's not really clear if it's worth worrying about these tails,
- * but since we generate the planes for each scissored tri, it's
- * free to trim them in this case.
- *
- * Note that otherwise, the scissor planes only vary in 'C' value,
- * and even then only on state-changes. Could alternatively store
- * these planes elsewhere.
- * (Or only store the c value together with a bit indicating which
- * scissor edge this is, so rasterization would treat them differently
- * (easier to evaluate) to ordinary planes.)
- */
if (nr_planes > 4) {
- struct lp_rast_plane *plane_s = &plane[4];
-
- if (s_planes[0]) {
- plane_s->dcdx = ~0U << 8;
- plane_s->dcdy = 0;
- plane_s->c = (1-scissor->x0) << 8;
- plane_s->eo = 1 << 8;
- plane_s++;
- }
- if (s_planes[1]) {
- plane_s->dcdx = 1 << 8;
- plane_s->dcdy = 0;
- plane_s->c = (scissor->x1+1) << 8;
- plane_s->eo = 0 << 8;
- plane_s++;
- }
- if (s_planes[2]) {
- plane_s->dcdx = 0;
- plane_s->dcdy = 1 << 8;
- plane_s->c = (1-scissor->y0) << 8;
- plane_s->eo = 1 << 8;
- plane_s++;
- }
- if (s_planes[3]) {
- plane_s->dcdx = 0;
- plane_s->dcdy = ~0U << 8;
- plane_s->c = (scissor->y1+1) << 8;
- plane_s->eo = 0;
- plane_s++;
- }
- assert(plane_s == &plane[nr_planes]);
+ lp_setup_add_scissor_planes(scissor, &plane[4], s_planes, setup->multisample);
}
return lp_setup_bin_triangle(setup, line, &bbox, &bboxpos, nr_planes, viewport_index);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 696612309..6d4e42634 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -352,10 +352,8 @@ try_setup_point( struct lp_setup_context *setup,
int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset;
struct lp_scene *scene = setup->scene;
- struct lp_rast_triangle *point;
- unsigned bytes;
struct u_rect bbox;
- unsigned nr_planes = 4;
+ int x[2], y[2];
struct point_info info;
unsigned viewport_index = 0;
unsigned layer = 0;
@@ -374,8 +372,7 @@ try_setup_point( struct lp_setup_context *setup,
print_point(setup, v0, size);
/* Bounding rectangle (in pixels) */
- if (!lp_context->rasterizer ||
- lp_context->rasterizer->point_quad_rasterization) {
+ if (!setup->legacy_points || setup->multisample) {
/*
* Rasterize points as quads.
*/
@@ -388,10 +385,14 @@ try_setup_point( struct lp_setup_context *setup,
x0 = subpixel_snap(v0[0][0] - pixel_offset) - fixed_width/2;
y0 = subpixel_snap(v0[0][1] - pixel_offset) - fixed_width/2;
- bbox.x0 = (x0 + (FIXED_ONE-1)) >> FIXED_ORDER;
- bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER;
- bbox.y0 = (y0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
- bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ x[0] = x0;
+ x[1] = x0 + fixed_width;
+ y[0] = y0;
+ y[1] = y0 + fixed_width;
+ bbox.x0 = x[0] >> FIXED_ORDER;
+ bbox.x1 = (x[1] + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y0 = (y[0] + adj) >> FIXED_ORDER;
+ bbox.y1 = (y[1] + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
/* Inclusive coordinates:
*/
@@ -439,6 +440,11 @@ try_setup_point( struct lp_setup_context *setup,
bbox.x1 = bbox.x0 + int_width - 1;
bbox.y1 = bbox.y0 + int_width - 1;
}
+
+ x[0] = (bbox.x0 - 1) << 8;
+ x[1] = (bbox.x1 + 1) << 8;
+ y[0] = (bbox.y0 - 1) << 8;
+ y[1] = (bbox.y1 + 1) << 8;
}
if (0) {
@@ -452,79 +458,143 @@ try_setup_point( struct lp_setup_context *setup,
}
if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
- if (0) debug_printf("offscreen\n");
+ if (0) debug_printf("no intersection\n");
LP_COUNT(nr_culled_tris);
return TRUE;
}
u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox);
- point = lp_setup_alloc_triangle(scene,
- key->num_inputs,
- nr_planes,
- &bytes);
- if (!point)
- return FALSE;
-
+ /* We can't use rectangle reasterizer for non-legacy points for now. */
+ if (!setup->legacy_points || setup->multisample) {
+ struct lp_rast_triangle *point;
+ struct lp_rast_plane *plane;
+ unsigned bytes;
+ unsigned nr_planes = 4;
+
+ point = lp_setup_alloc_triangle(scene,
+ key->num_inputs,
+ nr_planes,
+ &bytes);
+ if (!point)
+ return FALSE;
+
#ifdef DEBUG
- point->v[0][0] = v0[0][0];
- point->v[0][1] = v0[0][1];
+ point->v[0][0] = v0[0][0];
+ point->v[0][1] = v0[0][1];
#endif
- LP_COUNT(nr_tris);
+ LP_COUNT(nr_tris);
- if (draw_will_inject_frontface(lp_context->draw) &&
- setup->face_slot > 0) {
- point->inputs.frontfacing = v0[setup->face_slot][0];
- } else {
- point->inputs.frontfacing = TRUE;
- }
+ if (draw_will_inject_frontface(lp_context->draw) &&
+ setup->face_slot > 0) {
+ point->inputs.frontfacing = v0[setup->face_slot][0];
+ } else {
+ point->inputs.frontfacing = TRUE;
+ }
- info.v0 = v0;
- info.dx01 = 0;
- info.dx12 = fixed_width;
- info.dy01 = fixed_width;
- info.dy12 = 0;
- info.a0 = GET_A0(&point->inputs);
- info.dadx = GET_DADX(&point->inputs);
- info.dady = GET_DADY(&point->inputs);
- info.frontfacing = point->inputs.frontfacing;
+ info.v0 = v0;
+ info.dx01 = 0;
+ info.dx12 = fixed_width;
+ info.dy01 = fixed_width;
+ info.dy12 = 0;
+ info.a0 = GET_A0(&point->inputs);
+ info.dadx = GET_DADX(&point->inputs);
+ info.dady = GET_DADY(&point->inputs);
+ info.frontfacing = point->inputs.frontfacing;
- /* Setup parameter interpolants:
- */
- setup_point_coefficients(setup, &info);
+ /* Setup parameter interpolants:
+ */
+ setup_point_coefficients(setup, &info);
- point->inputs.disable = FALSE;
- point->inputs.opaque = FALSE;
- point->inputs.layer = layer;
- point->inputs.viewport_index = viewport_index;
- point->inputs.view_index = setup->view_index;
+ point->inputs.disable = FALSE;
+ point->inputs.is_blit = FALSE;
+ point->inputs.opaque = setup->fs.current.variant->opaque;
+ point->inputs.layer = layer;
+ point->inputs.viewport_index = viewport_index;
+ point->inputs.view_index = setup->view_index;
- {
- struct lp_rast_plane *plane = GET_PLANES(point);
+ plane = GET_PLANES(point);
plane[0].dcdx = ~0U << 8;
plane[0].dcdy = 0;
- plane[0].c = (1-bbox.x0) << 8;
+ plane[0].c = -MAX2(x[0], bbox.x0 << 8);
plane[0].eo = 1 << 8;
plane[1].dcdx = 1 << 8;
plane[1].dcdy = 0;
- plane[1].c = (bbox.x1+1) << 8;
+ plane[1].c = MIN2(x[1], (bbox.x1 + 1) << 8);
plane[1].eo = 0;
plane[2].dcdx = 0;
plane[2].dcdy = 1 << 8;
- plane[2].c = (1-bbox.y0) << 8;
+ plane[2].c = -MAX2(y[0], (bbox.y0 << 8) - adj);
plane[2].eo = 1 << 8;
plane[3].dcdx = 0;
plane[3].dcdy = ~0U << 8;
- plane[3].c = (bbox.y1+1) << 8;
+ plane[3].c = MIN2(y[1], (bbox.y1 + 1) << 8);
plane[3].eo = 0;
- }
- return lp_setup_bin_triangle(setup, point, &bbox, &bbox, nr_planes, viewport_index);
+ if (!setup->legacy_points || setup->multisample) {
+ /* adjust for fill-rule*/
+ plane[0].c++; /* left */
+ if (setup->bottom_edge_rule == 0)
+ plane[2].c++; /* top-left */
+ else
+ plane[3].c++; /* bottom-left */
+ }
+
+ return lp_setup_bin_triangle(setup, point, &bbox, &bbox, nr_planes, viewport_index);
+
+ } else {
+ struct lp_rast_rectangle *point;
+ point = lp_setup_alloc_rectangle(scene,
+ key->num_inputs);
+ if (!point)
+ return FALSE;
+#ifdef DEBUG
+ point->v[0][0] = v0[0][0];
+ point->v[0][1] = v0[0][1];
+#endif
+
+ point->box.x0 = bbox.x0;
+ point->box.x1 = bbox.x1;
+ point->box.y0 = bbox.y0;
+ point->box.y1 = bbox.y1;
+
+ LP_COUNT(nr_tris);
+
+ if (draw_will_inject_frontface(lp_context->draw) &&
+ setup->face_slot > 0) {
+ point->inputs.frontfacing = v0[setup->face_slot][0];
+ } else {
+ point->inputs.frontfacing = TRUE;
+ }
+
+ info.v0 = v0;
+ info.dx01 = 0;
+ info.dx12 = fixed_width;
+ info.dy01 = fixed_width;
+ info.dy12 = 0;
+ info.a0 = GET_A0(&point->inputs);
+ info.dadx = GET_DADX(&point->inputs);
+ info.dady = GET_DADY(&point->inputs);
+ info.frontfacing = point->inputs.frontfacing;
+
+ /* Setup parameter interpolants:
+ */
+ setup_point_coefficients(setup, &info);
+
+ point->inputs.disable = FALSE;
+ point->inputs.is_blit = FALSE;
+ point->inputs.opaque = setup->fs.current.variant->opaque;
+ point->inputs.layer = layer;
+ point->inputs.viewport_index = viewport_index;
+ point->inputs.view_index = setup->view_index;
+
+ return lp_setup_bin_rectangle(setup, point);
+ }
}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 4fb76dd22..347f0a61c 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -205,6 +205,7 @@ lp_rast_32_tri_tab[MAX_PLANES+1] = {
LP_RAST_OP_TRIANGLE_32_8
};
+
static unsigned
lp_rast_ms_tri_tab[MAX_PLANES+1] = {
0, /* should be impossible */
@@ -218,56 +219,46 @@ lp_rast_ms_tri_tab[MAX_PLANES+1] = {
LP_RAST_OP_MS_TRIANGLE_8
};
-/**
- * The primitive covers the whole tile- shade whole tile.
+/*
+ * Detect big primitives drawn with an alpha == 1.0.
*
- * \param tx, ty the tile position in tiles, not pixels
+ * This is used when simulating anti-aliasing primitives in shaders, e.g.,
+ * when drawing the windows client area in Aero's flip-3d effect.
*/
static boolean
-lp_setup_whole_tile(struct lp_setup_context *setup,
- const struct lp_rast_shader_inputs *inputs,
- int tx, int ty)
+check_opaque(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4])
{
- struct lp_scene *scene = setup->scene;
+ const struct lp_fragment_shader_variant *variant =
+ setup->fs.current.variant;
+ const struct lp_tgsi_channel_info *alpha_info = &variant->shader->info.cbuf[0][3];
- LP_COUNT(nr_fully_covered_64);
-
- /* if variant is opaque and scissor doesn't effect the tile */
- if (inputs->opaque) {
- /* Several things prevent this optimization from working:
- * - For layered rendering we can't determine if this covers the same layer
- * as previous rendering (or in case of clears those actually always cover
- * all layers so optimization is impossible). Need to use fb_max_layer and
- * not setup->layer_slot to determine this since even if there's currently
- * no slot assigned previous rendering could have used one.
- * - If there were any Begin/End query commands in the scene then those
- * would get removed which would be very wrong. Furthermore, if queries
- * were just active we also can't do the optimization since to get
- * accurate query results we unfortunately need to execute the rendering
- * commands.
- */
- if (!scene->fb.zsbuf && scene->fb_max_layer == 0 && !scene->had_queries) {
- /*
- * All previous rendering will be overwritten so reset the bin.
- */
- lp_scene_bin_reset( scene, tx, ty );
- }
+ if (variant->opaque)
+ return TRUE;
+
+ if (!variant->potentially_opaque)
+ return FALSE;
+
+ if (alpha_info->file == TGSI_FILE_CONSTANT) {
+ const float *constants = setup->fs.current.jit_context.constants[0];
+ float alpha = constants[alpha_info->u.index*4 +
+ alpha_info->swizzle];
+ return alpha == 1.0f;
+ }
- LP_COUNT(nr_shade_opaque_64);
- return lp_scene_bin_cmd_with_state( scene, tx, ty,
- setup->fs.stored,
- LP_RAST_OP_SHADE_TILE_OPAQUE,
- lp_rast_arg_inputs(inputs) );
- } else {
- LP_COUNT(nr_shade_64);
- return lp_scene_bin_cmd_with_state( scene, tx, ty,
- setup->fs.stored,
- LP_RAST_OP_SHADE_TILE,
- lp_rast_arg_inputs(inputs) );
+ if (alpha_info->file == TGSI_FILE_INPUT) {
+ return (v1[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f &&
+ v2[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f &&
+ v3[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f);
}
+
+ return FALSE;
}
+
/**
* Do basic setup for triangle rasterization and determine which
* framebuffer tiles are touched. Put the triangle in the scene's
@@ -333,15 +324,8 @@ do_triangle_ccw(struct lp_setup_context *setup,
bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER;
}
- if (bbox.x1 < bbox.x0 ||
- bbox.y1 < bbox.y0) {
- if (0) debug_printf("empty bounding box\n");
- LP_COUNT(nr_culled_tris);
- return TRUE;
- }
-
if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
- if (0) debug_printf("offscreen\n");
+ if (0) debug_printf("no intersection\n");
LP_COUNT(nr_culled_tris);
return TRUE;
}
@@ -382,17 +366,97 @@ do_triangle_ccw(struct lp_setup_context *setup,
LP_COUNT(nr_tris);
+ /*
+ * Rotate the tri such that v0 is closest to the fb origin.
+ * This can give more accurate a0 value (which is at fb origin)
+ * when calculating the interpolants.
+ * It can't work when there's flat shading for instance in one
+ * of the attributes, hence restrict this to just a single attribute
+ * which is what causes some test failures.
+ * (This does not address the problem that interpolation may be
+ * inaccurate if gradients are relatively steep in small tris far
+ * away from the origin. It does however fix the (silly) wgf11rasterizer
+ * Interpolator test.)
+ * XXX This causes problems with mipgen -EmuTexture for not yet really
+ * understood reasons (if the vertices would be submitted in a different
+ * order, we'd also generate the same "wrong" results here without
+ * rotation). In any case, that we generate different values if a prim
+ * has the vertices rotated but is otherwise the same (which is due to
+ * numerical issues) is not a nice property. An additional problem by
+ * swapping the vertices here (which is possibly worse) is that
+ * the same primitive coming in twice might generate different values
+ * (in particular for z) due to the swapping potentially not happening
+ * both times, if the attributes to be interpolated are different. For now,
+ * just restrict this to not get used with dx9 (by checking pixel offset),
+ * could also restrict it further to only trigger with wgf11Interpolator
+ * Rasterizer test (the only place which needs it, with always the same
+ * vertices even).
+ */
+ if ((LP_DEBUG & DEBUG_ACCURATE_A0) &&
+ setup->pixel_offset == 0.5f &&
+ key->num_inputs == 1 &&
+ (key->inputs[0].interp == LP_INTERP_LINEAR ||
+ key->inputs[0].interp == LP_INTERP_PERSPECTIVE)) {
+ float dist0 = v0[0][0] * v0[0][0] + v0[0][1] * v0[0][1];
+ float dist1 = v1[0][0] * v1[0][0] + v1[0][1] * v1[0][1];
+ float dist2 = v2[0][0] * v2[0][0] + v2[0][1] * v2[0][1];
+ if (dist0 > dist1 && dist1 < dist2) {
+ const float (*vt)[4];
+ int x, y;
+ vt = v0;
+ v0 = v1;
+ v1 = v2;
+ v2 = vt;
+ x = position->x[0];
+ y = position->y[0];
+ position->x[0] = position->x[1];
+ position->y[0] = position->y[1];
+ position->x[1] = position->x[2];
+ position->y[1] = position->y[2];
+ position->x[2] = x;
+ position->y[2] = y;
+
+ position->dx20 = position->dx01;
+ position->dy20 = position->dy01;
+ position->dx01 = position->x[0] - position->x[1];
+ position->dy01 = position->y[0] - position->y[1];
+ }
+ else if (dist0 > dist2) {
+ const float (*vt)[4];
+ int x, y;
+ vt = v0;
+ v0 = v2;
+ v2 = v1;
+ v1 = vt;
+ x = position->x[0];
+ y = position->y[0];
+ position->x[0] = position->x[2];
+ position->y[0] = position->y[2];
+ position->x[2] = position->x[1];
+ position->y[2] = position->y[1];
+ position->x[1] = x;
+ position->y[1] = y;
+
+ position->dx01 = position->dx20;
+ position->dy01 = position->dy20;
+ position->dx20 = position->x[2] - position->x[0];
+ position->dy20 = position->y[2] - position->y[0];
+ }
+ }
+
/* Setup parameter interpolants:
*/
setup->setup.variant->jit_function(v0, v1, v2,
frontfacing,
GET_A0(&tri->inputs),
GET_DADX(&tri->inputs),
- GET_DADY(&tri->inputs));
+ GET_DADY(&tri->inputs),
+ &setup->setup.variant->key);
tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
- tri->inputs.opaque = setup->fs.current.variant->opaque;
+ tri->inputs.is_blit = FALSE;
+ tri->inputs.opaque = check_opaque(setup, v0, v1, v2);
tri->inputs.layer = layer;
tri->inputs.viewport_index = viewport_index;
tri->inputs.view_index = setup->view_index;
@@ -670,61 +734,8 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane[2].eo);
}
-
- /*
- * When rasterizing scissored tris, use the intersection of the
- * triangle bounding box and the scissor rect to generate the
- * scissor planes.
- *
- * This permits us to cut off the triangle "tails" that are present
- * in the intermediate recursive levels caused when two of the
- * triangles edges don't diverge quickly enough to trivially reject
- * exterior blocks from the triangle.
- *
- * It's not really clear if it's worth worrying about these tails,
- * but since we generate the planes for each scissored tri, it's
- * free to trim them in this case.
- *
- * Note that otherwise, the scissor planes only vary in 'C' value,
- * and even then only on state-changes. Could alternatively store
- * these planes elsewhere.
- * (Or only store the c value together with a bit indicating which
- * scissor edge this is, so rasterization would treat them differently
- * (easier to evaluate) to ordinary planes.)
- */
if (nr_planes > 3) {
- /* why not just use draw_regions */
- struct lp_rast_plane *plane_s = &plane[3];
-
- if (s_planes[0]) {
- plane_s->dcdx = ~0U << 8;
- plane_s->dcdy = 0;
- plane_s->c = (1-scissor->x0) << 8;
- plane_s->eo = 1 << 8;
- plane_s++;
- }
- if (s_planes[1]) {
- plane_s->dcdx = 1 << 8;
- plane_s->dcdy = 0;
- plane_s->c = (scissor->x1+1) << 8;
- plane_s->eo = 0 << 8;
- plane_s++;
- }
- if (s_planes[2]) {
- plane_s->dcdx = 0;
- plane_s->dcdy = 1 << 8;
- plane_s->c = (1-scissor->y0) << 8;
- plane_s->eo = 1 << 8;
- plane_s++;
- }
- if (s_planes[3]) {
- plane_s->dcdx = 0;
- plane_s->dcdy = ~0U << 8;
- plane_s->c = (scissor->y1+1) << 8;
- plane_s->eo = 0;
- plane_s++;
- }
- assert(plane_s == &plane[nr_planes]);
+ lp_setup_add_scissor_planes(scissor, &plane[3], s_planes, setup->multisample);
}
return lp_setup_bin_triangle(setup, tri, &bbox, &bboxpos, nr_planes, viewport_index);
@@ -912,8 +923,8 @@ lp_setup_bin_triangle(struct lp_setup_context *setup,
ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
}
-
-
+ tri->inputs.is_blit = lp_setup_is_blit(setup, &tri->inputs);
+
/* Test tile-sized blocks against the triangle.
* Discard blocks fully outside the tri. If the block is fully
* contained inside the tri, bin an lp_rast_shade_tile command.
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 04899dd9b..d30d619d8 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -173,6 +173,74 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
}
+static void
+check_linear_rasterizer( struct llvmpipe_context *lp )
+{
+ boolean bgr8;
+ boolean permit_linear;
+ boolean single_vp;
+ boolean clipping_changed = FALSE;
+
+ bgr8 = (lp->framebuffer.nr_cbufs == 1 && lp->framebuffer.cbufs[0] &&
+ lp->framebuffer.cbufs[0]->texture->nr_samples == 1 &&
+ lp->framebuffer.cbufs[0]->texture->target == PIPE_TEXTURE_2D &&
+ (lp->framebuffer.cbufs[0]->format == PIPE_FORMAT_B8G8R8A8_UNORM ||
+ lp->framebuffer.cbufs[0]->format == PIPE_FORMAT_B8G8R8X8_UNORM));
+
+ /* permit_linear means guardband, hence fake scissor, which we can only
+ * handle if there's just one vp. */
+ single_vp = lp->viewport_index_slot < 0;
+ permit_linear = (!lp->framebuffer.zsbuf &&
+ bgr8 &&
+ single_vp);
+
+ /* Tell draw that we're happy doing our own x/y clipping.
+ */
+ if (lp->permit_linear_rasterizer != permit_linear) {
+ lp->permit_linear_rasterizer = permit_linear;
+ lp_setup_set_linear_mode(lp->setup, permit_linear);
+ clipping_changed = TRUE;
+ }
+
+ if (lp->single_vp != single_vp) {
+ lp->single_vp = single_vp;
+ clipping_changed = TRUE;
+ }
+
+ /* Disable xy clipping in linear mode.
+ *
+ * Use a guard band if we don't have zsbuf. Could enable
+ * guardband always - this just to be conservative.
+ *
+ * Because we have a layering violation where the draw module emits
+ * state changes to the driver while we're already inside a draw
+ * call, need to be careful about when we make calls back to the
+ * draw module. Hence the clipping_changed flag which is as much
+ * to prevent flush recursion as it is to short-circuit noop state
+ * changes.
+ */
+ if (clipping_changed) {
+ draw_set_driver_clipping(lp->draw,
+ FALSE,
+ FALSE,
+ permit_linear,
+ single_vp);
+ }
+}
+
+
+/**
+ * Handle state changes before clears.
+ * Called just prior to clearing (pipe::clear()).
+ */
+void llvmpipe_update_derived_clear( struct llvmpipe_context *llvmpipe )
+{
+ if (llvmpipe->dirty & (LP_NEW_FS |
+ LP_NEW_FRAMEBUFFER))
+ check_linear_rasterizer(llvmpipe);
+}
+
+
/**
* Handle state changes.
* Called just prior to drawing anything (pipe::draw_arrays(), etc).
@@ -293,6 +361,8 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
llvmpipe->viewports);
}
+ llvmpipe_update_derived_clear(llvmpipe);
+
llvmpipe->dirty = 0;
}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2fe01ce48..80dd95f01 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -67,6 +67,7 @@
#include "util/u_string.h"
#include "util/simple_list.h"
#include "util/u_dual_blend.h"
+#include "util/u_upload_mgr.h"
#include "util/os_time.h"
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
@@ -451,10 +452,13 @@ static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface,
}
static void fs_fb_fetch(const struct lp_build_fs_iface *iface,
- struct lp_build_context *bld,
- unsigned cbuf,
- LLVMValueRef result[4])
+ struct lp_build_context *bld,
+ int location,
+ LLVMValueRef result[4])
{
+ assert(location >= FRAG_RESULT_DATA0 && location <= FRAG_RESULT_DATA7);
+ const int cbuf = location - FRAG_RESULT_DATA0;
+
struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
struct gallivm_state *gallivm = bld->gallivm;
LLVMBuilderRef builder = gallivm->builder;
@@ -589,7 +593,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
LLVMValueRef stencil_refs[2];
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
LLVMValueRef zs_samples = lp_build_const_int32(gallivm, key->zsbuf_nr_samples);
- struct lp_build_for_loop_state loop_state, sample_loop_state;
+ LLVMValueRef z_out = NULL, s_out = NULL;
+ struct lp_build_for_loop_state loop_state, sample_loop_state = {0};
struct lp_build_mask_context mask;
/*
* TODO: figure out if simple_shader optimization is really worthwile to
@@ -700,6 +705,17 @@ generate_fs_loop(struct gallivm_state *gallivm,
color_store_size, "color1");
}
}
+ if (shader->info.base.writes_z) {
+ z_out = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm, type),
+ color_store_size, "depth");
+ }
+
+ if (shader->info.base.writes_stencil) {
+ s_out = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm, type),
+ color_store_size, "depth");
+ }
lp_build_for_loop_begin(&loop_state, gallivm,
lp_build_const_int32(gallivm, 0),
@@ -959,6 +975,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
params.ssbo_ptr = ssbo_ptr;
params.ssbo_sizes_ptr = num_ssbo_ptr;
params.image = image;
+ params.aniso_filter_table = lp_jit_context_aniso_filter_table(gallivm, context_ptr);
/* Build the actual shader */
if (shader->base.type == PIPE_SHADER_IR_TGSI)
@@ -1048,6 +1065,33 @@ generate_fs_loop(struct gallivm_state *gallivm,
LLVMBuildStore(builder, output_smask, out_sample_mask_storage);
}
+ if (shader->info.base.writes_z) {
+ int pos0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos0][2], "");
+ LLVMValueRef idx = loop_state.counter;
+ if (key->min_samples > 1)
+ idx = LLVMBuildAdd(builder, idx,
+ LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), "");
+ LLVMValueRef ptr = LLVMBuildGEP(builder, z_out, &idx, 1, "");
+ LLVMBuildStore(builder, out, ptr);
+ }
+
+ if (shader->info.base.writes_stencil) {
+ int sten_out = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_STENCIL,
+ 0);
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[sten_out][1], "output.s");
+ LLVMValueRef idx = loop_state.counter;
+ if (key->min_samples > 1)
+ idx = LLVMBuildAdd(builder, idx,
+ LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), "");
+ LLVMValueRef ptr = LLVMBuildGEP(builder, s_out, &idx, 1, "");
+ LLVMBuildStore(builder, out, ptr);
+ }
+
+
/* Color write - per fragment sample */
for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
{
@@ -1118,14 +1162,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
/* Late Z test */
if (depth_mode & LATE_DEPTH_TEST) {
- int pos0 = find_output_by_semantic(&shader->info.base,
- TGSI_SEMANTIC_POSITION,
- 0);
- int s_out = find_output_by_semantic(&shader->info.base,
- TGSI_SEMANTIC_STENCIL,
- 0);
- if (pos0 != -1 && outputs[pos0][2]) {
- z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
+ if (shader->info.base.writes_z) {
+ LLVMValueRef idx = loop_state.counter;
+ if (key->min_samples > 1)
+ idx = LLVMBuildAdd(builder, idx,
+ LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), "");
+ LLVMValueRef ptr = LLVMBuildGEP(builder, z_out, &idx, 1, "");
+ z = LLVMBuildLoad(builder, ptr, "output.z");
} else {
if (key->multisample) {
lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL);
@@ -1147,10 +1190,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
lp_build_const_vec(gallivm, type, 1.0));
}
- if (s_out != -1 && outputs[s_out][1]) {
+ if (shader->info.base.writes_stencil) {
+ LLVMValueRef idx = loop_state.counter;
+ if (key->min_samples > 1)
+ idx = LLVMBuildAdd(builder, idx,
+ LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), "");
+ LLVMValueRef ptr = LLVMBuildGEP(builder, s_out, &idx, 1, "");
+ stencil_refs[0] = LLVMBuildLoad(builder, ptr, "output.s");
/* there's only one value, and spec says to discard additional bits */
LLVMValueRef s_max_mask = lp_build_const_int_vec(gallivm, int_type, 255);
- stencil_refs[0] = LLVMBuildLoad(builder, outputs[s_out][1], "output.s");
stencil_refs[0] = LLVMBuildBitCast(builder, stencil_refs[0], int_vec_type, "");
stencil_refs[0] = LLVMBuildAnd(builder, stencil_refs[0], s_max_mask, "");
stencil_refs[1] = stencil_refs[0];
@@ -1664,6 +1712,15 @@ scale_bits(struct gallivm_state *gallivm,
int delta_bits = src_bits - dst_bits;
if (delta_bits <= dst_bits) {
+
+ if (dst_bits == 4) {
+ struct lp_type flt_type = lp_type_float_vec(32, src_type.length * 32);
+
+ result = lp_build_unsigned_norm_to_float(gallivm, src_bits, flt_type, src);
+ result = lp_build_clamped_float_to_unsigned_norm(gallivm, flt_type, dst_bits, result);
+ return result;
+ }
+
/*
* Approximate the rescaling with a single shift.
*
@@ -3113,7 +3170,7 @@ generate_fragment(struct llvmpipe_context *lp,
}
/* code generated texture sampling */
- sampler = lp_llvm_sampler_soa_create(key->samplers, key->nr_samplers);
+ sampler = lp_llvm_sampler_soa_create(lp_fs_variant_key_samplers(key), key->nr_samplers);
image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key), key->nr_images);
num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
@@ -3387,7 +3444,8 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
debug_printf("blend.alpha_to_coverage is enabled\n");
}
for (i = 0; i < key->nr_samplers; ++i) {
- const struct lp_static_sampler_state *sampler = &key->samplers[i].sampler_state;
+ const struct lp_sampler_static_state *samplers = lp_fs_variant_key_samplers(key);
+ const struct lp_static_sampler_state *sampler = &samplers[i].sampler_state;
debug_printf("sampler[%u] = \n", i);
debug_printf(" .wrap = %s %s %s\n",
util_str_tex_wrap(sampler->wrap_s, TRUE),
@@ -3407,9 +3465,11 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod);
debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod);
debug_printf(" .reduction_mode = %u\n", sampler->reduction_mode);
+ debug_printf(" .aniso = %u\n", sampler->aniso);
}
for (i = 0; i < key->nr_sampler_views; ++i) {
- const struct lp_static_texture_state *texture = &key->samplers[i].texture_state;
+ const struct lp_sampler_static_state *samplers = lp_fs_variant_key_samplers(key);
+ const struct lp_static_texture_state *texture = &samplers[i].texture_state;
debug_printf("texture[%u] = \n", i);
debug_printf(" .format = %s\n",
util_format_name(texture->format));
@@ -3439,6 +3499,24 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
}
}
+const char *
+lp_debug_fs_kind(enum lp_fs_kind kind)
+{
+ switch(kind) {
+ case LP_FS_KIND_GENERAL:
+ return "GENERAL";
+ case LP_FS_KIND_BLIT_RGBA:
+ return "BLIT_RGBA";
+ case LP_FS_KIND_BLIT_RGB1:
+ return "BLIT_RGB1";
+ case LP_FS_KIND_AERO_MINIFICATION:
+ return "AERO_MINIFICATION";
+ case LP_FS_KIND_LLVM_LINEAR:
+ return "LLVM_LINEAR";
+ default:
+ return "unknown";
+ }
+}
void
lp_debug_fs_variant(struct lp_fragment_shader_variant *variant)
@@ -3451,6 +3529,9 @@ lp_debug_fs_variant(struct lp_fragment_shader_variant *variant)
nir_print_shader(variant->shader->base.ir.nir, stderr);
dump_fs_variant_key(&variant->key);
debug_printf("variant->opaque = %u\n", variant->opaque);
+ debug_printf("variant->potentially_opaque = %u\n", variant->potentially_opaque);
+ debug_printf("variant->blit = %u\n", variant->blit);
+ debug_printf("shader->kind = %s\n", lp_debug_fs_kind(variant->shader->kind));
debug_printf("\n");
}
@@ -3489,6 +3570,8 @@ generate_variant(struct llvmpipe_context *lp,
struct lp_fragment_shader_variant *variant;
const struct util_format_description *cbuf0_format_desc = NULL;
boolean fullcolormask;
+ boolean no_kill;
+ boolean linear;
char module_name[64];
unsigned char ir_sha1_cache_key[20];
struct lp_cached_code cached = { 0 };
@@ -3534,9 +3617,9 @@ generate_variant(struct llvmpipe_context *lp,
fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask);
}
- variant->opaque =
- !key->blend.logicop_enable &&
- !key->blend.rt[0].blend_enable &&
+ /* The scissor is ignored here as only tiles inside the scissoring
+ * rectangle will refer to this */
+ no_kill =
fullcolormask &&
!key->stencil[0].enabled &&
!key->alpha.enabled &&
@@ -3544,13 +3627,83 @@ generate_variant(struct llvmpipe_context *lp,
!key->blend.alpha_to_coverage &&
!key->depth.enabled &&
!shader->info.base.uses_kill &&
- !shader->info.base.writes_samplemask
- ? TRUE : FALSE;
+ !shader->info.base.writes_samplemask;
+
+ variant->opaque =
+ no_kill &&
+ !key->blend.logicop_enable &&
+ !key->blend.rt[0].blend_enable
+ ? TRUE : FALSE;
+
+ variant->potentially_opaque =
+ no_kill &&
+ !key->blend.logicop_enable &&
+ key->blend.rt[0].blend_enable &&
+ key->blend.rt[0].rgb_func == PIPE_BLEND_ADD &&
+ key->blend.rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
+ key->blend.rt[0].alpha_func == key->blend.rt[0].rgb_func &&
+ key->blend.rt[0].alpha_dst_factor == key->blend.rt[0].rgb_dst_factor &&
+ shader->base.type == PIPE_SHADER_IR_TGSI &&
+ /*
+ * FIXME: for NIR, all of the fields of info.xxx (except info.base)
+ * are zeros, hence shader analysis (here and elsewhere) using these
+ * bits cannot work and will silently fail (cbuf is the only pointer
+ * field, hence causing a crash).
+ */
+ shader->info.cbuf[0][3].file != TGSI_FILE_NULL
+ ? TRUE : FALSE;
+
+ /* We only care about opaque blits for now */
+ if (variant->opaque &&
+ (shader->kind == LP_FS_KIND_BLIT_RGBA ||
+ shader->kind == LP_FS_KIND_BLIT_RGB1)) {
+ unsigned target, min_img_filter, mag_img_filter, min_mip_filter;
+ enum pipe_format texture_format;
+ struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(key, 0);
+ assert(samp0);
+ texture_format = samp0->texture_state.format;
+ target = samp0->texture_state.target;
+ min_img_filter = samp0->sampler_state.min_img_filter;
+ mag_img_filter = samp0->sampler_state.mag_img_filter;
+ if (samp0->texture_state.level_zero_only) {
+ min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ } else {
+ min_mip_filter = samp0->sampler_state.min_mip_filter;
+ }
+
+ if (target == PIPE_TEXTURE_2D &&
+ min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
+ ((texture_format &&
+ util_is_format_compatible(util_format_description(texture_format),
+ cbuf0_format_desc)) ||
+ (shader->kind == LP_FS_KIND_BLIT_RGB1 &&
+ (texture_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
+ texture_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
+ (key->cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM ||
+ key->cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM))))
+ variant->blit = 1;
+ }
+
+
+ /* Whether this is a candidate for the linear path */
+ linear =
+ !key->stencil[0].enabled &&
+ !key->depth.enabled &&
+ !shader->info.base.uses_kill &&
+ !key->blend.logicop_enable &&
+ (key->cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM ||
+ key->cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM);
+
+ memcpy(&variant->key, key, sizeof *key);
if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
lp_debug_fs_variant(variant);
}
+ llvmpipe_fs_variant_fastpath(variant);
+
lp_jit_init_types(variant);
if (variant->jit_function[RAST_EDGE_TEST] == NULL)
@@ -3563,6 +3716,36 @@ generate_variant(struct llvmpipe_context *lp,
}
}
+ if (linear) {
+ /* Currently keeping both the old fastpaths and new linear path
+ * active. The older code is still somewhat faster for the cases
+ * it covers.
+ *
+ * XXX: consider restricting this to aero-mode only.
+ */
+ if (fullcolormask &&
+ !key->alpha.enabled &&
+ !key->blend.alpha_to_coverage) {
+ llvmpipe_fs_variant_linear_fastpath(variant);
+ }
+
+ /* If the original fastpath doesn't cover this variant, try the new
+ * code:
+ */
+ if (variant->jit_linear == NULL) {
+ if (shader->kind == LP_FS_KIND_BLIT_RGBA ||
+ shader->kind == LP_FS_KIND_BLIT_RGB1 ||
+ shader->kind == LP_FS_KIND_LLVM_LINEAR) {
+ llvmpipe_fs_variant_linear_llvm(lp, shader, variant);
+ }
+ }
+ } else {
+ if (LP_DEBUG & DEBUG_LINEAR) {
+ lp_debug_fs_variant(variant);
+ debug_printf(" ----> no linear path for this variant\n");
+ }
+ }
+
/*
* Compile everything
*/
@@ -3585,6 +3768,19 @@ generate_variant(struct llvmpipe_context *lp,
variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
}
+ if (linear) {
+ if (variant->linear_function) {
+ variant->jit_linear_llvm = (lp_jit_linear_llvm_func)
+ gallivm_jit_function(variant->gallivm, variant->linear_function);
+ }
+
+ /*
+ * This must be done after LLVM compilation, as it will call the JIT'ed
+ * code to determine active inputs.
+ */
+ lp_linear_check_variant(variant);
+ }
+
if (needs_caching) {
lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
}
@@ -3640,7 +3836,6 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
for (i = 0; i < shader->info.base.num_inputs; i++) {
shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
- shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
shader->inputs[i].location = shader->info.base.input_interpolate_loc[i];
switch (shader->info.base.input_interpolate[i]) {
@@ -3677,7 +3872,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
shader->inputs[i].src_index = i+1;
}
- if (LP_DEBUG & DEBUG_TGSI) {
+ if (LP_DEBUG & DEBUG_TGSI && templ->type == PIPE_SHADER_IR_TGSI) {
unsigned attrib;
debug_printf("llvmpipe: Create fragment shader #%u %p:\n",
shader->no, (void *) shader);
@@ -3695,6 +3890,12 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
debug_printf("\n");
}
+ /* This will put a derived copy of the tokens into shader->base.tokens */
+ if (templ->type == PIPE_SHADER_IR_TGSI)
+ llvmpipe_fs_analyse(shader, templ->tokens);
+ else
+ shader->kind = LP_FS_KIND_GENERAL;
+
return shader;
}
@@ -3799,7 +4000,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
const struct pipe_constant_buffer *cb)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- struct pipe_resource *constants = cb ? cb->buffer : NULL;
+ struct pipe_constant_buffer *constants = &llvmpipe->constants[shader][index];
assert(shader < PIPE_SHADER_TYPES);
assert(index < ARRAY_SIZE(llvmpipe->constants[shader]));
@@ -3808,10 +4009,19 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb,
take_ownership);
- if (constants) {
- if (!(constants->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+ /* user_buffer is only valid until the next set_constant_buffer (at most,
+ * possibly until shader deletion), so we need to upload it now to make sure
+ * it doesn't get updated/freed out from under us.
+ */
+ if (constants->user_buffer) {
+ u_upload_data(llvmpipe->pipe.const_uploader, 0, constants->buffer_size, 16,
+ constants->user_buffer, &constants->buffer_offset,
+ &constants->buffer);
+ }
+ if (constants->buffer) {
+ if (!(constants->buffer->bind & PIPE_BIND_CONSTANT_BUFFER)) {
debug_printf("Illegal set constant without bind flag\n");
- constants->bind |= PIPE_BIND_CONSTANT_BUFFER;
+ constants->buffer->bind |= PIPE_BIND_CONSTANT_BUFFER;
}
}
@@ -3821,20 +4031,10 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
shader == PIPE_SHADER_TESS_EVAL) {
/* Pass the constants to the 'draw' module */
const unsigned size = cb ? cb->buffer_size : 0;
- const ubyte *data;
- if (constants) {
- data = (ubyte *) llvmpipe_resource_data(constants);
- }
- else if (cb && cb->user_buffer) {
- data = (ubyte *) cb->user_buffer;
- }
- else {
- data = NULL;
- }
-
- if (data)
- data += cb->buffer_offset;
+ const ubyte *data = NULL;
+ if (constants->buffer)
+ data = (ubyte *) llvmpipe_resource_data(constants->buffer) + constants->buffer_offset;
draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
index, data, size);
@@ -3843,10 +4043,6 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
llvmpipe->cs_dirty |= LP_CSNEW_CONSTANTS;
else
llvmpipe->dirty |= LP_NEW_FS_CONSTANTS;
-
- if (cb && cb->user_buffer) {
- pipe_resource_reference(&constants, NULL);
- }
}
static void
@@ -3957,7 +4153,7 @@ make_variant_key(struct llvmpipe_context *lp,
key = (struct lp_fragment_shader_variant_key *)store;
- memset(key, 0, offsetof(struct lp_fragment_shader_variant_key, samplers[1]));
+ memset(key, 0, sizeof(*key));
if (lp->framebuffer.zsbuf) {
enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format;
@@ -3984,10 +4180,8 @@ make_variant_key(struct llvmpipe_context *lp,
/*
* Propagate the depth clamp setting from the rasterizer state.
- * depth_clip == 0 implies depth clamping is enabled.
- *
*/
- key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0;
+ key->depth_clamp = lp->rasterizer->depth_clamp;
/* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
if (!lp->framebuffer.nr_cbufs ||
@@ -4104,7 +4298,7 @@ make_variant_key(struct llvmpipe_context *lp,
struct lp_sampler_static_state *fs_sampler;
- fs_sampler = key->samplers;
+ fs_sampler = lp_fs_variant_key_samplers(key);
memset(fs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *fs_sampler);
@@ -4153,11 +4347,18 @@ make_variant_key(struct llvmpipe_context *lp,
&lp->images[PIPE_SHADER_FRAGMENT][i]);
}
}
+
+ if (shader->kind == LP_FS_KIND_AERO_MINIFICATION) {
+ struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(key, 0);
+ assert(samp0);
+ samp0->sampler_state.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ samp0->sampler_state.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ }
+
return key;
}
-
/**
* Update fragment shader state. This is called just prior to drawing
* something when some fragment-related state has changed.
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 613e5286a..b5e8c31c7 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -120,6 +120,7 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
unsigned start,
unsigned num,
unsigned unbind_num_trailing_slots,
+ bool take_ownership,
struct pipe_sampler_view **views)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
@@ -150,8 +151,15 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
if (view)
llvmpipe_flush_resource(pipe, view->texture, 0, true, false, false, "sampler_view");
- pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
- view);
+
+ if (take_ownership) {
+ pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
+ NULL);
+ llvmpipe->sampler_views[shader][start + i] = view;
+ } else {
+ pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
+ view);
+ }
}
for (; i < num + unbind_num_trailing_slots; i++) {
@@ -178,8 +186,12 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
}
else if (shader == PIPE_SHADER_COMPUTE) {
llvmpipe->cs_dirty |= LP_CSNEW_SAMPLER_VIEW;
- } else {
+ }
+ else if (shader == PIPE_SHADER_FRAGMENT) {
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+ lp_setup_set_fragment_sampler_views(llvmpipe->setup,
+ llvmpipe->num_sampler_views[PIPE_SHADER_FRAGMENT],
+ llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT]);
}
}
@@ -341,13 +353,7 @@ prepare_shader_sampling(
}
else {
/* display target texture/surface */
- /*
- * XXX: Where should this be unmapped?
- */
- struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
- struct sw_winsys *winsys = screen->winsys;
- addr = winsys->displaytarget_map(winsys, lp_tex->dt,
- PIPE_MAP_READ);
+ addr = llvmpipe_resource_map(tex, 0, 0, LP_TEX_USAGE_READ);
row_stride[0] = lp_tex->row_stride[0];
img_stride[0] = lp_tex->img_stride[0];
mip_offsets[0] = 0;
@@ -411,6 +417,31 @@ llvmpipe_prepare_tess_eval_sampling(struct llvmpipe_context *lp,
prepare_shader_sampling(lp, num, views, PIPE_SHADER_TESS_EVAL);
}
+void
+llvmpipe_cleanup_stage_sampling(struct llvmpipe_context *ctx,
+ enum pipe_shader_type stage)
+{
+ unsigned num, i;
+ struct pipe_sampler_view **views;
+ assert(ctx);
+ assert(stage < ARRAY_SIZE(ctx->num_sampler_views));
+ assert(stage < ARRAY_SIZE(ctx->sampler_views));
+
+ num = ctx->num_sampler_views[stage];
+ views = ctx->sampler_views[stage];
+
+ assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ for (i = 0; i < num; i++) {
+ struct pipe_sampler_view *view = views[i];
+ if (view) {
+ struct pipe_resource *tex = view->texture;
+ if (tex)
+ llvmpipe_resource_unmap(tex, 0, 0);
+ }
+ }
+}
+
static void
prepare_shader_images(
struct llvmpipe_context *lp,
@@ -438,11 +469,19 @@ prepare_shader_images(
if (!img)
continue;
- unsigned width = u_minify(img->width0, view->u.tex.level);
- unsigned height = u_minify(img->height0, view->u.tex.level);
+ unsigned width = img->width0;
+ unsigned height = img->height0;
unsigned num_layers = img->depth0;
unsigned num_samples = img->nr_samples;
+ const uint32_t bw = util_format_get_blockwidth(view->resource->format);
+ const uint32_t bh = util_format_get_blockheight(view->resource->format);
+
+ width = DIV_ROUND_UP(width, bw);
+ height = DIV_ROUND_UP(height, bh);
+ width = u_minify(width, view->u.tex.level);
+ height = u_minify(height, view->u.tex.level);
+
if (!lp_img->dt) {
/* regular texture - setup array of mipmap level offsets */
struct pipe_resource *res = view->resource;
@@ -482,13 +521,7 @@ prepare_shader_images(
}
else {
/* display target texture/surface */
- /*
- * XXX: Where should this be unmapped?
- */
- struct llvmpipe_screen *screen = llvmpipe_screen(img->screen);
- struct sw_winsys *winsys = screen->winsys;
- addr = winsys->displaytarget_map(winsys, lp_img->dt,
- PIPE_MAP_READ);
+ addr = llvmpipe_resource_map(img, 0, 0, LP_TEX_USAGE_READ);
row_stride = lp_img->row_stride[0];
img_stride = lp_img->img_stride[0];
sample_stride = 0;
@@ -552,6 +585,30 @@ llvmpipe_prepare_tess_eval_images(struct llvmpipe_context *lp,
}
void
+llvmpipe_cleanup_stage_images(struct llvmpipe_context *ctx,
+ enum pipe_shader_type stage)
+{
+ unsigned num, i;
+ struct pipe_image_view *views;
+ assert(ctx);
+ assert(stage < ARRAY_SIZE(ctx->num_images));
+ assert(stage < ARRAY_SIZE(ctx->images));
+
+ num = ctx->num_images[stage];
+ views = ctx->images[stage];
+
+ assert(num <= LP_MAX_TGSI_SHADER_IMAGES);
+
+ for (i = 0; i < num; i++) {
+ struct pipe_image_view *view = &views[i];
+ assert(view);
+ struct pipe_resource *img = view->resource;
+ if (img)
+ llvmpipe_resource_unmap(img, 0, 0);
+ }
+}
+
+void
llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
{
llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 2bc94d5d4..9f385a084 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -71,6 +71,7 @@ struct lp_setup_args
LLVMValueRef a0;
LLVMValueRef dadx;
LLVMValueRef dady;
+ LLVMValueRef key;
/* Derived:
*/
@@ -200,7 +201,7 @@ lp_twoside(struct gallivm_state *gallivm,
}
-static void
+static LLVMValueRef
lp_do_offset_tri(struct gallivm_state *gallivm,
struct lp_setup_args *args,
const struct lp_setup_variant_key *key,
@@ -214,9 +215,7 @@ lp_do_offset_tri(struct gallivm_state *gallivm,
struct lp_build_context int_scalar_bld;
struct lp_build_context *bld = &args->bld;
LLVMValueRef zoffset, mult;
- LLVMValueRef z0_new, z1_new, z2_new;
LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
- LLVMValueRef z0z1, z0z1z2;
LLVMValueRef max, max_value, res12;
LLVMValueRef shuffles[4];
LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
@@ -267,8 +266,8 @@ lp_do_offset_tri(struct gallivm_state *gallivm,
if (key->floating_point_depth) {
/*
- * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) +
- * MAX2(dzdx, dzdy) * pgon_offset_scale
+ * bias = pgon_offset_units * 2^(exponent(max(abs(z0), abs(z1), abs(z2))) -
+ * mantissa_bits) + MAX2(dzdx, dzdy) * pgon_offset_scale
*
* NOTE: Assumes IEEE float32.
*/
@@ -281,11 +280,14 @@ lp_do_offset_tri(struct gallivm_state *gallivm,
exp_mask = lp_build_const_int32(gallivm, 0xff << 23);
maxz0z1_value = lp_build_max(&flt_scalar_bld,
- LLVMBuildExtractElement(b, attribv[0], twoi, ""),
- LLVMBuildExtractElement(b, attribv[1], twoi, ""));
+ lp_build_abs(&flt_scalar_bld,
+ LLVMBuildExtractElement(b, attribv[0], twoi, "")),
+ lp_build_abs(&flt_scalar_bld,
+ LLVMBuildExtractElement(b, attribv[1], twoi, "")));
maxz_value = lp_build_max(&flt_scalar_bld,
- LLVMBuildExtractElement(b, attribv[2], twoi, ""),
+ lp_build_abs(&flt_scalar_bld,
+ LLVMBuildExtractElement(b, attribv[2], twoi, "")),
maxz0z1_value);
exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, "");
@@ -322,34 +324,7 @@ lp_do_offset_tri(struct gallivm_state *gallivm,
zoffset);
}
- /* yuck */
- shuffles[0] = twoi;
- shuffles[1] = lp_build_const_int32(gallivm, 6);
- shuffles[2] = LLVMGetUndef(shuf_type);
- shuffles[3] = LLVMGetUndef(shuf_type);
- z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), "");
- shuffles[0] = zeroi;
- shuffles[1] = onei;
- shuffles[2] = lp_build_const_int32(gallivm, 6);
- shuffles[3] = LLVMGetUndef(shuf_type);
- z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), "");
- zoffset = lp_build_broadcast_scalar(bld, zoffset);
-
- /* clamp and do offset */
- /*
- * FIXME I suspect the clamp (is that even right to always clamp to fixed
- * 0.0/1.0?) should really be per fragment?
- */
- z0z1z2 = lp_build_clamp(bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld->zero, bld->one);
-
- /* insert into args->a0.z, a1.z, a2.z:
- */
- z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, "");
- z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, "");
- z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, "");
- attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, "");
- attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, "");
- attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, "");
+ return zoffset;
}
static void
@@ -393,12 +368,12 @@ load_attribute(struct gallivm_state *gallivm,
* which obviously wouldn't work)).
*/
static void
-emit_coef4( struct gallivm_state *gallivm,
+calc_coef4( struct gallivm_state *gallivm,
struct lp_setup_args *args,
- unsigned slot,
LLVMValueRef a0,
LLVMValueRef a1,
- LLVMValueRef a2)
+ LLVMValueRef a2,
+ LLVMValueRef out[3])
{
LLVMBuilderRef b = gallivm->builder;
LLVMValueRef attr_0;
@@ -430,7 +405,23 @@ emit_coef4( struct gallivm_state *gallivm,
LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
- store_coef(gallivm, args, slot, attr_0, dadx, dady);
+ out[0] = attr_0;
+ out[1] = dadx;
+ out[2] = dady;
+}
+
+static void
+emit_coef4( struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef a1,
+ LLVMValueRef a2)
+{
+ LLVMValueRef coeffs[3];
+ calc_coef4(gallivm, args, a0, a1, a2, coeffs);
+ store_coef(gallivm, args, slot,
+ coeffs[0], coeffs[1], coeffs[2]);
}
@@ -481,82 +472,6 @@ apply_perspective_corr( struct gallivm_state *gallivm,
/**
- * Apply cylindrical wrapping to vertex attributes if enabled.
- * Input coordinates must be in [0, 1] range, otherwise results are undefined.
- *
- * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags
- */
-static void
-emit_apply_cyl_wrap(struct gallivm_state *gallivm,
- struct lp_setup_args *args,
- uint cyl_wrap,
- LLVMValueRef attribv[3])
-
-{
- LLVMBuilderRef builder = gallivm->builder;
- struct lp_type type = args->bld.type;
- LLVMTypeRef float_vec_type = args->bld.vec_type;
- LLVMValueRef pos_half;
- LLVMValueRef neg_half;
- LLVMValueRef cyl_mask;
- LLVMValueRef offset;
- LLVMValueRef delta;
- LLVMValueRef one;
-
- if (!cyl_wrap)
- return;
-
- /* Constants */
- pos_half = lp_build_const_vec(gallivm, type, +0.5f);
- neg_half = lp_build_const_vec(gallivm, type, -0.5f);
- cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4);
-
- one = lp_build_const_vec(gallivm, type, 1.0f);
- one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), "");
- one = LLVMBuildAnd(builder, one, cyl_mask, "");
-
- /* Edge v0 -> v1 */
- delta = LLVMBuildFSub(builder, attribv[1], attribv[0], "");
-
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
-
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
-
- /* Edge v1 -> v2 */
- delta = LLVMBuildFSub(builder, attribv[2], attribv[1], "");
-
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
-
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
-
- /* Edge v2 -> v0 */
- delta = LLVMBuildFSub(builder, attribv[0], attribv[2], "");
-
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
-
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
-}
-
-
-/**
* Compute the inputs-> dadx, dady, a0 values.
*/
static void
@@ -584,13 +499,11 @@ emit_tri_coef( struct gallivm_state *gallivm,
case LP_INTERP_LINEAR:
load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
- emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
emit_linear_coef(gallivm, args, slot+1, attribs);
break;
case LP_INTERP_PERSPECTIVE:
load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
- emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
apply_perspective_corr(gallivm, args, slot+1, attribs);
emit_linear_coef(gallivm, args, slot+1, attribs);
break;
@@ -641,6 +554,7 @@ init_args(struct gallivm_state *gallivm,
LLVMValueRef e, f, ef, ooa;
LLVMValueRef shuffles[4], shuf10;
LLVMValueRef attr_pos[3];
+ LLVMValueRef polygon_offset;
struct lp_type typef4 = lp_type_float_vec(32, 128);
struct lp_build_context bld;
@@ -681,7 +595,9 @@ init_args(struct gallivm_state *gallivm,
/* tri offset calc shares a lot of arithmetic, do it here */
if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
- lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
+ polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
+ } else {
+ polygon_offset = lp_build_const_float(gallivm, 0.0f);
}
dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
@@ -696,7 +612,22 @@ init_args(struct gallivm_state *gallivm,
args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
- emit_linear_coef(gallivm, args, 0, attr_pos);
+ LLVMValueRef coeffs[3];
+ calc_coef4(gallivm, args,
+ attr_pos[0], attr_pos[1], attr_pos[2],
+ coeffs);
+
+ /* This is a bit sneaky:
+ * Because we observe that the X component of A0 is otherwise unused,
+ * we can overwrite it with the computed polygon-offset value, to make
+ * sure it's available in the fragment shader without having to change
+ * the interface (which is error-prone).
+ */
+ coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset,
+ lp_build_const_int32(gallivm, 0), "");
+
+ store_coef(gallivm, args, 0,
+ coeffs[0], coeffs[1], coeffs[2]);
}
/**
@@ -713,7 +644,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
char func_name[64];
LLVMTypeRef vec4f_type;
LLVMTypeRef func_type;
- LLVMTypeRef arg_types[7];
+ LLVMTypeRef arg_types[8];
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
int64_t t0 = 0, t1;
@@ -757,6 +688,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
+ arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key (placeholder) */
func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
arg_types, ARRAY_SIZE(arg_types), 0);
@@ -774,6 +706,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
args.a0 = LLVMGetParam(variant->function, 4);
args.dadx = LLVMGetParam(variant->function, 5);
args.dady = LLVMGetParam(variant->function, 6);
+ args.key = LLVMGetParam(variant->function, 7);
lp_build_name(args.v0, "in_v0");
lp_build_name(args.v1, "in_v1");
@@ -782,6 +715,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
lp_build_name(args.a0, "out_a0");
lp_build_name(args.dadx, "out_dadx");
lp_build_name(args.dady, "out_dady");
+ lp_build_name(args.key, "key");
/*
* Function body
@@ -864,11 +798,12 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
key->pgon_offset_units = (float) lp->rasterizer->offset_units;
} else {
key->pgon_offset_units =
- (float) (lp->rasterizer->offset_units * lp->mrd);
+ (float) (lp->rasterizer->offset_units * lp->mrd * 2);
}
key->pgon_offset_scale = lp->rasterizer->offset_scale;
key->pgon_offset_clamp = lp->rasterizer->offset_clamp;
+ key->uses_constant_interp = 0;
key->pad = 0;
memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
for (i = 0; i < key->num_inputs; i++) {
@@ -878,8 +813,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
else
key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
}
+ if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
+ key->uses_constant_interp = 1;
+ }
}
-
}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c
index b3f8e74af..28cc1258b 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c
@@ -49,7 +49,7 @@ llvmpipe_create_tcs_state(struct pipe_context *pipe,
goto no_state;
/* debug */
- if (LP_DEBUG & DEBUG_TGSI) {
+ if (LP_DEBUG & DEBUG_TGSI && templ->type == PIPE_SHADER_IR_TGSI) {
debug_printf("llvmpipe: Create tess ctrl shader %p:\n", (void *)state);
tgsi_dump(templ->tokens, 0);
}
@@ -181,6 +181,14 @@ llvmpipe_set_tess_state(struct pipe_context *pipe,
draw_set_tess_state(llvmpipe->draw, default_outer_level, default_inner_level);
}
+static void
+llvmpipe_set_patch_vertices(struct pipe_context *pipe, uint8_t patch_vertices)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ llvmpipe->patch_vertices = patch_vertices;
+}
+
void
llvmpipe_init_tess_funcs(struct llvmpipe_context *llvmpipe)
{
@@ -193,4 +201,5 @@ llvmpipe_init_tess_funcs(struct llvmpipe_context *llvmpipe)
llvmpipe->pipe.delete_tes_state = llvmpipe_delete_tes_state;
llvmpipe->pipe.set_tess_state = llvmpipe_set_tess_state;
+ llvmpipe->pipe.set_patch_vertices = llvmpipe_set_patch_vertices;
}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
index 9ba2b87b8..8e905b8d7 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -65,7 +65,7 @@ lp_resource_copy_ms(struct pipe_context *pipe,
&dst_box,
&dst_trans);
if (!dst_map) {
- pipe->transfer_unmap(pipe, src_trans);
+ pipe->texture_unmap(pipe, src_trans);
return;
}
@@ -77,8 +77,8 @@ lp_resource_copy_ms(struct pipe_context *pipe,
src_map,
src_trans->stride, src_trans->layer_stride,
0, 0, 0);
- pipe->transfer_unmap(pipe, dst_trans);
- pipe->transfer_unmap(pipe, src_trans);
+ pipe->texture_unmap(pipe, dst_trans);
+ pipe->texture_unmap(pipe, src_trans);
}
}
static void
@@ -295,7 +295,7 @@ lp_clear_color_texture_msaa(struct pipe_context *pipe,
lp_clear_color_texture_helper(dst_trans, dst_map, format, color,
box->width, box->height, box->depth);
}
- pipe->transfer_unmap(pipe, dst_trans);
+ pipe->texture_unmap(pipe, dst_trans);
}
static void
@@ -361,7 +361,7 @@ lp_clear_depth_stencil_texture_msaa(struct pipe_context *pipe,
dst_trans->stride, dst_trans->layer_stride,
box->width, box->height, box->depth, zstencil);
- pipe->transfer_unmap(pipe, dst_trans);
+ pipe->texture_unmap(pipe, dst_trans);
}
static void
@@ -456,7 +456,7 @@ llvmpipe_clear_buffer(struct pipe_context *pipe,
char *dst;
u_box_1d(offset, size, &box);
- dst = pipe->transfer_map(pipe,
+ dst = pipe->buffer_map(pipe,
res,
0,
PIPE_MAP_WRITE,
@@ -475,7 +475,7 @@ llvmpipe_clear_buffer(struct pipe_context *pipe,
memcpy(&dst[i], clear_value, clear_value_size);
break;
}
- pipe->transfer_unmap(pipe, dst_t);
+ pipe->buffer_unmap(pipe, dst_t);
}
void