diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:13:23 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:13:23 +0000 |
commit | 00b90445c923b6ed38dea0bff30f24b861da9f7d (patch) | |
tree | 01947d712ed2d4dc76016f3cf472112e9fefc40f /lib | |
parent | 2d31b2f567de560104130314fb3437969061582c (diff) |
Import Mesa 21.1.5
Diffstat (limited to 'lib')
-rw-r--r-- | lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c | 1073 |
1 files changed, 435 insertions, 638 deletions
diff --git a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c index f9c13954d..9388485b4 100644 --- a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c @@ -31,6 +31,101 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" +#include "common/intel_guardband.h" + +#if GFX_VER == 8 +void +gfx8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count; + const VkViewport *viewports = + cmd_buffer->state.gfx.dynamic.viewport.viewports; + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gfx7 state struct has just the matrix and guardband fields, the + * gfx8 struct adds the min/max viewport fields. */ + struct GENX(SF_CLIP_VIEWPORT) sfv = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = vp->maxDepth - vp->minDepth, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, + .ViewportMatrixElementm32 = vp->minDepth, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = MIN2(vp->y, vp->y + vp->height), + .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1, + }; + + if (fb) { + /* We can only calculate a "real" guardband clip if we know the + * framebuffer at the time we emit the packet. Otherwise, we have + * fall back to a worst-case guardband of [-1, 1]. + */ + intel_calculate_guardband_size(fb->width, fb->height, + sfv.ViewportMatrixElementm00, + sfv.ViewportMatrixElementm11, + sfv.ViewportMatrixElementm30, + sfv.ViewportMatrixElementm31, + &sfv.XMinClipGuardband, + &sfv.XMaxClipGuardband, + &sfv.YMinClipGuardband, + &sfv.YMaxClipGuardband); + } + + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { + clip.SFClipViewportPointer = sf_clip_state.offset; + } +} + +void +gfx8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer, + bool depth_clamp_enable) +{ + uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count; + const VkViewport *viewports = + cmd_buffer->state.gfx.dynamic.viewport.viewports; + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* From the Vulkan spec: + * + * "It is valid for minDepth to be greater than or equal to + * maxDepth." + */ + float min_depth = MIN2(vp->minDepth, vp->maxDepth); + float max_depth = MAX2(vp->minDepth, vp->maxDepth); + + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = depth_clamp_enable ? min_depth : 0.0f, + .MaximumDepth = depth_clamp_enable ? max_depth : 1.0f, + }; + + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { + cc.CCViewportPointer = cc_state.offset; + } +} +#endif void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) @@ -55,9 +150,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) pc.RenderTargetCacheFlushEnable = true; #if GFX_VER >= 12 pc.TileCacheFlushEnable = true; -#endif -#if INTEL_NEEDS_WA_1409600907 /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must * be set with any PIPE_CONTROL with Depth Flush Enable bit set. */ @@ -76,7 +169,20 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) lri.DataDWord = cache_mode; } -#endif /* GFX_VER == 9 */ +#elif GFX_VER == 8 + + uint32_t cache_mode; + anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1), + .NPPMAFixEnable = enable, + .NPEarlyZFailsDisable = enable, + .NPPMAFixEnableMask = true, + .NPEarlyZFailsDisableMask = true); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(CACHE_MODE_1_num); + lri.DataDWord = cache_mode; + } + +#endif /* GFX_VER == 8 */ /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache * Flush bits is often necessary. We do it regardless because it's easier. @@ -96,8 +202,96 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) } UNUSED static bool -want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, - const struct vk_depth_stencil_state *ds) +want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer) +{ + assert(GFX_VER == 8); + + /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE: + * + * SW must set this bit in order to enable this fix when following + * expression is TRUE. + * + * 3DSTATE_WM::ForceThreadDispatch != 1 && + * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && + * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && + * (3DSTATE_DEPTH_BUFFER::HIZ Enable) && + * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) && + * (3DSTATE_PS_EXTRA::PixelShaderValid) && + * !(3DSTATE_WM_HZ_OP::DepthBufferClear || + * 3DSTATE_WM_HZ_OP::DepthBufferResolve || + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || + * 3DSTATE_WM_HZ_OP::StencilBufferClear) && + * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) && + * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || + * 3DSTATE_PS_BLEND::AlphaTestEnable || + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && + * 3DSTATE_WM::ForceKillPix != ForceOff && + * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && + * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || + * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && + * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && + * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || + * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) + */ + + /* These are always true: + * 3DSTATE_WM::ForceThreadDispatch != 1 && + * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) + */ + + /* We only enable the PMA fix if we know for certain that HiZ is enabled. + * If we don't know whether HiZ is enabled or not, we disable the PMA fix + * and there is no harm. + * + * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && + * 3DSTATE_DEPTH_BUFFER::HIZ Enable + */ + if (!cmd_buffer->state.hiz_enabled) + return false; + + /* 3DSTATE_PS_EXTRA::PixelShaderValid */ + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) + return false; + + /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */ + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); + if (wm_prog_data->early_fragment_tests) + return false; + + /* We never use anv_pipeline for HiZ ops so this is trivially true: + * !(3DSTATE_WM_HZ_OP::DepthBufferClear || + * 3DSTATE_WM_HZ_OP::DepthBufferResolve || + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || + * 3DSTATE_WM_HZ_OP::StencilBufferClear) + */ + + /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */ + if (!pipeline->depth_test_enable) + return false; + + /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || + * 3DSTATE_PS_BLEND::AlphaTestEnable || + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && + * 3DSTATE_WM::ForceKillPix != ForceOff && + * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && + * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || + * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && + * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && + * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || + * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) + */ + return (pipeline->kill_pixel && (pipeline->writes_depth || + pipeline->writes_stencil)) || + wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; +} + +UNUSED static bool +want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer) { if (GFX_VER > 9) return false; @@ -159,10 +353,13 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, if (!cmd_buffer->state.hiz_enabled) return false; - /* We can't possibly know if HiZ is enabled without the depth attachment */ - ASSERTED const struct anv_image_view *d_iview = - cmd_buffer->state.gfx.depth_att.iview; - assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ); + /* We can't possibly know if HiZ is enabled without the framebuffer */ + assert(cmd_buffer->state.framebuffer); + + /* HiZ is enabled so we had better have a depth buffer with HiZ */ + const struct anv_image_view *ds_iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + assert(ds_iview && ds_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ); /* 3DSTATE_PS_EXTRA::PixelShaderValid */ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; @@ -184,13 +381,19 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable */ - const bool stc_test_en = ds->stencil.test_enable; + const bool stc_test_en = + (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + pipeline->stencil_test_enable; /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) */ - const bool stc_write_en = ds->stencil.write_enable; + const bool stc_write_en = + (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + (cmd_buffer->state.gfx.dynamic.stencil_write_mask.front || + cmd_buffer->state.gfx.dynamic.stencil_write_mask.back) && + pipeline->writes_stencil; /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */ const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil; @@ -211,346 +414,43 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; } -static UNUSED bool -geom_or_tess_prim_id_used(struct anv_graphics_pipeline *pipeline) -{ - const struct brw_tcs_prog_data *tcs_prog_data = - anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) ? - get_tcs_prog_data(pipeline) : NULL; - const struct brw_tes_prog_data *tes_prog_data = - anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? - get_tes_prog_data(pipeline) : NULL; - const struct brw_gs_prog_data *gs_prog_data = - anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) ? - get_gs_prog_data(pipeline) : NULL; - - return (tcs_prog_data && tcs_prog_data->include_primitive_id) || - (tes_prog_data && tes_prog_data->include_primitive_id) || - (gs_prog_data && gs_prog_data->include_primitive_id); -} - -static void -genX(cmd_emit_te)(struct anv_cmd_buffer *cmd_buffer) -{ - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline); - - if (!tes_prog_data || - !anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), te); - return; - } - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), te) { - te.Partitioning = tes_prog_data->partitioning; - te.TEDomain = tes_prog_data->domain; - te.TEEnable = true; - te.MaximumTessellationFactorOdd = 63.0; - te.MaximumTessellationFactorNotOdd = 64.0; -#if GFX_VERx10 >= 125 - if (intel_needs_workaround(cmd_buffer->device->info, 22012785325)) - te.TessellationDistributionMode = TEDMODE_RR_STRICT; - else - te.TessellationDistributionMode = TEDMODE_RR_FREE; - - if (intel_needs_workaround(cmd_buffer->device->info, 14015297576)) { - /* Wa_14015297576: - * - * Disable Tessellation Distribution when primitive Id is enabled. - */ - if (pipeline->primitive_id_override || - geom_or_tess_prim_id_used(pipeline)) - te.TessellationDistributionMode = TEDMODE_OFF; - } - - te.TessellationDistributionLevel = TEDLEVEL_PATCH; - /* 64_TRIANGLES */ - te.SmallPatchThreshold = 3; - /* 1K_TRIANGLES */ - te.TargetBlockSize = 8; - /* 1K_TRIANGLES */ - te.LocalBOPAccumulatorThreshold = 1; -#endif - if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) { - te.OutputTopology = tes_prog_data->output_topology; - } else { - /* When the origin is upper-left, we have to flip the winding order */ - if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) { - te.OutputTopology = OUTPUT_TRI_CW; - } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) { - te.OutputTopology = OUTPUT_TRI_CCW; - } else { - te.OutputTopology = tes_prog_data->output_topology; - } - } - } -} - -static void -genX(cmd_emit_sample_mask)(struct anv_cmd_buffer *cmd_buffer) -{ - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - - /* From the Vulkan 1.0 spec: - * If pSampleMask is NULL, it is treated as if the mask has all bits - * enabled, i.e. no coverage is removed from fragments. - * - * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. - */ - uint32_t sample_mask = 0xffff; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) { - sm.SampleMask = dyn->ms.sample_mask & sample_mask; - } -} - -#if GFX_VER >= 12 -static uint32_t -get_cps_state_offset(struct anv_device *device, bool cps_enabled, - const struct vk_fragment_shading_rate_state *fsr) -{ - if (!cps_enabled) - return device->cps_states.offset; - - uint32_t offset; - static const uint32_t size_index[] = { - [1] = 0, - [2] = 1, - [4] = 2, - }; - -#if GFX_VERx10 >= 125 - offset = - 1 + /* skip disabled */ - fsr->combiner_ops[0] * 5 * 3 * 3 + - fsr->combiner_ops[1] * 3 * 3 + - size_index[fsr->fragment_size.width] * 3 + - size_index[fsr->fragment_size.height]; -#else - offset = - 1 + /* skip disabled */ - size_index[fsr->fragment_size.width] * 3 + - size_index[fsr->fragment_size.height]; -#endif - - offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4; - - return device->cps_states.offset + offset; -} -#endif /* GFX_VER >= 12 */ - -#if GFX_VER >= 11 -static void -genX(emit_shading_rate)(struct anv_batch *batch, - const struct anv_graphics_pipeline *pipeline, - const struct vk_fragment_shading_rate_state *fsr) -{ - const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - const bool cps_enable = wm_prog_data && - brw_wm_prog_data_is_coarse(wm_prog_data, 0); - -#if GFX_VER == 11 - anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) { - cps.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE; - if (cps_enable) { - cps.MinCPSizeX = fsr->fragment_size.width; - cps.MinCPSizeY = fsr->fragment_size.height; - } - } -#elif GFX_VER >= 12 - /* TODO: we can optimize this flush in the following cases: - * - * In the case where the last geometry shader emits a value that is not - * constant, we can avoid this stall because we can synchronize the - * pixel shader internally with - * 3DSTATE_PS::EnablePSDependencyOnCPsizeChange. - * - * If we know that the previous pipeline and the current one are using - * the same fragment shading rate. - */ - anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { -#if GFX_VERx10 >= 125 - pc.PSSStallSyncEnable = true; -#else - pc.PSDSyncEnable = true; -#endif - } - - anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) { - struct anv_device *device = pipeline->base.device; - - cps.CoarsePixelShadingStateArrayPointer = - get_cps_state_offset(device, cps_enable, fsr); - } -#endif -} -#endif /* GFX_VER >= 11 */ - -const uint32_t genX(vk_to_intel_blend)[] = { - [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, -}; - -static const uint32_t genX(vk_to_intel_blend_op)[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, -}; - void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI)) { - const uint32_t ve_count = - pipeline->vs_input_elements + pipeline->svgs_count; - const uint32_t num_dwords = 1 + 2 * MAX2(1, ve_count); - uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_ELEMENTS)); - - if (p) { - if (ve_count == 0) { - memcpy(p + 1, cmd_buffer->device->empty_vs_input, - sizeof(cmd_buffer->device->empty_vs_input)); - } else if (ve_count == pipeline->vertex_input_elems) { - /* MESA_VK_DYNAMIC_VI is not dynamic for this pipeline, so - * everything is in pipeline->vertex_input_data and we can just - * memcpy - */ - memcpy(p + 1, pipeline->vertex_input_data, 4 * 2 * ve_count); - } else { - /* Use dyn->vi to emit the dynamic VERTEX_ELEMENT_STATE input. */ - genX(emit_vertex_input)(&cmd_buffer->batch, p + 1, - pipeline, dyn->vi); - /* Then append the VERTEX_ELEMENT_STATE for the draw parameters */ - memcpy(p + 1 + 2 * pipeline->vs_input_elements, - pipeline->vertex_input_data, - 4 * 2 * pipeline->vertex_input_elems); - } - } - } + struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic; - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) { - genX(cmd_emit_te)(cmd_buffer); - } - -#if GFX_VER >= 11 - if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate && - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)) - genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr); -#endif /* GFX_VER >= 11 */ - - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) { + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { uint32_t sf_dw[GENX(3DSTATE_SF_length)]; struct GENX(3DSTATE_SF) sf = { GENX(3DSTATE_SF_header), }; - - ANV_SETUP_PROVOKING_VERTEX(sf, dyn->rs.provoking_vertex); - - sf.LineWidth = dyn->rs.line.width, - +#if GFX_VER == 8 + if (cmd_buffer->device->info.is_cherryview) { + sf.CHVLineWidth = d->line_width; + } else { + sf.LineWidth = d->line_width; + } +#else + sf.LineWidth = d->line_width, +#endif GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf); } - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE)) { - /* Take dynamic primitive topology in to account with - * 3DSTATE_RASTER::APIMode - * 3DSTATE_RASTER::DXMultisampleRasterizationEnable - * 3DSTATE_RASTER::AntialiasingEnable - */ - uint32_t api_mode = 0; - bool msaa_raster_enable = false; - - VkLineRasterizationModeEXT line_mode = - anv_line_rasterization_mode(dyn->rs.line.mode, - pipeline->rasterization_samples); - - VkPolygonMode dynamic_raster_mode = - genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline, - dyn->rs.polygon_mode, - dyn->ia.primitive_topology); - - genX(rasterization_mode)(dynamic_raster_mode, - line_mode, dyn->rs.line.width, - &api_mode, &msaa_raster_enable); - - /* From the Browadwell PRM, Volume 2, documentation for - * 3DSTATE_RASTER, "Antialiasing Enable": - * - * "This field must be disabled if any of the render targets - * have integer (UINT or SINT) surface format." - * - * Additionally internal documentation for Gfx12+ states: - * - * "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR - * FORCED_SAMPLE_COUNT > 1." - */ - bool aa_enable = - anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) && - !cmd_buffer->state.gfx.has_uint_rt && - !(GFX_VER >= 12 && cmd_buffer->state.gfx.samples > 1); - - bool depth_clip_enable = - vk_rasterization_state_depth_clip_enable(&dyn->rs); - + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | + ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | + ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE)) { uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; struct GENX(3DSTATE_RASTER) raster = { GENX(3DSTATE_RASTER_header), - .APIMode = api_mode, - .DXMultisampleRasterizationEnable = msaa_raster_enable, - .AntialiasingEnable = aa_enable, - .CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode], - .FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face], - .GlobalDepthOffsetEnableSolid = dyn->rs.depth_bias.enable, - .GlobalDepthOffsetEnableWireframe = dyn->rs.depth_bias.enable, - .GlobalDepthOffsetEnablePoint = dyn->rs.depth_bias.enable, - .GlobalDepthOffsetConstant = dyn->rs.depth_bias.constant, - .GlobalDepthOffsetScale = dyn->rs.depth_bias.slope, - .GlobalDepthOffsetClamp = dyn->rs.depth_bias.clamp, - .FrontFaceFillMode = genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode], - .BackFaceFillMode = genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode], - .ViewportZFarClipTestEnable = depth_clip_enable, - .ViewportZNearClipTestEnable = depth_clip_enable, + .GlobalDepthOffsetConstant = d->depth_bias.bias, + .GlobalDepthOffsetScale = d->depth_bias.slope, + .GlobalDepthOffsetClamp = d->depth_bias.clamp, + .CullMode = genX(vk_to_gen_cullmode)[d->cull_mode], + .FrontWinding = genX(vk_to_gen_front_face)[d->front_face], }; GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, @@ -562,16 +462,86 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) * across different state packets for gfx8 and gfx9. We handle that by * using a big old #if switch here. */ - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) { +#if GFX_VER == 8 + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GENX(COLOR_CALC_STATE_length) * 4, 64); struct GENX(COLOR_CALC_STATE) cc = { - .BlendConstantColorRed = dyn->cb.blend_constants[0], - .BlendConstantColorGreen = dyn->cb.blend_constants[1], - .BlendConstantColorBlue = dyn->cb.blend_constants[2], - .BlendConstantColorAlpha = dyn->cb.blend_constants[3], + .BlendConstantColorRed = d->blend_constants[0], + .BlendConstantColorGreen = d->blend_constants[1], + .BlendConstantColorBlue = d->blend_constants[2], + .BlendConstantColorAlpha = d->blend_constants[3], + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, + }; + GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { + ccp.ColorCalcStatePointer = cc_state.offset; + ccp.ColorCalcStatePointerValid = true; + } + } + + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) { + uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + + struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { + GENX(3DSTATE_WM_DEPTH_STENCIL_header), + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilBufferWriteEnable = + (d->stencil_write_mask.front || d->stencil_write_mask.back) && + d->stencil_test_enable, + + .DepthTestEnable = d->depth_test_enable, + .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable, + .DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op], + .StencilTestEnable = d->stencil_test_enable, + .StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op], + .StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op], + .StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op], + .StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op], + .BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op], + .BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op], + .BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op], + .BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op], + }; + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, + pipeline->gfx8.wm_depth_stencil); + + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, + want_depth_pma_fix(cmd_buffer)); + } +#else + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GENX(COLOR_CALC_STATE_length) * 4, + 64); + struct GENX(COLOR_CALC_STATE) cc = { + .BlendConstantColorRed = d->blend_constants[0], + .BlendConstantColorGreen = d->blend_constants[1], + .BlendConstantColorBlue = d->blend_constants[2], + .BlendConstantColorAlpha = d->blend_constants[3], }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); @@ -581,333 +551,160 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) } } - if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK))) - genX(cmd_emit_sample_mask)(cmd_buffer); - - if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS)) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) { - VkImageAspectFlags ds_aspects = 0; - if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED) - ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED) - ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - - struct vk_depth_stencil_state opt_ds = dyn->ds; - vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { - ds.DoubleSidedStencilEnable = true; - - ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff; - ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff; - - ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff; - ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff; - - ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff; - ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff; - - ds.DepthTestEnable = opt_ds.depth.test_enable; - ds.DepthBufferWriteEnable = opt_ds.depth.write_enable; - ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]; - ds.StencilTestEnable = opt_ds.stencil.test_enable; - ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable; - ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]; - ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]; - ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]; - ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]; - ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]; - ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]; - ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]; - ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]; - } + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) { + uint32_t dwords[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { + GENX(3DSTATE_WM_DEPTH_STENCIL_header), + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, + + .StencilBufferWriteEnable = + (d->stencil_write_mask.front || d->stencil_write_mask.back) && + d->stencil_test_enable, + + .DepthTestEnable = d->depth_test_enable, + .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable, + .DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op], + .StencilTestEnable = d->stencil_test_enable, + .StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op], + .StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op], + .StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op], + .StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op], + .BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op], + .BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op], + .BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op], + .BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op], - const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds); - genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma); + }; + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dwords, &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, + pipeline->gfx9.wm_depth_stencil); + + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, + want_stencil_pma_fix(cmd_buffer)); } +#endif #if GFX_VER >= 12 - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) { + if(cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE)) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { - db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable; - db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min; - db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max; + db.DepthBoundsTestValueModifyDisable = false; + db.DepthBoundsTestEnableModifyDisable = false; + db.DepthBoundsTestEnable = d->depth_bounds_test_enable; + db.DepthBoundsTestMinValue = d->depth_bounds.min; + db.DepthBoundsTestMaxValue = d->depth_bounds.max; } } #endif - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) { + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) { - ls.LineStipplePattern = dyn->rs.line.stipple.pattern; + ls.LineStipplePattern = d->line_stipple.pattern; ls.LineStippleInverseRepeatCount = - 1.0f / MAX2(1, dyn->rs.line.stipple.factor); - ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor; + 1.0f / MAX2(1, d->line_stipple.factor); + ls.LineStippleRepeatCount = d->line_stipple.factor; } } - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RESTART_INDEX) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) { -#if GFX_VERx10 >= 125 - vf.GeometryDistributionEnable = true; -#endif - vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable; - vf.CutIndex = cmd_buffer->state.gfx.restart_index; + vf.IndexedDrawCutIndexEnable = pipeline->primitive_restart; + vf.CutIndex = cmd_buffer->state.restart_index; } } - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER) { - struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer; - uint32_t offset = cmd_buffer->state.gfx.index_offset; - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { - ib.IndexFormat = cmd_buffer->state.gfx.index_type; - ib.MOCS = anv_mocs(cmd_buffer->device, - buffer->address.bo, - ISL_SURF_USAGE_INDEX_BUFFER_BIT); -#if GFX_VER >= 12 - ib.L3BypassDisable = true; -#endif - ib.BufferStartingAddress = anv_address_add(buffer->address, offset); - ib.BufferSize = vk_buffer_range(&buffer->vk, offset, - VK_WHOLE_SIZE); - } - } + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) { + uint32_t topology; + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) + topology = pipeline->topology; + else + topology = genX(vk_to_gen_primitive_type)[d->primitive_topology]; -#if GFX_VERx10 >= 125 - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) { - /* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/ - vfg.DistributionMode = - anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT : - RR_FREE; - vfg.DistributionGranularity = BatchLevelGranularity; - /* Wa_14014890652 */ - if (intel_device_info_is_dg2(cmd_buffer->device->info)) - vfg.GranularityThresholdDisable = 1; - vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable; - /* 192 vertices for TRILIST_ADJ */ - vfg.ListNBatchSizeScale = 0; - /* Batch size of 384 vertices */ - vfg.List3BatchSizeScale = 2; - /* Batch size of 128 vertices */ - vfg.List2BatchSizeScale = 1; - /* Batch size of 128 vertices */ - vfg.List1BatchSizeScale = 2; - /* Batch size of 256 vertices for STRIP topologies */ - vfg.StripBatchSizeScale = 3; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeScale = 1; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeMultiplier = 31; + cmd_buffer->state.gfx.primitive_topology = topology; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { + vft.PrimitiveTopologyType = topology; } } -#endif - if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && - (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE))) { + if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations && + cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { genX(emit_sample_pattern)(&cmd_buffer->batch, - dyn->ms.sample_locations_enable ? - dyn->ms.sample_locations : NULL); + cmd_buffer->state.gfx.dynamic.sample_locations.samples, + cmd_buffer->state.gfx.dynamic.sample_locations.locations); } - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE)) { - /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders - * threads. - */ - uint32_t wm_dwords[GENX(3DSTATE_WM_length)]; - struct GENX(3DSTATE_WM) wm = { - GENX(3DSTATE_WM_header), - - .ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) && - (pipeline->force_fragment_thread_dispatch || - anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ? - ForceON : 0, - .LineStippleEnable = dyn->rs.line.stipple.enable, - }; - GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm); + cmd_buffer->state.gfx.dirty = 0; +} - anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm); +static uint32_t vk_to_gen_index_type(VkIndexType type) +{ + switch (type) { + case VK_INDEX_TYPE_UINT8_EXT: + return INDEX_BYTE; + case VK_INDEX_TYPE_UINT16: + return INDEX_WORD; + case VK_INDEX_TYPE_UINT32: + return INDEX_DWORD; + default: + unreachable("invalid index type"); } +} - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) { - const uint8_t color_writes = dyn->cb.color_write_enables; - const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx; - const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - bool has_writeable_rt = - anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) && - (color_writes & ((1u << state->color_att_count) - 1)) != 0; - - uint32_t num_dwords = GENX(BLEND_STATE_length) + - GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; - struct anv_state blend_states = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - num_dwords * 4, - 64); - - uint32_t *dws = blend_states.map; - - struct GENX(BLEND_STATE) blend_state = { - .AlphaToCoverageEnable = dyn->ms.alpha_to_coverage_enable, - .AlphaToOneEnable = dyn->ms.alpha_to_one_enable, - }; - - /* Jump to blend entries. */ - dws += GENX(BLEND_STATE_length); - - struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 }; - - for (uint32_t i = 0; i < MAX_RTS; i++) { - /* Disable anything above the current number of color attachments. */ - bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count || - (color_writes & BITFIELD_BIT(i)) == 0; - struct GENX(BLEND_STATE_ENTRY) entry = { - .WriteDisableAlpha = write_disabled || - (dyn->cb.attachments[i].write_mask & - VK_COLOR_COMPONENT_A_BIT) == 0, - .WriteDisableRed = write_disabled || - (dyn->cb.attachments[i].write_mask & - VK_COLOR_COMPONENT_R_BIT) == 0, - .WriteDisableGreen = write_disabled || - (dyn->cb.attachments[i].write_mask & - VK_COLOR_COMPONENT_G_BIT) == 0, - .WriteDisableBlue = write_disabled || - (dyn->cb.attachments[i].write_mask & - VK_COLOR_COMPONENT_B_BIT) == 0, - /* Vulkan specification 1.2.168, VkLogicOp: - * - * "Logical operations are controlled by the logicOpEnable and - * logicOp members of VkPipelineColorBlendStateCreateInfo. If - * logicOpEnable is VK_TRUE, then a logical operation selected - * by logicOp is applied between each color attachment and the - * fragment’s corresponding output value, and blending of all - * attachments is treated as if it were disabled." - * - * From the Broadwell PRM Volume 2d: Command Reference: - * Structures: BLEND_STATE_ENTRY: - * - * "Enabling LogicOp and Color Buffer Blending at the same time - * is UNDEFINED" - */ - .LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op], - .LogicOpEnable = dyn->cb.logic_op_enable, - .ColorBufferBlendEnable = - !dyn->cb.logic_op_enable && dyn->cb.attachments[i].blend_enable, - - .ColorClampRange = COLORCLAMP_RTFORMAT, - .PreBlendColorClampEnable = true, - .PostBlendColorClampEnable = true, - }; - - /* Setup blend equation. */ - entry.SourceBlendFactor = - genX(vk_to_intel_blend)[dyn->cb.attachments[i].src_color_blend_factor]; - entry.DestinationBlendFactor = - genX(vk_to_intel_blend)[dyn->cb.attachments[i].dst_color_blend_factor]; - entry.ColorBlendFunction = - genX(vk_to_intel_blend_op)[dyn->cb.attachments[i].color_blend_op]; - entry.SourceAlphaBlendFactor = - genX(vk_to_intel_blend)[dyn->cb.attachments[i].src_alpha_blend_factor]; - entry.DestinationAlphaBlendFactor = - genX(vk_to_intel_blend)[dyn->cb.attachments[i].dst_alpha_blend_factor]; - entry.AlphaBlendFunction = - genX(vk_to_intel_blend_op)[dyn->cb.attachments[i].alpha_blend_op]; - - if (dyn->cb.attachments[i].src_color_blend_factor != - dyn->cb.attachments[i].src_alpha_blend_factor || - dyn->cb.attachments[i].dst_color_blend_factor != - dyn->cb.attachments[i].dst_alpha_blend_factor || - dyn->cb.attachments[i].color_blend_op != - dyn->cb.attachments[i].alpha_blend_op) { - blend_state.IndependentAlphaBlendEnable = true; - } - - /* The Dual Source Blending documentation says: - * - * "If SRC1 is included in a src/dst blend factor and - * a DualSource RT Write message is not used, results - * are UNDEFINED. (This reflects the same restriction in DX APIs, - * where undefined results are produced if “o1” is not written - * by a PS – there are no default values defined)." - * - * There is no way to gracefully fix this undefined situation - * so we just disable the blending to prevent possible issues. - */ - if (wm_prog_data && !wm_prog_data->dual_src_blend && - anv_is_dual_src_blend_equation(&dyn->cb.attachments[i])) { - entry.ColorBufferBlendEnable = false; - } - - /* Our hardware applies the blend factor prior to the blend function - * regardless of what function is used. Technically, this means the - * hardware can do MORE than GL or Vulkan specify. However, it also - * means that, for MIN and MAX, we have to stomp the blend factor to - * ONE to make it a no-op. - */ - if (dyn->cb.attachments[i].color_blend_op == VK_BLEND_OP_MIN || - dyn->cb.attachments[i].color_blend_op == VK_BLEND_OP_MAX) { - entry.SourceBlendFactor = BLENDFACTOR_ONE; - entry.DestinationBlendFactor = BLENDFACTOR_ONE; - } - if (dyn->cb.attachments[i].alpha_blend_op == VK_BLEND_OP_MIN || - dyn->cb.attachments[i].alpha_blend_op == VK_BLEND_OP_MAX) { - entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE; - entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE; - } - - GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); - - if (i == 0) - bs0 = entry; - - dws += GENX(BLEND_STATE_ENTRY_length); - } - - /* Generate blend state after entries. */ - GENX(BLEND_STATE_pack)(NULL, blend_states.map, &blend_state); - - /* 3DSTATE_PS_BLEND to be consistent with the rest of the - * BLEND_STATE_ENTRY. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) { - blend.HasWriteableRT = has_writeable_rt, - blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable; - blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor; - blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor; - blend.SourceBlendFactor = bs0.SourceBlendFactor; - blend.DestinationBlendFactor = bs0.DestinationBlendFactor; - blend.AlphaTestEnable = false; - blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable; - blend.AlphaToCoverageEnable = dyn->ms.alpha_to_coverage_enable; - } +static uint32_t restart_index_for_type(VkIndexType type) +{ + switch (type) { + case VK_INDEX_TYPE_UINT8_EXT: + return UINT8_MAX; + case VK_INDEX_TYPE_UINT16: + return UINT16_MAX; + case VK_INDEX_TYPE_UINT32: + return UINT32_MAX; + default: + unreachable("invalid index type"); + } +} - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { - bsp.BlendStatePointer = blend_states.offset; - bsp.BlendStatePointerValid = true; - } +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + cmd_buffer->state.restart_index = restart_index_for_type(indexType); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { + ib.IndexFormat = vk_to_gen_index_type(indexType); + ib.MOCS = anv_mocs(cmd_buffer->device, + buffer->address.bo, + ISL_SURF_USAGE_INDEX_BUFFER_BIT); + ib.BufferStartingAddress = anv_address_add(buffer->address, offset); + ib.BufferSize = buffer->size - offset; } - /* When we're done, there is no more dirty gfx state. */ - vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state); - cmd_buffer->state.gfx.dirty = 0; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; } |