From 500e30a5d7582b5570d25d9b471c9bc7d653eab0 Mon Sep 17 00:00:00 2001 From: Jonathan Gray Date: Thu, 6 Apr 2023 10:23:47 +0000 Subject: Merge Mesa 22.3.7 --- lib/mesa/src/intel/blorp/blorp_clear.c | 11 ++++ lib/mesa/src/intel/compiler/brw_fs_nir.cpp | 29 ++++++++- lib/mesa/src/intel/vulkan/anv_batch_chain.c | 4 +- lib/mesa/src/intel/vulkan/anv_device.c | 4 +- lib/mesa/src/intel/vulkan/anv_image.c | 8 ++- lib/mesa/src/intel/vulkan/anv_private.h | 18 ++++-- lib/mesa/src/intel/vulkan/genX_cmd_buffer.c | 96 ++++++++++++++++++----------- lib/mesa/src/intel/vulkan/genX_pipeline.c | 72 ---------------------- 8 files changed, 118 insertions(+), 124 deletions(-) (limited to 'lib/mesa/src/intel') diff --git a/lib/mesa/src/intel/blorp/blorp_clear.c b/lib/mesa/src/intel/blorp/blorp_clear.c index 0e7185c94..aef660923 100644 --- a/lib/mesa/src/intel/blorp/blorp_clear.c +++ b/lib/mesa/src/intel/blorp/blorp_clear.c @@ -607,6 +607,17 @@ blorp_clear(struct blorp_batch *batch, if (batch->blorp->isl_dev->info->ver < 6) use_simd16_replicated_data = false; + /* From the BSpec: 47719 Replicate Data: + * + * "Replicate Data Render Target Write message should not be used + * on all projects TGL+." + * + * See 14017879046, 14017880152 for additional information. + */ + if (batch->blorp->isl_dev->info->ver >= 12 && + format == ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM) + use_simd16_replicated_data = false; + if (compute) use_simd16_replicated_data = false; diff --git a/lib/mesa/src/intel/compiler/brw_fs_nir.cpp b/lib/mesa/src/intel/compiler/brw_fs_nir.cpp index 769077473..9065fd39d 100644 --- a/lib/mesa/src/intel/compiler/brw_fs_nir.cpp +++ b/lib/mesa/src/intel/compiler/brw_fs_nir.cpp @@ -4608,6 +4608,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr assert(fence_regs_count <= ARRAY_SIZE(fence_regs)); + /* Be conservative in Gen11+ and always stall in a fence. Since + * there are two different fences, and shader might want to + * synchronize between them. + * + * TODO: Use scope and visibility information for the barriers from NIR + * to make a better decision on whether we need to stall. + */ + bool force_stall = devinfo->ver >= 11; + /* There are four cases where we want to insert a stall: * * 1. If we're a nir_intrinsic_end_invocation_interlock. This is @@ -4623,10 +4632,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * scheduling barrier to keep the compiler from moving things * around in an invalid way. * - * 4. On platforms with LSC. + * 4. On Gen11+ and platforms with LSC, we have multiple fence types, + * without further information about the fence, we need to force a + * stall. */ if (instr->intrinsic == nir_intrinsic_end_invocation_interlock || - fence_regs_count != 1 || devinfo->has_lsc) { + fence_regs_count != 1 || devinfo->has_lsc || force_stall) { ubld.exec_all().group(1, 0).emit( FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), fence_regs, fence_regs_count); @@ -5441,10 +5452,22 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_read_invocation: { const fs_reg value = get_nir_src(instr->src[0]); const fs_reg invocation = get_nir_src(instr->src[1]); + fs_reg tmp = bld.vgrf(value.type); + /* When for some reason the subgroup_size picked by NIR is larger than + * the dispatch size picked by the backend (this could happen in RT, + * FS), bound the invocation to the dispatch size. + */ + fs_reg bound_invocation; + if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) { + bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1)); + } else { + bound_invocation = invocation; + } bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value, - bld.emit_uniformize(invocation)); + bld.emit_uniformize(bound_invocation)); bld.MOV(retype(dest, value.type), fs_reg(component(tmp, 0))); break; diff --git a/lib/mesa/src/intel/vulkan/anv_batch_chain.c b/lib/mesa/src/intel/vulkan/anv_batch_chain.c index 3fd06fd69..cf890bd6a 100644 --- a/lib/mesa/src/intel/vulkan/anv_batch_chain.c +++ b/lib/mesa/src/intel/vulkan/anv_batch_chain.c @@ -1991,14 +1991,14 @@ anv_queue_submit(struct vk_queue *vk_queue, return VK_SUCCESS; } - uint64_t start_ts = intel_ds_begin_submit(queue->ds); + uint64_t start_ts = intel_ds_begin_submit(&queue->ds); pthread_mutex_lock(&device->mutex); result = anv_queue_submit_locked(queue, submit); /* Take submission ID under lock */ pthread_mutex_unlock(&device->mutex); - intel_ds_end_submit(queue->ds, start_ts); + intel_ds_end_submit(&queue->ds, start_ts); return result; } diff --git a/lib/mesa/src/intel/vulkan/anv_device.c b/lib/mesa/src/intel/vulkan/anv_device.c index 65d1201cb..486936146 100644 --- a/lib/mesa/src/intel/vulkan/anv_device.c +++ b/lib/mesa/src/intel/vulkan/anv_device.c @@ -3575,9 +3575,7 @@ VkResult anv_CreateDevice( * to zero and they have a valid descriptor. */ device->null_surface_state = - anv_state_pool_alloc(device->info->verx10 >= 125 ? - &device->scratch_surface_state_pool : - &device->internal_surface_state_pool, + anv_state_pool_alloc(&device->bindless_surface_state_pool, device->isl_dev.ss.size, device->isl_dev.ss.align); isl_null_fill_state(&device->isl_dev, device->null_surface_state.map, diff --git a/lib/mesa/src/intel/vulkan/anv_image.c b/lib/mesa/src/intel/vulkan/anv_image.c index 13d622d31..8bc06aedc 100644 --- a/lib/mesa/src/intel/vulkan/anv_image.c +++ b/lib/mesa/src/intel/vulkan/anv_image.c @@ -2245,7 +2245,8 @@ anv_layout_to_aux_state(const struct intel_device_info * const devinfo, case ISL_AUX_USAGE_CCS_D: /* We only support clear in exactly one state */ - if (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + if (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL || + layout == VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL) { assert(aux_supported); assert(clear_supported); return ISL_AUX_STATE_PARTIAL_CLEAR; @@ -2389,7 +2390,8 @@ anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo, case ISL_AUX_STATE_COMPRESSED_CLEAR: if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { return ANV_FAST_CLEAR_DEFAULT_VALUE; - } else if (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + } else if (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL || + layout == VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL) { /* The image might not support non zero fast clears when mutable. */ if (!image->planes[plane].can_non_zero_fast_clear) return ANV_FAST_CLEAR_DEFAULT_VALUE; @@ -2569,7 +2571,7 @@ anv_image_fill_surface_state(struct anv_device *device, */ assert(surface->isl.samples == 1); assert(view.levels == 1); - assert(view.array_len == 1); + assert(surface->isl.dim == ISL_SURF_DIM_3D || view.array_len == 1); ASSERTED bool ok = isl_surf_get_uncompressed_surf(&device->isl_dev, isl_surf, &view, diff --git a/lib/mesa/src/intel/vulkan/anv_private.h b/lib/mesa/src/intel/vulkan/anv_private.h index 01e1a46cd..b8204b539 100644 --- a/lib/mesa/src/intel/vulkan/anv_private.h +++ b/lib/mesa/src/intel/vulkan/anv_private.h @@ -1102,7 +1102,7 @@ struct anv_queue { /** Synchronization object for debug purposes (DEBUG_SYNC) */ struct vk_sync *sync; - struct intel_ds_queue * ds; + struct intel_ds_queue ds; }; struct nir_xfb_info; @@ -2287,14 +2287,25 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; } break; - case VK_ACCESS_2_SHADER_READ_BIT: case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT: case VK_ACCESS_2_TRANSFER_READ_BIT: + case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT: /* Transitioning a buffer to be read through the sampler, so * invalidate the texture cache, we don't want any stale data. */ pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; break; + case VK_ACCESS_2_SHADER_READ_BIT: + /* Same as VK_ACCESS_2_UNIFORM_READ_BIT and + * VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above + */ + pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; + if (!device->physical->compiler->indirect_ubos_use_sampler) { + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + } + break; case VK_ACCESS_2_MEMORY_READ_BIT: /* Transitioning a buffer for generic read, invalidate all the * caches. @@ -2333,6 +2344,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, */ pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; break; + case VK_ACCESS_2_SHADER_STORAGE_READ_BIT: default: break; /* Nothing to do */ } @@ -2463,7 +2475,6 @@ anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound, return false; } - assert(vb_address.bo); bound->start = intel_48b_address(anv_address_physical(vb_address)); bound->end = bound->start + vb_size; assert(bound->end > bound->start); /* No overflow */ @@ -3077,7 +3088,6 @@ struct anv_graphics_pipeline { uint32_t sf[4]; uint32_t raster[5]; uint32_t wm[2]; - uint32_t blend_state[1 + MAX_RTS * 2]; uint32_t streamout_state[5]; } gfx8; }; diff --git a/lib/mesa/src/intel/vulkan/genX_cmd_buffer.c b/lib/mesa/src/intel/vulkan/genX_cmd_buffer.c index 6e4ecbe59..08db4d14d 100644 --- a/lib/mesa/src/intel/vulkan/genX_cmd_buffer.c +++ b/lib/mesa/src/intel/vulkan/genX_cmd_buffer.c @@ -2501,7 +2501,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, assert(desc_idx < MAX_PUSH_DESCRIPTORS); if (shader->push_desc_info.fully_promoted_ubo_descriptors & BITFIELD_BIT(desc_idx)) { - surface_state = cmd_buffer->device->null_surface_state; + surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state); break; } } @@ -2531,7 +2532,9 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, anv_bindless_state_for_binding_table(sstate.state); assert(surface_state.alloc_size); } else { - surface_state = cmd_buffer->device->null_surface_state; + surface_state = + anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state); } break; } @@ -2561,7 +2564,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, "corresponding SPIR-V format enum."); } } else { - surface_state = cmd_buffer->device->null_surface_state; + surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state); } break; } @@ -2572,7 +2576,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, surface_state = desc->set_buffer_view->surface_state; assert(surface_state.alloc_size); } else { - surface_state = cmd_buffer->device->null_surface_state; + surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state); } break; @@ -2582,7 +2587,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, desc->buffer_view->surface_state); assert(surface_state.alloc_size); } else { - surface_state = cmd_buffer->device->null_surface_state; + surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state); } break; @@ -2619,7 +2625,9 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, format, ISL_SWIZZLE_IDENTITY, usage, address, range, 1); } else { - surface_state = cmd_buffer->device->null_surface_state; + surface_state = + anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state); } break; } @@ -2632,7 +2640,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, : desc->buffer_view->storage_surface_state); assert(surface_state.alloc_size); } else { - surface_state = cmd_buffer->device->null_surface_state; + surface_state = anv_bindless_state_for_binding_table( + cmd_buffer->device->null_surface_state); } break; @@ -3929,7 +3938,7 @@ update_dirty_vbs_for_gfx8_vb_flush(struct anv_cmd_buffer *cmd_buffer, vb_used |= 1ull << ANV_DRAWID_VB_INDEX; genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, - access_type == RANDOM, + access_type, vb_used); #endif } @@ -4058,13 +4067,14 @@ void genX(CmdDrawMultiEXT)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = 0; } + } #if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, draw->vertexCount); + genX(emit_dummy_post_sync_op)(cmd_buffer, + drawCount == 0 ? 0 : + pVertexInfo[drawCount - 1].vertexCount); #endif - } - update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); trace_intel_end_draw_multi(&cmd_buffer->trace, count); @@ -4233,12 +4243,15 @@ void genX(CmdDrawMultiIndexedEXT)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = draw->vertexOffset; } -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, draw->indexCount); -#endif } } +#if GFX_VERx10 == 125 + genX(emit_dummy_post_sync_op)(cmd_buffer, + drawCount == 0 ? 0 : + pIndexInfo[drawCount - 1].indexCount); +#endif + update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count); @@ -6006,28 +6019,6 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, */ if (pipeline == GPGPU) anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t); - - if (pipeline == _3D) { - /* There is a mid-object preemption workaround which requires you to - * re-emit MEDIA_VFE_STATE after switching from GPGPU to 3D. However, - * even without preemption, we have issues with geometry flickering when - * GPGPU and 3D are back-to-back and this seems to fix it. We don't - * really know why. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) { - vfe.MaximumNumberofThreads = - devinfo->max_cs_threads * devinfo->subslice_total - 1; - vfe.NumberofURBEntries = 2; - vfe.URBEntryAllocationSize = 2; - } - - /* We just emitted a dummy MEDIA_VFE_STATE so now that packet is - * invalid. Set the compute pipeline to dirty to force a re-emit of the - * pipeline in case we get back-to-back dispatch calls with the same - * pipeline and a PIPELINE_SELECT in between. - */ - cmd_buffer->state.compute.pipeline_dirty = true; - } #endif /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] @@ -6056,6 +6047,37 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, "flush and invalidate for PIPELINE_SELECT"); genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); +#if GFX_VER == 9 + if (pipeline == _3D) { + /* There is a mid-object preemption workaround which requires you to + * re-emit MEDIA_VFE_STATE after switching from GPGPU to 3D. However, + * even without preemption, we have issues with geometry flickering when + * GPGPU and 3D are back-to-back and this seems to fix it. We don't + * really know why. + * + * Also, from the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE: + * + * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless + * the only bits that are changed are scoreboard related ..." + * + * This is satisfied by applying pre-PIPELINE_SELECT pipe flushes above. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) { + vfe.MaximumNumberofThreads = + devinfo->max_cs_threads * devinfo->subslice_total - 1; + vfe.NumberofURBEntries = 2; + vfe.URBEntryAllocationSize = 2; + } + + /* We just emitted a dummy MEDIA_VFE_STATE so now that packet is + * invalid. Set the compute pipeline to dirty to force a re-emit of the + * pipeline in case we get back-to-back dispatch calls with the same + * pipeline and a PIPELINE_SELECT in between. + */ + cmd_buffer->state.compute.pipeline_dirty = true; + } +#endif + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), ps) { ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3; ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12; @@ -7539,7 +7561,7 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, primitive_topology == _3DPRIM_POINTLIST_BF || primitive_topology == _3DPRIM_LINESTRIP_CONT || primitive_topology == _3DPRIM_LINESTRIP_BF || - primitive_topology == _3DPRIM_LINESTRIP_CONT_BF) || + primitive_topology == _3DPRIM_LINESTRIP_CONT_BF) && (vertex_count == 1 || vertex_count == 2)) { anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { pc.PostSyncOperation = WriteImmediateData; diff --git a/lib/mesa/src/intel/vulkan/genX_pipeline.c b/lib/mesa/src/intel/vulkan/genX_pipeline.c index 1261a31d3..3dcc8b64e 100644 --- a/lib/mesa/src/intel/vulkan/genX_pipeline.c +++ b/lib/mesa/src/intel/vulkan/genX_pipeline.c @@ -792,77 +792,6 @@ const uint32_t genX(vk_to_intel_primitive_type)[] = { [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, }; -static inline uint32_t * -write_disabled_blend(uint32_t *state) -{ - struct GENX(BLEND_STATE_ENTRY) entry = { - .WriteDisableAlpha = true, - .WriteDisableRed = true, - .WriteDisableGreen = true, - .WriteDisableBlue = true, - }; - GENX(BLEND_STATE_ENTRY_pack)(NULL, state, &entry); - return state + GENX(BLEND_STATE_ENTRY_length); -} - -static void -emit_cb_state(struct anv_graphics_pipeline *pipeline, - const struct vk_color_blend_state *cb, - const struct vk_multisample_state *ms) -{ - uint32_t surface_count = 0; - struct anv_pipeline_bind_map *map; - if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; - surface_count = map->surface_count; - } - - uint32_t *state_pos = pipeline->gfx8.blend_state; - - state_pos += GENX(BLEND_STATE_length); - for (unsigned i = 0; i < surface_count; i++) { - struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; - - /* All color attachments are at the beginning of the binding table */ - if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) - break; - - /* We can have at most 8 attachments */ - assert(i < MAX_RTS); - - if (cb == NULL || binding->index >= cb->attachment_count) { - state_pos = write_disabled_blend(state_pos); - continue; - } - - struct GENX(BLEND_STATE_ENTRY) entry = { - /* Vulkan specification 1.2.168, VkLogicOp: - * - * "Logical operations are controlled by the logicOpEnable and - * logicOp members of VkPipelineColorBlendStateCreateInfo. If - * logicOpEnable is VK_TRUE, then a logical operation selected by - * logicOp is applied between each color attachment and the - * fragment’s corresponding output value, and blending of all - * attachments is treated as if it were disabled." - * - * From the Broadwell PRM Volume 2d: Command Reference: Structures: - * BLEND_STATE_ENTRY: - * - * "Enabling LogicOp and Color Buffer Blending at the same time is - * UNDEFINED" - * - * Above is handled during emit since these states are dynamic. - */ - .ColorClampRange = COLORCLAMP_RTFORMAT, - .PreBlendColorClampEnable = true, - .PostBlendColorClampEnable = true, - }; - - GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry); - state_pos += GENX(BLEND_STATE_ENTRY_length); - } -} - static void emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, const struct vk_input_assembly_state *ia, @@ -1812,7 +1741,6 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp, urb_deref_block_size); emit_ms_state(pipeline, state->ms); - emit_cb_state(pipeline, state->cb, state->ms); compute_kill_pixel(pipeline, state->ms, state->rp); emit_3dstate_clip(pipeline, state->ia, state->vp, state->rs); -- cgit v1.2.3