From d74844ad65da665ff38b2ad60a2fec6da3894557 Mon Sep 17 00:00:00 2001 From: Jonathan Gray Date: Tue, 29 Jan 2019 11:02:31 +0000 Subject: Import Mesa 18.3.2 --- lib/mesa/src/gallium/drivers/swr/meson.build | 272 ++++-------- .../swr/rasterizer/archrast/events_private.proto | 75 ++-- .../drivers/swr/rasterizer/codegen/meson.build | 35 +- .../swr/rasterizer/jitter/builder_gfx_mem.cpp | 198 ++------- .../swr/rasterizer/jitter/builder_gfx_mem.h | 40 +- .../drivers/swr/rasterizer/jitter/builder_mem.cpp | 62 +-- .../drivers/swr/rasterizer/jitter/builder_mem.h | 53 +-- .../rasterizer/jitter/functionpasses/lower_x86.cpp | 454 +++++++-------------- .../swr/rasterizer/jitter/functionpasses/passes.h | 1 - .../drivers/swr/rasterizer/jitter/jit_pch.hpp | 35 +- .../drivers/swr/rasterizer/jitter/meson.build | 12 +- .../drivers/swr/rasterizer/memory/InitMemory.cpp | 11 - .../drivers/swr/rasterizer/memory/InitMemory.h | 50 --- 13 files changed, 384 insertions(+), 914 deletions(-) (limited to 'lib/mesa/src/gallium') diff --git a/lib/mesa/src/gallium/drivers/swr/meson.build b/lib/mesa/src/gallium/drivers/swr/meson.build index ac712d804..b95c8bc1b 100644 --- a/lib/mesa/src/gallium/drivers/swr/meson.build +++ b/lib/mesa/src/gallium/drivers/swr/meson.build @@ -1,4 +1,4 @@ -# Copyright © 2017-2020 Intel Corporation +# Copyright © 2017-2018 Intel Corporation # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -82,9 +82,7 @@ files_swr_mesa = files( 'rasterizer/jitter/streamout_jit.cpp', 'rasterizer/jitter/streamout_jit.h', 'rasterizer/jitter/shader_lib/DebugOutput.cpp', - 'rasterizer/jitter/shader_lib/Scatter.cpp', 'rasterizer/jitter/functionpasses/lower_x86.cpp', - 'rasterizer/memory/SurfaceState.h' ) files_swr_arch = files( @@ -129,8 +127,6 @@ files_swr_arch = files( 'rasterizer/core/state.h', 'rasterizer/core/state_funcs.h', 'rasterizer/core/tessellator.h', - 'rasterizer/core/tessellator.hpp', - 'rasterizer/core/tessellator.cpp', 'rasterizer/core/threads.cpp', 'rasterizer/core/threads.h', 'rasterizer/core/tilemgr.cpp', @@ -157,12 +153,10 @@ files_swr_arch = files( 'rasterizer/memory/tilingtraits.h', 'rasterizer/memory/InitMemory.h', 'rasterizer/memory/InitMemory.cpp', - 'rasterizer/memory/SurfaceState.h' ) swr_context_files = files('swr_context.h') swr_state_files = files('rasterizer/core/state.h') -swr_surf_state_files = files('rasterizer/memory/SurfaceState.h') swr_event_proto_files = files('rasterizer/archrast/events.proto') swr_event_pproto_files = files('rasterizer/archrast/events_private.proto') swr_gen_backend_files = files('rasterizer/codegen/templates/gen_backend.cpp') @@ -187,201 +181,124 @@ swr_incs = include_directories( 'rasterizer/archrast', 'rasterizer', ) -swr_cpp_args = [] +swr_cpp_args = [cpp_vis_args] if cpp.has_argument('-fno-strict-aliasing') swr_cpp_args += '-fno-strict-aliasing' endif -if cpp.has_argument('-Wno-aligned-new') - swr_cpp_args += '-Wno-aligned-new' -endif - swr_arch_libs = [] -swr_defines = [] +swr_arch_defines = [] swr_avx_args = cpp.first_supported_argument( '-target-cpu=sandybridge', '-mavx', '-march=core-avx', '-tp=sandybridge', - '/arch:AVX', + prefix : ''' + #if !defined(__AVX__) + # error + #endif ''', ) if swr_avx_args == [] error('Cannot find AVX support for swr. (these are required for SWR an all architectures.)') endif - -shared_swr = get_option('shared-swr') -if not shared_swr - if with_swr_arches.length() > 1 - error('When SWR is linked statically only one architecture is allowed.') - endif - swr_defines += '-DHAVE_SWR_BUILTIN' +if with_swr_arches.contains('avx') + swr_arch_defines += '-DHAVE_SWR_AVX' + swr_arch_libs += shared_library( + 'swrAVX', + [files_swr_common, files_swr_arch], + cpp_args : [swr_cpp_args, swr_avx_args, '-DKNOB_ARCH=KNOB_ARCH_AVX'], + link_args : [ld_args_gc_sections], + include_directories : [swr_incs], + dependencies : [dep_thread, dep_llvm], + version : '0.0.0', + install : true, + ) endif -if with_swr_arches.contains('skx') - swr_skx_args = cpp.first_supported_argument( - '-march=skylake-avx512', '-target-cpu=x86-skylake', '-xCORE-AVX512', +if with_swr_arches.contains('avx2') + swr_avx2_args = cpp.first_supported_argument( + '-target-cpu=haswell', '-march=core-avx2', '-tp=haswell', + prefix : ''' + #if !defined(__AVX2__) + # error + #endif ''', ) - if swr_skx_args == [] - error('Cannot find SKX support for swr.') + if swr_avx2_args == [] + if cpp.has_argument(['-mavx2', '-mfma', '-mbmi2', '-mf16c'], + prefix : ''' + #if !defined(__AVX2__) + # error + #endif ''') + swr_avx2_args = ['-mavx2', '-mfma', '-mbmi2', '-mf16c'] + else + error('Cannot find AVX2 support for swr.') + endif endif - swr_defines += '-DHAVE_SWR_SKX' - if shared_swr - swr_arch_libs += shared_library( - 'swrSKX', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_skx_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX512', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - version : '0.0.0', - soversion : host_machine.system() == 'windows' ? '' : '0', - install : true, - name_prefix : host_machine.system() == 'windows' ? '' : 'lib', - ) - else - swr_arch_libs += static_library( - 'swrSKX', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_skx_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX512', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - ) - endif + swr_arch_defines += '-DHAVE_SWR_AVX2' + swr_arch_libs += shared_library( + 'swrAVX2', + [files_swr_common, files_swr_arch], + cpp_args : [swr_cpp_args, swr_avx2_args, '-DKNOB_ARCH=KNOB_ARCH_AVX2'], + link_args : [ld_args_gc_sections], + include_directories : [swr_incs], + dependencies : [dep_thread, dep_llvm], + version : '0.0.0', + install : true, + ) endif if with_swr_arches.contains('knl') swr_knl_args = cpp.first_supported_argument( - '-march=knl', '-target-cpu=mic-knl', '-xMIC-AVX512', + '-target-cpu=mic-knl', '-march=knl', '-xMIC-AVX512', + prefix : ''' + #if !defined(__AVX512F__) || !defined(__AVX512ER__) + # error + #endif ''', ) if swr_knl_args == [] error('Cannot find KNL support for swr.') endif - swr_defines += '-DHAVE_SWR_KNL' - if shared_swr - swr_arch_libs += shared_library( - 'swrKNL', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_knl_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX512', '-DSIMD_ARCH_KNIGHTS', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - version : '0.0.0', - soversion : host_machine.system() == 'windows' ? '' : '0', - install : true, - name_prefix : host_machine.system() == 'windows' ? '' : 'lib', - ) - else - swr_arch_libs += static_library( - 'swrKNL', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_knl_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX512', '-DSIMD_ARCH_KNIGHTS', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - ) - endif + swr_arch_defines += '-DHAVE_SWR_KNL' + swr_arch_libs += shared_library( + 'swrKNL', + [files_swr_common, files_swr_arch], + cpp_args : [ + swr_cpp_args, swr_knl_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512', + '-DKNOB_ARCH_KNIGHTS', + ], + link_args : [ld_args_gc_sections], + include_directories : [swr_incs], + dependencies : [dep_thread, dep_llvm], + version : '0.0.0', + install : true, + ) endif - -if with_swr_arches.contains('avx2') - swr_avx2_args = cpp.first_supported_argument( - '-target-cpu=haswell', '-march=core-avx2', '-tp=haswell', '/arch:AVX2', +if with_swr_arches.contains('skx') + swr_skx_args = cpp.first_supported_argument( + '-target-cpu=x86-skylake', '-march=skylake-avx512', '-xCORE-AVX512', + prefix : ''' + #if !defined(__AVX512F__) || !defined(__AVX512BW__) + # error + #endif ''', ) - if swr_avx2_args == [] - if cpp.has_argument(['-mavx2', '-mfma', '-mbmi2', '-mf16c']) - swr_avx2_args = ['-mavx2', '-mfma', '-mbmi2', '-mf16c'] - else - error('Cannot find AVX2 support for swr.') - endif - endif - - swr_defines += '-DHAVE_SWR_AVX2' - if shared_swr - swr_arch_libs += shared_library( - 'swrAVX2', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_avx2_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX2', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - version : '0.0.0', - soversion : host_machine.system() == 'windows' ? '' : '0', - install : true, - name_prefix : host_machine.system() == 'windows' ? '' : 'lib', - ) - else - swr_arch_libs += static_library( - 'swrAVX2', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_avx2_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX2', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - ) + if swr_skx_args == [] + error('Cannot find SKX support for swr.') endif -endif -if with_swr_arches.contains('avx') - swr_defines += '-DHAVE_SWR_AVX' - if shared_swr - swr_arch_libs += shared_library( - 'swrAVX', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_avx_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - version : '0.0.0', - soversion : host_machine.system() == 'windows' ? '' : '0', - install : true, - name_prefix : host_machine.system() == 'windows' ? '' : 'lib', - ) - else - swr_arch_libs += static_library( - 'swrAVX', - [files_swr_common, files_swr_arch], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_avx_args, - '-DKNOB_ARCH=KNOB_ARCH_AVX', - ], - gnu_symbol_visibility : 'hidden', - link_args : [ld_args_gc_sections], - include_directories : [swr_incs], - dependencies : [dep_thread, dep_llvm], - ) - endif + swr_arch_defines += '-DHAVE_SWR_SKX' + swr_arch_libs += shared_library( + 'swrSKX', + [files_swr_common, files_swr_arch], + cpp_args : [swr_cpp_args, swr_skx_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512'], + link_args : [ld_args_gc_sections], + include_directories : [swr_incs], + dependencies : [dep_thread, dep_llvm], + version : '0.0.0', + install : true, + ) endif - if swr_arch_libs == [] error('SWR configured, but no SWR architectures configured') endif @@ -391,21 +308,12 @@ libmesaswr = static_library( 'mesaswr', [files_swr_mesa, files_swr_common, gen_knobs_h, gen_knobs_cpp, gen_builder_hpp, gen_builder_meta_hpp, gen_builder_intrin_hpp], - cpp_args : [ - cpp_msvc_compat_args, swr_cpp_args, swr_avx_args, - swr_defines, - ], - gnu_symbol_visibility : 'hidden', - include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, swr_incs], - dependencies : [dep_llvm, idep_mesautil], + cpp_args : [cpp_vis_args, swr_cpp_args, swr_avx_args, swr_arch_defines], + include_directories : [inc_common, swr_incs], + dependencies : dep_llvm, ) -link_libs = [libmesaswr] -if not shared_swr - link_libs += swr_arch_libs -endif - driver_swr = declare_dependency( compile_args : '-DGALLIUM_SWR', - link_with : link_libs + link_with : libmesaswr, ) diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index b57d5c428..f5cfb470a 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -24,99 +24,84 @@ # ArchRast is to not pollute the Rasty code with lots of calculations, etc. that # are needed to compute per draw statistics, etc. -event PipelineStats::EarlyDepthStencilInfoSingleSample +event EarlyDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event PipelineStats::EarlyDepthStencilInfoSampleRate +event EarlyDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event PipelineStats::EarlyDepthStencilInfoNullPS +event EarlyDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event PipelineStats::LateDepthStencilInfoSingleSample +event LateDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event PipelineStats::LateDepthStencilInfoSampleRate +event LateDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event PipelineStats::LateDepthStencilInfoNullPS +event LateDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event PipelineStats::EarlyDepthInfoPixelRate +event EarlyDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event PipelineStats::LateDepthInfoPixelRate +event LateDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event PipelineStats::BackendDrawEndEvent +event BackendDrawEndEvent { uint32_t drawId; }; -event PipelineStats::FrontendDrawEndEvent +event FrontendDrawEndEvent { uint32_t drawId; }; -event Memory::MemoryAccessEvent -{ - uint32_t drawId; - uint64_t tsc; - uint64_t ptr; - uint32_t size; - uint8_t isRead; - uint8_t client; -}; - -event Memory::MemoryStatsEndEvent -{ - uint32_t drawId; -}; - -event PipelineStats::TessPrimCount +event TessPrimCount { uint64_t primCount; }; -event PipelineStats::RasterTileCount +event RasterTileCount { uint32_t drawId; uint64_t rasterTiles; }; -event PipelineStats::GSPrimInfo +event GSPrimInfo { uint64_t inputPrimCount; uint64_t primGeneratedCount; @@ -128,14 +113,14 @@ event PipelineStats::GSPrimInfo // Trivial reject is numInvocations - pop_cnt32(validMask) // Trivial accept is validMask & ~clipMask // Must clip count is pop_cnt32(clipMask) -event PipelineStats::ClipInfoEvent +event ClipInfoEvent { uint32_t numInvocations; uint32_t validMask; uint32_t clipMask; }; -event PipelineStats::CullInfoEvent +event CullInfoEvent { uint32_t drawId; uint64_t degeneratePrimMask; @@ -143,14 +128,14 @@ event PipelineStats::CullInfoEvent uint32_t validMask; }; -event PipelineStats::AlphaInfoEvent +event AlphaInfoEvent { uint32_t drawId; uint32_t alphaTestEnable; uint32_t alphaBlendEnable; }; -event PipelineStats::DrawInstancedEvent +event DrawInstancedEvent { uint32_t drawId; uint32_t topology; @@ -165,7 +150,7 @@ event PipelineStats::DrawInstancedEvent uint32_t splitId; // Split draw count or id. }; -event PipelineStats::DrawIndexedInstancedEvent +event DrawIndexedInstancedEvent { uint32_t drawId; uint32_t topology; @@ -181,32 +166,32 @@ event PipelineStats::DrawIndexedInstancedEvent uint32_t splitId; // Split draw count or id. }; -event ShaderStats::VSStats +event VSStats { - HANDLE hStats; // SWR_SHADER_STATS + uint32_t numInstExecuted; }; -event ShaderStats::HSStats +event HSStats { - HANDLE hStats; // SWR_SHADER_STATS + uint32_t numInstExecuted; }; -event ShaderStats::DSStats +event DSStats { - HANDLE hStats; // SWR_SHADER_STATS + uint32_t numInstExecuted; }; -event ShaderStats::GSStats +event GSStats { - HANDLE hStats; // SWR_SHADER_STATS + uint32_t numInstExecuted; }; -event ShaderStats::PSStats +event PSStats { - HANDLE hStats; // SWR_SHADER_STATS + uint32_t numInstExecuted; }; -event ShaderStats::CSStats +event CSStats { - HANDLE hStats; // SWR_SHADER_STATS + uint32_t numInstExecuted; }; \ No newline at end of file diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build index daf79ed4c..282751760 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build @@ -48,8 +48,7 @@ files_swr_common += [ ] foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'], - [swr_state_files, 'gen_state_llvm.h'], - [swr_surf_state_files, 'gen_surf_state_llvm.h']] + [swr_state_files, 'gen_state_llvm.h']] files_swr_common += custom_target( x[1], input : ['gen_llvm_types.py', x[0]], @@ -62,16 +61,26 @@ foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'], ) endforeach -ar_output_filenames = ['gen_ar_event.hpp', 'gen_ar_event.cpp', 'gen_ar_eventhandler.hpp', 'gen_ar_eventhandlerfile.hpp'] -ar_template_filenames = [] -foreach fname : ar_output_filenames - ar_template_filenames += join_paths('templates', fname) +foreach x : [['gen_ar_event.hpp', '--gen_event_hpp'], + ['gen_ar_event.cpp', '--gen_event_cpp'], + ['gen_ar_eventhandler.hpp', '--gen_eventhandler_h'], + ['gen_ar_eventhandlerfile.hpp', '--gen_eventhandlerfile_h']] + files_swr_common += custom_target( + x[0], + input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files], + output : x[0], + command : [ + prog_python, + '@INPUT0@', + '--proto', '@INPUT1@', + '--proto_private', '@INPUT2@', + '--output', '@OUTPUT@', + x[1], + ], + depend_files : files( + join_paths('templates', x[0]), + 'gen_common.py', + ), + ) endforeach -files_swr_common += custom_target( - 'gen_archrast', - input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files], - output : ar_output_filenames, - command : [prog_python, '@INPUT0@', '--proto', '@INPUT1@', '@INPUT2@', '--output-dir', meson.current_build_dir()], - depend_files : files('gen_common.py', ar_template_filenames) -) diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp index b67ffbfa7..c68f3b9a6 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp @@ -32,6 +32,7 @@ #include "common/rdtsc_buckets.h" #include "builder_gfx_mem.h" + namespace SwrJit { using namespace llvm; @@ -41,9 +42,7 @@ namespace SwrJit mpTranslationFuncTy = nullptr; mpfnTranslateGfxAddressForRead = nullptr; mpfnTranslateGfxAddressForWrite = nullptr; - mpfnTrackMemAccess = nullptr; mpParamSimDC = nullptr; - mpWorkerData = nullptr; } @@ -51,12 +50,13 @@ namespace SwrJit { } - void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage) + void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage) { - SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL), + SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t."); } + ////////////////////////////////////////////////////////////////////////// /// @brief Generate a masked gather operation in LLVM IR. If not /// supported on the underlying platform, emulate it with loads @@ -70,7 +70,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { // address may be coming in as 64bit int now so get the pointer if (pBase->getType() == mInt64Ty) @@ -95,7 +95,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { // address may be coming in as 64bit int now so get the pointer @@ -109,7 +109,7 @@ namespace SwrJit } void BuilderGfxMem::SCATTERPS( - Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) + Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage) { // address may be coming in as 64bit int now so get the pointer @@ -118,95 +118,46 @@ namespace SwrJit pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0)); } - Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage); + Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage); } + Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset) { return ADD(base, offset); } - Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name) + Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name) { - bool xlate = (Ptr->getType() == mInt64Ty); - if (xlate) - { - Ptr = INT_TO_PTR(Ptr, Ty); - Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name); - Ptr = PTR_TO_INT(Ptr, mInt64Ty); - if (isReadOnly) - { - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); - } - else - { - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForWrite); - } - } - else - { - Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name); - } - return Ptr; + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ptr, Idx, nullptr, Name); } Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name) { - bool xlate = (Ptr->getType() == mInt64Ty); - if (xlate) - { - Ptr = INT_TO_PTR(Ptr, Ty); - Ptr = Builder::GEP(Ty, Ptr, Idx, Name); - Ptr = PTR_TO_INT(Ptr, mInt64Ty); - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); - } - else - { - Ptr = Builder::GEP(Ty, Ptr, Idx, Name); - } - return Ptr; + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ty, Ptr, Idx, Name); } Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list& indexList, Type* Ty) { - bool xlate = (Ptr->getType() == mInt64Ty); - if (xlate) - { - Ptr = INT_TO_PTR(Ptr, Ty); - Ptr = Builder::GEP(Ptr, indexList); - Ptr = PTR_TO_INT(Ptr, mInt64Ty); - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); - } - else - { - Ptr = Builder::GEP(Ptr, indexList); - } - return Ptr; + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ptr, indexList); } Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list& indexList, Type* Ty) { - bool xlate = (Ptr->getType() == mInt64Ty); - if (xlate) - { - Ptr = INT_TO_PTR(Ptr, Ty); - Ptr = Builder::GEP(Ptr, indexList); - Ptr = PTR_TO_INT(Ptr, mInt64Ty); - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); - } - else - { - Ptr = Builder::GEP(Ptr, indexList); - } - return Ptr; + Ptr = TranslationHelper(Ptr, Ty); + return Builder::GEP(Ptr, indexList); } - Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress) + Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty) { SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified."); + // address may be coming in as 64bit int now so get the pointer if (Ptr->getType() == mInt64Ty) { @@ -216,78 +167,29 @@ namespace SwrJit return Ptr; } - void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead) - { -#if defined(KNOB_ENABLE_AR) - if (!KNOB_AR_ENABLE_MEMORY_EVENTS) - { - return; - } - - Value* tmpPtr; - // convert actual pointers to int64. - uint32_t size = 0; - - if (Ptr->getType() == mInt64Ty) - { - DataLayout dataLayout(JM()->mpCurrentModule); - size = (uint32_t)dataLayout.getTypeAllocSize(Ty); - - tmpPtr = Ptr; - } - else - { - DataLayout dataLayout(JM()->mpCurrentModule); - size = (uint32_t)dataLayout.getTypeAllocSize(Ptr->getType()); - - tmpPtr = PTR_TO_INT(Ptr, mInt64Ty); - } - - // There are some shader compile setups where there's no translation functions set up. - // This would be a situation where the accesses are to internal rasterizer memory and won't - // be logged. - // TODO: we may wish to revisit this for URB reads/writes, though. - if (mpfnTrackMemAccess) - { - SWR_ASSERT(mpWorkerData != nullptr); - CALL(mpfnTrackMemAccess, - {mpParamSimDC, - mpWorkerData, - tmpPtr, - C((uint32_t)size), - C((uint8_t)isRead), - C((uint32_t)usage)}); - } -#endif - - return; - } - - LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); - TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + Ptr = TranslationHelper(Ptr, Ty); return Builder::LOAD(Ptr, Name); } - LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); - TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + Ptr = TranslationHelper(Ptr, Ty); return Builder::LOAD(Ptr, Name); } + LoadInst* BuilderGfxMem::LOAD( - Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage) + Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); - TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + Ptr = TranslationHelper(Ptr, Ty); return Builder::LOAD(Ptr, isVolatile, Name); } @@ -295,7 +197,7 @@ namespace SwrJit const std::initializer_list& offset, const llvm::Twine& name, Type* Ty, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(BasePtr, usage); @@ -320,59 +222,25 @@ namespace SwrJit return LOAD(BasePtr, name, Ty, usage); } + CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr, unsigned Align, Value* Mask, Value* PassThru, const Twine& Name, Type* Ty, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); - TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + Ptr = TranslationHelper(Ptr, Ty); return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage); } - StoreInst* - BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage) - { - AssertGFXMemoryParams(Ptr, usage); - TrackerHelper(Ptr, Ty, usage, false); - - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); - return Builder::STORE(Val, Ptr, isVolatile, Ty, usage); - } - - StoreInst* BuilderGfxMem::STORE(Value* Val, - Value* BasePtr, - const std::initializer_list& offset, - Type* Ty, - MEM_CLIENT usage) - { - AssertGFXMemoryParams(BasePtr, usage); - TrackerHelper(BasePtr, Ty, usage, false); - - BasePtr = TranslationHelper(BasePtr, Ty, mpfnTranslateGfxAddressForRead); - return Builder::STORE(Val, BasePtr, offset, Ty, usage); - } - - CallInst* BuilderGfxMem::MASKED_STORE( - Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage) - { - AssertGFXMemoryParams(Ptr, usage); - - TrackerHelper(Ptr, Ty, usage, false); - - Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); - return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage); - } - Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress, Type* PtrTy, const Twine& Name, - MEM_CLIENT /* usage */) + JIT_MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { @@ -384,7 +252,7 @@ namespace SwrJit Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress, Type* PtrTy, const Twine& Name, - MEM_CLIENT /* usage */) + JIT_MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h index c361959b7..aefbbef9f 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h @@ -41,7 +41,7 @@ namespace SwrJit BuilderGfxMem(JitManager* pJitMgr); virtual ~BuilderGfxMem() {} - virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = ""); + virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = ""); virtual Value* GEP(Value* Ptr, const std::initializer_list& indexList, Type* Ty = nullptr); @@ -51,21 +51,22 @@ namespace SwrJit virtual LoadInst* LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, @@ -73,64 +74,57 @@ namespace SwrJit Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - - virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - - virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - - virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + Value* TranslateGfxAddressForRead(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); Value* TranslateGfxAddressForWrite(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + + protected: - void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage); + void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage); virtual void NotifyPrivateContextSet(); virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); - Value* TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress); - void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead); + Value* TranslationHelper(Value* Ptr, Type* Ty); FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; } Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; } Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; } Value* GetParamSimDC() { return mpParamSimDC; } - Value* mpWorkerData; private: FunctionType* mpTranslationFuncTy; Value* mpfnTranslateGfxAddressForRead; Value* mpfnTranslateGfxAddressForWrite; Value* mpParamSimDC; - Value* mpfnTrackMemAccess; }; } // namespace SwrJit diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index b5eb0a782..94489f1c7 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -29,19 +29,20 @@ ******************************************************************************/ #include "jit_pch.hpp" #include "builder.h" +#include "common/rdtsc_buckets.h" #include namespace SwrJit { - void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage) + void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage) { SWR_ASSERT( ptr->getType() != mInt64Ty, "Address appears to be GFX access. Requires translation through BuilderGfxMem."); } - Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name) + Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name) { return IRB()->CreateGEP(Ptr, Idx, Name); } @@ -93,26 +94,26 @@ namespace SwrJit return IN_BOUNDS_GEP(ptr, indices); } - LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); } - LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); } - LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage) + LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ty, Ptr, Name); } LoadInst* - Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage) + Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, isVolatile, Name); @@ -122,7 +123,7 @@ namespace SwrJit const std::initializer_list& indices, const llvm::Twine& name, Type* Ty, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { std::vector valIndices; for (auto i : indices) @@ -141,7 +142,7 @@ namespace SwrJit } StoreInst* - Builder::STORE(Value* val, Value* basePtr, const std::initializer_list& indices, Type* Ty, MEM_CLIENT usage) + Builder::STORE(Value* val, Value* basePtr, const std::initializer_list& indices) { std::vector valIndices; for (auto i : indices) @@ -186,7 +187,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(pBase, usage); @@ -206,7 +207,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(pBase, usage); @@ -234,12 +235,7 @@ namespace SwrJit /// @param pVecPassthru - SIMD wide vector of values to load when lane is inactive Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru) { - return MASKED_GATHER(pVecSrcPtr, AlignType(4), pVecMask, pVecPassthru); - } - - void Builder::SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask) - { - MASKED_SCATTER(pVecSrc, pVecDstPtr, AlignType(4), pVecMask); + return MASKED_GATHER(pVecSrcPtr, 4, pVecMask, pVecPassthru); } void Builder::Gather4(const SWR_FORMAT format, @@ -248,7 +244,7 @@ namespace SwrJit Value* mask, Value* vGatherComponents[], bool bPackedOutput, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { const SWR_FORMAT_INFO& info = GetFormatInfo(format); if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32) @@ -267,7 +263,7 @@ namespace SwrJit Value* vMask, Value* vGatherComponents[], bool bPackedOutput, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { switch (info.bpp / info.numComps) { @@ -341,7 +337,7 @@ namespace SwrJit Value* vMask, Value* vGatherComponents[], bool bPackedOutput, - MEM_CLIENT usage) + JIT_MEM_CLIENT usage) { switch (info.bpp / info.numComps) { @@ -427,8 +423,8 @@ namespace SwrJit bool bPackedOutput) { // cast types - Type* vGatherTy = getVectorType(IntegerType::getInt32Ty(JM()->mContext), mVWidth); - Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits + Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth); + Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits // input could either be float or int vector; do shuffle work in int vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty); @@ -436,7 +432,7 @@ namespace SwrJit if (bPackedOutput) { - Type* v128bitTy = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128), + Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits // shuffle mask @@ -532,12 +528,12 @@ namespace SwrJit bool bPackedOutput) { // cast types - Type* vGatherTy = getVectorType(IntegerType::getInt32Ty(JM()->mContext), mVWidth); - Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits + Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth); + Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits if (bPackedOutput) { - Type* v128Ty = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128), + Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits // shuffle mask Value* vConstMask = C({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, @@ -632,7 +628,6 @@ namespace SwrJit break; } - assert(vConstMask && "Invalid info.numComps value"); vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy); // after pshufb for x channel @@ -649,16 +644,9 @@ namespace SwrJit /// @param vOffsets - vector of byte offsets from pDst /// @param vMask - mask of valid lanes void Builder::SCATTERPS( - Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) + Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(pDst, usage); -#if LLVM_VERSION_MAJOR >= 11 - SWR_ASSERT(cast(vSrc->getType())->getElementType()->isFloatTy()); -#else - SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy()); -#endif - VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1)); - return; /* Scatter algorithm @@ -670,10 +658,6 @@ namespace SwrJit */ - /* - - // Reference implementation kept around for reference - BasicBlock* pCurBB = IRB()->GetInsertBlock(); Function* pFunc = pCurBB->getParent(); Type* pSrcTy = vSrc->getType()->getVectorElementType(); @@ -761,7 +745,5 @@ namespace SwrJit // Move builder to beginning of post loop IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin()); - - */ } } // namespace SwrJit diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index 429d5779a..15def96cb 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -30,22 +30,20 @@ #pragma once public: -enum class MEM_CLIENT +typedef enum _JIT_MEM_CLIENT { MEM_CLIENT_INTERNAL, GFX_MEM_CLIENT_FETCH, GFX_MEM_CLIENT_SAMPLER, GFX_MEM_CLIENT_SHADER, - GFX_MEM_CLIENT_STREAMOUT, - GFX_MEM_CLIENT_URB -}; +} JIT_MEM_CLIENT; protected: virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); -void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage); +void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage); public: -virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = ""); +virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = ""); virtual Value* GEP(Value* ptr, const std::initializer_list& indexList, Type* Ty = nullptr); virtual Value* @@ -58,23 +56,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list& indexList) Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list& indexList); virtual LoadInst* - LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* - LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, @@ -82,24 +80,14 @@ virtual CallInst* MASKED_LOAD(Value* Ptr, Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL) { - return IRB()->CreateMaskedLoad(Ptr, AlignType(Align), Mask, PassThru, Name); + return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name); } -virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) -{ - return IRB()->CreateStore(Val, Ptr, isVolatile); -} - -virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - -virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) -{ - return IRB()->CreateMaskedStore(Val, Ptr, AlignType(Align), Mask); -} - -LoadInst* LOADV(Value* BasePtr, const std::initializer_list& offset, const llvm::Twine& name = ""); +LoadInst* + LOADV(Value* BasePtr, const std::initializer_list& offset, const llvm::Twine& name = ""); +StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list& offset); StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list& offset); Value* MEM_ADD(Value* i32Incr, @@ -113,14 +101,14 @@ void Gather4(const SWR_FORMAT format, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); void GATHER4PS(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -128,14 +116,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); void GATHER4DD(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -143,18 +131,17 @@ void GATHER4DD(const SWR_FORMAT_INFO& info, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); Value* GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru); -void SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask); virtual void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, - MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info, Value* vGatherInput, diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index 61c6b57b3..c34959d35 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -32,15 +32,11 @@ #include "passes.h" #include "JitManager.h" -#include "common/simdlib.hpp" - #include -extern "C" void ScatterPS_256(uint8_t*, SIMD256::Integer, SIMD256::Float, uint8_t, uint32_t); - namespace llvm { - // forward declare the initializer + // foward declare the initializer void initializeLowerX86Pass(PassRegistry&); } // namespace llvm @@ -68,26 +64,22 @@ namespace SwrJit struct X86Intrinsic { - IntrinsicID intrin[NUM_WIDTHS]; + Intrinsic::ID intrin[NUM_WIDTHS]; EmuFunc emuFunc; }; // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the // previous behavior of mapping directly to avx/avx2 intrinsics. - using intrinsicMap_t = std::map; - static intrinsicMap_t& getIntrinsicMap() { - static std::map intrinsicMap = { - {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32}, - {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b}, - {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256}, - {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256}, - {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256}, - {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d}, - {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32}, - {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc} - }; - return intrinsicMap; - } + static std::map intrinsicMap = { + {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32}, + {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b}, + {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256}, + {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256}, + {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256}, + {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d}, + {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32}, + {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc}, + }; // Forward decls Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); @@ -96,8 +88,6 @@ namespace SwrJit Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); Instruction* - VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); - Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); @@ -112,75 +102,88 @@ namespace SwrJit static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1; - using intrinsicMapAdvanced_t = std::vector>; - - static intrinsicMapAdvanced_t& getIntrinsicMapAdvanced() - { - // clang-format off - static intrinsicMapAdvanced_t intrinsicMapAdvanced = { - // 256 wide 512 wide - { - // AVX - {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, - {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, - {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, - }, - { - // AVX2 - {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, - {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, - }, - { - // AVX512 - {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}}, - #if LLVM_VERSION_MAJOR < 7 - {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}}, - {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}}, - #else - {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, - #endif - {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, - #if LLVM_VERSION_MAJOR < 7 - {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}}, - #else - {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}}, - #endif - {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}}, - {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}} - }}; - // clang-format on - return intrinsicMapAdvanced; - } - - static uint32_t getBitWidth(VectorType *pVTy) - { -#if LLVM_VERSION_MAJOR >= 12 - return cast(pVTy)->getNumElements() * pVTy->getElementType()->getPrimitiveSizeInBits(); -#elif LLVM_VERSION_MAJOR >= 11 - return pVTy->getNumElements() * pVTy->getElementType()->getPrimitiveSizeInBits(); + static std::map intrinsicMap2[] = { + // 256 wide 512 wide + { + // AVX + {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VPERMPS", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VPERMD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VGATHERPD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERPS", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERDD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VCVTPD2PS", + {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, + {"meta.intrinsic.VCVTPH2PS", + {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, + {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, + }, + { + // AVX2 + {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VPERMPS", + {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VPERMD", + {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VGATHERPD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERPS", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERDD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VCVTPH2PS", + {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, + {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, + }, + { + // AVX512 + {"meta.intrinsic.VRCPPS", + {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}}, +#if LLVM_VERSION_MAJOR < 7 + {"meta.intrinsic.VPERMPS", + {{Intrinsic::x86_avx512_mask_permvar_sf_256, + Intrinsic::x86_avx512_mask_permvar_sf_512}, + NO_EMU}}, + {"meta.intrinsic.VPERMD", + {{Intrinsic::x86_avx512_mask_permvar_si_256, + Intrinsic::x86_avx512_mask_permvar_si_512}, + NO_EMU}}, #else - return pVTy->getBitWidth(); + {"meta.intrinsic.VPERMPS", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VPERMD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, #endif - } + {"meta.intrinsic.VGATHERPD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERPS", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERDD", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, +#if LLVM_VERSION_MAJOR < 7 + {"meta.intrinsic.VCVTPD2PS", + {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, + NO_EMU}}, +#else + {"meta.intrinsic.VCVTPD2PS", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}}, +#endif + {"meta.intrinsic.VCVTPH2PS", + {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512}, + NO_EMU}}, + {"meta.intrinsic.VROUND", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}}, + {"meta.intrinsic.VHSUBPS", + {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}}, + }}; struct LowerX86 : public FunctionPass { @@ -206,31 +209,6 @@ namespace SwrJit SWR_ASSERT(false, "Unsupported AVX architecture."); mTarget = AVX; } - - // Setup scatter function for 256 wide - uint32_t curWidth = B->mVWidth; - B->SetTargetWidth(8); - std::vector args = { - B->mInt8PtrTy, // pBase - B->mSimdInt32Ty, // vIndices - B->mSimdFP32Ty, // vSrc - B->mInt8Ty, // mask - B->mInt32Ty // scale - }; - - FunctionType* pfnScatterTy = FunctionType::get(B->mVoidTy, args, false); - mPfnScatter256 = cast( -#if LLVM_VERSION_MAJOR >= 9 - B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy).getCallee()); -#else - B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy)); -#endif - if (sys::DynamicLibrary::SearchForAddressOfSymbol("ScatterPS_256") == nullptr) - { - sys::DynamicLibrary::AddSymbol("ScatterPS_256", (void*)&ScatterPS_256); - } - - B->SetTargetWidth(curWidth); } // Try to decipher the vector type of the instruction. This does not work properly @@ -242,16 +220,13 @@ namespace SwrJit TargetWidth* pWidth, Type** pTy) { - assert(pCallInst); Type* pVecTy = pCallInst->getType(); // Check for intrinsic specific types // VCVTPD2PS type comes from src, not dst if (intrinName.equals("meta.intrinsic.VCVTPD2PS")) { - Value* pOp = pCallInst->getOperand(0); - assert(pOp); - pVecTy = pOp->getType(); + pVecTy = pCallInst->getOperand(0)->getType(); } if (!pVecTy->isVectorTy()) @@ -267,7 +242,7 @@ namespace SwrJit } SWR_ASSERT(pVecTy->isVectorTy(), "Couldn't determine vector size"); - uint32_t width = getBitWidth(cast(pVecTy)); + uint32_t width = cast(pVecTy)->getBitWidth(); switch (width) { case 256: @@ -299,7 +274,7 @@ namespace SwrJit SWR_ASSERT(false, "Unhandled vector width type %d\n", width); } - return ConstantVector::getNullValue(getVectorType(pTy, numElem)); + return ConstantVector::getNullValue(VectorType::get(pTy, numElem)); } Value* GetMask(TargetWidth width) @@ -322,28 +297,20 @@ namespace SwrJit // Convert mask to x86 mask Value* VectorMask(Value* vi1Mask) { -#if LLVM_VERSION_MAJOR >= 12 - uint32_t numElem = cast(vi1Mask->getType())->getNumElements(); -#elif LLVM_VERSION_MAJOR >= 11 - uint32_t numElem = cast(vi1Mask->getType())->getNumElements(); -#else uint32_t numElem = vi1Mask->getType()->getVectorNumElements(); -#endif - return B->S_EXT(vi1Mask, getVectorType(B->mInt32Ty, numElem)); + return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem)); } Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst) { - Function* pFunc = pCallInst->getCalledFunction(); - assert(pFunc); - - auto& intrinsic = getIntrinsicMapAdvanced()[mTarget][pFunc->getName().str()]; + Function* pFunc = pCallInst->getCalledFunction(); + auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()]; TargetWidth vecWidth; Type* pElemTy; GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy); // Check if there is a native intrinsic for this instruction - IntrinsicID id = intrinsic.intrin[vecWidth]; + Intrinsic::ID id = intrinsic.intrin[vecWidth]; if (id == DOUBLE) { // Double pump the next smaller SIMD intrinsic @@ -396,19 +363,18 @@ namespace SwrJit Instruction* ProcessIntrinsic(CallInst* pCallInst) { Function* pFunc = pCallInst->getCalledFunction(); - assert(pFunc); // Forward to the advanced support if found - if (getIntrinsicMapAdvanced()[mTarget].find(pFunc->getName().str()) != getIntrinsicMapAdvanced()[mTarget].end()) + if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end()) { return ProcessIntrinsicAdvanced(pCallInst); } - SWR_ASSERT(getIntrinsicMap().find(pFunc->getName().str()) != getIntrinsicMap().end(), + SWR_ASSERT(intrinsicMap.find(pFunc->getName()) != intrinsicMap.end(), "Unimplemented intrinsic %s.", - pFunc->getName().str().c_str()); + pFunc->getName()); - Intrinsic::ID x86Intrinsic = getIntrinsicMap()[pFunc->getName().str()]; + Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()]; Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic); @@ -421,44 +387,28 @@ namespace SwrJit } ////////////////////////////////////////////////////////////////////////// - /// @brief LLVM function pass run method. + /// @brief LLVM funtion pass run method. /// @param f- The function we're working on with this pass. virtual bool runOnFunction(Function& F) { std::vector toRemove; - std::vector bbs; - - // Make temp copy of the basic blocks and instructions, as the intrinsic - // replacement code might invalidate the iterators - for (auto& b : F.getBasicBlockList()) - { - bbs.push_back(&b); - } - for (auto* BB : bbs) + for (auto& BB : F.getBasicBlockList()) { - std::vector insts; - for (auto& i : BB->getInstList()) + for (auto& I : BB.getInstList()) { - insts.push_back(&i); - } - - for (auto* I : insts) - { - if (CallInst* pCallInst = dyn_cast(I)) + if (CallInst* pCallInst = dyn_cast(&I)) { Function* pFunc = pCallInst->getCalledFunction(); if (pFunc) { if (pFunc->getName().startswith("meta.intrinsic")) { - B->IRB()->SetInsertPoint(I); + B->IRB()->SetInsertPoint(&I); Instruction* pReplace = ProcessIntrinsic(pCallInst); + SWR_ASSERT(pReplace); toRemove.push_back(pCallInst); - if (pReplace) - { - pCallInst->replaceAllUsesWith(pReplace); - } + pCallInst->replaceAllUsesWith(pReplace); } } } @@ -478,9 +428,10 @@ namespace SwrJit virtual void getAnalysisUsage(AnalysisUsage& AU) const {} JitManager* JM() { return B->JM(); } - Builder* B; - TargetArch mTarget; - Function* mPfnScatter256; + + Builder* B; + + TargetArch mTarget; static char ID; ///< Needed by LLVM to generate ID for FunctionPass. }; @@ -513,14 +464,7 @@ namespace SwrJit else { v32Result = UndefValue::get(v32A->getType()); -#if LLVM_VERSION_MAJOR >= 12 - uint32_t numElem = cast(v32A->getType())->getNumElements(); -#elif LLVM_VERSION_MAJOR >= 11 - uint32_t numElem = cast(v32A->getType())->getNumElements(); -#else - uint32_t numElem = v32A->getType()->getVectorNumElements(); -#endif - for (uint32_t l = 0; l < numElem; ++l) + for (uint32_t l = 0; l < v32A->getType()->getVectorNumElements(); ++l) { auto i32Index = B->VEXTRACT(vi32Index, B->C(l)); auto val = B->VEXTRACT(v32A, i32Index); @@ -540,22 +484,11 @@ namespace SwrJit auto vi1Mask = pCallInst->getArgOperand(3); auto i8Scale = pCallInst->getArgOperand(4); - pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0)); -#if LLVM_VERSION_MAJOR >= 11 -#if LLVM_VERSION_MAJOR >= 12 - FixedVectorType* pVectorType = cast(vSrc->getType()); -#else - VectorType* pVectorType = cast(vSrc->getType()); -#endif - uint32_t numElem = pVectorType->getNumElements(); - auto srcTy = pVectorType->getElementType(); -#else - uint32_t numElem = vSrc->getType()->getVectorNumElements(); - auto srcTy = vSrc->getType()->getVectorElementType(); -#endif - auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty); - - Value* v32Gather = nullptr; + pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0)); + uint32_t numElem = vSrc->getType()->getVectorNumElements(); + auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty); + auto srcTy = vSrc->getType()->getVectorElementType(); + Value* v32Gather; if (arch == AVX) { // Full emulation for AVX @@ -565,13 +498,7 @@ namespace SwrJit B->STORE(vSrc, pTmp); v32Gather = UndefValue::get(vSrc->getType()); -#if LLVM_VERSION_MAJOR <= 10 auto vi32Scale = ConstantVector::getSplat(numElem, cast(i32Scale)); -#elif LLVM_VERSION_MAJOR == 11 - auto vi32Scale = ConstantVector::getSplat(ElementCount(numElem, false), cast(i32Scale)); -#else - auto vi32Scale = ConstantVector::getSplat(ElementCount::get(numElem, false), cast(i32Scale)); -#endif auto vi32Offsets = B->MUL(vi32Indices, vi32Scale); for (uint32_t i = 0; i < numElem; ++i) @@ -590,7 +517,7 @@ namespace SwrJit } else if (arch == AVX2 || (arch == AVX512 && width == W256)) { - Function* pX86IntrinFunc = nullptr; + Function* pX86IntrinFunc; if (srcTy == B->mFP32Ty) { pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, @@ -619,23 +546,12 @@ namespace SwrJit else if (width == W512) { // Double pump 4-wide for 64bit elements -#if LLVM_VERSION_MAJOR >= 12 - if (cast(vSrc->getType())->getElementType() == B->mDoubleTy) -#elif LLVM_VERSION_MAJOR >= 11 - if (cast(vSrc->getType())->getElementType() == B->mDoubleTy) -#else if (vSrc->getType()->getVectorElementType() == B->mDoubleTy) -#endif { auto v64Mask = pThis->VectorMask(vi1Mask); -#if LLVM_VERSION_MAJOR >= 12 - uint32_t numElem = cast(v64Mask->getType())->getNumElements(); -#elif LLVM_VERSION_MAJOR >= 11 - uint32_t numElem = cast(v64Mask->getType())->getNumElements(); -#else - uint32_t numElem = v64Mask->getType()->getVectorNumElements(); -#endif - v64Mask = B->S_EXT(v64Mask, getVectorType(B->mInt64Ty, numElem)); + v64Mask = B->S_EXT( + v64Mask, + VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements())); v64Mask = B->BITCAST(v64Mask, vSrc->getType()); Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3})); @@ -647,30 +563,23 @@ namespace SwrJit Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3})); Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7})); -#if LLVM_VERSION_MAJOR >= 12 - uint32_t numElemSrc0 = cast(src0->getType())->getNumElements(); - uint32_t numElemMask0 = cast(mask0->getType())->getNumElements(); - uint32_t numElemSrc1 = cast(src1->getType())->getNumElements(); - uint32_t numElemMask1 = cast(mask1->getType())->getNumElements(); -#elif LLVM_VERSION_MAJOR >= 11 - uint32_t numElemSrc0 = cast(src0->getType())->getNumElements(); - uint32_t numElemMask0 = cast(mask0->getType())->getNumElements(); - uint32_t numElemSrc1 = cast(src1->getType())->getNumElements(); - uint32_t numElemMask1 = cast(mask1->getType())->getNumElements(); -#else - uint32_t numElemSrc0 = src0->getType()->getVectorNumElements(); - uint32_t numElemMask0 = mask0->getType()->getVectorNumElements(); - uint32_t numElemSrc1 = src1->getType()->getVectorNumElements(); - uint32_t numElemMask1 = mask1->getType()->getVectorNumElements(); -#endif - src0 = B->BITCAST(src0, getVectorType(B->mInt64Ty, numElemSrc0)); - mask0 = B->BITCAST(mask0, getVectorType(B->mInt64Ty, numElemMask0)); + src0 = B->BITCAST( + src0, + VectorType::get(B->mInt64Ty, src0->getType()->getVectorNumElements())); + mask0 = B->BITCAST( + mask0, + VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements())); Value* gather0 = B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale}); - src1 = B->BITCAST(src1, getVectorType(B->mInt64Ty, numElemSrc1)); - mask1 = B->BITCAST(mask1, getVectorType(B->mInt64Ty, numElemMask1)); + src1 = B->BITCAST( + src1, + VectorType::get(B->mInt64Ty, src1->getType()->getVectorNumElements())); + mask1 = B->BITCAST( + mask1, + VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements())); Value* gather1 = B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale}); + v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7})); v32Gather = B->BITCAST(v32Gather, vSrc->getType()); } @@ -699,8 +608,8 @@ namespace SwrJit } else if (arch == AVX512) { - Value* iMask = nullptr; - Function* pX86IntrinFunc = nullptr; + Value* iMask; + Function* pX86IntrinFunc; if (srcTy == B->mFP32Ty) { pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, @@ -730,69 +639,6 @@ namespace SwrJit return cast(v32Gather); } - Instruction* - VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) - { - Builder* B = pThis->B; - auto pBase = pCallInst->getArgOperand(0); - auto vi1Mask = pCallInst->getArgOperand(1); - auto vi32Indices = pCallInst->getArgOperand(2); - auto v32Src = pCallInst->getArgOperand(3); - auto i32Scale = pCallInst->getArgOperand(4); - - if (arch != AVX512) - { - // Call into C function to do the scatter. This has significantly better compile perf - // compared to jitting scatter loops for every scatter - if (width == W256) - { - auto mask = B->BITCAST(vi1Mask, B->mInt8Ty); - B->CALL(pThis->mPfnScatter256, {pBase, vi32Indices, v32Src, mask, i32Scale}); - } - else - { - // Need to break up 512 wide scatter to two 256 wide - auto maskLo = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({0, 1, 2, 3, 4, 5, 6, 7})); - auto indicesLo = - B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3, 4, 5, 6, 7})); - auto srcLo = B->VSHUFFLE(v32Src, v32Src, B->C({0, 1, 2, 3, 4, 5, 6, 7})); - - auto mask = B->BITCAST(maskLo, B->mInt8Ty); - B->CALL(pThis->mPfnScatter256, {pBase, indicesLo, srcLo, mask, i32Scale}); - - auto maskHi = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({8, 9, 10, 11, 12, 13, 14, 15})); - auto indicesHi = - B->VSHUFFLE(vi32Indices, vi32Indices, B->C({8, 9, 10, 11, 12, 13, 14, 15})); - auto srcHi = B->VSHUFFLE(v32Src, v32Src, B->C({8, 9, 10, 11, 12, 13, 14, 15})); - - mask = B->BITCAST(maskHi, B->mInt8Ty); - B->CALL(pThis->mPfnScatter256, {pBase, indicesHi, srcHi, mask, i32Scale}); - } - return nullptr; - } - - Value* iMask; - Function* pX86IntrinFunc; - if (width == W256) - { - // No direct intrinsic supported in llvm to scatter 8 elem with 32bit indices, but we - // can use the scatter of 8 elements with 64bit indices - pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, - Intrinsic::x86_avx512_scatter_qps_512); - - auto vi32IndicesExt = B->Z_EXT(vi32Indices, B->mSimdInt64Ty); - iMask = B->BITCAST(vi1Mask, B->mInt8Ty); - B->CALL(pX86IntrinFunc, {pBase, iMask, vi32IndicesExt, v32Src, i32Scale}); - } - else if (width == W512) - { - pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, - Intrinsic::x86_avx512_scatter_dps_512); - iMask = B->BITCAST(vi1Mask, B->mInt16Ty); - B->CALL(pX86IntrinFunc, {pBase, iMask, vi32Indices, v32Src, i32Scale}); - } - return nullptr; - } // No support for vroundps in avx512 (it is available in kncni), so emulate with avx // instructions @@ -803,9 +649,7 @@ namespace SwrJit auto B = pThis->B; auto vf32Src = pCallInst->getOperand(0); - assert(vf32Src); auto i8Round = pCallInst->getOperand(1); - assert(i8Round); auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256); @@ -910,18 +754,10 @@ namespace SwrJit auto argType = arg.get()->getType(); if (argType->isVectorTy()) { -#if LLVM_VERSION_MAJOR >= 12 - uint32_t vecWidth = cast(argType)->getNumElements(); - auto elemTy = cast(argType)->getElementType(); -#elif LLVM_VERSION_MAJOR >= 11 - uint32_t vecWidth = cast(argType)->getNumElements(); - auto elemTy = cast(argType)->getElementType(); -#else uint32_t vecWidth = argType->getVectorNumElements(); - auto elemTy = argType->getVectorElementType(); -#endif Value* lanes = B->CInc(i * vecWidth / 2, vecWidth / 2); - Value* argToPush = B->VSHUFFLE(arg.get(), B->VUNDEF(elemTy, vecWidth), lanes); + Value* argToPush = B->VSHUFFLE( + arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes); args.push_back(argToPush); } else @@ -935,16 +771,8 @@ namespace SwrJit if (result[0]->getType()->isVectorTy()) { assert(result[1]->getType()->isVectorTy()); -#if LLVM_VERSION_MAJOR >= 12 - vecWidth = cast(result[0]->getType())->getNumElements() + - cast(result[1]->getType())->getNumElements(); -#elif LLVM_VERSION_MAJOR >= 11 - vecWidth = cast(result[0]->getType())->getNumElements() + - cast(result[1]->getType())->getNumElements(); -#else vecWidth = result[0]->getType()->getVectorNumElements() + result[1]->getType()->getVectorNumElements(); -#endif } else { diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h index e0bb75cde..d3c732af0 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h @@ -25,7 +25,6 @@ * @brief Include file for llvm passes * ******************************************************************************/ -#pragma once #include "JitManager.h" #include "builder.h" diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp index e54e23fc9..47f717bfc 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp @@ -1,5 +1,5 @@ /**************************************************************************** - * Copyright (C) 2017-2020 Intel Corporation. All Rights Reserved. + * Copyright (C) 2017-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,17 +30,13 @@ #pragma once -#if defined(_MSC_VER) +#if defined(_WIN32) #pragma warning(disable : 4146 4244 4267 4800 4996) #endif -#include - -#if LLVM_VERSION_MAJOR < 7 // llvm 3.7+ reuses "DEBUG" as an enum value #pragma push_macro("DEBUG") #undef DEBUG -#endif #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" @@ -49,11 +45,10 @@ #include "llvm/IR/Type.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" -#if LLVM_VERSION_MAJOR >= 10 -#include "llvm/IR/IntrinsicsX86.h" -#endif #include "llvm/ExecutionEngine/ObjectCache.h" +#include "llvm/Config/llvm-config.h" + #include "llvm/IR/Verifier.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/Support/FileSystem.h" @@ -138,29 +133,7 @@ static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, } #endif -#if LLVM_VERSION_MAJOR >= 11 -static inline llvm::VectorType* getVectorType(llvm::Type *ElementType, unsigned NumElements) -{ - return llvm::VectorType::get(ElementType, NumElements, false); -} -#else -static inline llvm::VectorType* getVectorType(llvm::Type *ElementType, unsigned NumElements) -{ - return llvm::VectorType::get(ElementType, NumElements); -} -#endif - -#if LLVM_VERSION_MAJOR < 7 #pragma pop_macro("DEBUG") -#endif - -#if LLVM_VERSION_MAJOR > 10 - typedef unsigned IntrinsicID; - typedef llvm::Align AlignType; -#else - typedef llvm::Intrinsic::ID IntrinsicID; - typedef unsigned AlignType; -#endif #include #include diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build index 295dc2fcc..aced4a1b7 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build @@ -18,17 +18,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -if dep_llvm.type_name() == 'internal' - _irbuilder_h = subproject('llvm').get_variable('irbuilder_h') -else - _llvm_includedir = dep_llvm.get_variable(configtool : 'includedir', cmake : 'LLVM_INCLUDE_DIR') - _irbuilder_h = join_paths(_llvm_includedir, 'llvm', 'IR', 'IRBuilder.h') -endif gen_builder_hpp = custom_target( 'gen_builder.hpp', input : [ - swr_gen_llvm_ir_macros_py, _irbuilder_h, + swr_gen_llvm_ir_macros_py, + join_paths( + dep_llvm.get_configtool_variable('includedir'), 'llvm', 'IR', + 'IRBuilder.h' + ) ], output : 'gen_builder.hpp', command : [ diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp index 3a19bbac7..bff96e17f 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp @@ -25,11 +25,7 @@ * @brief Provide access to tiles table initialization functions * ******************************************************************************/ - #include "memory/InitMemory.h" -#include "memory/LoadTile.h" -#include "memory/StoreTile.h" -#include "InitMemory.h" void InitSimLoadTilesTable(); void InitSimStoreTilesTable(); @@ -41,10 +37,3 @@ void InitTilesTable() InitSimStoreTilesTable(); InitSimClearTilesTable(); } - - -void SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs) -{ - out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile; - out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface; -} \ No newline at end of file diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h index a3ed7b3cb..14cca6ab3 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h @@ -25,59 +25,9 @@ * @brief Provide access to tiles table initialization functions * ******************************************************************************/ - -#pragma once - #include "common/os.h" -#include "memory/SurfaceState.h" - -////////////////////////////////////////////////////////////////////////// -/// @brief Loads a full hottile from a render surface -/// @param hPrivateContext - Handle to private DC -/// @param dstFormat - Format for hot tile. -/// @param renderTargetIndex - Index to src render target -/// @param x, y - Coordinates to raster tile. -/// @param pDstHotTile - Pointer to Hot Tile -SWR_FUNC(void, - SwrLoadHotTile, - HANDLE hWorkerPrivateData, - const SWR_SURFACE_STATE* pSrcSurface, - BucketManager* pBucketManager, - SWR_FORMAT dstFormat, - SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, - uint32_t x, - uint32_t y, - uint32_t renderTargetArrayIndex, - uint8_t* pDstHotTile); - -////////////////////////////////////////////////////////////////////////// -/// @brief Deswizzles and stores a full hottile to a render surface -/// @param hPrivateContext - Handle to private DC -/// @param srcFormat - Format for hot tile. -/// @param renderTargetIndex - Index to destination render target -/// @param x, y - Coordinates to raster tile. -/// @param pSrcHotTile - Pointer to Hot Tile -SWR_FUNC(void, - SwrStoreHotTileToSurface, - HANDLE hWorkerPrivateData, - SWR_SURFACE_STATE* pDstSurface, - BucketManager* pBucketManager, - SWR_FORMAT srcFormat, - SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, - uint32_t x, - uint32_t y, - uint32_t renderTargetArrayIndex, - uint8_t* pSrcHotTile); - -struct SWR_TILE_INTERFACE { - PFNSwrLoadHotTile pfnSwrLoadHotTile; - PFNSwrStoreHotTileToSurface pfnSwrStoreHotTileToSurface; -}; extern "C" { SWR_VISIBLE void SWR_API InitTilesTable(); - - typedef void(SWR_API* PFNSwrGetTileInterface)(SWR_TILE_INTERFACE& out_funcs); - SWR_VISIBLE void SWR_API SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs); } -- cgit v1.2.3