diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-01-22 02:13:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-01-22 02:13:18 +0000 |
commit | fdcc03929065b5bf5dd93553db219ea3e05c8c34 (patch) | |
tree | ca90dc8d9e89febdcd4160956c1b8ec098a4efc9 /lib/mesa/src/gallium/drivers/swr | |
parent | 3c9de4a7e13712b5696750bbd59a18c848742022 (diff) |
Import Mesa 19.2.8
Diffstat (limited to 'lib/mesa/src/gallium/drivers/swr')
23 files changed, 899 insertions, 273 deletions
diff --git a/lib/mesa/src/gallium/drivers/swr/.clang-format b/lib/mesa/src/gallium/drivers/swr/.clang-format new file mode 100644 index 000000000..0ec65a5de --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/.clang-format @@ -0,0 +1,64 @@ +--- +Language: Cpp +AccessModifierOffset: -3 +AlignAfterOpenBracket: true +AlignEscapedNewlinesLeft: false +AlignOperands: false +AlignTrailingComments: false +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AlwaysBreakAfterDefinitionReturnType: true +AlwaysBreakTemplateDeclarations: false +AlwaysBreakBeforeMultilineStrings: false +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: true +BinPackParameters: false +BinPackArguments: false +ColumnLimit: 78 +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 3 +DerivePointerAlignment: false +ExperimentalAutoDetectBinPacking: false +IndentCaseLabels: false +IndentWrappedFunctionNames: false +IndentFunctionDeclarationAfterType: false +MaxEmptyLinesToKeep: 2 +KeepEmptyLinesAtTheStartOfBlocks: true +NamespaceIndentation: Inner +ObjCBlockIndentWidth: 3 +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakString: 1000 +PenaltyBreakFirstLessLess: 120 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 0 +PointerAlignment: Right +SpacesBeforeTrailingComments: 1 +Cpp11BracedListStyle: true +Standard: Cpp11 +IndentWidth: 3 +TabWidth: 8 +UseTab: Never +BreakBeforeBraces: Linux +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpacesInAngles: false +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +SpaceAfterCStyleCast: false +SpacesInContainerLiterals: true +SpaceBeforeAssignmentOperators: true +ContinuationIndentWidth: 3 +CommentPragmas: '^ IWYU pragma:' +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +SpaceBeforeParens: ControlStatements +DisableFormat: false +... + diff --git a/lib/mesa/src/gallium/drivers/swr/meson.build b/lib/mesa/src/gallium/drivers/swr/meson.build index 3f32d031e..658747563 100644 --- a/lib/mesa/src/gallium/drivers/swr/meson.build +++ b/lib/mesa/src/gallium/drivers/swr/meson.build @@ -82,7 +82,9 @@ files_swr_mesa = files( 'rasterizer/jitter/streamout_jit.cpp', 'rasterizer/jitter/streamout_jit.h', 'rasterizer/jitter/shader_lib/DebugOutput.cpp', + 'rasterizer/jitter/shader_lib/Scatter.cpp', 'rasterizer/jitter/functionpasses/lower_x86.cpp', + 'rasterizer/memory/SurfaceState.h' ) files_swr_arch = files( @@ -153,10 +155,12 @@ files_swr_arch = files( 'rasterizer/memory/tilingtraits.h', 'rasterizer/memory/InitMemory.h', 'rasterizer/memory/InitMemory.cpp', + 'rasterizer/memory/SurfaceState.h' ) swr_context_files = files('swr_context.h') swr_state_files = files('rasterizer/core/state.h') +swr_surf_state_files = files('rasterizer/memory/SurfaceState.h') swr_event_proto_files = files('rasterizer/archrast/events.proto') swr_event_pproto_files = files('rasterizer/archrast/events_private.proto') swr_gen_backend_files = files('rasterizer/codegen/templates/gen_backend.cpp') diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/.dir-locals.el b/lib/mesa/src/gallium/drivers/swr/rasterizer/.dir-locals.el new file mode 100644 index 000000000..2b04c18a9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/.dir-locals.el @@ -0,0 +1,8 @@ +((prog-mode + (c-basic-offset . 4) + (c-file-style . "k&r") + (fill-column . 78) + (indent-tabs-mode . nil) + (show-trailing-whitespace . t) + ) + ) diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/_clang-format b/lib/mesa/src/gallium/drivers/swr/rasterizer/_clang-format new file mode 100644 index 000000000..ed4b9b409 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/_clang-format @@ -0,0 +1,114 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: true +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + #AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: AfterColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +#IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +#IndentPPDirectives: AfterHash +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: All +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Left +#RawStringFormats: +# - Delimiter: pb +# Language: TextProto +# BasedOnStyle: google +ReflowComments: true +SortIncludes: false +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never +... diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index f5cfb470a..b57d5c428 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -24,84 +24,99 @@ # ArchRast is to not pollute the Rasty code with lots of calculations, etc. that # are needed to compute per draw statistics, etc. -event EarlyDepthStencilInfoSingleSample +event PipelineStats::EarlyDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event EarlyDepthStencilInfoSampleRate +event PipelineStats::EarlyDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event EarlyDepthStencilInfoNullPS +event PipelineStats::EarlyDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event LateDepthStencilInfoSingleSample +event PipelineStats::LateDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event LateDepthStencilInfoSampleRate +event PipelineStats::LateDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event LateDepthStencilInfoNullPS +event PipelineStats::LateDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event EarlyDepthInfoPixelRate +event PipelineStats::EarlyDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event LateDepthInfoPixelRate +event PipelineStats::LateDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event BackendDrawEndEvent +event PipelineStats::BackendDrawEndEvent { uint32_t drawId; }; -event FrontendDrawEndEvent +event PipelineStats::FrontendDrawEndEvent { uint32_t drawId; }; -event TessPrimCount +event Memory::MemoryAccessEvent +{ + uint32_t drawId; + uint64_t tsc; + uint64_t ptr; + uint32_t size; + uint8_t isRead; + uint8_t client; +}; + +event Memory::MemoryStatsEndEvent +{ + uint32_t drawId; +}; + +event PipelineStats::TessPrimCount { uint64_t primCount; }; -event RasterTileCount +event PipelineStats::RasterTileCount { uint32_t drawId; uint64_t rasterTiles; }; -event GSPrimInfo +event PipelineStats::GSPrimInfo { uint64_t inputPrimCount; uint64_t primGeneratedCount; @@ -113,14 +128,14 @@ event GSPrimInfo // Trivial reject is numInvocations - pop_cnt32(validMask) // Trivial accept is validMask & ~clipMask // Must clip count is pop_cnt32(clipMask) -event ClipInfoEvent +event PipelineStats::ClipInfoEvent { uint32_t numInvocations; uint32_t validMask; uint32_t clipMask; }; -event CullInfoEvent +event PipelineStats::CullInfoEvent { uint32_t drawId; uint64_t degeneratePrimMask; @@ -128,14 +143,14 @@ event CullInfoEvent uint32_t validMask; }; -event AlphaInfoEvent +event PipelineStats::AlphaInfoEvent { uint32_t drawId; uint32_t alphaTestEnable; uint32_t alphaBlendEnable; }; -event DrawInstancedEvent +event PipelineStats::DrawInstancedEvent { uint32_t drawId; uint32_t topology; @@ -150,7 +165,7 @@ event DrawInstancedEvent uint32_t splitId; // Split draw count or id. }; -event DrawIndexedInstancedEvent +event PipelineStats::DrawIndexedInstancedEvent { uint32_t drawId; uint32_t topology; @@ -166,32 +181,32 @@ event DrawIndexedInstancedEvent uint32_t splitId; // Split draw count or id. }; -event VSStats +event ShaderStats::VSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; -event HSStats +event ShaderStats::HSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; -event DSStats +event ShaderStats::DSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; -event GSStats +event ShaderStats::GSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; -event PSStats +event ShaderStats::PSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; -event CSStats +event ShaderStats::CSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS };
\ No newline at end of file diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build index 282751760..daf79ed4c 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build @@ -48,7 +48,8 @@ files_swr_common += [ ] foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'], - [swr_state_files, 'gen_state_llvm.h']] + [swr_state_files, 'gen_state_llvm.h'], + [swr_surf_state_files, 'gen_surf_state_llvm.h']] files_swr_common += custom_target( x[1], input : ['gen_llvm_types.py', x[0]], @@ -61,26 +62,16 @@ foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'], ) endforeach -foreach x : [['gen_ar_event.hpp', '--gen_event_hpp'], - ['gen_ar_event.cpp', '--gen_event_cpp'], - ['gen_ar_eventhandler.hpp', '--gen_eventhandler_h'], - ['gen_ar_eventhandlerfile.hpp', '--gen_eventhandlerfile_h']] - files_swr_common += custom_target( - x[0], - input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files], - output : x[0], - command : [ - prog_python, - '@INPUT0@', - '--proto', '@INPUT1@', - '--proto_private', '@INPUT2@', - '--output', '@OUTPUT@', - x[1], - ], - depend_files : files( - join_paths('templates', x[0]), - 'gen_common.py', - ), - ) +ar_output_filenames = ['gen_ar_event.hpp', 'gen_ar_event.cpp', 'gen_ar_eventhandler.hpp', 'gen_ar_eventhandlerfile.hpp'] +ar_template_filenames = [] +foreach fname : ar_output_filenames + ar_template_filenames += join_paths('templates', fname) endforeach +files_swr_common += custom_target( + 'gen_archrast', + input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files], + output : ar_output_filenames, + command : [prog_python, '@INPUT0@', '--proto', '@INPUT1@', '@INPUT2@', '--output-dir', meson.current_build_dir()], + depend_files : files('gen_common.py', ar_template_filenames) +) diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h index 71dbdacfd..8b88a1170 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h @@ -72,7 +72,7 @@ private: T m_Value; }; -#define DEFINE_KNOB(_name, _type, _default) \\ +#define DEFINE_KNOB(_name, _type) \\ struct Knob_##_name : Knob<_type> \\ @@ -80,7 +80,11 @@ private: static const char* Name() { return "KNOB_" #_name; } \\ - static _type DefaultValue() { return (_default); } \\ + static _type DefaultValue() { return (m_default); } \\ + + private: \\ + + static _type m_default; \\ } _name; @@ -105,11 +109,7 @@ struct GlobalKnobs % endfor % endif // - % if knob[1]['type'] == 'std::string': - DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, "${repr(knob[1]['default'])[1:-1]}"); - % else: - DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, ${knob[1]['default']}); - % endif + DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}); % endfor diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp index aea5740bb..41af0055f 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp @@ -38,8 +38,10 @@ #include <intrin.h> #else #include <string.h> +#if !defined(__cpuid) #include <cpuid.h> #endif +#endif class InstructionSet { diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h index bbc9538b8..b00cbf63e 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h @@ -48,7 +48,17 @@ extern THREAD UINT tlsThreadId; class BucketManager { public: - BucketManager() {} + + uint32_t mCurrentFrame; + std::vector<uint32_t> mBucketMap; + bool mBucketsInitialized; + std::string mBucketMgrName; + + + BucketManager(std::string name) : mCurrentFrame(0), mBucketsInitialized(false), mBucketMgrName(name) + { + mBucketMap.clear(); + } ~BucketManager(); // removes all registered thread data diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp index e858a7d59..6329b2ec9 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp @@ -92,9 +92,3 @@ BUCKET_DESC gCoreBuckets[] = { static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])), "RDTSC Bucket enum and description table size mismatched."); -/// @todo bucketmanager and mapping should probably be a part of the SWR context -std::vector<uint32_t> gBucketMap; -BucketManager gBucketMgr; - -uint32_t gCurrentFrame = 0; -bool gBucketsInitialized = false; diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h index dc20e5be9..0228275bd 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h @@ -100,90 +100,86 @@ enum CORE_BUCKETS NumBuckets }; -void rdtscReset(); -void rdtscInit(int threadId); -void rdtscStart(uint32_t bucketId); -void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId); -void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2); -void rdtscEndFrame(); +void rdtscReset(BucketManager* pBucketMgr); +void rdtscInit(BucketManager* pBucketMgr, int threadId); +void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId); +void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId); +void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2); +void rdtscEndFrame(BucketManager* pBucketMgr); #ifdef KNOB_ENABLE_RDTSC -#define RDTSC_RESET() rdtscReset() -#define RDTSC_INIT(threadId) rdtscInit(threadId) -#define RDTSC_START(bucket) rdtscStart(bucket) -#define RDTSC_STOP(bucket, count, draw) rdtscStop(bucket, count, draw) -#define RDTSC_EVENT(bucket, count1, count2) rdtscEvent(bucket, count1, count2) -#define RDTSC_ENDFRAME() rdtscEndFrame() +#define RDTSC_RESET(pBucketMgr) rdtscReset(pBucketMgr) +#define RDTSC_INIT(pBucketMgr, threadId) rdtscInit(pBucketMgr,threadId) +#define RDTSC_START(pBucketMgr, bucket) rdtscStart(pBucketMgr, bucket) +#define RDTSC_STOP(pBucketMgr, bucket, count, draw) rdtscStop(pBucketMgr, bucket, count, draw) +#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) rdtscEvent(pBucketMgr, bucket, count1, count2) +#define RDTSC_ENDFRAME(pBucketMgr) rdtscEndFrame(pBucketMgr) #else -#define RDTSC_RESET() -#define RDTSC_INIT(threadId) -#define RDTSC_START(bucket) -#define RDTSC_STOP(bucket, count, draw) -#define RDTSC_EVENT(bucket, count1, count2) -#define RDTSC_ENDFRAME() +#define RDTSC_RESET(pBucketMgr) +#define RDTSC_INIT(pBucketMgr, threadId) +#define RDTSC_START(pBucketMgr, bucket) +#define RDTSC_STOP(pBucketMgr, bucket, count, draw) +#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) +#define RDTSC_ENDFRAME(pBucketMgr) #endif -extern std::vector<uint32_t> gBucketMap; -extern BucketManager gBucketMgr; extern BUCKET_DESC gCoreBuckets[]; -extern uint32_t gCurrentFrame; -extern bool gBucketsInitialized; -INLINE void rdtscReset() +INLINE void rdtscReset(BucketManager *pBucketMgr) { - gCurrentFrame = 0; - gBucketMgr.ClearThreads(); + pBucketMgr->mCurrentFrame = 0; + pBucketMgr->ClearThreads(); } -INLINE void rdtscInit(int threadId) +INLINE void rdtscInit(BucketManager* pBucketMgr, int threadId) { // register all the buckets once - if (!gBucketsInitialized && (threadId == 0)) + if (!pBucketMgr->mBucketsInitialized && (threadId == 0)) { - gBucketMap.resize(NumBuckets); + pBucketMgr->mBucketMap.resize(NumBuckets); for (uint32_t i = 0; i < NumBuckets; ++i) { - gBucketMap[i] = gBucketMgr.RegisterBucket(gCoreBuckets[i]); + pBucketMgr->mBucketMap[i] = pBucketMgr->RegisterBucket(gCoreBuckets[i]); } - gBucketsInitialized = true; + pBucketMgr->mBucketsInitialized = true; } std::string name = threadId == 0 ? "API" : "WORKER"; - gBucketMgr.RegisterThread(name); + pBucketMgr->RegisterThread(name); } -INLINE void rdtscStart(uint32_t bucketId) +INLINE void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId) { - uint32_t id = gBucketMap[bucketId]; - gBucketMgr.StartBucket(id); + uint32_t id = pBucketMgr->mBucketMap[bucketId]; + pBucketMgr->StartBucket(id); } -INLINE void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId) +INLINE void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId) { - uint32_t id = gBucketMap[bucketId]; - gBucketMgr.StopBucket(id); + uint32_t id = pBucketMgr->mBucketMap[bucketId]; + pBucketMgr->StopBucket(id); } -INLINE void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2) +INLINE void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2) { - uint32_t id = gBucketMap[bucketId]; - gBucketMgr.AddEvent(id, count1); + uint32_t id = pBucketMgr->mBucketMap[bucketId]; + pBucketMgr->AddEvent(id, count1); } -INLINE void rdtscEndFrame() +INLINE void rdtscEndFrame(BucketManager* pBucketMgr) { - gCurrentFrame++; + pBucketMgr->mCurrentFrame++; - if (gCurrentFrame == KNOB_BUCKETS_START_FRAME && + if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_START_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME) { - gBucketMgr.StartCapture(); + pBucketMgr->StartCapture(); } - if (gCurrentFrame == KNOB_BUCKETS_END_FRAME && + if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_END_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME) { - gBucketMgr.StopCapture(); - gBucketMgr.PrintReport("rdtsc.txt"); + pBucketMgr->StopCapture(); + pBucketMgr->PrintReport("rdtsc.txt"); } } diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h index d0f4b30dc..3072bbc83 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h @@ -45,7 +45,8 @@ struct THREAD_DATA uint32_t numaId; // NUMA node id uint32_t coreId; // Core id uint32_t htId; // Hyperthread id - uint32_t workerId; + uint32_t workerId; // index of worker in total thread data + void* clipperData; // pointer to hang clipper-private data on SWR_CONTEXT* pContext; bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set. }; diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp index 19eec7e99..b67ffbfa7 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp @@ -32,7 +32,6 @@ #include "common/rdtsc_buckets.h" #include "builder_gfx_mem.h" - namespace SwrJit { using namespace llvm; @@ -42,7 +41,9 @@ namespace SwrJit mpTranslationFuncTy = nullptr; mpfnTranslateGfxAddressForRead = nullptr; mpfnTranslateGfxAddressForWrite = nullptr; + mpfnTrackMemAccess = nullptr; mpParamSimDC = nullptr; + mpWorkerData = nullptr; } @@ -50,13 +51,12 @@ namespace SwrJit { } - void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage) + void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage) { - SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL), + SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t."); } - ////////////////////////////////////////////////////////////////////////// /// @brief Generate a masked gather operation in LLVM IR. If not /// supported on the underlying platform, emulate it with loads @@ -70,7 +70,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { // address may be coming in as 64bit int now so get the pointer if (pBase->getType() == mInt64Ty) @@ -95,7 +95,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { // address may be coming in as 64bit int now so get the pointer @@ -109,7 +109,7 @@ namespace SwrJit } void BuilderGfxMem::SCATTERPS( - Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage) + Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) { // address may be coming in as 64bit int now so get the pointer @@ -118,46 +118,95 @@ namespace SwrJit pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0)); } - Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage); + Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage); } - Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset) { return ADD(base, offset); } - Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name) + Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ptr, Idx, nullptr, Name); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + if (isReadOnly) + { + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForWrite); + } + } + else + { + Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name); + } + return Ptr; } Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ty, Ptr, Idx, Name); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ty, Ptr, Idx, Name); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = Builder::GEP(Ty, Ptr, Idx, Name); + } + return Ptr; } Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ptr, indexList); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ptr, indexList); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = Builder::GEP(Ptr, indexList); + } + return Ptr; } Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ptr, indexList); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ptr, indexList); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = Builder::GEP(Ptr, indexList); + } + return Ptr; } - Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty) + Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress) { SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified."); - // address may be coming in as 64bit int now so get the pointer if (Ptr->getType() == mInt64Ty) { @@ -167,29 +216,78 @@ namespace SwrJit return Ptr; } - LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage) + void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead) + { +#if defined(KNOB_ENABLE_AR) + if (!KNOB_AR_ENABLE_MEMORY_EVENTS) + { + return; + } + + Value* tmpPtr; + // convert actual pointers to int64. + uint32_t size = 0; + + if (Ptr->getType() == mInt64Ty) + { + DataLayout dataLayout(JM()->mpCurrentModule); + size = (uint32_t)dataLayout.getTypeAllocSize(Ty); + + tmpPtr = Ptr; + } + else + { + DataLayout dataLayout(JM()->mpCurrentModule); + size = (uint32_t)dataLayout.getTypeAllocSize(Ptr->getType()); + + tmpPtr = PTR_TO_INT(Ptr, mInt64Ty); + } + + // There are some shader compile setups where there's no translation functions set up. + // This would be a situation where the accesses are to internal rasterizer memory and won't + // be logged. + // TODO: we may wish to revisit this for URB reads/writes, though. + if (mpfnTrackMemAccess) + { + SWR_ASSERT(mpWorkerData != nullptr); + CALL(mpfnTrackMemAccess, + {mpParamSimDC, + mpWorkerData, + tmpPtr, + C((uint32_t)size), + C((uint8_t)isRead), + C((uint32_t)usage)}); + } +#endif + + return; + } + + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); + TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::LOAD(Ptr, Name); } - LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); + TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::LOAD(Ptr, Name); } - LoadInst* BuilderGfxMem::LOAD( - Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); + TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::LOAD(Ptr, isVolatile, Name); } @@ -197,7 +295,7 @@ namespace SwrJit const std::initializer_list<uint32_t>& offset, const llvm::Twine& name, Type* Ty, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertGFXMemoryParams(BasePtr, usage); @@ -222,25 +320,59 @@ namespace SwrJit return LOAD(BasePtr, name, Ty, usage); } - CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr, unsigned Align, Value* Mask, Value* PassThru, const Twine& Name, Type* Ty, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertGFXMemoryParams(Ptr, usage); + TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage); } + StoreInst* + BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage) + { + AssertGFXMemoryParams(Ptr, usage); + TrackerHelper(Ptr, Ty, usage, false); + + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + return Builder::STORE(Val, Ptr, isVolatile, Ty, usage); + } + + StoreInst* BuilderGfxMem::STORE(Value* Val, + Value* BasePtr, + const std::initializer_list<uint32_t>& offset, + Type* Ty, + MEM_CLIENT usage) + { + AssertGFXMemoryParams(BasePtr, usage); + TrackerHelper(BasePtr, Ty, usage, false); + + BasePtr = TranslationHelper(BasePtr, Ty, mpfnTranslateGfxAddressForRead); + return Builder::STORE(Val, BasePtr, offset, Ty, usage); + } + + CallInst* BuilderGfxMem::MASKED_STORE( + Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage) + { + AssertGFXMemoryParams(Ptr, usage); + + TrackerHelper(Ptr, Ty, usage, false); + + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage); + } + Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress, Type* PtrTy, const Twine& Name, - JIT_MEM_CLIENT /* usage */) + MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { @@ -252,7 +384,7 @@ namespace SwrJit Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress, Type* PtrTy, const Twine& Name, - JIT_MEM_CLIENT /* usage */) + MEM_CLIENT /* usage */) { if (PtrTy == nullptr) { diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h index 4cf062536..b1f662414 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h @@ -41,7 +41,7 @@ namespace SwrJit BuilderGfxMem(JitManager* pJitMgr); virtual ~BuilderGfxMem() {} - virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); + virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = ""); virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = ""); virtual Value* GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr); @@ -51,22 +51,21 @@ namespace SwrJit virtual LoadInst* LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list<uint32_t>& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); - + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, @@ -74,57 +73,65 @@ namespace SwrJit Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + + virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + + virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + + virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); - + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* TranslateGfxAddressForRead(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* TranslateGfxAddressForWrite(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); - - + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + protected: - void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage); + void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage); virtual void NotifyPrivateContextSet(); virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); - Value* TranslationHelper(Value* Ptr, Type* Ty); + Value* TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress); + void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead); FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; } Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; } Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; } Value* GetParamSimDC() { return mpParamSimDC; } + Value* mpWorkerData; private: FunctionType* mpTranslationFuncTy; Value* mpfnTranslateGfxAddressForRead; Value* mpfnTranslateGfxAddressForWrite; Value* mpParamSimDC; + FunctionType* mpTrackMemAccessFuncTy; + Value* mpfnTrackMemAccess; }; } // namespace SwrJit diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index a9d649092..2d8240187 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -34,14 +34,14 @@ namespace SwrJit { - void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage) + void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage) { SWR_ASSERT( ptr->getType() != mInt64Ty, "Address appears to be GFX access. Requires translation through BuilderGfxMem."); } - Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name) + Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name) { return IRB()->CreateGEP(Ptr, Idx, Name); } @@ -93,26 +93,26 @@ namespace SwrJit return IN_BOUNDS_GEP(ptr, indices); } - LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); } - LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, Name); } - LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage) + LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ty, Ptr, Name); } LoadInst* - Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage) + Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage) { AssertMemoryUsageParams(Ptr, usage); return IRB()->CreateLoad(Ptr, isVolatile, Name); @@ -122,7 +122,7 @@ namespace SwrJit const std::initializer_list<uint32_t>& indices, const llvm::Twine& name, Type* Ty, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { std::vector<Value*> valIndices; for (auto i : indices) @@ -141,7 +141,7 @@ namespace SwrJit } StoreInst* - Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices) + Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage) { std::vector<Value*> valIndices; for (auto i : indices) @@ -186,7 +186,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertMemoryUsageParams(pBase, usage); @@ -206,7 +206,7 @@ namespace SwrJit Value* vIndices, Value* vMask, uint8_t scale, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { AssertMemoryUsageParams(pBase, usage); @@ -237,13 +237,18 @@ namespace SwrJit return MASKED_GATHER(pVecSrcPtr, 4, pVecMask, pVecPassthru); } + void Builder::SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask) + { + MASKED_SCATTER(pVecSrc, pVecDstPtr, 4, pVecMask); + } + void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { const SWR_FORMAT_INFO& info = GetFormatInfo(format); if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32) @@ -262,7 +267,7 @@ namespace SwrJit Value* vMask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { switch (info.bpp / info.numComps) { @@ -336,7 +341,7 @@ namespace SwrJit Value* vMask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage) + MEM_CLIENT usage) { switch (info.bpp / info.numComps) { @@ -643,9 +648,16 @@ namespace SwrJit /// @param vOffsets - vector of byte offsets from pDst /// @param vMask - mask of valid lanes void Builder::SCATTERPS( - Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage) + Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) { AssertMemoryUsageParams(pDst, usage); +// if (vSrc->getType() != mSimdFP32Ty) +// { +// vSrc = BITCAST(vSrc, mSimdFP32Ty); +// } + SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy()); + VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1)); + return; /* Scatter algorithm @@ -657,6 +669,10 @@ namespace SwrJit */ + /* + + // Reference implementation kept around for reference + BasicBlock* pCurBB = IRB()->GetInsertBlock(); Function* pFunc = pCurBB->getParent(); Type* pSrcTy = vSrc->getType()->getVectorElementType(); @@ -744,5 +760,7 @@ namespace SwrJit // Move builder to beginning of post loop IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin()); + + */ } } // namespace SwrJit diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index c533984da..fe4c5dd38 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -30,20 +30,22 @@ #pragma once public: -enum class JIT_MEM_CLIENT +enum class MEM_CLIENT { MEM_CLIENT_INTERNAL, GFX_MEM_CLIENT_FETCH, GFX_MEM_CLIENT_SAMPLER, GFX_MEM_CLIENT_SHADER, + GFX_MEM_CLIENT_STREAMOUT, + GFX_MEM_CLIENT_URB }; protected: virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); -void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage); +void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage); public: -virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); +virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = ""); virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = ""); virtual Value* GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr); virtual Value* @@ -56,23 +58,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList) Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList); virtual LoadInst* - LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* - LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list<uint32_t>& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, @@ -80,14 +82,24 @@ virtual CallInst* MASKED_LOAD(Value* Ptr, Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL) + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) { return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name); } -LoadInst* - LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = ""); -StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset); +virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) +{ + return IRB()->CreateStore(Val, Ptr, isVolatile); +} + +virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); + +virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL) +{ + return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask); +} + +LoadInst* LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = ""); StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset); Value* MEM_ADD(Value* i32Incr, @@ -101,14 +113,14 @@ void Gather4(const SWR_FORMAT format, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); void GATHER4PS(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -116,14 +128,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); void GATHER4DD(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -131,17 +143,18 @@ void GATHER4DD(const SWR_FORMAT_INFO& info, Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); Value* GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru); +void SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask); virtual void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, - JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); + MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info, Value* vGatherInput, diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index c34959d35..d00a8963d 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -32,8 +32,12 @@ #include "passes.h" #include "JitManager.h" +#include "common/simdlib.hpp" + #include <unordered_map> +extern "C" void ScatterPS_256(uint8_t*, SIMD256::Integer, SIMD256::Float, uint8_t, uint32_t); + namespace llvm { // foward declare the initializer @@ -88,6 +92,8 @@ namespace SwrJit Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); Instruction* + VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); + Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst); @@ -102,88 +108,61 @@ namespace SwrJit static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1; + // clang-format off static std::map<std::string, X86Intrinsic> intrinsicMap2[] = { - // 256 wide 512 wide + // 256 wide 512 wide { // AVX - {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VPERMPS", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VPERMD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VGATHERPD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERPS", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERDD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VCVTPD2PS", - {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, - {"meta.intrinsic.VCVTPH2PS", - {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, - {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, + {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, + {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, + {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, }, { // AVX2 - {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VPERMPS", - {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VPERMD", - {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VGATHERPD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERPS", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERDD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VCVTPH2PS", - {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, - {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, + {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, + {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, + {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, }, { // AVX512 - {"meta.intrinsic.VRCPPS", - {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}}, + {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}}, #if LLVM_VERSION_MAJOR < 7 - {"meta.intrinsic.VPERMPS", - {{Intrinsic::x86_avx512_mask_permvar_sf_256, - Intrinsic::x86_avx512_mask_permvar_sf_512}, - NO_EMU}}, - {"meta.intrinsic.VPERMD", - {{Intrinsic::x86_avx512_mask_permvar_si_256, - Intrinsic::x86_avx512_mask_permvar_si_512}, - NO_EMU}}, + {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}}, + {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}}, #else - {"meta.intrinsic.VPERMPS", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, - {"meta.intrinsic.VPERMD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, + {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}}, #endif - {"meta.intrinsic.VGATHERPD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERPS", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VGATHERDD", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, + {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, #if LLVM_VERSION_MAJOR < 7 - {"meta.intrinsic.VCVTPD2PS", - {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, - NO_EMU}}, + {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}}, #else - {"meta.intrinsic.VCVTPD2PS", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}}, + {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}}, #endif - {"meta.intrinsic.VCVTPH2PS", - {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512}, - NO_EMU}}, - {"meta.intrinsic.VROUND", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}}, - {"meta.intrinsic.VHSUBPS", - {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}}, + {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512}, NO_EMU}}, + {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}}, + {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}}, }}; + // clang-format on struct LowerX86 : public FunctionPass { @@ -209,6 +188,31 @@ namespace SwrJit SWR_ASSERT(false, "Unsupported AVX architecture."); mTarget = AVX; } + + // Setup scatter function for 256 wide + uint32_t curWidth = B->mVWidth; + B->SetTargetWidth(8); + std::vector<Type*> args = { + B->mInt8PtrTy, // pBase + B->mSimdInt32Ty, // vIndices + B->mSimdFP32Ty, // vSrc + B->mInt8Ty, // mask + B->mInt32Ty // scale + }; + + FunctionType* pfnScatterTy = FunctionType::get(B->mVoidTy, args, false); + mPfnScatter256 = cast<Function>( +#if LLVM_VERSION_MAJOR >= 9 + B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy).getCallee()); +#else + B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy)); +#endif + if (sys::DynamicLibrary::SearchForAddressOfSymbol("ScatterPS_256") == nullptr) + { + sys::DynamicLibrary::AddSymbol("ScatterPS_256", (void*)&ScatterPS_256); + } + + B->SetTargetWidth(curWidth); } // Try to decipher the vector type of the instruction. This does not work properly @@ -392,23 +396,39 @@ namespace SwrJit virtual bool runOnFunction(Function& F) { std::vector<Instruction*> toRemove; + std::vector<BasicBlock*> bbs; + + // Make temp copy of the basic blocks and instructions, as the intrinsic + // replacement code might invalidate the iterators + for (auto& b : F.getBasicBlockList()) + { + bbs.push_back(&b); + } - for (auto& BB : F.getBasicBlockList()) + for (auto* BB : bbs) { - for (auto& I : BB.getInstList()) + std::vector<Instruction*> insts; + for (auto& i : BB->getInstList()) + { + insts.push_back(&i); + } + + for (auto* I : insts) { - if (CallInst* pCallInst = dyn_cast<CallInst>(&I)) + if (CallInst* pCallInst = dyn_cast<CallInst>(I)) { Function* pFunc = pCallInst->getCalledFunction(); if (pFunc) { if (pFunc->getName().startswith("meta.intrinsic")) { - B->IRB()->SetInsertPoint(&I); + B->IRB()->SetInsertPoint(I); Instruction* pReplace = ProcessIntrinsic(pCallInst); - SWR_ASSERT(pReplace); toRemove.push_back(pCallInst); - pCallInst->replaceAllUsesWith(pReplace); + if (pReplace) + { + pCallInst->replaceAllUsesWith(pReplace); + } } } } @@ -428,10 +448,9 @@ namespace SwrJit virtual void getAnalysisUsage(AnalysisUsage& AU) const {} JitManager* JM() { return B->JM(); } - - Builder* B; - - TargetArch mTarget; + Builder* B; + TargetArch mTarget; + Function* mPfnScatter256; static char ID; ///< Needed by LLVM to generate ID for FunctionPass. }; @@ -639,6 +658,69 @@ namespace SwrJit return cast<Instruction>(v32Gather); } + Instruction* + VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst) + { + Builder* B = pThis->B; + auto pBase = pCallInst->getArgOperand(0); + auto vi1Mask = pCallInst->getArgOperand(1); + auto vi32Indices = pCallInst->getArgOperand(2); + auto v32Src = pCallInst->getArgOperand(3); + auto i32Scale = pCallInst->getArgOperand(4); + + if (arch != AVX512) + { + // Call into C function to do the scatter. This has significantly better compile perf + // compared to jitting scatter loops for every scatter + if (width == W256) + { + auto mask = B->BITCAST(vi1Mask, B->mInt8Ty); + B->CALL(pThis->mPfnScatter256, {pBase, vi32Indices, v32Src, mask, i32Scale}); + } + else + { + // Need to break up 512 wide scatter to two 256 wide + auto maskLo = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({0, 1, 2, 3, 4, 5, 6, 7})); + auto indicesLo = + B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3, 4, 5, 6, 7})); + auto srcLo = B->VSHUFFLE(v32Src, v32Src, B->C({0, 1, 2, 3, 4, 5, 6, 7})); + + auto mask = B->BITCAST(maskLo, B->mInt8Ty); + B->CALL(pThis->mPfnScatter256, {pBase, indicesLo, srcLo, mask, i32Scale}); + + auto maskHi = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({8, 9, 10, 11, 12, 13, 14, 15})); + auto indicesHi = + B->VSHUFFLE(vi32Indices, vi32Indices, B->C({8, 9, 10, 11, 12, 13, 14, 15})); + auto srcHi = B->VSHUFFLE(v32Src, v32Src, B->C({8, 9, 10, 11, 12, 13, 14, 15})); + + mask = B->BITCAST(maskHi, B->mInt8Ty); + B->CALL(pThis->mPfnScatter256, {pBase, indicesHi, srcHi, mask, i32Scale}); + } + return nullptr; + } + + Value* iMask; + Function* pX86IntrinFunc; + if (width == W256) + { + // No direct intrinsic supported in llvm to scatter 8 elem with 32bit indices, but we + // can use the scatter of 8 elements with 64bit indices + pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, + Intrinsic::x86_avx512_scatter_qps_512); + + auto vi32IndicesExt = B->Z_EXT(vi32Indices, B->mSimdInt64Ty); + iMask = B->BITCAST(vi1Mask, B->mInt8Ty); + B->CALL(pX86IntrinFunc, {pBase, iMask, vi32IndicesExt, v32Src, i32Scale}); + } + else if (width == W512) + { + pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, + Intrinsic::x86_avx512_scatter_dps_512); + iMask = B->BITCAST(vi1Mask, B->mInt16Ty); + B->CALL(pX86IntrinFunc, {pBase, iMask, vi32Indices, v32Src, i32Scale}); + } + return nullptr; + } // No support for vroundps in avx512 (it is available in kncni), so emulate with avx // instructions diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h index d3c732af0..e0bb75cde 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h @@ -25,6 +25,7 @@ * @brief Include file for llvm passes * ******************************************************************************/ +#pragma once #include "JitManager.h" #include "builder.h" diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp new file mode 100644 index 000000000..de8115404 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp @@ -0,0 +1,49 @@ +/**************************************************************************** + * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * @file Scatter.cpp + * + * @brief Shader support library implementation for scatter emulation + * + * Notes: + * + ******************************************************************************/ +#include <stdarg.h> +#include "common/os.h" +#include "common/simdlib.hpp" + +extern "C" void ScatterPS_256(uint8_t* pBase, SIMD256::Integer vIndices, SIMD256::Float vSrc, uint8_t mask, uint32_t scale) +{ + OSALIGN(float, 32) src[8]; + OSALIGN(uint32_t, 32) indices[8]; + + SIMD256::store_ps(src, vSrc); + SIMD256::store_si((SIMD256::Integer*)indices, vIndices); + + DWORD index; + while (_BitScanForward(&index, mask)) + { + mask &= ~(1 << index); + + *(float*)(pBase + indices[index] * scale) = src[index]; + } +} diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp index bff96e17f..3a19bbac7 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp @@ -25,7 +25,11 @@ * @brief Provide access to tiles table initialization functions * ******************************************************************************/ + #include "memory/InitMemory.h" +#include "memory/LoadTile.h" +#include "memory/StoreTile.h" +#include "InitMemory.h" void InitSimLoadTilesTable(); void InitSimStoreTilesTable(); @@ -37,3 +41,10 @@ void InitTilesTable() InitSimStoreTilesTable(); InitSimClearTilesTable(); } + + +void SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs) +{ + out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile; + out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface; +}
\ No newline at end of file diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h index 14cca6ab3..6838ddc70 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h @@ -25,9 +25,57 @@ * @brief Provide access to tiles table initialization functions * ******************************************************************************/ + +#pragma once + #include "common/os.h" +#include "memory/SurfaceState.h" + +////////////////////////////////////////////////////////////////////////// +/// @brief Loads a full hottile from a render surface +/// @param hPrivateContext - Handle to private DC +/// @param dstFormat - Format for hot tile. +/// @param renderTargetIndex - Index to src render target +/// @param x, y - Coordinates to raster tile. +/// @param pDstHotTile - Pointer to Hot Tile +SWR_FUNC(void, + SwrLoadHotTile, + HANDLE hWorkerPrivateData, + const SWR_SURFACE_STATE* pSrcSurface, + SWR_FORMAT dstFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + uint32_t x, + uint32_t y, + uint32_t renderTargetArrayIndex, + uint8_t* pDstHotTile); + +////////////////////////////////////////////////////////////////////////// +/// @brief Deswizzles and stores a full hottile to a render surface +/// @param hPrivateContext - Handle to private DC +/// @param srcFormat - Format for hot tile. +/// @param renderTargetIndex - Index to destination render target +/// @param x, y - Coordinates to raster tile. +/// @param pSrcHotTile - Pointer to Hot Tile +SWR_FUNC(void, + SwrStoreHotTileToSurface, + HANDLE hWorkerPrivateData, + SWR_SURFACE_STATE* pDstSurface, + SWR_FORMAT srcFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + uint32_t x, + uint32_t y, + uint32_t renderTargetArrayIndex, + uint8_t* pSrcHotTile); + +struct SWR_TILE_INTERFACE { + PFNSwrLoadHotTile pfnSwrLoadHotTile; + PFNSwrStoreHotTileToSurface pfnSwrStoreHotTileToSurface; +}; extern "C" { SWR_VISIBLE void SWR_API InitTilesTable(); + + typedef void(SWR_API* PFNSwrGetTileInterface)(SWR_TILE_INTERFACE& out_funcs); + SWR_VISIBLE void SWR_API SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs); } diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h new file mode 100644 index 000000000..6b1b78eee --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h @@ -0,0 +1,66 @@ +/**************************************************************************** +* Copyright (C) 2014-2019 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +* +* @file SurfaceState.h +* +* @brief Common definitions for surface state +* +******************************************************************************/ +#pragma once + +#include "core/state.h" + +////////////////////////////////////////////////////////////////////////// +/// SWR_SURFACE_STATE +////////////////////////////////////////////////////////////////////////// +struct SWR_SURFACE_STATE +{ + gfxptr_t xpBaseAddress; + SWR_SURFACE_TYPE type; // @llvm_enum + SWR_FORMAT format; // @llvm_enum + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t numSamples; + uint32_t samplePattern; + uint32_t pitch; + uint32_t qpitch; + uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler + uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed + float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be + // accessed by sampler + uint32_t lod; // for render targets, the lod being rendered to + uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces + SWR_TILE_MODE tileMode; // @llvm_enum + uint32_t halign; + uint32_t valign; + uint32_t xOffset; + uint32_t yOffset; + + uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces + + gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc. + SWR_AUX_MODE auxMode; // @llvm_enum + + + bool bInterleavedSamples; // are MSAA samples stored interleaved or planar +};
\ No newline at end of file diff --git a/lib/mesa/src/gallium/drivers/swr/swr_query.h b/lib/mesa/src/gallium/drivers/swr/swr_query.h index 1c736e4e1..d838dc859 100644 --- a/lib/mesa/src/gallium/drivers/swr/swr_query.h +++ b/lib/mesa/src/gallium/drivers/swr/swr_query.h @@ -44,5 +44,5 @@ OSALIGNLINE(struct) swr_query { extern void swr_query_init(struct pipe_context *pipe); -extern boolean swr_check_render_cond(struct pipe_context *pipe); +extern bool swr_check_render_cond(struct pipe_context *pipe); #endif |