summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/swr
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2020-01-22 02:13:18 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2020-01-22 02:13:18 +0000
commitfdcc03929065b5bf5dd93553db219ea3e05c8c34 (patch)
treeca90dc8d9e89febdcd4160956c1b8ec098a4efc9 /lib/mesa/src/gallium/drivers/swr
parent3c9de4a7e13712b5696750bbd59a18c848742022 (diff)
Import Mesa 19.2.8
Diffstat (limited to 'lib/mesa/src/gallium/drivers/swr')
-rw-r--r--lib/mesa/src/gallium/drivers/swr/.clang-format64
-rw-r--r--lib/mesa/src/gallium/drivers/swr/meson.build4
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/.dir-locals.el8
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/_clang-format114
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto75
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build35
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h14
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp2
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h12
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp6
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h90
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h3
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp198
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h41
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp46
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h49
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp234
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h1
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp49
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp11
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h48
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h66
-rw-r--r--lib/mesa/src/gallium/drivers/swr/swr_query.h2
23 files changed, 899 insertions, 273 deletions
diff --git a/lib/mesa/src/gallium/drivers/swr/.clang-format b/lib/mesa/src/gallium/drivers/swr/.clang-format
new file mode 100644
index 000000000..0ec65a5de
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/.clang-format
@@ -0,0 +1,64 @@
+---
+Language: Cpp
+AccessModifierOffset: -3
+AlignAfterOpenBracket: true
+AlignEscapedNewlinesLeft: false
+AlignOperands: false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AlwaysBreakAfterDefinitionReturnType: true
+AlwaysBreakTemplateDeclarations: false
+AlwaysBreakBeforeMultilineStrings: false
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: true
+BinPackParameters: false
+BinPackArguments: false
+ColumnLimit: 78
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 3
+DerivePointerAlignment: false
+ExperimentalAutoDetectBinPacking: false
+IndentCaseLabels: false
+IndentWrappedFunctionNames: false
+IndentFunctionDeclarationAfterType: false
+MaxEmptyLinesToKeep: 2
+KeepEmptyLinesAtTheStartOfBlocks: true
+NamespaceIndentation: Inner
+ObjCBlockIndentWidth: 3
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakString: 1000
+PenaltyBreakFirstLessLess: 120
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 0
+PointerAlignment: Right
+SpacesBeforeTrailingComments: 1
+Cpp11BracedListStyle: true
+Standard: Cpp11
+IndentWidth: 3
+TabWidth: 8
+UseTab: Never
+BreakBeforeBraces: Linux
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpacesInAngles: false
+SpaceInEmptyParentheses: false
+SpacesInCStyleCastParentheses: false
+SpaceAfterCStyleCast: false
+SpacesInContainerLiterals: true
+SpaceBeforeAssignmentOperators: true
+ContinuationIndentWidth: 3
+CommentPragmas: '^ IWYU pragma:'
+ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
+SpaceBeforeParens: ControlStatements
+DisableFormat: false
+...
+
diff --git a/lib/mesa/src/gallium/drivers/swr/meson.build b/lib/mesa/src/gallium/drivers/swr/meson.build
index 3f32d031e..658747563 100644
--- a/lib/mesa/src/gallium/drivers/swr/meson.build
+++ b/lib/mesa/src/gallium/drivers/swr/meson.build
@@ -82,7 +82,9 @@ files_swr_mesa = files(
'rasterizer/jitter/streamout_jit.cpp',
'rasterizer/jitter/streamout_jit.h',
'rasterizer/jitter/shader_lib/DebugOutput.cpp',
+ 'rasterizer/jitter/shader_lib/Scatter.cpp',
'rasterizer/jitter/functionpasses/lower_x86.cpp',
+ 'rasterizer/memory/SurfaceState.h'
)
files_swr_arch = files(
@@ -153,10 +155,12 @@ files_swr_arch = files(
'rasterizer/memory/tilingtraits.h',
'rasterizer/memory/InitMemory.h',
'rasterizer/memory/InitMemory.cpp',
+ 'rasterizer/memory/SurfaceState.h'
)
swr_context_files = files('swr_context.h')
swr_state_files = files('rasterizer/core/state.h')
+swr_surf_state_files = files('rasterizer/memory/SurfaceState.h')
swr_event_proto_files = files('rasterizer/archrast/events.proto')
swr_event_pproto_files = files('rasterizer/archrast/events_private.proto')
swr_gen_backend_files = files('rasterizer/codegen/templates/gen_backend.cpp')
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/.dir-locals.el b/lib/mesa/src/gallium/drivers/swr/rasterizer/.dir-locals.el
new file mode 100644
index 000000000..2b04c18a9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/.dir-locals.el
@@ -0,0 +1,8 @@
+((prog-mode
+ (c-basic-offset . 4)
+ (c-file-style . "k&r")
+ (fill-column . 78)
+ (indent-tabs-mode . nil)
+ (show-trailing-whitespace . t)
+ )
+ )
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/_clang-format b/lib/mesa/src/gallium/drivers/swr/rasterizer/_clang-format
new file mode 100644
index 000000000..ed4b9b409
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/_clang-format
@@ -0,0 +1,114 @@
+---
+Language: Cpp
+# BasedOnStyle: LLVM
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: true
+AlignEscapedNewlines: Left
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+ AfterClass: true
+ AfterControlStatement: true
+ AfterEnum: true
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: true
+ AfterStruct: true
+ AfterUnion: true
+ #AfterExternBlock: false
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: AfterColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 100
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
+#IncludeBlocks: Preserve
+IncludeCategories:
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
+ Priority: 2
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
+ Priority: 3
+ - Regex: '.*'
+ Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+#IndentPPDirectives: AfterHash
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: All
+ObjCBlockIndentWidth: 4
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Left
+#RawStringFormats:
+# - Delimiter: pb
+# Language: TextProto
+# BasedOnStyle: google
+ReflowComments: true
+SortIncludes: false
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 4
+UseTab: Never
+...
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
index f5cfb470a..b57d5c428 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
@@ -24,84 +24,99 @@
# ArchRast is to not pollute the Rasty code with lots of calculations, etc. that
# are needed to compute per draw statistics, etc.
-event EarlyDepthStencilInfoSingleSample
+event PipelineStats::EarlyDepthStencilInfoSingleSample
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event EarlyDepthStencilInfoSampleRate
+event PipelineStats::EarlyDepthStencilInfoSampleRate
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event EarlyDepthStencilInfoNullPS
+event PipelineStats::EarlyDepthStencilInfoNullPS
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event LateDepthStencilInfoSingleSample
+event PipelineStats::LateDepthStencilInfoSingleSample
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event LateDepthStencilInfoSampleRate
+event PipelineStats::LateDepthStencilInfoSampleRate
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event LateDepthStencilInfoNullPS
+event PipelineStats::LateDepthStencilInfoNullPS
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event EarlyDepthInfoPixelRate
+event PipelineStats::EarlyDepthInfoPixelRate
{
uint64_t depthPassCount;
uint64_t activeLanes;
};
-event LateDepthInfoPixelRate
+event PipelineStats::LateDepthInfoPixelRate
{
uint64_t depthPassCount;
uint64_t activeLanes;
};
-event BackendDrawEndEvent
+event PipelineStats::BackendDrawEndEvent
{
uint32_t drawId;
};
-event FrontendDrawEndEvent
+event PipelineStats::FrontendDrawEndEvent
{
uint32_t drawId;
};
-event TessPrimCount
+event Memory::MemoryAccessEvent
+{
+ uint32_t drawId;
+ uint64_t tsc;
+ uint64_t ptr;
+ uint32_t size;
+ uint8_t isRead;
+ uint8_t client;
+};
+
+event Memory::MemoryStatsEndEvent
+{
+ uint32_t drawId;
+};
+
+event PipelineStats::TessPrimCount
{
uint64_t primCount;
};
-event RasterTileCount
+event PipelineStats::RasterTileCount
{
uint32_t drawId;
uint64_t rasterTiles;
};
-event GSPrimInfo
+event PipelineStats::GSPrimInfo
{
uint64_t inputPrimCount;
uint64_t primGeneratedCount;
@@ -113,14 +128,14 @@ event GSPrimInfo
// Trivial reject is numInvocations - pop_cnt32(validMask)
// Trivial accept is validMask & ~clipMask
// Must clip count is pop_cnt32(clipMask)
-event ClipInfoEvent
+event PipelineStats::ClipInfoEvent
{
uint32_t numInvocations;
uint32_t validMask;
uint32_t clipMask;
};
-event CullInfoEvent
+event PipelineStats::CullInfoEvent
{
uint32_t drawId;
uint64_t degeneratePrimMask;
@@ -128,14 +143,14 @@ event CullInfoEvent
uint32_t validMask;
};
-event AlphaInfoEvent
+event PipelineStats::AlphaInfoEvent
{
uint32_t drawId;
uint32_t alphaTestEnable;
uint32_t alphaBlendEnable;
};
-event DrawInstancedEvent
+event PipelineStats::DrawInstancedEvent
{
uint32_t drawId;
uint32_t topology;
@@ -150,7 +165,7 @@ event DrawInstancedEvent
uint32_t splitId; // Split draw count or id.
};
-event DrawIndexedInstancedEvent
+event PipelineStats::DrawIndexedInstancedEvent
{
uint32_t drawId;
uint32_t topology;
@@ -166,32 +181,32 @@ event DrawIndexedInstancedEvent
uint32_t splitId; // Split draw count or id.
};
-event VSStats
+event ShaderStats::VSStats
{
- uint32_t numInstExecuted;
+ HANDLE hStats; // SWR_SHADER_STATS
};
-event HSStats
+event ShaderStats::HSStats
{
- uint32_t numInstExecuted;
+ HANDLE hStats; // SWR_SHADER_STATS
};
-event DSStats
+event ShaderStats::DSStats
{
- uint32_t numInstExecuted;
+ HANDLE hStats; // SWR_SHADER_STATS
};
-event GSStats
+event ShaderStats::GSStats
{
- uint32_t numInstExecuted;
+ HANDLE hStats; // SWR_SHADER_STATS
};
-event PSStats
+event ShaderStats::PSStats
{
- uint32_t numInstExecuted;
+ HANDLE hStats; // SWR_SHADER_STATS
};
-event CSStats
+event ShaderStats::CSStats
{
- uint32_t numInstExecuted;
+ HANDLE hStats; // SWR_SHADER_STATS
}; \ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build
index 282751760..daf79ed4c 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build
@@ -48,7 +48,8 @@ files_swr_common += [
]
foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'],
- [swr_state_files, 'gen_state_llvm.h']]
+ [swr_state_files, 'gen_state_llvm.h'],
+ [swr_surf_state_files, 'gen_surf_state_llvm.h']]
files_swr_common += custom_target(
x[1],
input : ['gen_llvm_types.py', x[0]],
@@ -61,26 +62,16 @@ foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'],
)
endforeach
-foreach x : [['gen_ar_event.hpp', '--gen_event_hpp'],
- ['gen_ar_event.cpp', '--gen_event_cpp'],
- ['gen_ar_eventhandler.hpp', '--gen_eventhandler_h'],
- ['gen_ar_eventhandlerfile.hpp', '--gen_eventhandlerfile_h']]
- files_swr_common += custom_target(
- x[0],
- input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files],
- output : x[0],
- command : [
- prog_python,
- '@INPUT0@',
- '--proto', '@INPUT1@',
- '--proto_private', '@INPUT2@',
- '--output', '@OUTPUT@',
- x[1],
- ],
- depend_files : files(
- join_paths('templates', x[0]),
- 'gen_common.py',
- ),
- )
+ar_output_filenames = ['gen_ar_event.hpp', 'gen_ar_event.cpp', 'gen_ar_eventhandler.hpp', 'gen_ar_eventhandlerfile.hpp']
+ar_template_filenames = []
+foreach fname : ar_output_filenames
+ ar_template_filenames += join_paths('templates', fname)
endforeach
+files_swr_common += custom_target(
+ 'gen_archrast',
+ input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files],
+ output : ar_output_filenames,
+ command : [prog_python, '@INPUT0@', '--proto', '@INPUT1@', '@INPUT2@', '--output-dir', meson.current_build_dir()],
+ depend_files : files('gen_common.py', ar_template_filenames)
+)
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
index 71dbdacfd..8b88a1170 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
@@ -72,7 +72,7 @@ private:
T m_Value;
};
-#define DEFINE_KNOB(_name, _type, _default) \\
+#define DEFINE_KNOB(_name, _type) \\
struct Knob_##_name : Knob<_type> \\
@@ -80,7 +80,11 @@ private:
static const char* Name() { return "KNOB_" #_name; } \\
- static _type DefaultValue() { return (_default); } \\
+ static _type DefaultValue() { return (m_default); } \\
+
+ private: \\
+
+ static _type m_default; \\
} _name;
@@ -105,11 +109,7 @@ struct GlobalKnobs
% endfor
% endif
//
- % if knob[1]['type'] == 'std::string':
- DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, "${repr(knob[1]['default'])[1:-1]}");
- % else:
- DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, ${knob[1]['default']});
- % endif
+ DEFINE_KNOB(${knob[0]}, ${knob[1]['type']});
% endfor
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp
index aea5740bb..41af0055f 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/isa.hpp
@@ -38,8 +38,10 @@
#include <intrin.h>
#else
#include <string.h>
+#if !defined(__cpuid)
#include <cpuid.h>
#endif
+#endif
class InstructionSet
{
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
index bbc9538b8..b00cbf63e 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
@@ -48,7 +48,17 @@ extern THREAD UINT tlsThreadId;
class BucketManager
{
public:
- BucketManager() {}
+
+ uint32_t mCurrentFrame;
+ std::vector<uint32_t> mBucketMap;
+ bool mBucketsInitialized;
+ std::string mBucketMgrName;
+
+
+ BucketManager(std::string name) : mCurrentFrame(0), mBucketsInitialized(false), mBucketMgrName(name)
+ {
+ mBucketMap.clear();
+ }
~BucketManager();
// removes all registered thread data
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
index e858a7d59..6329b2ec9 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
@@ -92,9 +92,3 @@ BUCKET_DESC gCoreBuckets[] = {
static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])),
"RDTSC Bucket enum and description table size mismatched.");
-/// @todo bucketmanager and mapping should probably be a part of the SWR context
-std::vector<uint32_t> gBucketMap;
-BucketManager gBucketMgr;
-
-uint32_t gCurrentFrame = 0;
-bool gBucketsInitialized = false;
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
index dc20e5be9..0228275bd 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
@@ -100,90 +100,86 @@ enum CORE_BUCKETS
NumBuckets
};
-void rdtscReset();
-void rdtscInit(int threadId);
-void rdtscStart(uint32_t bucketId);
-void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId);
-void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2);
-void rdtscEndFrame();
+void rdtscReset(BucketManager* pBucketMgr);
+void rdtscInit(BucketManager* pBucketMgr, int threadId);
+void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId);
+void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId);
+void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2);
+void rdtscEndFrame(BucketManager* pBucketMgr);
#ifdef KNOB_ENABLE_RDTSC
-#define RDTSC_RESET() rdtscReset()
-#define RDTSC_INIT(threadId) rdtscInit(threadId)
-#define RDTSC_START(bucket) rdtscStart(bucket)
-#define RDTSC_STOP(bucket, count, draw) rdtscStop(bucket, count, draw)
-#define RDTSC_EVENT(bucket, count1, count2) rdtscEvent(bucket, count1, count2)
-#define RDTSC_ENDFRAME() rdtscEndFrame()
+#define RDTSC_RESET(pBucketMgr) rdtscReset(pBucketMgr)
+#define RDTSC_INIT(pBucketMgr, threadId) rdtscInit(pBucketMgr,threadId)
+#define RDTSC_START(pBucketMgr, bucket) rdtscStart(pBucketMgr, bucket)
+#define RDTSC_STOP(pBucketMgr, bucket, count, draw) rdtscStop(pBucketMgr, bucket, count, draw)
+#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) rdtscEvent(pBucketMgr, bucket, count1, count2)
+#define RDTSC_ENDFRAME(pBucketMgr) rdtscEndFrame(pBucketMgr)
#else
-#define RDTSC_RESET()
-#define RDTSC_INIT(threadId)
-#define RDTSC_START(bucket)
-#define RDTSC_STOP(bucket, count, draw)
-#define RDTSC_EVENT(bucket, count1, count2)
-#define RDTSC_ENDFRAME()
+#define RDTSC_RESET(pBucketMgr)
+#define RDTSC_INIT(pBucketMgr, threadId)
+#define RDTSC_START(pBucketMgr, bucket)
+#define RDTSC_STOP(pBucketMgr, bucket, count, draw)
+#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2)
+#define RDTSC_ENDFRAME(pBucketMgr)
#endif
-extern std::vector<uint32_t> gBucketMap;
-extern BucketManager gBucketMgr;
extern BUCKET_DESC gCoreBuckets[];
-extern uint32_t gCurrentFrame;
-extern bool gBucketsInitialized;
-INLINE void rdtscReset()
+INLINE void rdtscReset(BucketManager *pBucketMgr)
{
- gCurrentFrame = 0;
- gBucketMgr.ClearThreads();
+ pBucketMgr->mCurrentFrame = 0;
+ pBucketMgr->ClearThreads();
}
-INLINE void rdtscInit(int threadId)
+INLINE void rdtscInit(BucketManager* pBucketMgr, int threadId)
{
// register all the buckets once
- if (!gBucketsInitialized && (threadId == 0))
+ if (!pBucketMgr->mBucketsInitialized && (threadId == 0))
{
- gBucketMap.resize(NumBuckets);
+ pBucketMgr->mBucketMap.resize(NumBuckets);
for (uint32_t i = 0; i < NumBuckets; ++i)
{
- gBucketMap[i] = gBucketMgr.RegisterBucket(gCoreBuckets[i]);
+ pBucketMgr->mBucketMap[i] = pBucketMgr->RegisterBucket(gCoreBuckets[i]);
}
- gBucketsInitialized = true;
+ pBucketMgr->mBucketsInitialized = true;
}
std::string name = threadId == 0 ? "API" : "WORKER";
- gBucketMgr.RegisterThread(name);
+ pBucketMgr->RegisterThread(name);
}
-INLINE void rdtscStart(uint32_t bucketId)
+INLINE void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId)
{
- uint32_t id = gBucketMap[bucketId];
- gBucketMgr.StartBucket(id);
+ uint32_t id = pBucketMgr->mBucketMap[bucketId];
+ pBucketMgr->StartBucket(id);
}
-INLINE void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId)
+INLINE void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId)
{
- uint32_t id = gBucketMap[bucketId];
- gBucketMgr.StopBucket(id);
+ uint32_t id = pBucketMgr->mBucketMap[bucketId];
+ pBucketMgr->StopBucket(id);
}
-INLINE void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2)
+INLINE void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2)
{
- uint32_t id = gBucketMap[bucketId];
- gBucketMgr.AddEvent(id, count1);
+ uint32_t id = pBucketMgr->mBucketMap[bucketId];
+ pBucketMgr->AddEvent(id, count1);
}
-INLINE void rdtscEndFrame()
+INLINE void rdtscEndFrame(BucketManager* pBucketMgr)
{
- gCurrentFrame++;
+ pBucketMgr->mCurrentFrame++;
- if (gCurrentFrame == KNOB_BUCKETS_START_FRAME &&
+ if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_START_FRAME &&
KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
{
- gBucketMgr.StartCapture();
+ pBucketMgr->StartCapture();
}
- if (gCurrentFrame == KNOB_BUCKETS_END_FRAME &&
+ if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_END_FRAME &&
KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
{
- gBucketMgr.StopCapture();
- gBucketMgr.PrintReport("rdtsc.txt");
+ pBucketMgr->StopCapture();
+ pBucketMgr->PrintReport("rdtsc.txt");
}
}
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h
index d0f4b30dc..3072bbc83 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -45,7 +45,8 @@ struct THREAD_DATA
uint32_t numaId; // NUMA node id
uint32_t coreId; // Core id
uint32_t htId; // Hyperthread id
- uint32_t workerId;
+ uint32_t workerId; // index of worker in total thread data
+ void* clipperData; // pointer to hang clipper-private data on
SWR_CONTEXT* pContext;
bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
};
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
index 19eec7e99..b67ffbfa7 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
@@ -32,7 +32,6 @@
#include "common/rdtsc_buckets.h"
#include "builder_gfx_mem.h"
-
namespace SwrJit
{
using namespace llvm;
@@ -42,7 +41,9 @@ namespace SwrJit
mpTranslationFuncTy = nullptr;
mpfnTranslateGfxAddressForRead = nullptr;
mpfnTranslateGfxAddressForWrite = nullptr;
+ mpfnTrackMemAccess = nullptr;
mpParamSimDC = nullptr;
+ mpWorkerData = nullptr;
}
@@ -50,13 +51,12 @@ namespace SwrJit
{
}
- void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
+ void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage)
{
- SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL),
+ SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL),
"Internal memory should not be gfxptr_t.");
}
-
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
@@ -70,7 +70,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
@@ -95,7 +95,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
@@ -109,7 +109,7 @@ namespace SwrJit
}
void BuilderGfxMem::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
@@ -118,46 +118,95 @@ namespace SwrJit
pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0));
}
- Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage);
+ Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage);
}
-
Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return ADD(base, offset);
}
- Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
+ Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name)
{
- Ptr = TranslationHelper(Ptr, Ty);
- return Builder::GEP(Ptr, Idx, nullptr, Name);
+ bool xlate = (Ptr->getType() == mInt64Ty);
+ if (xlate)
+ {
+ Ptr = INT_TO_PTR(Ptr, Ty);
+ Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name);
+ Ptr = PTR_TO_INT(Ptr, mInt64Ty);
+ if (isReadOnly)
+ {
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ }
+ else
+ {
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForWrite);
+ }
+ }
+ else
+ {
+ Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name);
+ }
+ return Ptr;
}
Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
{
- Ptr = TranslationHelper(Ptr, Ty);
- return Builder::GEP(Ty, Ptr, Idx, Name);
+ bool xlate = (Ptr->getType() == mInt64Ty);
+ if (xlate)
+ {
+ Ptr = INT_TO_PTR(Ptr, Ty);
+ Ptr = Builder::GEP(Ty, Ptr, Idx, Name);
+ Ptr = PTR_TO_INT(Ptr, mInt64Ty);
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ }
+ else
+ {
+ Ptr = Builder::GEP(Ty, Ptr, Idx, Name);
+ }
+ return Ptr;
}
Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
{
- Ptr = TranslationHelper(Ptr, Ty);
- return Builder::GEP(Ptr, indexList);
+ bool xlate = (Ptr->getType() == mInt64Ty);
+ if (xlate)
+ {
+ Ptr = INT_TO_PTR(Ptr, Ty);
+ Ptr = Builder::GEP(Ptr, indexList);
+ Ptr = PTR_TO_INT(Ptr, mInt64Ty);
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ }
+ else
+ {
+ Ptr = Builder::GEP(Ptr, indexList);
+ }
+ return Ptr;
}
Value*
BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
{
- Ptr = TranslationHelper(Ptr, Ty);
- return Builder::GEP(Ptr, indexList);
+ bool xlate = (Ptr->getType() == mInt64Ty);
+ if (xlate)
+ {
+ Ptr = INT_TO_PTR(Ptr, Ty);
+ Ptr = Builder::GEP(Ptr, indexList);
+ Ptr = PTR_TO_INT(Ptr, mInt64Ty);
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ }
+ else
+ {
+ Ptr = Builder::GEP(Ptr, indexList);
+ }
+ return Ptr;
}
- Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty)
+ Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress)
{
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
-
// address may be coming in as 64bit int now so get the pointer
if (Ptr->getType() == mInt64Ty)
{
@@ -167,29 +216,78 @@ namespace SwrJit
return Ptr;
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+ void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead)
+ {
+#if defined(KNOB_ENABLE_AR)
+ if (!KNOB_AR_ENABLE_MEMORY_EVENTS)
+ {
+ return;
+ }
+
+ Value* tmpPtr;
+ // convert actual pointers to int64.
+ uint32_t size = 0;
+
+ if (Ptr->getType() == mInt64Ty)
+ {
+ DataLayout dataLayout(JM()->mpCurrentModule);
+ size = (uint32_t)dataLayout.getTypeAllocSize(Ty);
+
+ tmpPtr = Ptr;
+ }
+ else
+ {
+ DataLayout dataLayout(JM()->mpCurrentModule);
+ size = (uint32_t)dataLayout.getTypeAllocSize(Ptr->getType());
+
+ tmpPtr = PTR_TO_INT(Ptr, mInt64Ty);
+ }
+
+ // There are some shader compile setups where there's no translation functions set up.
+ // This would be a situation where the accesses are to internal rasterizer memory and won't
+ // be logged.
+ // TODO: we may wish to revisit this for URB reads/writes, though.
+ if (mpfnTrackMemAccess)
+ {
+ SWR_ASSERT(mpWorkerData != nullptr);
+ CALL(mpfnTrackMemAccess,
+ {mpParamSimDC,
+ mpWorkerData,
+ tmpPtr,
+ C((uint32_t)size),
+ C((uint8_t)isRead),
+ C((uint32_t)usage)});
+ }
+#endif
+
+ return;
+ }
+
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
+ TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty);
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
return Builder::LOAD(Ptr, Name);
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
+ TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty);
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
return Builder::LOAD(Ptr, Name);
}
-
LoadInst* BuilderGfxMem::LOAD(
- Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
+ TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty);
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
return Builder::LOAD(Ptr, isVolatile, Name);
}
@@ -197,7 +295,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
@@ -222,25 +320,59 @@ namespace SwrJit
return LOAD(BasePtr, name, Ty, usage);
}
-
CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
unsigned Align,
Value* Mask,
Value* PassThru,
const Twine& Name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
+ TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty);
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
}
+ StoreInst*
+ BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage)
+ {
+ AssertGFXMemoryParams(Ptr, usage);
+ TrackerHelper(Ptr, Ty, usage, false);
+
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ return Builder::STORE(Val, Ptr, isVolatile, Ty, usage);
+ }
+
+ StoreInst* BuilderGfxMem::STORE(Value* Val,
+ Value* BasePtr,
+ const std::initializer_list<uint32_t>& offset,
+ Type* Ty,
+ MEM_CLIENT usage)
+ {
+ AssertGFXMemoryParams(BasePtr, usage);
+ TrackerHelper(BasePtr, Ty, usage, false);
+
+ BasePtr = TranslationHelper(BasePtr, Ty, mpfnTranslateGfxAddressForRead);
+ return Builder::STORE(Val, BasePtr, offset, Ty, usage);
+ }
+
+ CallInst* BuilderGfxMem::MASKED_STORE(
+ Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage)
+ {
+ AssertGFXMemoryParams(Ptr, usage);
+
+ TrackerHelper(Ptr, Ty, usage, false);
+
+ Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage);
+ }
+
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- JIT_MEM_CLIENT /* usage */)
+ MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
@@ -252,7 +384,7 @@ namespace SwrJit
Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- JIT_MEM_CLIENT /* usage */)
+ MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
index 4cf062536..b1f662414 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
@@ -41,7 +41,7 @@ namespace SwrJit
BuilderGfxMem(JitManager* pJitMgr);
virtual ~BuilderGfxMem() {}
- virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
+ virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = "");
virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
virtual Value*
GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
@@ -51,22 +51,21 @@ namespace SwrJit
virtual LoadInst* LOAD(Value* Ptr,
const char* Name,
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@@ -74,57 +73,65 @@ namespace SwrJit
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+ virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+ virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+ virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
protected:
- void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
+ void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage);
virtual void NotifyPrivateContextSet();
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
- Value* TranslationHelper(Value* Ptr, Type* Ty);
+ Value* TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress);
+ void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead);
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
Value* GetParamSimDC() { return mpParamSimDC; }
+ Value* mpWorkerData;
private:
FunctionType* mpTranslationFuncTy;
Value* mpfnTranslateGfxAddressForRead;
Value* mpfnTranslateGfxAddressForWrite;
Value* mpParamSimDC;
+ FunctionType* mpTrackMemAccessFuncTy;
+ Value* mpfnTrackMemAccess;
};
} // namespace SwrJit
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index a9d649092..2d8240187 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -34,14 +34,14 @@
namespace SwrJit
{
- void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
+ void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage)
{
SWR_ASSERT(
ptr->getType() != mInt64Ty,
"Address appears to be GFX access. Requires translation through BuilderGfxMem.");
}
- Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
+ Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name)
{
return IRB()->CreateGEP(Ptr, Idx, Name);
}
@@ -93,26 +93,26 @@ namespace SwrJit
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
LoadInst*
- Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
@@ -122,7 +122,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& indices,
const llvm::Twine& name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@@ -141,7 +141,7 @@ namespace SwrJit
}
StoreInst*
- Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices)
+ Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@@ -186,7 +186,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@@ -206,7 +206,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@@ -237,13 +237,18 @@ namespace SwrJit
return MASKED_GATHER(pVecSrcPtr, 4, pVecMask, pVecPassthru);
}
+ void Builder::SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask)
+ {
+ MASKED_SCATTER(pVecSrc, pVecDstPtr, 4, pVecMask);
+ }
+
void Builder::Gather4(const SWR_FORMAT format,
Value* pSrcBase,
Value* byteOffsets,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
@@ -262,7 +267,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@@ -336,7 +341,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@@ -643,9 +648,16 @@ namespace SwrJit
/// @param vOffsets - vector of byte offsets from pDst
/// @param vMask - mask of valid lanes
void Builder::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
AssertMemoryUsageParams(pDst, usage);
+// if (vSrc->getType() != mSimdFP32Ty)
+// {
+// vSrc = BITCAST(vSrc, mSimdFP32Ty);
+// }
+ SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy());
+ VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1));
+ return;
/* Scatter algorithm
@@ -657,6 +669,10 @@ namespace SwrJit
*/
+ /*
+
+ // Reference implementation kept around for reference
+
BasicBlock* pCurBB = IRB()->GetInsertBlock();
Function* pFunc = pCurBB->getParent();
Type* pSrcTy = vSrc->getType()->getVectorElementType();
@@ -744,5 +760,7 @@ namespace SwrJit
// Move builder to beginning of post loop
IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
+
+ */
}
} // namespace SwrJit
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index c533984da..fe4c5dd38 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -30,20 +30,22 @@
#pragma once
public:
-enum class JIT_MEM_CLIENT
+enum class MEM_CLIENT
{
MEM_CLIENT_INTERNAL,
GFX_MEM_CLIENT_FETCH,
GFX_MEM_CLIENT_SAMPLER,
GFX_MEM_CLIENT_SHADER,
+ GFX_MEM_CLIENT_STREAMOUT,
+ GFX_MEM_CLIENT_URB
};
protected:
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
-void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
+void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage);
public:
-virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
+virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = "");
virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
virtual Value* GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
virtual Value*
@@ -56,23 +58,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
virtual LoadInst*
- LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst*
- LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@@ -80,14 +82,24 @@ virtual CallInst* MASKED_LOAD(Value* Ptr,
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
}
-LoadInst*
- LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
-StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset);
+virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
+{
+ return IRB()->CreateStore(Val, Ptr, isVolatile);
+}
+
+virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
+{
+ return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask);
+}
+
+LoadInst* LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset);
Value* MEM_ADD(Value* i32Incr,
@@ -101,14 +113,14 @@ void Gather4(const SWR_FORMAT format,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@@ -116,14 +128,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@@ -131,17 +143,18 @@ void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
Value* GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
+void SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask);
virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
Value* vGatherInput,
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index c34959d35..d00a8963d 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -32,8 +32,12 @@
#include "passes.h"
#include "JitManager.h"
+#include "common/simdlib.hpp"
+
#include <unordered_map>
+extern "C" void ScatterPS_256(uint8_t*, SIMD256::Integer, SIMD256::Float, uint8_t, uint32_t);
+
namespace llvm
{
// foward declare the initializer
@@ -88,6 +92,8 @@ namespace SwrJit
Instruction*
VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
+ VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+ Instruction*
VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
@@ -102,88 +108,61 @@ namespace SwrJit
static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
+ // clang-format off
static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
- // 256 wide 512 wide
+ // 256 wide 512 wide
{
// AVX
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VCVTPD2PS",
- {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VCVTPH2PS",
- {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
},
{
// AVX2
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS",
- {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD",
- {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VCVTPH2PS",
- {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
},
{
// AVX512
- {"meta.intrinsic.VRCPPS",
- {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
#if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VPERMPS",
- {{Intrinsic::x86_avx512_mask_permvar_sf_256,
- Intrinsic::x86_avx512_mask_permvar_sf_512},
- NO_EMU}},
- {"meta.intrinsic.VPERMD",
- {{Intrinsic::x86_avx512_mask_permvar_si_256,
- Intrinsic::x86_avx512_mask_permvar_si_512},
- NO_EMU}},
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
#else
- {"meta.intrinsic.VPERMPS",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
#endif
- {"meta.intrinsic.VGATHERPD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
#if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VCVTPD2PS",
- {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512},
- NO_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}},
#else
- {"meta.intrinsic.VCVTPD2PS",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
#endif
- {"meta.intrinsic.VCVTPH2PS",
- {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512},
- NO_EMU}},
- {"meta.intrinsic.VROUND",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
- {"meta.intrinsic.VHSUBPS",
- {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
+ {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
}};
+ // clang-format on
struct LowerX86 : public FunctionPass
{
@@ -209,6 +188,31 @@ namespace SwrJit
SWR_ASSERT(false, "Unsupported AVX architecture.");
mTarget = AVX;
}
+
+ // Setup scatter function for 256 wide
+ uint32_t curWidth = B->mVWidth;
+ B->SetTargetWidth(8);
+ std::vector<Type*> args = {
+ B->mInt8PtrTy, // pBase
+ B->mSimdInt32Ty, // vIndices
+ B->mSimdFP32Ty, // vSrc
+ B->mInt8Ty, // mask
+ B->mInt32Ty // scale
+ };
+
+ FunctionType* pfnScatterTy = FunctionType::get(B->mVoidTy, args, false);
+ mPfnScatter256 = cast<Function>(
+#if LLVM_VERSION_MAJOR >= 9
+ B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy).getCallee());
+#else
+ B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy));
+#endif
+ if (sys::DynamicLibrary::SearchForAddressOfSymbol("ScatterPS_256") == nullptr)
+ {
+ sys::DynamicLibrary::AddSymbol("ScatterPS_256", (void*)&ScatterPS_256);
+ }
+
+ B->SetTargetWidth(curWidth);
}
// Try to decipher the vector type of the instruction. This does not work properly
@@ -392,23 +396,39 @@ namespace SwrJit
virtual bool runOnFunction(Function& F)
{
std::vector<Instruction*> toRemove;
+ std::vector<BasicBlock*> bbs;
+
+ // Make temp copy of the basic blocks and instructions, as the intrinsic
+ // replacement code might invalidate the iterators
+ for (auto& b : F.getBasicBlockList())
+ {
+ bbs.push_back(&b);
+ }
- for (auto& BB : F.getBasicBlockList())
+ for (auto* BB : bbs)
{
- for (auto& I : BB.getInstList())
+ std::vector<Instruction*> insts;
+ for (auto& i : BB->getInstList())
+ {
+ insts.push_back(&i);
+ }
+
+ for (auto* I : insts)
{
- if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
+ if (CallInst* pCallInst = dyn_cast<CallInst>(I))
{
Function* pFunc = pCallInst->getCalledFunction();
if (pFunc)
{
if (pFunc->getName().startswith("meta.intrinsic"))
{
- B->IRB()->SetInsertPoint(&I);
+ B->IRB()->SetInsertPoint(I);
Instruction* pReplace = ProcessIntrinsic(pCallInst);
- SWR_ASSERT(pReplace);
toRemove.push_back(pCallInst);
- pCallInst->replaceAllUsesWith(pReplace);
+ if (pReplace)
+ {
+ pCallInst->replaceAllUsesWith(pReplace);
+ }
}
}
}
@@ -428,10 +448,9 @@ namespace SwrJit
virtual void getAnalysisUsage(AnalysisUsage& AU) const {}
JitManager* JM() { return B->JM(); }
-
- Builder* B;
-
- TargetArch mTarget;
+ Builder* B;
+ TargetArch mTarget;
+ Function* mPfnScatter256;
static char ID; ///< Needed by LLVM to generate ID for FunctionPass.
};
@@ -639,6 +658,69 @@ namespace SwrJit
return cast<Instruction>(v32Gather);
}
+ Instruction*
+ VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ {
+ Builder* B = pThis->B;
+ auto pBase = pCallInst->getArgOperand(0);
+ auto vi1Mask = pCallInst->getArgOperand(1);
+ auto vi32Indices = pCallInst->getArgOperand(2);
+ auto v32Src = pCallInst->getArgOperand(3);
+ auto i32Scale = pCallInst->getArgOperand(4);
+
+ if (arch != AVX512)
+ {
+ // Call into C function to do the scatter. This has significantly better compile perf
+ // compared to jitting scatter loops for every scatter
+ if (width == W256)
+ {
+ auto mask = B->BITCAST(vi1Mask, B->mInt8Ty);
+ B->CALL(pThis->mPfnScatter256, {pBase, vi32Indices, v32Src, mask, i32Scale});
+ }
+ else
+ {
+ // Need to break up 512 wide scatter to two 256 wide
+ auto maskLo = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
+ auto indicesLo =
+ B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
+ auto srcLo = B->VSHUFFLE(v32Src, v32Src, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
+
+ auto mask = B->BITCAST(maskLo, B->mInt8Ty);
+ B->CALL(pThis->mPfnScatter256, {pBase, indicesLo, srcLo, mask, i32Scale});
+
+ auto maskHi = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
+ auto indicesHi =
+ B->VSHUFFLE(vi32Indices, vi32Indices, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
+ auto srcHi = B->VSHUFFLE(v32Src, v32Src, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
+
+ mask = B->BITCAST(maskHi, B->mInt8Ty);
+ B->CALL(pThis->mPfnScatter256, {pBase, indicesHi, srcHi, mask, i32Scale});
+ }
+ return nullptr;
+ }
+
+ Value* iMask;
+ Function* pX86IntrinFunc;
+ if (width == W256)
+ {
+ // No direct intrinsic supported in llvm to scatter 8 elem with 32bit indices, but we
+ // can use the scatter of 8 elements with 64bit indices
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx512_scatter_qps_512);
+
+ auto vi32IndicesExt = B->Z_EXT(vi32Indices, B->mSimdInt64Ty);
+ iMask = B->BITCAST(vi1Mask, B->mInt8Ty);
+ B->CALL(pX86IntrinFunc, {pBase, iMask, vi32IndicesExt, v32Src, i32Scale});
+ }
+ else if (width == W512)
+ {
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx512_scatter_dps_512);
+ iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
+ B->CALL(pX86IntrinFunc, {pBase, iMask, vi32Indices, v32Src, i32Scale});
+ }
+ return nullptr;
+ }
// No support for vroundps in avx512 (it is available in kncni), so emulate with avx
// instructions
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
index d3c732af0..e0bb75cde 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
@@ -25,6 +25,7 @@
* @brief Include file for llvm passes
*
******************************************************************************/
+#pragma once
#include "JitManager.h"
#include "builder.h"
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp
new file mode 100644
index 000000000..de8115404
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp
@@ -0,0 +1,49 @@
+/****************************************************************************
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file Scatter.cpp
+ *
+ * @brief Shader support library implementation for scatter emulation
+ *
+ * Notes:
+ *
+ ******************************************************************************/
+#include <stdarg.h>
+#include "common/os.h"
+#include "common/simdlib.hpp"
+
+extern "C" void ScatterPS_256(uint8_t* pBase, SIMD256::Integer vIndices, SIMD256::Float vSrc, uint8_t mask, uint32_t scale)
+{
+ OSALIGN(float, 32) src[8];
+ OSALIGN(uint32_t, 32) indices[8];
+
+ SIMD256::store_ps(src, vSrc);
+ SIMD256::store_si((SIMD256::Integer*)indices, vIndices);
+
+ DWORD index;
+ while (_BitScanForward(&index, mask))
+ {
+ mask &= ~(1 << index);
+
+ *(float*)(pBase + indices[index] * scale) = src[index];
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
index bff96e17f..3a19bbac7 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
@@ -25,7 +25,11 @@
* @brief Provide access to tiles table initialization functions
*
******************************************************************************/
+
#include "memory/InitMemory.h"
+#include "memory/LoadTile.h"
+#include "memory/StoreTile.h"
+#include "InitMemory.h"
void InitSimLoadTilesTable();
void InitSimStoreTilesTable();
@@ -37,3 +41,10 @@ void InitTilesTable()
InitSimStoreTilesTable();
InitSimClearTilesTable();
}
+
+
+void SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs)
+{
+ out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile;
+ out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface;
+} \ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
index 14cca6ab3..6838ddc70 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
@@ -25,9 +25,57 @@
* @brief Provide access to tiles table initialization functions
*
******************************************************************************/
+
+#pragma once
+
#include "common/os.h"
+#include "memory/SurfaceState.h"
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Loads a full hottile from a render surface
+/// @param hPrivateContext - Handle to private DC
+/// @param dstFormat - Format for hot tile.
+/// @param renderTargetIndex - Index to src render target
+/// @param x, y - Coordinates to raster tile.
+/// @param pDstHotTile - Pointer to Hot Tile
+SWR_FUNC(void,
+ SwrLoadHotTile,
+ HANDLE hWorkerPrivateData,
+ const SWR_SURFACE_STATE* pSrcSurface,
+ SWR_FORMAT dstFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ uint8_t* pDstHotTile);
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Deswizzles and stores a full hottile to a render surface
+/// @param hPrivateContext - Handle to private DC
+/// @param srcFormat - Format for hot tile.
+/// @param renderTargetIndex - Index to destination render target
+/// @param x, y - Coordinates to raster tile.
+/// @param pSrcHotTile - Pointer to Hot Tile
+SWR_FUNC(void,
+ SwrStoreHotTileToSurface,
+ HANDLE hWorkerPrivateData,
+ SWR_SURFACE_STATE* pDstSurface,
+ SWR_FORMAT srcFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ uint8_t* pSrcHotTile);
+
+struct SWR_TILE_INTERFACE {
+ PFNSwrLoadHotTile pfnSwrLoadHotTile;
+ PFNSwrStoreHotTileToSurface pfnSwrStoreHotTileToSurface;
+};
extern "C"
{
SWR_VISIBLE void SWR_API InitTilesTable();
+
+ typedef void(SWR_API* PFNSwrGetTileInterface)(SWR_TILE_INTERFACE& out_funcs);
+ SWR_VISIBLE void SWR_API SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs);
}
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h
new file mode 100644
index 000000000..6b1b78eee
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h
@@ -0,0 +1,66 @@
+/****************************************************************************
+* Copyright (C) 2014-2019 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file SurfaceState.h
+*
+* @brief Common definitions for surface state
+*
+******************************************************************************/
+#pragma once
+
+#include "core/state.h"
+
+//////////////////////////////////////////////////////////////////////////
+/// SWR_SURFACE_STATE
+//////////////////////////////////////////////////////////////////////////
+struct SWR_SURFACE_STATE
+{
+ gfxptr_t xpBaseAddress;
+ SWR_SURFACE_TYPE type; // @llvm_enum
+ SWR_FORMAT format; // @llvm_enum
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+ uint32_t numSamples;
+ uint32_t samplePattern;
+ uint32_t pitch;
+ uint32_t qpitch;
+ uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
+ uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
+ float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be
+ // accessed by sampler
+ uint32_t lod; // for render targets, the lod being rendered to
+ uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
+ SWR_TILE_MODE tileMode; // @llvm_enum
+ uint32_t halign;
+ uint32_t valign;
+ uint32_t xOffset;
+ uint32_t yOffset;
+
+ uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
+
+ gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
+ SWR_AUX_MODE auxMode; // @llvm_enum
+
+
+ bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
+}; \ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/swr/swr_query.h b/lib/mesa/src/gallium/drivers/swr/swr_query.h
index 1c736e4e1..d838dc859 100644
--- a/lib/mesa/src/gallium/drivers/swr/swr_query.h
+++ b/lib/mesa/src/gallium/drivers/swr/swr_query.h
@@ -44,5 +44,5 @@ OSALIGNLINE(struct) swr_query {
extern void swr_query_init(struct pipe_context *pipe);
-extern boolean swr_check_render_cond(struct pipe_context *pipe);
+extern bool swr_check_render_cond(struct pipe_context *pipe);
#endif