summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/swr
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/gallium/drivers/swr')
-rw-r--r--lib/mesa/src/gallium/drivers/swr/meson.build272
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto75
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build35
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp198
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h40
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp62
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h53
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp454
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h1
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp35
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build12
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp11
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h50
13 files changed, 384 insertions, 914 deletions
diff --git a/lib/mesa/src/gallium/drivers/swr/meson.build b/lib/mesa/src/gallium/drivers/swr/meson.build
index ac712d804..b95c8bc1b 100644
--- a/lib/mesa/src/gallium/drivers/swr/meson.build
+++ b/lib/mesa/src/gallium/drivers/swr/meson.build
@@ -1,4 +1,4 @@
-# Copyright © 2017-2020 Intel Corporation
+# Copyright © 2017-2018 Intel Corporation
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -82,9 +82,7 @@ files_swr_mesa = files(
'rasterizer/jitter/streamout_jit.cpp',
'rasterizer/jitter/streamout_jit.h',
'rasterizer/jitter/shader_lib/DebugOutput.cpp',
- 'rasterizer/jitter/shader_lib/Scatter.cpp',
'rasterizer/jitter/functionpasses/lower_x86.cpp',
- 'rasterizer/memory/SurfaceState.h'
)
files_swr_arch = files(
@@ -129,8 +127,6 @@ files_swr_arch = files(
'rasterizer/core/state.h',
'rasterizer/core/state_funcs.h',
'rasterizer/core/tessellator.h',
- 'rasterizer/core/tessellator.hpp',
- 'rasterizer/core/tessellator.cpp',
'rasterizer/core/threads.cpp',
'rasterizer/core/threads.h',
'rasterizer/core/tilemgr.cpp',
@@ -157,12 +153,10 @@ files_swr_arch = files(
'rasterizer/memory/tilingtraits.h',
'rasterizer/memory/InitMemory.h',
'rasterizer/memory/InitMemory.cpp',
- 'rasterizer/memory/SurfaceState.h'
)
swr_context_files = files('swr_context.h')
swr_state_files = files('rasterizer/core/state.h')
-swr_surf_state_files = files('rasterizer/memory/SurfaceState.h')
swr_event_proto_files = files('rasterizer/archrast/events.proto')
swr_event_pproto_files = files('rasterizer/archrast/events_private.proto')
swr_gen_backend_files = files('rasterizer/codegen/templates/gen_backend.cpp')
@@ -187,201 +181,124 @@ swr_incs = include_directories(
'rasterizer/archrast', 'rasterizer',
)
-swr_cpp_args = []
+swr_cpp_args = [cpp_vis_args]
if cpp.has_argument('-fno-strict-aliasing')
swr_cpp_args += '-fno-strict-aliasing'
endif
-if cpp.has_argument('-Wno-aligned-new')
- swr_cpp_args += '-Wno-aligned-new'
-endif
-
swr_arch_libs = []
-swr_defines = []
+swr_arch_defines = []
swr_avx_args = cpp.first_supported_argument(
'-target-cpu=sandybridge', '-mavx', '-march=core-avx', '-tp=sandybridge',
- '/arch:AVX',
+ prefix : '''
+ #if !defined(__AVX__)
+ # error
+ #endif ''',
)
if swr_avx_args == []
error('Cannot find AVX support for swr. (these are required for SWR an all architectures.)')
endif
-
-shared_swr = get_option('shared-swr')
-if not shared_swr
- if with_swr_arches.length() > 1
- error('When SWR is linked statically only one architecture is allowed.')
- endif
- swr_defines += '-DHAVE_SWR_BUILTIN'
+if with_swr_arches.contains('avx')
+ swr_arch_defines += '-DHAVE_SWR_AVX'
+ swr_arch_libs += shared_library(
+ 'swrAVX',
+ [files_swr_common, files_swr_arch],
+ cpp_args : [swr_cpp_args, swr_avx_args, '-DKNOB_ARCH=KNOB_ARCH_AVX'],
+ link_args : [ld_args_gc_sections],
+ include_directories : [swr_incs],
+ dependencies : [dep_thread, dep_llvm],
+ version : '0.0.0',
+ install : true,
+ )
endif
-if with_swr_arches.contains('skx')
- swr_skx_args = cpp.first_supported_argument(
- '-march=skylake-avx512', '-target-cpu=x86-skylake', '-xCORE-AVX512',
+if with_swr_arches.contains('avx2')
+ swr_avx2_args = cpp.first_supported_argument(
+ '-target-cpu=haswell', '-march=core-avx2', '-tp=haswell',
+ prefix : '''
+ #if !defined(__AVX2__)
+ # error
+ #endif ''',
)
- if swr_skx_args == []
- error('Cannot find SKX support for swr.')
+ if swr_avx2_args == []
+ if cpp.has_argument(['-mavx2', '-mfma', '-mbmi2', '-mf16c'],
+ prefix : '''
+ #if !defined(__AVX2__)
+ # error
+ #endif ''')
+ swr_avx2_args = ['-mavx2', '-mfma', '-mbmi2', '-mf16c']
+ else
+ error('Cannot find AVX2 support for swr.')
+ endif
endif
- swr_defines += '-DHAVE_SWR_SKX'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrSKX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_skx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrSKX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_skx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
- endif
+ swr_arch_defines += '-DHAVE_SWR_AVX2'
+ swr_arch_libs += shared_library(
+ 'swrAVX2',
+ [files_swr_common, files_swr_arch],
+ cpp_args : [swr_cpp_args, swr_avx2_args, '-DKNOB_ARCH=KNOB_ARCH_AVX2'],
+ link_args : [ld_args_gc_sections],
+ include_directories : [swr_incs],
+ dependencies : [dep_thread, dep_llvm],
+ version : '0.0.0',
+ install : true,
+ )
endif
if with_swr_arches.contains('knl')
swr_knl_args = cpp.first_supported_argument(
- '-march=knl', '-target-cpu=mic-knl', '-xMIC-AVX512',
+ '-target-cpu=mic-knl', '-march=knl', '-xMIC-AVX512',
+ prefix : '''
+ #if !defined(__AVX512F__) || !defined(__AVX512ER__)
+ # error
+ #endif ''',
)
if swr_knl_args == []
error('Cannot find KNL support for swr.')
endif
- swr_defines += '-DHAVE_SWR_KNL'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrKNL',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_knl_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512', '-DSIMD_ARCH_KNIGHTS',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrKNL',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_knl_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512', '-DSIMD_ARCH_KNIGHTS',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
- endif
+ swr_arch_defines += '-DHAVE_SWR_KNL'
+ swr_arch_libs += shared_library(
+ 'swrKNL',
+ [files_swr_common, files_swr_arch],
+ cpp_args : [
+ swr_cpp_args, swr_knl_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512',
+ '-DKNOB_ARCH_KNIGHTS',
+ ],
+ link_args : [ld_args_gc_sections],
+ include_directories : [swr_incs],
+ dependencies : [dep_thread, dep_llvm],
+ version : '0.0.0',
+ install : true,
+ )
endif
-
-if with_swr_arches.contains('avx2')
- swr_avx2_args = cpp.first_supported_argument(
- '-target-cpu=haswell', '-march=core-avx2', '-tp=haswell', '/arch:AVX2',
+if with_swr_arches.contains('skx')
+ swr_skx_args = cpp.first_supported_argument(
+ '-target-cpu=x86-skylake', '-march=skylake-avx512', '-xCORE-AVX512',
+ prefix : '''
+ #if !defined(__AVX512F__) || !defined(__AVX512BW__)
+ # error
+ #endif ''',
)
- if swr_avx2_args == []
- if cpp.has_argument(['-mavx2', '-mfma', '-mbmi2', '-mf16c'])
- swr_avx2_args = ['-mavx2', '-mfma', '-mbmi2', '-mf16c']
- else
- error('Cannot find AVX2 support for swr.')
- endif
- endif
-
- swr_defines += '-DHAVE_SWR_AVX2'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrAVX2',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx2_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX2',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrAVX2',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx2_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX2',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
+ if swr_skx_args == []
+ error('Cannot find SKX support for swr.')
endif
-endif
-if with_swr_arches.contains('avx')
- swr_defines += '-DHAVE_SWR_AVX'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrAVX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrAVX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
- endif
+ swr_arch_defines += '-DHAVE_SWR_SKX'
+ swr_arch_libs += shared_library(
+ 'swrSKX',
+ [files_swr_common, files_swr_arch],
+ cpp_args : [swr_cpp_args, swr_skx_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512'],
+ link_args : [ld_args_gc_sections],
+ include_directories : [swr_incs],
+ dependencies : [dep_thread, dep_llvm],
+ version : '0.0.0',
+ install : true,
+ )
endif
-
if swr_arch_libs == []
error('SWR configured, but no SWR architectures configured')
endif
@@ -391,21 +308,12 @@ libmesaswr = static_library(
'mesaswr',
[files_swr_mesa, files_swr_common, gen_knobs_h, gen_knobs_cpp,
gen_builder_hpp, gen_builder_meta_hpp, gen_builder_intrin_hpp],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx_args,
- swr_defines,
- ],
- gnu_symbol_visibility : 'hidden',
- include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, swr_incs],
- dependencies : [dep_llvm, idep_mesautil],
+ cpp_args : [cpp_vis_args, swr_cpp_args, swr_avx_args, swr_arch_defines],
+ include_directories : [inc_common, swr_incs],
+ dependencies : dep_llvm,
)
-link_libs = [libmesaswr]
-if not shared_swr
- link_libs += swr_arch_libs
-endif
-
driver_swr = declare_dependency(
compile_args : '-DGALLIUM_SWR',
- link_with : link_libs
+ link_with : libmesaswr,
)
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
index b57d5c428..f5cfb470a 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
@@ -24,99 +24,84 @@
# ArchRast is to not pollute the Rasty code with lots of calculations, etc. that
# are needed to compute per draw statistics, etc.
-event PipelineStats::EarlyDepthStencilInfoSingleSample
+event EarlyDepthStencilInfoSingleSample
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event PipelineStats::EarlyDepthStencilInfoSampleRate
+event EarlyDepthStencilInfoSampleRate
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event PipelineStats::EarlyDepthStencilInfoNullPS
+event EarlyDepthStencilInfoNullPS
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event PipelineStats::LateDepthStencilInfoSingleSample
+event LateDepthStencilInfoSingleSample
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event PipelineStats::LateDepthStencilInfoSampleRate
+event LateDepthStencilInfoSampleRate
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event PipelineStats::LateDepthStencilInfoNullPS
+event LateDepthStencilInfoNullPS
{
uint64_t depthPassMask;
uint64_t stencilPassMask;
uint64_t coverageMask;
};
-event PipelineStats::EarlyDepthInfoPixelRate
+event EarlyDepthInfoPixelRate
{
uint64_t depthPassCount;
uint64_t activeLanes;
};
-event PipelineStats::LateDepthInfoPixelRate
+event LateDepthInfoPixelRate
{
uint64_t depthPassCount;
uint64_t activeLanes;
};
-event PipelineStats::BackendDrawEndEvent
+event BackendDrawEndEvent
{
uint32_t drawId;
};
-event PipelineStats::FrontendDrawEndEvent
+event FrontendDrawEndEvent
{
uint32_t drawId;
};
-event Memory::MemoryAccessEvent
-{
- uint32_t drawId;
- uint64_t tsc;
- uint64_t ptr;
- uint32_t size;
- uint8_t isRead;
- uint8_t client;
-};
-
-event Memory::MemoryStatsEndEvent
-{
- uint32_t drawId;
-};
-
-event PipelineStats::TessPrimCount
+event TessPrimCount
{
uint64_t primCount;
};
-event PipelineStats::RasterTileCount
+event RasterTileCount
{
uint32_t drawId;
uint64_t rasterTiles;
};
-event PipelineStats::GSPrimInfo
+event GSPrimInfo
{
uint64_t inputPrimCount;
uint64_t primGeneratedCount;
@@ -128,14 +113,14 @@ event PipelineStats::GSPrimInfo
// Trivial reject is numInvocations - pop_cnt32(validMask)
// Trivial accept is validMask & ~clipMask
// Must clip count is pop_cnt32(clipMask)
-event PipelineStats::ClipInfoEvent
+event ClipInfoEvent
{
uint32_t numInvocations;
uint32_t validMask;
uint32_t clipMask;
};
-event PipelineStats::CullInfoEvent
+event CullInfoEvent
{
uint32_t drawId;
uint64_t degeneratePrimMask;
@@ -143,14 +128,14 @@ event PipelineStats::CullInfoEvent
uint32_t validMask;
};
-event PipelineStats::AlphaInfoEvent
+event AlphaInfoEvent
{
uint32_t drawId;
uint32_t alphaTestEnable;
uint32_t alphaBlendEnable;
};
-event PipelineStats::DrawInstancedEvent
+event DrawInstancedEvent
{
uint32_t drawId;
uint32_t topology;
@@ -165,7 +150,7 @@ event PipelineStats::DrawInstancedEvent
uint32_t splitId; // Split draw count or id.
};
-event PipelineStats::DrawIndexedInstancedEvent
+event DrawIndexedInstancedEvent
{
uint32_t drawId;
uint32_t topology;
@@ -181,32 +166,32 @@ event PipelineStats::DrawIndexedInstancedEvent
uint32_t splitId; // Split draw count or id.
};
-event ShaderStats::VSStats
+event VSStats
{
- HANDLE hStats; // SWR_SHADER_STATS
+ uint32_t numInstExecuted;
};
-event ShaderStats::HSStats
+event HSStats
{
- HANDLE hStats; // SWR_SHADER_STATS
+ uint32_t numInstExecuted;
};
-event ShaderStats::DSStats
+event DSStats
{
- HANDLE hStats; // SWR_SHADER_STATS
+ uint32_t numInstExecuted;
};
-event ShaderStats::GSStats
+event GSStats
{
- HANDLE hStats; // SWR_SHADER_STATS
+ uint32_t numInstExecuted;
};
-event ShaderStats::PSStats
+event PSStats
{
- HANDLE hStats; // SWR_SHADER_STATS
+ uint32_t numInstExecuted;
};
-event ShaderStats::CSStats
+event CSStats
{
- HANDLE hStats; // SWR_SHADER_STATS
+ uint32_t numInstExecuted;
}; \ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build
index daf79ed4c..282751760 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/meson.build
@@ -48,8 +48,7 @@ files_swr_common += [
]
foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'],
- [swr_state_files, 'gen_state_llvm.h'],
- [swr_surf_state_files, 'gen_surf_state_llvm.h']]
+ [swr_state_files, 'gen_state_llvm.h']]
files_swr_common += custom_target(
x[1],
input : ['gen_llvm_types.py', x[0]],
@@ -62,16 +61,26 @@ foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'],
)
endforeach
-ar_output_filenames = ['gen_ar_event.hpp', 'gen_ar_event.cpp', 'gen_ar_eventhandler.hpp', 'gen_ar_eventhandlerfile.hpp']
-ar_template_filenames = []
-foreach fname : ar_output_filenames
- ar_template_filenames += join_paths('templates', fname)
+foreach x : [['gen_ar_event.hpp', '--gen_event_hpp'],
+ ['gen_ar_event.cpp', '--gen_event_cpp'],
+ ['gen_ar_eventhandler.hpp', '--gen_eventhandler_h'],
+ ['gen_ar_eventhandlerfile.hpp', '--gen_eventhandlerfile_h']]
+ files_swr_common += custom_target(
+ x[0],
+ input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files],
+ output : x[0],
+ command : [
+ prog_python,
+ '@INPUT0@',
+ '--proto', '@INPUT1@',
+ '--proto_private', '@INPUT2@',
+ '--output', '@OUTPUT@',
+ x[1],
+ ],
+ depend_files : files(
+ join_paths('templates', x[0]),
+ 'gen_common.py',
+ ),
+ )
endforeach
-files_swr_common += custom_target(
- 'gen_archrast',
- input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files],
- output : ar_output_filenames,
- command : [prog_python, '@INPUT0@', '--proto', '@INPUT1@', '@INPUT2@', '--output-dir', meson.current_build_dir()],
- depend_files : files('gen_common.py', ar_template_filenames)
-)
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
index b67ffbfa7..c68f3b9a6 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
@@ -32,6 +32,7 @@
#include "common/rdtsc_buckets.h"
#include "builder_gfx_mem.h"
+
namespace SwrJit
{
using namespace llvm;
@@ -41,9 +42,7 @@ namespace SwrJit
mpTranslationFuncTy = nullptr;
mpfnTranslateGfxAddressForRead = nullptr;
mpfnTranslateGfxAddressForWrite = nullptr;
- mpfnTrackMemAccess = nullptr;
mpParamSimDC = nullptr;
- mpWorkerData = nullptr;
}
@@ -51,12 +50,13 @@ namespace SwrJit
{
}
- void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage)
+ void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
{
- SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL),
+ SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL),
"Internal memory should not be gfxptr_t.");
}
+
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
@@ -70,7 +70,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
@@ -95,7 +95,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
@@ -109,7 +109,7 @@ namespace SwrJit
}
void BuilderGfxMem::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
@@ -118,95 +118,46 @@ namespace SwrJit
pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0));
}
- Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage);
+ Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage);
}
+
Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return ADD(base, offset);
}
- Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name)
+ Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
{
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- if (isReadOnly)
- {
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForWrite);
- }
- }
- else
- {
- Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name);
- }
- return Ptr;
+ Ptr = TranslationHelper(Ptr, Ty);
+ return Builder::GEP(Ptr, Idx, nullptr, Name);
}
Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
{
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ty, Ptr, Idx, Name);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = Builder::GEP(Ty, Ptr, Idx, Name);
- }
- return Ptr;
+ Ptr = TranslationHelper(Ptr, Ty);
+ return Builder::GEP(Ty, Ptr, Idx, Name);
}
Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
{
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ptr, indexList);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = Builder::GEP(Ptr, indexList);
- }
- return Ptr;
+ Ptr = TranslationHelper(Ptr, Ty);
+ return Builder::GEP(Ptr, indexList);
}
Value*
BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
{
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ptr, indexList);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = Builder::GEP(Ptr, indexList);
- }
- return Ptr;
+ Ptr = TranslationHelper(Ptr, Ty);
+ return Builder::GEP(Ptr, indexList);
}
- Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress)
+ Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty)
{
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
+
// address may be coming in as 64bit int now so get the pointer
if (Ptr->getType() == mInt64Ty)
{
@@ -216,78 +167,29 @@ namespace SwrJit
return Ptr;
}
- void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead)
- {
-#if defined(KNOB_ENABLE_AR)
- if (!KNOB_AR_ENABLE_MEMORY_EVENTS)
- {
- return;
- }
-
- Value* tmpPtr;
- // convert actual pointers to int64.
- uint32_t size = 0;
-
- if (Ptr->getType() == mInt64Ty)
- {
- DataLayout dataLayout(JM()->mpCurrentModule);
- size = (uint32_t)dataLayout.getTypeAllocSize(Ty);
-
- tmpPtr = Ptr;
- }
- else
- {
- DataLayout dataLayout(JM()->mpCurrentModule);
- size = (uint32_t)dataLayout.getTypeAllocSize(Ptr->getType());
-
- tmpPtr = PTR_TO_INT(Ptr, mInt64Ty);
- }
-
- // There are some shader compile setups where there's no translation functions set up.
- // This would be a situation where the accesses are to internal rasterizer memory and won't
- // be logged.
- // TODO: we may wish to revisit this for URB reads/writes, though.
- if (mpfnTrackMemAccess)
- {
- SWR_ASSERT(mpWorkerData != nullptr);
- CALL(mpfnTrackMemAccess,
- {mpParamSimDC,
- mpWorkerData,
- tmpPtr,
- C((uint32_t)size),
- C((uint8_t)isRead),
- C((uint32_t)usage)});
- }
-#endif
-
- return;
- }
-
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ Ptr = TranslationHelper(Ptr, Ty);
return Builder::LOAD(Ptr, Name);
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ Ptr = TranslationHelper(Ptr, Ty);
return Builder::LOAD(Ptr, Name);
}
+
LoadInst* BuilderGfxMem::LOAD(
- Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
+ Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ Ptr = TranslationHelper(Ptr, Ty);
return Builder::LOAD(Ptr, isVolatile, Name);
}
@@ -295,7 +197,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& name,
Type* Ty,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
@@ -320,59 +222,25 @@ namespace SwrJit
return LOAD(BasePtr, name, Ty, usage);
}
+
CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
unsigned Align,
Value* Mask,
Value* PassThru,
const Twine& Name,
Type* Ty,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
+ Ptr = TranslationHelper(Ptr, Ty);
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
}
- StoreInst*
- BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, false);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::STORE(Val, Ptr, isVolatile, Ty, usage);
- }
-
- StoreInst* BuilderGfxMem::STORE(Value* Val,
- Value* BasePtr,
- const std::initializer_list<uint32_t>& offset,
- Type* Ty,
- MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(BasePtr, usage);
- TrackerHelper(BasePtr, Ty, usage, false);
-
- BasePtr = TranslationHelper(BasePtr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::STORE(Val, BasePtr, offset, Ty, usage);
- }
-
- CallInst* BuilderGfxMem::MASKED_STORE(
- Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
-
- TrackerHelper(Ptr, Ty, usage, false);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage);
- }
-
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- MEM_CLIENT /* usage */)
+ JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
@@ -384,7 +252,7 @@ namespace SwrJit
Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- MEM_CLIENT /* usage */)
+ JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
index c361959b7..aefbbef9f 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
@@ -41,7 +41,7 @@ namespace SwrJit
BuilderGfxMem(JitManager* pJitMgr);
virtual ~BuilderGfxMem() {}
- virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = "");
+ virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
virtual Value*
GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
@@ -51,21 +51,22 @@ namespace SwrJit
virtual LoadInst* LOAD(Value* Ptr,
const char* Name,
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@@ -73,64 +74,57 @@ namespace SwrJit
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+
protected:
- void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage);
+ void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
virtual void NotifyPrivateContextSet();
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
- Value* TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress);
- void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead);
+ Value* TranslationHelper(Value* Ptr, Type* Ty);
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
Value* GetParamSimDC() { return mpParamSimDC; }
- Value* mpWorkerData;
private:
FunctionType* mpTranslationFuncTy;
Value* mpfnTranslateGfxAddressForRead;
Value* mpfnTranslateGfxAddressForWrite;
Value* mpParamSimDC;
- Value* mpfnTrackMemAccess;
};
} // namespace SwrJit
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index b5eb0a782..94489f1c7 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -29,19 +29,20 @@
******************************************************************************/
#include "jit_pch.hpp"
#include "builder.h"
+#include "common/rdtsc_buckets.h"
#include <cstdarg>
namespace SwrJit
{
- void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage)
+ void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
{
SWR_ASSERT(
ptr->getType() != mInt64Ty,
"Address appears to be GFX access. Requires translation through BuilderGfxMem.");
}
- Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name)
+ Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
{
return IRB()->CreateGEP(Ptr, Idx, Name);
}
@@ -93,26 +94,26 @@ namespace SwrJit
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
LoadInst*
- Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
+ Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
@@ -122,7 +123,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& indices,
const llvm::Twine& name,
Type* Ty,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@@ -141,7 +142,7 @@ namespace SwrJit
}
StoreInst*
- Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage)
+ Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@@ -186,7 +187,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@@ -206,7 +207,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@@ -234,12 +235,7 @@ namespace SwrJit
/// @param pVecPassthru - SIMD wide vector of values to load when lane is inactive
Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru)
{
- return MASKED_GATHER(pVecSrcPtr, AlignType(4), pVecMask, pVecPassthru);
- }
-
- void Builder::SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask)
- {
- MASKED_SCATTER(pVecSrc, pVecDstPtr, AlignType(4), pVecMask);
+ return MASKED_GATHER(pVecSrcPtr, 4, pVecMask, pVecPassthru);
}
void Builder::Gather4(const SWR_FORMAT format,
@@ -248,7 +244,7 @@ namespace SwrJit
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
@@ -267,7 +263,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@@ -341,7 +337,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- MEM_CLIENT usage)
+ JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@@ -427,8 +423,8 @@ namespace SwrJit
bool bPackedOutput)
{
// cast types
- Type* vGatherTy = getVectorType(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
// input could either be float or int vector; do shuffle work in int
vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
@@ -436,7 +432,7 @@ namespace SwrJit
if (bPackedOutput)
{
- Type* v128bitTy = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128),
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
@@ -532,12 +528,12 @@ namespace SwrJit
bool bPackedOutput)
{
// cast types
- Type* vGatherTy = getVectorType(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
if (bPackedOutput)
{
- Type* v128Ty = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128),
+ Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
@@ -632,7 +628,6 @@ namespace SwrJit
break;
}
- assert(vConstMask && "Invalid info.numComps value");
vGatherOutput[swizzleIndex] =
BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
// after pshufb for x channel
@@ -649,16 +644,9 @@ namespace SwrJit
/// @param vOffsets - vector of byte offsets from pDst
/// @param vMask - mask of valid lanes
void Builder::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(pDst, usage);
-#if LLVM_VERSION_MAJOR >= 11
- SWR_ASSERT(cast<VectorType>(vSrc->getType())->getElementType()->isFloatTy());
-#else
- SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy());
-#endif
- VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1));
- return;
/* Scatter algorithm
@@ -670,10 +658,6 @@ namespace SwrJit
*/
- /*
-
- // Reference implementation kept around for reference
-
BasicBlock* pCurBB = IRB()->GetInsertBlock();
Function* pFunc = pCurBB->getParent();
Type* pSrcTy = vSrc->getType()->getVectorElementType();
@@ -761,7 +745,5 @@ namespace SwrJit
// Move builder to beginning of post loop
IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
-
- */
}
} // namespace SwrJit
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index 429d5779a..15def96cb 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -30,22 +30,20 @@
#pragma once
public:
-enum class MEM_CLIENT
+typedef enum _JIT_MEM_CLIENT
{
MEM_CLIENT_INTERNAL,
GFX_MEM_CLIENT_FETCH,
GFX_MEM_CLIENT_SAMPLER,
GFX_MEM_CLIENT_SHADER,
- GFX_MEM_CLIENT_STREAMOUT,
- GFX_MEM_CLIENT_URB
-};
+} JIT_MEM_CLIENT;
protected:
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
-void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage);
+void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
public:
-virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = "");
+virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
virtual Value* GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
virtual Value*
@@ -58,23 +56,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
virtual LoadInst*
- LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst*
- LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@@ -82,24 +80,14 @@ virtual CallInst* MASKED_LOAD(Value* Ptr,
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL)
{
- return IRB()->CreateMaskedLoad(Ptr, AlignType(Align), Mask, PassThru, Name);
+ return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
}
-virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
-{
- return IRB()->CreateStore(Val, Ptr, isVolatile);
-}
-
-virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
-{
- return IRB()->CreateMaskedStore(Val, Ptr, AlignType(Align), Mask);
-}
-
-LoadInst* LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
+LoadInst*
+ LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
+StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset);
StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset);
Value* MEM_ADD(Value* i32Incr,
@@ -113,14 +101,14 @@ void Gather4(const SWR_FORMAT format,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@@ -128,14 +116,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@@ -143,18 +131,17 @@ void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
Value* GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
-void SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask);
virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
Value* vGatherInput,
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 61c6b57b3..c34959d35 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -32,15 +32,11 @@
#include "passes.h"
#include "JitManager.h"
-#include "common/simdlib.hpp"
-
#include <unordered_map>
-extern "C" void ScatterPS_256(uint8_t*, SIMD256::Integer, SIMD256::Float, uint8_t, uint32_t);
-
namespace llvm
{
- // forward declare the initializer
+ // foward declare the initializer
void initializeLowerX86Pass(PassRegistry&);
} // namespace llvm
@@ -68,26 +64,22 @@ namespace SwrJit
struct X86Intrinsic
{
- IntrinsicID intrin[NUM_WIDTHS];
+ Intrinsic::ID intrin[NUM_WIDTHS];
EmuFunc emuFunc;
};
// Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the
// previous behavior of mapping directly to avx/avx2 intrinsics.
- using intrinsicMap_t = std::map<std::string, IntrinsicID>;
- static intrinsicMap_t& getIntrinsicMap() {
- static std::map<std::string, IntrinsicID> intrinsicMap = {
- {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
- {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
- {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
- {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
- {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
- {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
- {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
- {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc}
- };
- return intrinsicMap;
- }
+ static std::map<std::string, Intrinsic::ID> intrinsicMap = {
+ {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
+ {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
+ {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
+ {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
+ {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
+ {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
+ {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
+ {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
+ };
// Forward decls
Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
@@ -96,8 +88,6 @@ namespace SwrJit
Instruction*
VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
- VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction*
VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
@@ -112,75 +102,88 @@ namespace SwrJit
static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
- using intrinsicMapAdvanced_t = std::vector<std::map<std::string, X86Intrinsic>>;
-
- static intrinsicMapAdvanced_t& getIntrinsicMapAdvanced()
- {
- // clang-format off
- static intrinsicMapAdvanced_t intrinsicMapAdvanced = {
- // 256 wide 512 wide
- {
- // AVX
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- {
- // AVX2
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- {
- // AVX512
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
- #if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
- #else
- {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- #endif
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- #if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}},
- #else
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
- #endif
- {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}}
- }};
- // clang-format on
- return intrinsicMapAdvanced;
- }
-
- static uint32_t getBitWidth(VectorType *pVTy)
- {
-#if LLVM_VERSION_MAJOR >= 12
- return cast<FixedVectorType>(pVTy)->getNumElements() * pVTy->getElementType()->getPrimitiveSizeInBits();
-#elif LLVM_VERSION_MAJOR >= 11
- return pVTy->getNumElements() * pVTy->getElementType()->getPrimitiveSizeInBits();
+ static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
+ // 256 wide 512 wide
+ {
+ // AVX
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS",
+ {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS",
+ {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ },
+ {
+ // AVX2
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS",
+ {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD",
+ {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS",
+ {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ },
+ {
+ // AVX512
+ {"meta.intrinsic.VRCPPS",
+ {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
+#if LLVM_VERSION_MAJOR < 7
+ {"meta.intrinsic.VPERMPS",
+ {{Intrinsic::x86_avx512_mask_permvar_sf_256,
+ Intrinsic::x86_avx512_mask_permvar_sf_512},
+ NO_EMU}},
+ {"meta.intrinsic.VPERMD",
+ {{Intrinsic::x86_avx512_mask_permvar_si_256,
+ Intrinsic::x86_avx512_mask_permvar_si_512},
+ NO_EMU}},
#else
- return pVTy->getBitWidth();
+ {"meta.intrinsic.VPERMPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
#endif
- }
+ {"meta.intrinsic.VGATHERPD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+#if LLVM_VERSION_MAJOR < 7
+ {"meta.intrinsic.VCVTPD2PS",
+ {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512},
+ NO_EMU}},
+#else
+ {"meta.intrinsic.VCVTPD2PS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
+#endif
+ {"meta.intrinsic.VCVTPH2PS",
+ {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512},
+ NO_EMU}},
+ {"meta.intrinsic.VROUND",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
+ {"meta.intrinsic.VHSUBPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
+ }};
struct LowerX86 : public FunctionPass
{
@@ -206,31 +209,6 @@ namespace SwrJit
SWR_ASSERT(false, "Unsupported AVX architecture.");
mTarget = AVX;
}
-
- // Setup scatter function for 256 wide
- uint32_t curWidth = B->mVWidth;
- B->SetTargetWidth(8);
- std::vector<Type*> args = {
- B->mInt8PtrTy, // pBase
- B->mSimdInt32Ty, // vIndices
- B->mSimdFP32Ty, // vSrc
- B->mInt8Ty, // mask
- B->mInt32Ty // scale
- };
-
- FunctionType* pfnScatterTy = FunctionType::get(B->mVoidTy, args, false);
- mPfnScatter256 = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy).getCallee());
-#else
- B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy));
-#endif
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("ScatterPS_256") == nullptr)
- {
- sys::DynamicLibrary::AddSymbol("ScatterPS_256", (void*)&ScatterPS_256);
- }
-
- B->SetTargetWidth(curWidth);
}
// Try to decipher the vector type of the instruction. This does not work properly
@@ -242,16 +220,13 @@ namespace SwrJit
TargetWidth* pWidth,
Type** pTy)
{
- assert(pCallInst);
Type* pVecTy = pCallInst->getType();
// Check for intrinsic specific types
// VCVTPD2PS type comes from src, not dst
if (intrinName.equals("meta.intrinsic.VCVTPD2PS"))
{
- Value* pOp = pCallInst->getOperand(0);
- assert(pOp);
- pVecTy = pOp->getType();
+ pVecTy = pCallInst->getOperand(0)->getType();
}
if (!pVecTy->isVectorTy())
@@ -267,7 +242,7 @@ namespace SwrJit
}
SWR_ASSERT(pVecTy->isVectorTy(), "Couldn't determine vector size");
- uint32_t width = getBitWidth(cast<VectorType>(pVecTy));
+ uint32_t width = cast<VectorType>(pVecTy)->getBitWidth();
switch (width)
{
case 256:
@@ -299,7 +274,7 @@ namespace SwrJit
SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
}
- return ConstantVector::getNullValue(getVectorType(pTy, numElem));
+ return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
}
Value* GetMask(TargetWidth width)
@@ -322,28 +297,20 @@ namespace SwrJit
// Convert <N x i1> mask to <N x i32> x86 mask
Value* VectorMask(Value* vi1Mask)
{
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElem = cast<FixedVectorType>(vi1Mask->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElem = cast<VectorType>(vi1Mask->getType())->getNumElements();
-#else
uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
-#endif
- return B->S_EXT(vi1Mask, getVectorType(B->mInt32Ty, numElem));
+ return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
}
Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
{
- Function* pFunc = pCallInst->getCalledFunction();
- assert(pFunc);
-
- auto& intrinsic = getIntrinsicMapAdvanced()[mTarget][pFunc->getName().str()];
+ Function* pFunc = pCallInst->getCalledFunction();
+ auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
TargetWidth vecWidth;
Type* pElemTy;
GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
// Check if there is a native intrinsic for this instruction
- IntrinsicID id = intrinsic.intrin[vecWidth];
+ Intrinsic::ID id = intrinsic.intrin[vecWidth];
if (id == DOUBLE)
{
// Double pump the next smaller SIMD intrinsic
@@ -396,19 +363,18 @@ namespace SwrJit
Instruction* ProcessIntrinsic(CallInst* pCallInst)
{
Function* pFunc = pCallInst->getCalledFunction();
- assert(pFunc);
// Forward to the advanced support if found
- if (getIntrinsicMapAdvanced()[mTarget].find(pFunc->getName().str()) != getIntrinsicMapAdvanced()[mTarget].end())
+ if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end())
{
return ProcessIntrinsicAdvanced(pCallInst);
}
- SWR_ASSERT(getIntrinsicMap().find(pFunc->getName().str()) != getIntrinsicMap().end(),
+ SWR_ASSERT(intrinsicMap.find(pFunc->getName()) != intrinsicMap.end(),
"Unimplemented intrinsic %s.",
- pFunc->getName().str().c_str());
+ pFunc->getName());
- Intrinsic::ID x86Intrinsic = getIntrinsicMap()[pFunc->getName().str()];
+ Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()];
Function* pX86IntrinFunc =
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
@@ -421,44 +387,28 @@ namespace SwrJit
}
//////////////////////////////////////////////////////////////////////////
- /// @brief LLVM function pass run method.
+ /// @brief LLVM funtion pass run method.
/// @param f- The function we're working on with this pass.
virtual bool runOnFunction(Function& F)
{
std::vector<Instruction*> toRemove;
- std::vector<BasicBlock*> bbs;
-
- // Make temp copy of the basic blocks and instructions, as the intrinsic
- // replacement code might invalidate the iterators
- for (auto& b : F.getBasicBlockList())
- {
- bbs.push_back(&b);
- }
- for (auto* BB : bbs)
+ for (auto& BB : F.getBasicBlockList())
{
- std::vector<Instruction*> insts;
- for (auto& i : BB->getInstList())
+ for (auto& I : BB.getInstList())
{
- insts.push_back(&i);
- }
-
- for (auto* I : insts)
- {
- if (CallInst* pCallInst = dyn_cast<CallInst>(I))
+ if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
{
Function* pFunc = pCallInst->getCalledFunction();
if (pFunc)
{
if (pFunc->getName().startswith("meta.intrinsic"))
{
- B->IRB()->SetInsertPoint(I);
+ B->IRB()->SetInsertPoint(&I);
Instruction* pReplace = ProcessIntrinsic(pCallInst);
+ SWR_ASSERT(pReplace);
toRemove.push_back(pCallInst);
- if (pReplace)
- {
- pCallInst->replaceAllUsesWith(pReplace);
- }
+ pCallInst->replaceAllUsesWith(pReplace);
}
}
}
@@ -478,9 +428,10 @@ namespace SwrJit
virtual void getAnalysisUsage(AnalysisUsage& AU) const {}
JitManager* JM() { return B->JM(); }
- Builder* B;
- TargetArch mTarget;
- Function* mPfnScatter256;
+
+ Builder* B;
+
+ TargetArch mTarget;
static char ID; ///< Needed by LLVM to generate ID for FunctionPass.
};
@@ -513,14 +464,7 @@ namespace SwrJit
else
{
v32Result = UndefValue::get(v32A->getType());
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElem = cast<FixedVectorType>(v32A->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElem = cast<VectorType>(v32A->getType())->getNumElements();
-#else
- uint32_t numElem = v32A->getType()->getVectorNumElements();
-#endif
- for (uint32_t l = 0; l < numElem; ++l)
+ for (uint32_t l = 0; l < v32A->getType()->getVectorNumElements(); ++l)
{
auto i32Index = B->VEXTRACT(vi32Index, B->C(l));
auto val = B->VEXTRACT(v32A, i32Index);
@@ -540,22 +484,11 @@ namespace SwrJit
auto vi1Mask = pCallInst->getArgOperand(3);
auto i8Scale = pCallInst->getArgOperand(4);
- pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0));
-#if LLVM_VERSION_MAJOR >= 11
-#if LLVM_VERSION_MAJOR >= 12
- FixedVectorType* pVectorType = cast<FixedVectorType>(vSrc->getType());
-#else
- VectorType* pVectorType = cast<VectorType>(vSrc->getType());
-#endif
- uint32_t numElem = pVectorType->getNumElements();
- auto srcTy = pVectorType->getElementType();
-#else
- uint32_t numElem = vSrc->getType()->getVectorNumElements();
- auto srcTy = vSrc->getType()->getVectorElementType();
-#endif
- auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
-
- Value* v32Gather = nullptr;
+ pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0));
+ uint32_t numElem = vSrc->getType()->getVectorNumElements();
+ auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
+ auto srcTy = vSrc->getType()->getVectorElementType();
+ Value* v32Gather;
if (arch == AVX)
{
// Full emulation for AVX
@@ -565,13 +498,7 @@ namespace SwrJit
B->STORE(vSrc, pTmp);
v32Gather = UndefValue::get(vSrc->getType());
-#if LLVM_VERSION_MAJOR <= 10
auto vi32Scale = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
-#elif LLVM_VERSION_MAJOR == 11
- auto vi32Scale = ConstantVector::getSplat(ElementCount(numElem, false), cast<ConstantInt>(i32Scale));
-#else
- auto vi32Scale = ConstantVector::getSplat(ElementCount::get(numElem, false), cast<ConstantInt>(i32Scale));
-#endif
auto vi32Offsets = B->MUL(vi32Indices, vi32Scale);
for (uint32_t i = 0; i < numElem; ++i)
@@ -590,7 +517,7 @@ namespace SwrJit
}
else if (arch == AVX2 || (arch == AVX512 && width == W256))
{
- Function* pX86IntrinFunc = nullptr;
+ Function* pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
@@ -619,23 +546,12 @@ namespace SwrJit
else if (width == W512)
{
// Double pump 4-wide for 64bit elements
-#if LLVM_VERSION_MAJOR >= 12
- if (cast<FixedVectorType>(vSrc->getType())->getElementType() == B->mDoubleTy)
-#elif LLVM_VERSION_MAJOR >= 11
- if (cast<VectorType>(vSrc->getType())->getElementType() == B->mDoubleTy)
-#else
if (vSrc->getType()->getVectorElementType() == B->mDoubleTy)
-#endif
{
auto v64Mask = pThis->VectorMask(vi1Mask);
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElem = cast<FixedVectorType>(v64Mask->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElem = cast<VectorType>(v64Mask->getType())->getNumElements();
-#else
- uint32_t numElem = v64Mask->getType()->getVectorNumElements();
-#endif
- v64Mask = B->S_EXT(v64Mask, getVectorType(B->mInt64Ty, numElem));
+ v64Mask = B->S_EXT(
+ v64Mask,
+ VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
v64Mask = B->BITCAST(v64Mask, vSrc->getType());
Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
@@ -647,30 +563,23 @@ namespace SwrJit
Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElemSrc0 = cast<FixedVectorType>(src0->getType())->getNumElements();
- uint32_t numElemMask0 = cast<FixedVectorType>(mask0->getType())->getNumElements();
- uint32_t numElemSrc1 = cast<FixedVectorType>(src1->getType())->getNumElements();
- uint32_t numElemMask1 = cast<FixedVectorType>(mask1->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElemSrc0 = cast<VectorType>(src0->getType())->getNumElements();
- uint32_t numElemMask0 = cast<VectorType>(mask0->getType())->getNumElements();
- uint32_t numElemSrc1 = cast<VectorType>(src1->getType())->getNumElements();
- uint32_t numElemMask1 = cast<VectorType>(mask1->getType())->getNumElements();
-#else
- uint32_t numElemSrc0 = src0->getType()->getVectorNumElements();
- uint32_t numElemMask0 = mask0->getType()->getVectorNumElements();
- uint32_t numElemSrc1 = src1->getType()->getVectorNumElements();
- uint32_t numElemMask1 = mask1->getType()->getVectorNumElements();
-#endif
- src0 = B->BITCAST(src0, getVectorType(B->mInt64Ty, numElemSrc0));
- mask0 = B->BITCAST(mask0, getVectorType(B->mInt64Ty, numElemMask0));
+ src0 = B->BITCAST(
+ src0,
+ VectorType::get(B->mInt64Ty, src0->getType()->getVectorNumElements()));
+ mask0 = B->BITCAST(
+ mask0,
+ VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
Value* gather0 =
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
- src1 = B->BITCAST(src1, getVectorType(B->mInt64Ty, numElemSrc1));
- mask1 = B->BITCAST(mask1, getVectorType(B->mInt64Ty, numElemMask1));
+ src1 = B->BITCAST(
+ src1,
+ VectorType::get(B->mInt64Ty, src1->getType()->getVectorNumElements()));
+ mask1 = B->BITCAST(
+ mask1,
+ VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
Value* gather1 =
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
+
v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
v32Gather = B->BITCAST(v32Gather, vSrc->getType());
}
@@ -699,8 +608,8 @@ namespace SwrJit
}
else if (arch == AVX512)
{
- Value* iMask = nullptr;
- Function* pX86IntrinFunc = nullptr;
+ Value* iMask;
+ Function* pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
@@ -730,69 +639,6 @@ namespace SwrJit
return cast<Instruction>(v32Gather);
}
- Instruction*
- VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- Builder* B = pThis->B;
- auto pBase = pCallInst->getArgOperand(0);
- auto vi1Mask = pCallInst->getArgOperand(1);
- auto vi32Indices = pCallInst->getArgOperand(2);
- auto v32Src = pCallInst->getArgOperand(3);
- auto i32Scale = pCallInst->getArgOperand(4);
-
- if (arch != AVX512)
- {
- // Call into C function to do the scatter. This has significantly better compile perf
- // compared to jitting scatter loops for every scatter
- if (width == W256)
- {
- auto mask = B->BITCAST(vi1Mask, B->mInt8Ty);
- B->CALL(pThis->mPfnScatter256, {pBase, vi32Indices, v32Src, mask, i32Scale});
- }
- else
- {
- // Need to break up 512 wide scatter to two 256 wide
- auto maskLo = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
- auto indicesLo =
- B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
- auto srcLo = B->VSHUFFLE(v32Src, v32Src, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
-
- auto mask = B->BITCAST(maskLo, B->mInt8Ty);
- B->CALL(pThis->mPfnScatter256, {pBase, indicesLo, srcLo, mask, i32Scale});
-
- auto maskHi = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
- auto indicesHi =
- B->VSHUFFLE(vi32Indices, vi32Indices, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
- auto srcHi = B->VSHUFFLE(v32Src, v32Src, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
-
- mask = B->BITCAST(maskHi, B->mInt8Ty);
- B->CALL(pThis->mPfnScatter256, {pBase, indicesHi, srcHi, mask, i32Scale});
- }
- return nullptr;
- }
-
- Value* iMask;
- Function* pX86IntrinFunc;
- if (width == W256)
- {
- // No direct intrinsic supported in llvm to scatter 8 elem with 32bit indices, but we
- // can use the scatter of 8 elements with 64bit indices
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_scatter_qps_512);
-
- auto vi32IndicesExt = B->Z_EXT(vi32Indices, B->mSimdInt64Ty);
- iMask = B->BITCAST(vi1Mask, B->mInt8Ty);
- B->CALL(pX86IntrinFunc, {pBase, iMask, vi32IndicesExt, v32Src, i32Scale});
- }
- else if (width == W512)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_scatter_dps_512);
- iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
- B->CALL(pX86IntrinFunc, {pBase, iMask, vi32Indices, v32Src, i32Scale});
- }
- return nullptr;
- }
// No support for vroundps in avx512 (it is available in kncni), so emulate with avx
// instructions
@@ -803,9 +649,7 @@ namespace SwrJit
auto B = pThis->B;
auto vf32Src = pCallInst->getOperand(0);
- assert(vf32Src);
auto i8Round = pCallInst->getOperand(1);
- assert(i8Round);
auto pfnFunc =
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
@@ -910,18 +754,10 @@ namespace SwrJit
auto argType = arg.get()->getType();
if (argType->isVectorTy())
{
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t vecWidth = cast<FixedVectorType>(argType)->getNumElements();
- auto elemTy = cast<FixedVectorType>(argType)->getElementType();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t vecWidth = cast<VectorType>(argType)->getNumElements();
- auto elemTy = cast<VectorType>(argType)->getElementType();
-#else
uint32_t vecWidth = argType->getVectorNumElements();
- auto elemTy = argType->getVectorElementType();
-#endif
Value* lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
- Value* argToPush = B->VSHUFFLE(arg.get(), B->VUNDEF(elemTy, vecWidth), lanes);
+ Value* argToPush = B->VSHUFFLE(
+ arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
args.push_back(argToPush);
}
else
@@ -935,16 +771,8 @@ namespace SwrJit
if (result[0]->getType()->isVectorTy())
{
assert(result[1]->getType()->isVectorTy());
-#if LLVM_VERSION_MAJOR >= 12
- vecWidth = cast<FixedVectorType>(result[0]->getType())->getNumElements() +
- cast<FixedVectorType>(result[1]->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- vecWidth = cast<VectorType>(result[0]->getType())->getNumElements() +
- cast<VectorType>(result[1]->getType())->getNumElements();
-#else
vecWidth = result[0]->getType()->getVectorNumElements() +
result[1]->getType()->getVectorNumElements();
-#endif
}
else
{
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
index e0bb75cde..d3c732af0 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
@@ -25,7 +25,6 @@
* @brief Include file for llvm passes
*
******************************************************************************/
-#pragma once
#include "JitManager.h"
#include "builder.h"
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
index e54e23fc9..47f717bfc 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
@@ -1,5 +1,5 @@
/****************************************************************************
- * Copyright (C) 2017-2020 Intel Corporation. All Rights Reserved.
+ * Copyright (C) 2017-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,17 +30,13 @@
#pragma once
-#if defined(_MSC_VER)
+#if defined(_WIN32)
#pragma warning(disable : 4146 4244 4267 4800 4996)
#endif
-#include <llvm/Config/llvm-config.h>
-
-#if LLVM_VERSION_MAJOR < 7
// llvm 3.7+ reuses "DEBUG" as an enum value
#pragma push_macro("DEBUG")
#undef DEBUG
-#endif
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
@@ -49,11 +45,10 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
-#if LLVM_VERSION_MAJOR >= 10
-#include "llvm/IR/IntrinsicsX86.h"
-#endif
#include "llvm/ExecutionEngine/ObjectCache.h"
+#include "llvm/Config/llvm-config.h"
+
#include "llvm/IR/Verifier.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/Support/FileSystem.h"
@@ -138,29 +133,7 @@ static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx,
}
#endif
-#if LLVM_VERSION_MAJOR >= 11
-static inline llvm::VectorType* getVectorType(llvm::Type *ElementType, unsigned NumElements)
-{
- return llvm::VectorType::get(ElementType, NumElements, false);
-}
-#else
-static inline llvm::VectorType* getVectorType(llvm::Type *ElementType, unsigned NumElements)
-{
- return llvm::VectorType::get(ElementType, NumElements);
-}
-#endif
-
-#if LLVM_VERSION_MAJOR < 7
#pragma pop_macro("DEBUG")
-#endif
-
-#if LLVM_VERSION_MAJOR > 10
- typedef unsigned IntrinsicID;
- typedef llvm::Align AlignType;
-#else
- typedef llvm::Intrinsic::ID IntrinsicID;
- typedef unsigned AlignType;
-#endif
#include <deque>
#include <list>
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build
index 295dc2fcc..aced4a1b7 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/jitter/meson.build
@@ -18,17 +18,15 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-if dep_llvm.type_name() == 'internal'
- _irbuilder_h = subproject('llvm').get_variable('irbuilder_h')
-else
- _llvm_includedir = dep_llvm.get_variable(configtool : 'includedir', cmake : 'LLVM_INCLUDE_DIR')
- _irbuilder_h = join_paths(_llvm_includedir, 'llvm', 'IR', 'IRBuilder.h')
-endif
gen_builder_hpp = custom_target(
'gen_builder.hpp',
input : [
- swr_gen_llvm_ir_macros_py, _irbuilder_h,
+ swr_gen_llvm_ir_macros_py,
+ join_paths(
+ dep_llvm.get_configtool_variable('includedir'), 'llvm', 'IR',
+ 'IRBuilder.h'
+ )
],
output : 'gen_builder.hpp',
command : [
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
index 3a19bbac7..bff96e17f 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
@@ -25,11 +25,7 @@
* @brief Provide access to tiles table initialization functions
*
******************************************************************************/
-
#include "memory/InitMemory.h"
-#include "memory/LoadTile.h"
-#include "memory/StoreTile.h"
-#include "InitMemory.h"
void InitSimLoadTilesTable();
void InitSimStoreTilesTable();
@@ -41,10 +37,3 @@ void InitTilesTable()
InitSimStoreTilesTable();
InitSimClearTilesTable();
}
-
-
-void SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs)
-{
- out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile;
- out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface;
-} \ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
index a3ed7b3cb..14cca6ab3 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
@@ -25,59 +25,9 @@
* @brief Provide access to tiles table initialization functions
*
******************************************************************************/
-
-#pragma once
-
#include "common/os.h"
-#include "memory/SurfaceState.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads a full hottile from a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param dstFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to src render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pDstHotTile - Pointer to Hot Tile
-SWR_FUNC(void,
- SwrLoadHotTile,
- HANDLE hWorkerPrivateData,
- const SWR_SURFACE_STATE* pSrcSurface,
- BucketManager* pBucketManager,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- uint8_t* pDstHotTile);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Deswizzles and stores a full hottile to a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param srcFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to destination render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pSrcHotTile - Pointer to Hot Tile
-SWR_FUNC(void,
- SwrStoreHotTileToSurface,
- HANDLE hWorkerPrivateData,
- SWR_SURFACE_STATE* pDstSurface,
- BucketManager* pBucketManager,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- uint8_t* pSrcHotTile);
-
-struct SWR_TILE_INTERFACE {
- PFNSwrLoadHotTile pfnSwrLoadHotTile;
- PFNSwrStoreHotTileToSurface pfnSwrStoreHotTileToSurface;
-};
extern "C"
{
SWR_VISIBLE void SWR_API InitTilesTable();
-
- typedef void(SWR_API* PFNSwrGetTileInterface)(SWR_TILE_INTERFACE& out_funcs);
- SWR_VISIBLE void SWR_API SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs);
}