summaryrefslogtreecommitdiff
path: root/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/amd/llvm/ac_llvm_helper.cpp')
-rw-r--r--lib/mesa/src/amd/llvm/ac_llvm_helper.cpp522
1 files changed, 288 insertions, 234 deletions
diff --git a/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp b/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp
index f5383344d..5d065279a 100644
--- a/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp
+++ b/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp
@@ -1,324 +1,378 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
+ * SPDX-License-Identifier: MIT
*/
+#include <llvm-c/Core.h>
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/MC/MCSubtargetInfo.h>
+#include <llvm/Support/CommandLine.h>
+#include <llvm/Transforms/IPO.h>
+#include <llvm/Transforms/Scalar.h>
+#include <llvm/Transforms/Utils.h>
+#include <llvm/CodeGen/Passes.h>
+#include <llvm/Transforms/IPO/AlwaysInliner.h>
+#include <llvm/Transforms/InstCombine/InstCombine.h>
+#include <llvm/Transforms/IPO/SCCP.h>
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
#include <cstring>
+/* DO NOT REORDER THE HEADERS
+ * The LLVM headers need to all be included before any Mesa header,
+ * as they use the `restrict` keyword in ways that are incompatible
+ * with our #define in include/c99_compat.h
+ */
+
#include "ac_binary.h"
#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
-
#include "util/macros.h"
-#include <llvm-c/Core.h>
-#include <llvm/Target/TargetMachine.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
-#include <llvm/Transforms/IPO.h>
+using namespace llvm;
-#include <llvm/IR/LegacyPassManager.h>
+class RunAtExitForStaticDestructors : public SDNode
+{
+public:
+ /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
+ RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
+ {
+ }
+};
-void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
+void ac_llvm_run_atexit_for_destructors(void)
{
- llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
- A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
+ /* LLVM >= 16 registers static variable destructors on the first compile, which gcc
+ * implements by calling atexit there. Before that, u_queue registers its atexit
+ * handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
+ * the LLVM destructors are called first while shader compiler threads may still be
+ * running, which crashes in LLVM in SelectionDAG.cpp.
+ *
+ * The solution is to run the code that declares the LLVM static variables first,
+ * so that atexit for LLVM is registered first and u_queue is registered after that,
+ * which ensures that all u_queue threads are terminated before LLVM destructors are
+ * called.
+ *
+ * This just executes the code that declares static variables.
+ */
+ RunAtExitForStaticDestructors();
}
-bool ac_is_sgpr_param(LLVMValueRef arg)
+bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
+{
+ TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
+ return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
+}
+
+void ac_reset_llvm_all_options_occurrences()
+{
+ cl::ResetAllOptionOccurrences();
+}
+
+void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
{
- llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
- llvm::AttributeList AS = A->getParent()->getAttributes();
- unsigned ArgNo = A->getArgNo();
- return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
+ Argument *A = unwrap<Argument>(val);
+ A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
}
-LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
+void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
{
- return LLVMGetCalledValue(call);
+ Argument *A = unwrap<Argument>(val);
+ A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes)));
}
-bool ac_llvm_is_function(LLVMValueRef v)
+bool ac_is_sgpr_param(LLVMValueRef arg)
{
- return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+ Argument *A = unwrap<Argument>(arg);
+ AttributeList AS = A->getParent()->getAttributes();
+ unsigned ArgNo = A->getArgNo();
+ return AS.hasParamAttr(ArgNo, Attribute::InReg);
}
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
{
- llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
+ TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
- llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
- llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
+ unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
+ unwrap(module)->setDataLayout(TM->createDataLayout());
return module;
}
-LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
- enum ac_float_mode float_mode)
+LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
{
- LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
-
- llvm::FastMathFlags flags;
+ LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
- switch (float_mode) {
- case AC_FLOAT_MODE_DEFAULT:
- case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
- break;
+ FastMathFlags flags;
- case AC_FLOAT_MODE_DEFAULT_OPENGL:
- /* Allow optimizations to treat the sign of a zero argument or
- * result as insignificant.
- */
- flags.setNoSignedZeros(); /* nsz */
+ switch (float_mode) {
+ case AC_FLOAT_MODE_DEFAULT:
+ case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
+ break;
- /* Allow optimizations to use the reciprocal of an argument
- * rather than perform division.
- */
- flags.setAllowReciprocal(); /* arcp */
+ case AC_FLOAT_MODE_DEFAULT_OPENGL:
+ /* Allow optimizations to treat the sign of a zero argument or
+ * result as insignificant.
+ */
+ flags.setNoSignedZeros(); /* nsz */
- /* Allow floating-point contraction (e.g. fusing a multiply
- * followed by an addition into a fused multiply-and-add).
- */
- flags.setAllowContract(); /* contract */
+ /* Allow optimizations to use the reciprocal of an argument
+ * rather than perform division.
+ */
+ flags.setAllowReciprocal(); /* arcp */
- llvm::unwrap(builder)->setFastMathFlags(flags);
- break;
- }
+ unwrap(builder)->setFastMathFlags(flags);
+ break;
+ }
- return builder;
+ return builder;
}
-/* Return the original state of inexact math. */
-bool ac_disable_inexact_math(LLVMBuilderRef builder)
+void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
{
- auto *b = llvm::unwrap(builder);
- llvm::FastMathFlags flags = b->getFastMathFlags();
-
- if (!flags.allowContract())
- return false;
-
- flags.setAllowContract(false);
- b->setFastMathFlags(flags);
- return true;
+ if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
+ auto *b = unwrap(ctx->builder);
+ FastMathFlags flags = b->getFastMathFlags();
+
+ /* This disables the optimization of (x + 0), which is used
+ * to convert negative zero to positive zero.
+ */
+ flags.setNoSignedZeros(false);
+ b->setFastMathFlags(flags);
+ }
}
-void ac_restore_inexact_math(LLVMBuilderRef builder, bool value)
+void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
{
- auto *b = llvm::unwrap(builder);
- llvm::FastMathFlags flags = b->getFastMathFlags();
-
- if (flags.allowContract() == value)
- return;
+ if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
+ auto *b = unwrap(ctx->builder);
+ FastMathFlags flags = b->getFastMathFlags();
- flags.setAllowContract(value);
- b->setFastMathFlags(flags);
+ flags.setNoSignedZeros();
+ b->setFastMathFlags(flags);
+ }
}
-LLVMTargetLibraryInfoRef
-ac_create_target_library_info(const char *triple)
+LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
{
- return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
+ return reinterpret_cast<LLVMTargetLibraryInfoRef>(
+ new TargetLibraryInfoImpl(Triple(triple)));
}
-void
-ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
+void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
{
- delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
+ delete reinterpret_cast<TargetLibraryInfoImpl *>(library_info);
}
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
* better compatibility with C code. */
-struct raw_memory_ostream : public llvm::raw_pwrite_stream {
- char *buffer;
- size_t written;
- size_t bufsize;
-
- raw_memory_ostream()
- {
- buffer = NULL;
- written = 0;
- bufsize = 0;
- SetUnbuffered();
- }
-
- ~raw_memory_ostream()
- {
- free(buffer);
- }
-
- void clear()
- {
- written = 0;
- }
-
- void take(char *&out_buffer, size_t &out_size)
- {
- out_buffer = buffer;
- out_size = written;
- buffer = NULL;
- written = 0;
- bufsize = 0;
- }
-
- void flush() = delete;
-
- void write_impl(const char *ptr, size_t size) override
- {
- if (unlikely(written + size < written))
- abort();
- if (written + size > bufsize) {
- bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
- buffer = (char *)realloc(buffer, bufsize);
- if (!buffer) {
- fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
- abort();
- }
- }
- memcpy(buffer + written, ptr, size);
- written += size;
- }
-
- void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
- {
- assert(offset == (size_t)offset &&
- offset + size >= offset && offset + size <= written);
- memcpy(buffer + offset, ptr, size);
- }
-
- uint64_t current_pos() const override
- {
- return written;
- }
+struct raw_memory_ostream : public raw_pwrite_stream {
+ char *buffer;
+ size_t written;
+ size_t bufsize;
+
+ raw_memory_ostream()
+ {
+ buffer = NULL;
+ written = 0;
+ bufsize = 0;
+ SetUnbuffered();
+ }
+
+ ~raw_memory_ostream()
+ {
+ free(buffer);
+ }
+
+ void clear()
+ {
+ written = 0;
+ }
+
+ void take(char *&out_buffer, size_t &out_size)
+ {
+ out_buffer = buffer;
+ out_size = written;
+ buffer = NULL;
+ written = 0;
+ bufsize = 0;
+ }
+
+ void flush() = delete;
+
+ void write_impl(const char *ptr, size_t size) override
+ {
+ if (unlikely(written + size < written))
+ abort();
+ if (written + size > bufsize) {
+ bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
+ buffer = (char *)realloc(buffer, bufsize);
+ if (!buffer) {
+ fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
+ abort();
+ }
+ }
+ memcpy(buffer + written, ptr, size);
+ written += size;
+ }
+
+ void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
+ {
+ assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
+ memcpy(buffer + offset, ptr, size);
+ }
+
+ uint64_t current_pos() const override
+ {
+ return written;
+ }
};
/* The LLVM compiler is represented as a pass manager containing passes for
* optimizations, instruction selection, and code generation.
*/
struct ac_compiler_passes {
- raw_memory_ostream ostream; /* ELF shader binary stream */
- llvm::legacy::PassManager passmgr; /* list of passes */
+ raw_memory_ostream ostream; /* ELF shader binary stream */
+ legacy::PassManager passmgr; /* list of passes */
};
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
{
- struct ac_compiler_passes *p = new ac_compiler_passes();
- if (!p)
- return NULL;
+ struct ac_compiler_passes *p = new ac_compiler_passes();
+ if (!p)
+ return NULL;
- llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
+ TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
- if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
- nullptr,
-#if LLVM_VERSION_MAJOR >= 10
- llvm::CGFT_ObjectFile)) {
+ if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
+#if LLVM_VERSION_MAJOR >= 18
+ CodeGenFileType::ObjectFile)) {
#else
- llvm::TargetMachine::CGFT_ObjectFile)) {
+ CGFT_ObjectFile)) {
#endif
- fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
- delete p;
- return NULL;
- }
- return p;
+ fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
+ delete p;
+ return NULL;
+ }
+ return p;
}
void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
{
- delete p;
+ delete p;
}
/* This returns false on failure. */
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size)
+ char **pelf_buffer, size_t *pelf_size)
{
- p->passmgr.run(*llvm::unwrap(module));
- p->ostream.take(*pelf_buffer, *pelf_size);
- return true;
+ p->passmgr.run(*unwrap(module));
+ p->ostream.take(*pelf_buffer, *pelf_size);
+ return true;
}
-void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
+LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
+ bool check_ir)
{
- llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
-}
-
-void ac_enable_global_isel(LLVMTargetMachineRef tm)
-{
- reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
+ LLVMPassManagerRef passmgr = LLVMCreatePassManager();
+ if (!passmgr)
+ return NULL;
+
+ if (target_library_info)
+ LLVMAddTargetLibraryInfo(target_library_info, passmgr);
+
+ if (check_ir)
+ unwrap(passmgr)->add(createVerifierPass());
+
+ unwrap(passmgr)->add(createAlwaysInlinerLegacyPass());
+
+ /* Normally, the pass manager runs all passes on one function before
+ * moving onto another. Adding a barrier no-op pass forces the pass
+ * manager to run the inliner on all functions first, which makes sure
+ * that the following passes are only run on the remaining non-inline
+ * function, so it removes useless work done on dead inline functions.
+ */
+ unwrap(passmgr)->add(createBarrierNoopPass());
+
+ #if LLVM_VERSION_MAJOR >= 16
+ unwrap(passmgr)->add(createSROAPass(true));
+ #else
+ unwrap(passmgr)->add(createSROAPass());
+ #endif
+ /* TODO: restore IPSCCP */
+ unwrap(passmgr)->add(createLICMPass());
+ unwrap(passmgr)->add(createCFGSimplificationPass());
+ /* This is recommended by the instruction combining pass. */
+ unwrap(passmgr)->add(createEarlyCSEPass(true));
+ unwrap(passmgr)->add(createInstructionCombiningPass());
+ return passmgr;
}
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
- LLVMValueRef ptr, LLVMValueRef val,
- const char *sync_scope) {
- llvm::AtomicRMWInst::BinOp binop;
- switch (op) {
- case LLVMAtomicRMWBinOpXchg:
- binop = llvm::AtomicRMWInst::Xchg;
- break;
- case LLVMAtomicRMWBinOpAdd:
- binop = llvm::AtomicRMWInst::Add;
- break;
- case LLVMAtomicRMWBinOpSub:
- binop = llvm::AtomicRMWInst::Sub;
- break;
- case LLVMAtomicRMWBinOpAnd:
- binop = llvm::AtomicRMWInst::And;
- break;
- case LLVMAtomicRMWBinOpNand:
- binop = llvm::AtomicRMWInst::Nand;
- break;
- case LLVMAtomicRMWBinOpOr:
- binop = llvm::AtomicRMWInst::Or;
- break;
- case LLVMAtomicRMWBinOpXor:
- binop = llvm::AtomicRMWInst::Xor;
- break;
- case LLVMAtomicRMWBinOpMax:
- binop = llvm::AtomicRMWInst::Max;
- break;
- case LLVMAtomicRMWBinOpMin:
- binop = llvm::AtomicRMWInst::Min;
- break;
- case LLVMAtomicRMWBinOpUMax:
- binop = llvm::AtomicRMWInst::UMax;
- break;
- case LLVMAtomicRMWBinOpUMin:
- binop = llvm::AtomicRMWInst::UMin;
- break;
- default:
- unreachable(!"invalid LLVMAtomicRMWBinOp");
- break;
- }
- unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
- return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
- binop, llvm::unwrap(ptr), llvm::unwrap(val),
- llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+ LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
+{
+ AtomicRMWInst::BinOp binop;
+ switch (op) {
+ case LLVMAtomicRMWBinOpXchg:
+ binop = AtomicRMWInst::Xchg;
+ break;
+ case LLVMAtomicRMWBinOpAdd:
+ binop = AtomicRMWInst::Add;
+ break;
+ case LLVMAtomicRMWBinOpSub:
+ binop = AtomicRMWInst::Sub;
+ break;
+ case LLVMAtomicRMWBinOpAnd:
+ binop = AtomicRMWInst::And;
+ break;
+ case LLVMAtomicRMWBinOpNand:
+ binop = AtomicRMWInst::Nand;
+ break;
+ case LLVMAtomicRMWBinOpOr:
+ binop = AtomicRMWInst::Or;
+ break;
+ case LLVMAtomicRMWBinOpXor:
+ binop = AtomicRMWInst::Xor;
+ break;
+ case LLVMAtomicRMWBinOpMax:
+ binop = AtomicRMWInst::Max;
+ break;
+ case LLVMAtomicRMWBinOpMin:
+ binop = AtomicRMWInst::Min;
+ break;
+ case LLVMAtomicRMWBinOpUMax:
+ binop = AtomicRMWInst::UMax;
+ break;
+ case LLVMAtomicRMWBinOpUMin:
+ binop = AtomicRMWInst::UMin;
+ break;
+ case LLVMAtomicRMWBinOpFAdd:
+ binop = AtomicRMWInst::FAdd;
+ break;
+ default:
+ unreachable("invalid LLVMAtomicRMWBinOp");
+ break;
+ }
+ unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return wrap(unwrap(ctx->builder)
+ ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val),
+ MaybeAlign(0),
+ AtomicOrdering::SequentiallyConsistent, SSID));
}
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef cmp, LLVMValueRef val,
- const char *sync_scope) {
- unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
- return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
- llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
- llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+ LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
+{
+ unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return wrap(unwrap(ctx->builder)
+ ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
+ unwrap(val),
+ MaybeAlign(0),
+ AtomicOrdering::SequentiallyConsistent,
+ AtomicOrdering::SequentiallyConsistent, SSID));
}