diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2024-07-25 09:01:56 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2024-07-25 09:01:56 +0000 |
commit | 80c41f8d6d1ed41a36326ab488200d1d78f3beac (patch) | |
tree | 601fd97516746ea4276e25ccbb02df5b5d6ba1df | |
parent | 38999c29df0ece14e1acc178f7e3dd6bc4b468f2 (diff) |
fix: ac/llvm: LLVM 18: remove useless passes, partially removed upstream
From Kai Wasserbaech
bc7e363f8e1a26342e6fd7241c1f0ebb722338d6 in mainline Mesa
robert@ reports this is required to build with llvm 18
-rw-r--r-- | lib/mesa/src/amd/llvm/ac_llvm_helper.cpp | 522 |
1 files changed, 288 insertions, 234 deletions
diff --git a/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp b/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp index f5383344d..5d065279a 100644 --- a/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp +++ b/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp @@ -1,324 +1,378 @@ /* * Copyright 2014 Advanced Micro Devices, Inc. * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * + * SPDX-License-Identifier: MIT */ +#include <llvm-c/Core.h> +#include <llvm/Analysis/TargetLibraryInfo.h> +#include <llvm/IR/IRBuilder.h> +#include <llvm/IR/LegacyPassManager.h> +#include <llvm/IR/Verifier.h> +#include <llvm/Target/TargetMachine.h> +#include <llvm/MC/MCSubtargetInfo.h> +#include <llvm/Support/CommandLine.h> +#include <llvm/Transforms/IPO.h> +#include <llvm/Transforms/Scalar.h> +#include <llvm/Transforms/Utils.h> +#include <llvm/CodeGen/Passes.h> +#include <llvm/Transforms/IPO/AlwaysInliner.h> +#include <llvm/Transforms/InstCombine/InstCombine.h> +#include <llvm/Transforms/IPO/SCCP.h> +#include "llvm/CodeGen/SelectionDAGNodes.h" + #include <cstring> +/* DO NOT REORDER THE HEADERS + * The LLVM headers need to all be included before any Mesa header, + * as they use the `restrict` keyword in ways that are incompatible + * with our #define in include/c99_compat.h + */ + #include "ac_binary.h" #include "ac_llvm_util.h" #include "ac_llvm_build.h" - #include "util/macros.h" -#include <llvm-c/Core.h> -#include <llvm/Target/TargetMachine.h> -#include <llvm/IR/IRBuilder.h> -#include <llvm/Analysis/TargetLibraryInfo.h> -#include <llvm/Transforms/IPO.h> +using namespace llvm; -#include <llvm/IR/LegacyPassManager.h> +class RunAtExitForStaticDestructors : public SDNode +{ +public: + /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */ + RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other)) + { + } +}; -void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) +void ac_llvm_run_atexit_for_destructors(void) { - llvm::Argument *A = llvm::unwrap<llvm::Argument>(val); - A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); + /* LLVM >= 16 registers static variable destructors on the first compile, which gcc + * implements by calling atexit there. Before that, u_queue registers its atexit + * handler to kill all threads. Since exit() runs atexit handlers in the reverse order, + * the LLVM destructors are called first while shader compiler threads may still be + * running, which crashes in LLVM in SelectionDAG.cpp. + * + * The solution is to run the code that declares the LLVM static variables first, + * so that atexit for LLVM is registered first and u_queue is registered after that, + * which ensures that all u_queue threads are terminated before LLVM destructors are + * called. + * + * This just executes the code that declares static variables. + */ + RunAtExitForStaticDestructors(); } -bool ac_is_sgpr_param(LLVMValueRef arg) +bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor) +{ + TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm); + return TM->getMCSubtargetInfo()->isCPUStringValid(processor); +} + +void ac_reset_llvm_all_options_occurrences() +{ + cl::ResetAllOptionOccurrences(); +} + +void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) { - llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg); - llvm::AttributeList AS = A->getParent()->getAttributes(); - unsigned ArgNo = A->getArgNo(); - return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg); + Argument *A = unwrap<Argument>(val); + A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); } -LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call) +void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes) { - return LLVMGetCalledValue(call); + Argument *A = unwrap<Argument>(val); + A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes))); } -bool ac_llvm_is_function(LLVMValueRef v) +bool ac_is_sgpr_param(LLVMValueRef arg) { - return LLVMGetValueKind(v) == LLVMFunctionValueKind; + Argument *A = unwrap<Argument>(arg); + AttributeList AS = A->getParent()->getAttributes(); + unsigned ArgNo = A->getArgNo(); + return AS.hasParamAttr(ArgNo, Attribute::InReg); } LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx) { - llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm); + TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm); LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx); - llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple()); - llvm::unwrap(module)->setDataLayout(TM->createDataLayout()); + unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple()); + unwrap(module)->setDataLayout(TM->createDataLayout()); return module; } -LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, - enum ac_float_mode float_mode) +LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode) { - LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx); - - llvm::FastMathFlags flags; + LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx); - switch (float_mode) { - case AC_FLOAT_MODE_DEFAULT: - case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO: - break; + FastMathFlags flags; - case AC_FLOAT_MODE_DEFAULT_OPENGL: - /* Allow optimizations to treat the sign of a zero argument or - * result as insignificant. - */ - flags.setNoSignedZeros(); /* nsz */ + switch (float_mode) { + case AC_FLOAT_MODE_DEFAULT: + case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO: + break; - /* Allow optimizations to use the reciprocal of an argument - * rather than perform division. - */ - flags.setAllowReciprocal(); /* arcp */ + case AC_FLOAT_MODE_DEFAULT_OPENGL: + /* Allow optimizations to treat the sign of a zero argument or + * result as insignificant. + */ + flags.setNoSignedZeros(); /* nsz */ - /* Allow floating-point contraction (e.g. fusing a multiply - * followed by an addition into a fused multiply-and-add). - */ - flags.setAllowContract(); /* contract */ + /* Allow optimizations to use the reciprocal of an argument + * rather than perform division. + */ + flags.setAllowReciprocal(); /* arcp */ - llvm::unwrap(builder)->setFastMathFlags(flags); - break; - } + unwrap(builder)->setFastMathFlags(flags); + break; + } - return builder; + return builder; } -/* Return the original state of inexact math. */ -bool ac_disable_inexact_math(LLVMBuilderRef builder) +void ac_enable_signed_zeros(struct ac_llvm_context *ctx) { - auto *b = llvm::unwrap(builder); - llvm::FastMathFlags flags = b->getFastMathFlags(); - - if (!flags.allowContract()) - return false; - - flags.setAllowContract(false); - b->setFastMathFlags(flags); - return true; + if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { + auto *b = unwrap(ctx->builder); + FastMathFlags flags = b->getFastMathFlags(); + + /* This disables the optimization of (x + 0), which is used + * to convert negative zero to positive zero. + */ + flags.setNoSignedZeros(false); + b->setFastMathFlags(flags); + } } -void ac_restore_inexact_math(LLVMBuilderRef builder, bool value) +void ac_disable_signed_zeros(struct ac_llvm_context *ctx) { - auto *b = llvm::unwrap(builder); - llvm::FastMathFlags flags = b->getFastMathFlags(); - - if (flags.allowContract() == value) - return; + if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { + auto *b = unwrap(ctx->builder); + FastMathFlags flags = b->getFastMathFlags(); - flags.setAllowContract(value); - b->setFastMathFlags(flags); + flags.setNoSignedZeros(); + b->setFastMathFlags(flags); + } } -LLVMTargetLibraryInfoRef -ac_create_target_library_info(const char *triple) +LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple) { - return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple))); + return reinterpret_cast<LLVMTargetLibraryInfoRef>( + new TargetLibraryInfoImpl(Triple(triple))); } -void -ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) +void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) { - delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info); + delete reinterpret_cast<TargetLibraryInfoImpl *>(library_info); } /* Implementation of raw_pwrite_stream that works on malloc()ed memory for * better compatibility with C code. */ -struct raw_memory_ostream : public llvm::raw_pwrite_stream { - char *buffer; - size_t written; - size_t bufsize; - - raw_memory_ostream() - { - buffer = NULL; - written = 0; - bufsize = 0; - SetUnbuffered(); - } - - ~raw_memory_ostream() - { - free(buffer); - } - - void clear() - { - written = 0; - } - - void take(char *&out_buffer, size_t &out_size) - { - out_buffer = buffer; - out_size = written; - buffer = NULL; - written = 0; - bufsize = 0; - } - - void flush() = delete; - - void write_impl(const char *ptr, size_t size) override - { - if (unlikely(written + size < written)) - abort(); - if (written + size > bufsize) { - bufsize = MAX3(1024, written + size, bufsize / 3 * 4); - buffer = (char *)realloc(buffer, bufsize); - if (!buffer) { - fprintf(stderr, "amd: out of memory allocating ELF buffer\n"); - abort(); - } - } - memcpy(buffer + written, ptr, size); - written += size; - } - - void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override - { - assert(offset == (size_t)offset && - offset + size >= offset && offset + size <= written); - memcpy(buffer + offset, ptr, size); - } - - uint64_t current_pos() const override - { - return written; - } +struct raw_memory_ostream : public raw_pwrite_stream { + char *buffer; + size_t written; + size_t bufsize; + + raw_memory_ostream() + { + buffer = NULL; + written = 0; + bufsize = 0; + SetUnbuffered(); + } + + ~raw_memory_ostream() + { + free(buffer); + } + + void clear() + { + written = 0; + } + + void take(char *&out_buffer, size_t &out_size) + { + out_buffer = buffer; + out_size = written; + buffer = NULL; + written = 0; + bufsize = 0; + } + + void flush() = delete; + + void write_impl(const char *ptr, size_t size) override + { + if (unlikely(written + size < written)) + abort(); + if (written + size > bufsize) { + bufsize = MAX3(1024, written + size, bufsize / 3 * 4); + buffer = (char *)realloc(buffer, bufsize); + if (!buffer) { + fprintf(stderr, "amd: out of memory allocating ELF buffer\n"); + abort(); + } + } + memcpy(buffer + written, ptr, size); + written += size; + } + + void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override + { + assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written); + memcpy(buffer + offset, ptr, size); + } + + uint64_t current_pos() const override + { + return written; + } }; /* The LLVM compiler is represented as a pass manager containing passes for * optimizations, instruction selection, and code generation. */ struct ac_compiler_passes { - raw_memory_ostream ostream; /* ELF shader binary stream */ - llvm::legacy::PassManager passmgr; /* list of passes */ + raw_memory_ostream ostream; /* ELF shader binary stream */ + legacy::PassManager passmgr; /* list of passes */ }; struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm) { - struct ac_compiler_passes *p = new ac_compiler_passes(); - if (!p) - return NULL; + struct ac_compiler_passes *p = new ac_compiler_passes(); + if (!p) + return NULL; - llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm); + TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm); - if (TM->addPassesToEmitFile(p->passmgr, p->ostream, - nullptr, -#if LLVM_VERSION_MAJOR >= 10 - llvm::CGFT_ObjectFile)) { + if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr, +#if LLVM_VERSION_MAJOR >= 18 + CodeGenFileType::ObjectFile)) { #else - llvm::TargetMachine::CGFT_ObjectFile)) { + CGFT_ObjectFile)) { #endif - fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); - delete p; - return NULL; - } - return p; + fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); + delete p; + return NULL; + } + return p; } void ac_destroy_llvm_passes(struct ac_compiler_passes *p) { - delete p; + delete p; } /* This returns false on failure. */ bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, - char **pelf_buffer, size_t *pelf_size) + char **pelf_buffer, size_t *pelf_size) { - p->passmgr.run(*llvm::unwrap(module)); - p->ostream.take(*pelf_buffer, *pelf_size); - return true; + p->passmgr.run(*unwrap(module)); + p->ostream.take(*pelf_buffer, *pelf_size); + return true; } -void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr) +LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, + bool check_ir) { - llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass()); -} - -void ac_enable_global_isel(LLVMTargetMachineRef tm) -{ - reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true); + LLVMPassManagerRef passmgr = LLVMCreatePassManager(); + if (!passmgr) + return NULL; + + if (target_library_info) + LLVMAddTargetLibraryInfo(target_library_info, passmgr); + + if (check_ir) + unwrap(passmgr)->add(createVerifierPass()); + + unwrap(passmgr)->add(createAlwaysInlinerLegacyPass()); + + /* Normally, the pass manager runs all passes on one function before + * moving onto another. Adding a barrier no-op pass forces the pass + * manager to run the inliner on all functions first, which makes sure + * that the following passes are only run on the remaining non-inline + * function, so it removes useless work done on dead inline functions. + */ + unwrap(passmgr)->add(createBarrierNoopPass()); + + #if LLVM_VERSION_MAJOR >= 16 + unwrap(passmgr)->add(createSROAPass(true)); + #else + unwrap(passmgr)->add(createSROAPass()); + #endif + /* TODO: restore IPSCCP */ + unwrap(passmgr)->add(createLICMPass()); + unwrap(passmgr)->add(createCFGSimplificationPass()); + /* This is recommended by the instruction combining pass. */ + unwrap(passmgr)->add(createEarlyCSEPass(true)); + unwrap(passmgr)->add(createInstructionCombiningPass()); + return passmgr; } LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, - LLVMValueRef ptr, LLVMValueRef val, - const char *sync_scope) { - llvm::AtomicRMWInst::BinOp binop; - switch (op) { - case LLVMAtomicRMWBinOpXchg: - binop = llvm::AtomicRMWInst::Xchg; - break; - case LLVMAtomicRMWBinOpAdd: - binop = llvm::AtomicRMWInst::Add; - break; - case LLVMAtomicRMWBinOpSub: - binop = llvm::AtomicRMWInst::Sub; - break; - case LLVMAtomicRMWBinOpAnd: - binop = llvm::AtomicRMWInst::And; - break; - case LLVMAtomicRMWBinOpNand: - binop = llvm::AtomicRMWInst::Nand; - break; - case LLVMAtomicRMWBinOpOr: - binop = llvm::AtomicRMWInst::Or; - break; - case LLVMAtomicRMWBinOpXor: - binop = llvm::AtomicRMWInst::Xor; - break; - case LLVMAtomicRMWBinOpMax: - binop = llvm::AtomicRMWInst::Max; - break; - case LLVMAtomicRMWBinOpMin: - binop = llvm::AtomicRMWInst::Min; - break; - case LLVMAtomicRMWBinOpUMax: - binop = llvm::AtomicRMWInst::UMax; - break; - case LLVMAtomicRMWBinOpUMin: - binop = llvm::AtomicRMWInst::UMin; - break; - default: - unreachable(!"invalid LLVMAtomicRMWBinOp"); - break; - } - unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); - return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW( - binop, llvm::unwrap(ptr), llvm::unwrap(val), - llvm::AtomicOrdering::SequentiallyConsistent, SSID)); + LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope) +{ + AtomicRMWInst::BinOp binop; + switch (op) { + case LLVMAtomicRMWBinOpXchg: + binop = AtomicRMWInst::Xchg; + break; + case LLVMAtomicRMWBinOpAdd: + binop = AtomicRMWInst::Add; + break; + case LLVMAtomicRMWBinOpSub: + binop = AtomicRMWInst::Sub; + break; + case LLVMAtomicRMWBinOpAnd: + binop = AtomicRMWInst::And; + break; + case LLVMAtomicRMWBinOpNand: + binop = AtomicRMWInst::Nand; + break; + case LLVMAtomicRMWBinOpOr: + binop = AtomicRMWInst::Or; + break; + case LLVMAtomicRMWBinOpXor: + binop = AtomicRMWInst::Xor; + break; + case LLVMAtomicRMWBinOpMax: + binop = AtomicRMWInst::Max; + break; + case LLVMAtomicRMWBinOpMin: + binop = AtomicRMWInst::Min; + break; + case LLVMAtomicRMWBinOpUMax: + binop = AtomicRMWInst::UMax; + break; + case LLVMAtomicRMWBinOpUMin: + binop = AtomicRMWInst::UMin; + break; + case LLVMAtomicRMWBinOpFAdd: + binop = AtomicRMWInst::FAdd; + break; + default: + unreachable("invalid LLVMAtomicRMWBinOp"); + break; + } + unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); + return wrap(unwrap(ctx->builder) + ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val), + MaybeAlign(0), + AtomicOrdering::SequentiallyConsistent, SSID)); } LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr, - LLVMValueRef cmp, LLVMValueRef val, - const char *sync_scope) { - unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); - return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg( - llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val), - llvm::AtomicOrdering::SequentiallyConsistent, - llvm::AtomicOrdering::SequentiallyConsistent, SSID)); + LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope) +{ + unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); + return wrap(unwrap(ctx->builder) + ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp), + unwrap(val), + MaybeAlign(0), + AtomicOrdering::SequentiallyConsistent, + AtomicOrdering::SequentiallyConsistent, SSID)); } |