diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 05:27:17 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 05:27:17 +0000 |
commit | 1bf474f2c641b83cebc2f7280bf5605fc439869b (patch) | |
tree | cbd7eb5e697988c9476037327295f52963ab4a93 | |
parent | 1b01e2b650298ea611382f6d42ef0b97ed3ade30 (diff) |
Import Mesa 20.1.6
-rw-r--r-- | lib/mesa/src/amd/llvm/ac_llvm_helper.cpp | 527 |
1 files changed, 234 insertions, 293 deletions
diff --git a/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp b/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp index 40a4399e7..f5383344d 100644 --- a/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp +++ b/lib/mesa/src/amd/llvm/ac_llvm_helper.cpp @@ -1,383 +1,324 @@ /* * Copyright 2014 Advanced Micro Devices, Inc. * - * SPDX-License-Identifier: MIT + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * */ -#include <llvm-c/Core.h> -#include <llvm/Analysis/TargetLibraryInfo.h> -#include <llvm/IR/IRBuilder.h> -#include <llvm/IR/LegacyPassManager.h> -#include <llvm/IR/Verifier.h> -#include <llvm/Target/TargetMachine.h> -#include <llvm/MC/MCSubtargetInfo.h> -#include <llvm/Support/CommandLine.h> -#include <llvm/Transforms/IPO.h> -#include <llvm/Transforms/Scalar.h> -#include <llvm/Transforms/Utils.h> -#include <llvm/CodeGen/Passes.h> -#include <llvm/Transforms/IPO/AlwaysInliner.h> -#include <llvm/Transforms/InstCombine/InstCombine.h> -#include <llvm/Transforms/IPO/SCCP.h> -#include "llvm/CodeGen/SelectionDAGNodes.h" - #include <cstring> -/* DO NOT REORDER THE HEADERS - * The LLVM headers need to all be included before any Mesa header, - * as they use the `restrict` keyword in ways that are incompatible - * with our #define in include/c99_compat.h - */ - #include "ac_binary.h" #include "ac_llvm_util.h" #include "ac_llvm_build.h" -#include "util/macros.h" -using namespace llvm; - -class RunAtExitForStaticDestructors : public SDNode -{ -public: - /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */ - RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other)) - { - } -}; +#include "util/macros.h" -void ac_llvm_run_atexit_for_destructors(void) -{ - /* LLVM >= 16 registers static variable destructors on the first compile, which gcc - * implements by calling atexit there. Before that, u_queue registers its atexit - * handler to kill all threads. Since exit() runs atexit handlers in the reverse order, - * the LLVM destructors are called first while shader compiler threads may still be - * running, which crashes in LLVM in SelectionDAG.cpp. - * - * The solution is to run the code that declares the LLVM static variables first, - * so that atexit for LLVM is registered first and u_queue is registered after that, - * which ensures that all u_queue threads are terminated before LLVM destructors are - * called. - * - * This just executes the code that declares static variables. - */ - RunAtExitForStaticDestructors(); -} +#include <llvm-c/Core.h> +#include <llvm/Target/TargetMachine.h> +#include <llvm/IR/IRBuilder.h> +#include <llvm/Analysis/TargetLibraryInfo.h> +#include <llvm/Transforms/IPO.h> -bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor) -{ - TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm); - return TM->getMCSubtargetInfo()->isCPUStringValid(processor); -} +#include <llvm/IR/LegacyPassManager.h> -void ac_reset_llvm_all_options_occurrences() +void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) { - cl::ResetAllOptionOccurrences(); + llvm::Argument *A = llvm::unwrap<llvm::Argument>(val); + A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); } -void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) +bool ac_is_sgpr_param(LLVMValueRef arg) { - Argument *A = unwrap<Argument>(val); - A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); + llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg); + llvm::AttributeList AS = A->getParent()->getAttributes(); + unsigned ArgNo = A->getArgNo(); + return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg); } -void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes) +LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call) { - Argument *A = unwrap<Argument>(val); - A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes))); + return LLVMGetCalledValue(call); } -bool ac_is_sgpr_param(LLVMValueRef arg) +bool ac_llvm_is_function(LLVMValueRef v) { - Argument *A = unwrap<Argument>(arg); - AttributeList AS = A->getParent()->getAttributes(); - unsigned ArgNo = A->getArgNo(); - return AS.hasParamAttr(ArgNo, Attribute::InReg); + return LLVMGetValueKind(v) == LLVMFunctionValueKind; } LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx) { - TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm); + llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm); LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx); - unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple()); - unwrap(module)->setDataLayout(TM->createDataLayout()); + llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple()); + llvm::unwrap(module)->setDataLayout(TM->createDataLayout()); return module; } -LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode) +LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, + enum ac_float_mode float_mode) { - LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx); + LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx); + + llvm::FastMathFlags flags; - FastMathFlags flags; + switch (float_mode) { + case AC_FLOAT_MODE_DEFAULT: + case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO: + break; - switch (float_mode) { - case AC_FLOAT_MODE_DEFAULT: - case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO: - break; + case AC_FLOAT_MODE_DEFAULT_OPENGL: + /* Allow optimizations to treat the sign of a zero argument or + * result as insignificant. + */ + flags.setNoSignedZeros(); /* nsz */ - case AC_FLOAT_MODE_DEFAULT_OPENGL: - /* Allow optimizations to treat the sign of a zero argument or - * result as insignificant. - */ - flags.setNoSignedZeros(); /* nsz */ + /* Allow optimizations to use the reciprocal of an argument + * rather than perform division. + */ + flags.setAllowReciprocal(); /* arcp */ - /* Allow optimizations to use the reciprocal of an argument - * rather than perform division. - */ - flags.setAllowReciprocal(); /* arcp */ + /* Allow floating-point contraction (e.g. fusing a multiply + * followed by an addition into a fused multiply-and-add). + */ + flags.setAllowContract(); /* contract */ - unwrap(builder)->setFastMathFlags(flags); - break; - } + llvm::unwrap(builder)->setFastMathFlags(flags); + break; + } - return builder; + return builder; } -void ac_enable_signed_zeros(struct ac_llvm_context *ctx) +/* Return the original state of inexact math. */ +bool ac_disable_inexact_math(LLVMBuilderRef builder) { - if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { - auto *b = unwrap(ctx->builder); - FastMathFlags flags = b->getFastMathFlags(); - - /* This disables the optimization of (x + 0), which is used - * to convert negative zero to positive zero. - */ - flags.setNoSignedZeros(false); - b->setFastMathFlags(flags); - } + auto *b = llvm::unwrap(builder); + llvm::FastMathFlags flags = b->getFastMathFlags(); + + if (!flags.allowContract()) + return false; + + flags.setAllowContract(false); + b->setFastMathFlags(flags); + return true; } -void ac_disable_signed_zeros(struct ac_llvm_context *ctx) +void ac_restore_inexact_math(LLVMBuilderRef builder, bool value) { - if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { - auto *b = unwrap(ctx->builder); - FastMathFlags flags = b->getFastMathFlags(); + auto *b = llvm::unwrap(builder); + llvm::FastMathFlags flags = b->getFastMathFlags(); + + if (flags.allowContract() == value) + return; - flags.setNoSignedZeros(); - b->setFastMathFlags(flags); - } + flags.setAllowContract(value); + b->setFastMathFlags(flags); } -LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple) +LLVMTargetLibraryInfoRef +ac_create_target_library_info(const char *triple) { - return reinterpret_cast<LLVMTargetLibraryInfoRef>( - new TargetLibraryInfoImpl(Triple(triple))); + return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple))); } -void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) +void +ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) { - delete reinterpret_cast<TargetLibraryInfoImpl *>(library_info); + delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info); } /* Implementation of raw_pwrite_stream that works on malloc()ed memory for * better compatibility with C code. */ -struct raw_memory_ostream : public raw_pwrite_stream { - char *buffer; - size_t written; - size_t bufsize; - - raw_memory_ostream() - { - buffer = NULL; - written = 0; - bufsize = 0; - SetUnbuffered(); - } - - ~raw_memory_ostream() - { - free(buffer); - } - - void clear() - { - written = 0; - } - - void take(char *&out_buffer, size_t &out_size) - { - out_buffer = buffer; - out_size = written; - buffer = NULL; - written = 0; - bufsize = 0; - } - - void flush() = delete; - - void write_impl(const char *ptr, size_t size) override - { - if (unlikely(written + size < written)) - abort(); - if (written + size > bufsize) { - bufsize = MAX3(1024, written + size, bufsize / 3 * 4); - buffer = (char *)realloc(buffer, bufsize); - if (!buffer) { - fprintf(stderr, "amd: out of memory allocating ELF buffer\n"); - abort(); - } - } - memcpy(buffer + written, ptr, size); - written += size; - } - - void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override - { - assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written); - memcpy(buffer + offset, ptr, size); - } - - uint64_t current_pos() const override - { - return written; - } +struct raw_memory_ostream : public llvm::raw_pwrite_stream { + char *buffer; + size_t written; + size_t bufsize; + + raw_memory_ostream() + { + buffer = NULL; + written = 0; + bufsize = 0; + SetUnbuffered(); + } + + ~raw_memory_ostream() + { + free(buffer); + } + + void clear() + { + written = 0; + } + + void take(char *&out_buffer, size_t &out_size) + { + out_buffer = buffer; + out_size = written; + buffer = NULL; + written = 0; + bufsize = 0; + } + + void flush() = delete; + + void write_impl(const char *ptr, size_t size) override + { + if (unlikely(written + size < written)) + abort(); + if (written + size > bufsize) { + bufsize = MAX3(1024, written + size, bufsize / 3 * 4); + buffer = (char *)realloc(buffer, bufsize); + if (!buffer) { + fprintf(stderr, "amd: out of memory allocating ELF buffer\n"); + abort(); + } + } + memcpy(buffer + written, ptr, size); + written += size; + } + + void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override + { + assert(offset == (size_t)offset && + offset + size >= offset && offset + size <= written); + memcpy(buffer + offset, ptr, size); + } + + uint64_t current_pos() const override + { + return written; + } }; /* The LLVM compiler is represented as a pass manager containing passes for * optimizations, instruction selection, and code generation. */ struct ac_compiler_passes { - raw_memory_ostream ostream; /* ELF shader binary stream */ - legacy::PassManager passmgr; /* list of passes */ + raw_memory_ostream ostream; /* ELF shader binary stream */ + llvm::legacy::PassManager passmgr; /* list of passes */ }; struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm) { - struct ac_compiler_passes *p = new ac_compiler_passes(); - if (!p) - return NULL; + struct ac_compiler_passes *p = new ac_compiler_passes(); + if (!p) + return NULL; - TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm); + llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm); - if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr, -#if LLVM_VERSION_MAJOR >= 18 - CodeGenFileType::ObjectFile)) { + if (TM->addPassesToEmitFile(p->passmgr, p->ostream, + nullptr, +#if LLVM_VERSION_MAJOR >= 10 + llvm::CGFT_ObjectFile)) { #else - CGFT_ObjectFile)) { + llvm::TargetMachine::CGFT_ObjectFile)) { #endif - fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); - delete p; - return NULL; - } - return p; + fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); + delete p; + return NULL; + } + return p; } void ac_destroy_llvm_passes(struct ac_compiler_passes *p) { - delete p; + delete p; } /* This returns false on failure. */ bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, - char **pelf_buffer, size_t *pelf_size) + char **pelf_buffer, size_t *pelf_size) { - p->passmgr.run(*unwrap(module)); - p->ostream.take(*pelf_buffer, *pelf_size); - return true; + p->passmgr.run(*llvm::unwrap(module)); + p->ostream.take(*pelf_buffer, *pelf_size); + return true; } -LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, - bool check_ir) +void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr) { - LLVMPassManagerRef passmgr = LLVMCreatePassManager(); - if (!passmgr) - return NULL; - - if (target_library_info) - LLVMAddTargetLibraryInfo(target_library_info, passmgr); - - if (check_ir) - unwrap(passmgr)->add(createVerifierPass()); - - unwrap(passmgr)->add(createAlwaysInlinerLegacyPass()); - - /* Normally, the pass manager runs all passes on one function before - * moving onto another. Adding a barrier no-op pass forces the pass - * manager to run the inliner on all functions first, which makes sure - * that the following passes are only run on the remaining non-inline - * function, so it removes useless work done on dead inline functions. - */ - unwrap(passmgr)->add(createBarrierNoopPass()); - - /* This pass eliminates all loads and stores on alloca'd pointers. */ - unwrap(passmgr)->add(createPromoteMemoryToRegisterPass()); - #if LLVM_VERSION_MAJOR >= 16 - unwrap(passmgr)->add(createSROAPass(true)); - #else - unwrap(passmgr)->add(createSROAPass()); - #endif - /* TODO: restore IPSCCP */ - if (LLVM_VERSION_MAJOR >= 16) - unwrap(passmgr)->add(createLoopSinkPass()); - /* TODO: restore IPSCCP */ - unwrap(passmgr)->add(createLICMPass()); - unwrap(passmgr)->add(createCFGSimplificationPass()); - /* This is recommended by the instruction combining pass. */ - unwrap(passmgr)->add(createEarlyCSEPass(true)); - unwrap(passmgr)->add(createInstructionCombiningPass()); - return passmgr; + llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass()); } -LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, - LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope) +void ac_enable_global_isel(LLVMTargetMachineRef tm) { - AtomicRMWInst::BinOp binop; - switch (op) { - case LLVMAtomicRMWBinOpXchg: - binop = AtomicRMWInst::Xchg; - break; - case LLVMAtomicRMWBinOpAdd: - binop = AtomicRMWInst::Add; - break; - case LLVMAtomicRMWBinOpSub: - binop = AtomicRMWInst::Sub; - break; - case LLVMAtomicRMWBinOpAnd: - binop = AtomicRMWInst::And; - break; - case LLVMAtomicRMWBinOpNand: - binop = AtomicRMWInst::Nand; - break; - case LLVMAtomicRMWBinOpOr: - binop = AtomicRMWInst::Or; - break; - case LLVMAtomicRMWBinOpXor: - binop = AtomicRMWInst::Xor; - break; - case LLVMAtomicRMWBinOpMax: - binop = AtomicRMWInst::Max; - break; - case LLVMAtomicRMWBinOpMin: - binop = AtomicRMWInst::Min; - break; - case LLVMAtomicRMWBinOpUMax: - binop = AtomicRMWInst::UMax; - break; - case LLVMAtomicRMWBinOpUMin: - binop = AtomicRMWInst::UMin; - break; - case LLVMAtomicRMWBinOpFAdd: - binop = AtomicRMWInst::FAdd; - break; - default: - unreachable("invalid LLVMAtomicRMWBinOp"); - break; - } - unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); - return wrap(unwrap(ctx->builder) - ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val), - MaybeAlign(0), - AtomicOrdering::SequentiallyConsistent, SSID)); + reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true); +} + +LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, + LLVMValueRef ptr, LLVMValueRef val, + const char *sync_scope) { + llvm::AtomicRMWInst::BinOp binop; + switch (op) { + case LLVMAtomicRMWBinOpXchg: + binop = llvm::AtomicRMWInst::Xchg; + break; + case LLVMAtomicRMWBinOpAdd: + binop = llvm::AtomicRMWInst::Add; + break; + case LLVMAtomicRMWBinOpSub: + binop = llvm::AtomicRMWInst::Sub; + break; + case LLVMAtomicRMWBinOpAnd: + binop = llvm::AtomicRMWInst::And; + break; + case LLVMAtomicRMWBinOpNand: + binop = llvm::AtomicRMWInst::Nand; + break; + case LLVMAtomicRMWBinOpOr: + binop = llvm::AtomicRMWInst::Or; + break; + case LLVMAtomicRMWBinOpXor: + binop = llvm::AtomicRMWInst::Xor; + break; + case LLVMAtomicRMWBinOpMax: + binop = llvm::AtomicRMWInst::Max; + break; + case LLVMAtomicRMWBinOpMin: + binop = llvm::AtomicRMWInst::Min; + break; + case LLVMAtomicRMWBinOpUMax: + binop = llvm::AtomicRMWInst::UMax; + break; + case LLVMAtomicRMWBinOpUMin: + binop = llvm::AtomicRMWInst::UMin; + break; + default: + unreachable(!"invalid LLVMAtomicRMWBinOp"); + break; + } + unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); + return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW( + binop, llvm::unwrap(ptr), llvm::unwrap(val), + llvm::AtomicOrdering::SequentiallyConsistent, SSID)); } LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr, - LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope) -{ - unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); - return wrap(unwrap(ctx->builder) - ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp), - unwrap(val), - MaybeAlign(0), - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent, SSID)); + LLVMValueRef cmp, LLVMValueRef val, + const char *sync_scope) { + unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); + return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg( + llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val), + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering::SequentiallyConsistent, SSID)); } |