diff options
author | gkoehler <gkoehler@cvs.openbsd.org> | 2020-03-04 18:44:39 +0000 |
---|---|---|
committer | gkoehler <gkoehler@cvs.openbsd.org> | 2020-03-04 18:44:39 +0000 |
commit | 380b8eb56c5db89611078ccb6db5bd42588e73cb (patch) | |
tree | 77ce8a2f276e861529e05bb9114f5294cc495ea5 /gnu/llvm | |
parent | f95356d4aa6421f8b327d38074bf92c296570399 (diff) |
Switch powerpc clang to -msvr4-struct-return, like gcc.
Add these options from gcc to clang:
-maix-struct-return # return all structs in memory
-msvr4-struct-return # return small structs in r3/r4
ok mortimer@ jca@ deraadt@
Diffstat (limited to 'gnu/llvm')
-rw-r--r-- | gnu/llvm/tools/clang/include/clang/Driver/Options.td | 6 | ||||
-rw-r--r-- | gnu/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp | 3061 | ||||
-rw-r--r-- | gnu/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp | 13 | ||||
-rw-r--r-- | gnu/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp | 13 |
4 files changed, 2541 insertions, 552 deletions
diff --git a/gnu/llvm/tools/clang/include/clang/Driver/Options.td b/gnu/llvm/tools/clang/include/clang/Driver/Options.td index 8fd8cb67cbf..7fcbc76b6eb 100644 --- a/gnu/llvm/tools/clang/include/clang/Driver/Options.td +++ b/gnu/llvm/tools/clang/include/clang/Driver/Options.td @@ -2238,6 +2238,12 @@ def mlongcall: Flag<["-"], "mlongcall">, Group<m_ppc_Features_Group>; def mno_longcall : Flag<["-"], "mno-longcall">, Group<m_ppc_Features_Group>; +def maix_struct_return : Flag<["-"], "maix-struct-return">, + Group<m_Group>, Flags<[CC1Option]>, + HelpText<"Return all structs in memory (PPC32 only)">; +def msvr4_struct_return : Flag<["-"], "msvr4-struct-return">, + Group<m_Group>, Flags<[CC1Option]>, + HelpText<"Return small structs in registers (PPC32 only)">; def mvx : Flag<["-"], "mvx">, Group<m_Group>; def mno_vx : Flag<["-"], "mno-vx">, Group<m_Group>; diff --git a/gnu/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/gnu/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index 04edc0c852d..f54decf5106 100644 --- a/gnu/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/gnu/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -14,14 +14,18 @@ #include "TargetInfo.h" #include "ABIInfo.h" +#include "CGBlocks.h" #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/CodeGen/SwiftCallingConv.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" @@ -30,6 +34,31 @@ using namespace clang; using namespace CodeGen; +// Helper for coercing an aggregate argument or return value into an integer +// array of the same size (including padding) and alignment. This alternate +// coercion happens only for the RenderScript ABI and can be removed after +// runtimes that rely on it are no longer supported. +// +// RenderScript assumes that the size of the argument / return value in the IR +// is the same as the size of the corresponding qualified type. This helper +// coerces the aggregate type into an array of the same size (including +// padding). This coercion is used in lieu of expansion of struct members or +// other canonical coercions that return a coerced-type of larger size. +// +// Ty - The argument / return value type +// Context - The associated ASTContext +// LLVMContext - The associated LLVMContext +static ABIArgInfo coerceToIntArray(QualType Ty, + ASTContext &Context, + llvm::LLVMContext &LLVMContext) { + // Alignment and Size are measured in bits. + const uint64_t Size = Context.getTypeSize(Ty); + const uint64_t Alignment = Context.getTypeAlign(Ty); + llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment); + const uint64_t NumElements = (Size + Alignment - 1) / Alignment; + return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); +} + static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, llvm::Value *Value, @@ -68,11 +97,54 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, ABIInfo::~ABIInfo() {} +/// Does the given lowering require more than the given number of +/// registers when expanded? +/// +/// This is intended to be the basis of a reasonable basic implementation +/// of should{Pass,Return}IndirectlyForSwift. +/// +/// For most targets, a limit of four total registers is reasonable; this +/// limits the amount of code required in order to move around the value +/// in case it wasn't produced immediately prior to the call by the caller +/// (or wasn't produced in exactly the right registers) or isn't used +/// immediately within the callee. But some targets may need to further +/// limit the register count due to an inability to support that many +/// return registers. +static bool occupiesMoreThan(CodeGenTypes &cgt, + ArrayRef<llvm::Type*> scalarTypes, + unsigned maxAllRegisters) { + unsigned intCount = 0, fpCount = 0; + for (llvm::Type *type : scalarTypes) { + if (type->isPointerTy()) { + intCount++; + } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) { + auto ptrWidth = cgt.getTarget().getPointerWidth(0); + intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; + } else { + assert(type->isVectorTy() || type->isFloatingPointTy()); + fpCount++; + } + } + + return (intCount + fpCount > maxAllRegisters); +} + +bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + // The default implementation of this assumes that the target guarantees + // 128-bit SIMD support but nothing more. + return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16); +} + static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI) { const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()); - if (!RD) + if (!RD) { + if (!RT->getDecl()->canPassInRegisters()) + return CGCXXABI::RAA_Indirect; return CGCXXABI::RAA_Default; + } return CXXABI.getRecordArgABI(RD); } @@ -84,6 +156,20 @@ static CGCXXABI::RecordArgABI getRecordArgABI(QualType T, return getRecordArgABI(RT, CXXABI); } +static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, + const ABIInfo &Info) { + QualType Ty = FI.getReturnType(); + + if (const auto *RT = Ty->getAs<RecordType>()) + if (!isa<CXXRecordDecl>(RT->getDecl()) && + !RT->getDecl()->canPassInRegisters()) { + FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty); + return true; + } + + return CXXABI.classifyReturnType(FI); +} + /// Pass transparent unions as if they were the type of the first element. Sema /// should ensure that all elements of the union have the same "machine type". static QualType useFirstFieldIfTransparentUnion(QualType Ty) { @@ -117,6 +203,12 @@ const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } +const CodeGenOptions &ABIInfo::getCodeGenOpts() const { + return CGT.getCodeGenOpts(); +} + +bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } + bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; } @@ -126,11 +218,7 @@ bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return false; } -bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const { - return false; -} - -void ABIArgInfo::dump() const { +LLVM_DUMP_METHOD void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; switch (TheKind) { @@ -158,6 +246,10 @@ void ABIArgInfo::dump() const { case Expand: OS << "Expand"; break; + case CoerceAndExpand: + OS << "CoerceAndExpand Type="; + getCoerceAndExpandType()->print(OS); + break; } OS << ")\n"; } @@ -213,11 +305,11 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), DirectAlign); } else { - Addr = Address(Ptr, SlotSize); + Addr = Address(Ptr, SlotSize); } // Advance the pointer past the argument, then store that back. - CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize); + CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); llvm::Value *NextPtr = CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize, "argp.next"); @@ -225,7 +317,8 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. - if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) { + if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() && + !DirectTy->isStructTy()) { Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); } @@ -276,7 +369,7 @@ static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, } return Addr; - + } static Address emitMergePHI(CodeGenFunction &CGF, @@ -324,6 +417,57 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, Opt += Lib; } +unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { + // OpenCL kernels are called via an explicit runtime API with arguments + // set with clSetKernelArg(), not as normal sub-functions. + // Return SPIR_KERNEL by default as the kernel calling convention to + // ensure the fingerprint is fixed such way that each OpenCL argument + // gets one matching argument in the produced kernel function argument + // list to enable feasible implementation of clSetKernelArg() with + // aggregates etc. In case we would use the default C calling conv here, + // clSetKernelArg() might break depending on the target-specific + // conventions; different targets might split structs passed as values + // to multiple function arguments etc. + return llvm::CallingConv::SPIR_KERNEL; +} + +llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, + llvm::PointerType *T, QualType QT) const { + return llvm::ConstantPointerNull::get(T); +} + +LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + return D ? D->getType().getAddressSpace() : LangAS::Default; +} + +llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( + CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + if (auto *C = dyn_cast<llvm::Constant>(Src)) + return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy); + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy); +} + +llvm::Constant * +TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, + LangAS SrcAddr, LangAS DestAddr, + llvm::Type *DestTy) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return llvm::ConstantExpr::getPointerCast(Src, DestTy); +} + +llvm::SyncScope::ID +TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { + return C.getOrInsertSyncScopeID(""); /* default sync scope */ +} + static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it @@ -364,7 +508,7 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { const RecordType *RT = T->getAs<RecordType>(); if (!RT) - return 0; + return false; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; @@ -456,72 +600,6 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { return Found; } -static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { - // Treat complex types as the element type. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) - Ty = CTy->getElementType(); - - // Check for a type which we know has a simple scalar argument-passing - // convention without any padding. (We're specifically looking for 32 - // and 64-bit integer and integer-equivalents, float, and double.) - if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && - !Ty->isEnumeralType() && !Ty->isBlockPointerType()) - return false; - - uint64_t Size = Context.getTypeSize(Ty); - return Size == 32 || Size == 64; -} - -/// canExpandIndirectArgument - Test whether an argument type which is to be -/// passed indirectly (on the stack) would have the equivalent layout if it was -/// expanded into separate arguments. If so, we prefer to do the latter to avoid -/// inhibiting optimizations. -/// -// FIXME: This predicate is missing many cases, currently it just follows -// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We -// should probably make this smarter, or better yet make the LLVM backend -// capable of handling it. -static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) { - // We can only expand structure types. - const RecordType *RT = Ty->getAs<RecordType>(); - if (!RT) - return false; - - // We can only expand (C) structures. - // - // FIXME: This needs to be generalized to handle classes as well. - const RecordDecl *RD = RT->getDecl(); - if (!RD->isStruct()) - return false; - - // We try to expand CLike CXXRecordDecl. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!CXXRD->isCLike()) - return false; - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - if (!is32Or64BitBasicType(FD->getType(), Context)) - return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) - return false; - - Size += Context.getTypeSize(FD->getType()); - } - - // Make sure there are not any holes in the struct. - if (Size != Context.getTypeSize(Ty)) - return false; - - return true; -} - namespace { Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, const ABIArgInfo &AI) { @@ -538,10 +616,10 @@ Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, if (AI.isIndirect()) { assert(!AI.getPaddingType() && - "Unepxected PaddingType seen in arginfo in generic VAArg emitter!"); + "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); assert( !AI.getIndirectRealign() && - "Unepxected IndirectRealign seen in arginfo in generic VAArg emitter!"); + "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!"); auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty); CharUnits TyAlignForABI = TyInfo.second; @@ -556,13 +634,13 @@ Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, "Unexpected ArgInfo Kind in generic VAArg emitter!"); assert(!AI.getInReg() && - "Unepxected InReg seen in arginfo in generic VAArg emitter!"); + "Unexpected InReg seen in arginfo in generic VAArg emitter!"); assert(!AI.getPaddingType() && - "Unepxected PaddingType seen in arginfo in generic VAArg emitter!"); + "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); assert(!AI.getDirectOffset() && - "Unepxected DirectOffset seen in arginfo in generic VAArg emitter!"); + "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!"); assert(!AI.getCoerceToType() && - "Unepxected CoerceToType seen in arginfo in generic VAArg emitter!"); + "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!"); Address Temp = CGF.CreateMemTemp(Ty, "varet"); Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty)); @@ -617,8 +695,8 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { @@ -632,8 +710,8 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } //===----------------------------------------------------------------------===// @@ -642,17 +720,19 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { // This is a very simple ABI that relies a lot on DefaultABIInfo. //===----------------------------------------------------------------------===// -class WebAssemblyABIInfo final : public DefaultABIInfo { +class WebAssemblyABIInfo final : public SwiftABIInfo { + DefaultABIInfo defaultInfo; + public: explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT) - : DefaultABIInfo(CGT) {} + : SwiftABIInfo(CGT), defaultInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; // DefaultABIInfo's classifyReturnType and classifyArgumentType are - // non-virtual, but computeInfo and EmitVAArg is virtual, so we + // non-virtual, but computeInfo and EmitVAArg are virtual, so we // overload them. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) @@ -660,15 +740,52 @@ private: for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + + bool isSwiftErrorInRegister() const override { + return false; + } }; class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-module", Attr->getImportModule()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-name", Attr->getImportName()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + } + + if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + llvm::Function *Fn = cast<llvm::Function>(GV); + if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) + Fn->addFnAttr("no-prototype"); + } + } }; -/// \brief Classify argument of given type \p Ty. +/// Classify argument of given type \p Ty. ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); @@ -688,7 +805,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { } // Otherwise just do the default thing. - return DefaultABIInfo::classifyArgumentType(Ty); + return defaultInfo.classifyArgumentType(Ty); } ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { @@ -708,7 +825,15 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { } // Otherwise just do the default thing. - return DefaultABIInfo::classifyReturnType(RetTy); + return defaultInfo.classifyReturnType(RetTy); +} + +Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), + /*AllowHigherAlign=*/ true); } //===----------------------------------------------------------------------===// @@ -755,7 +880,7 @@ Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); } -/// \brief Classify argument of given type \p Ty. +/// Classify argument of given type \p Ty. ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) @@ -769,8 +894,8 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(); } - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { @@ -785,8 +910,8 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } /// IsX86_MMXType - Return true if this is an MMX type. @@ -800,7 +925,10 @@ bool IsX86_MMXType(llvm::Type *IRType) { static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { - if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) { + bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) + .Cases("y", "&y", "^Ym", true) + .Default(false); + if (IsMMXCons && Ty->isVectorTy()) { if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) { // Invalid MMX constraint return nullptr; @@ -817,8 +945,14 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) + if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { + if (BT->getKind() == BuiltinType::LongDouble) { + if (&Context.getTargetInfo().getLongDoubleFormat() == + &llvm::APFloat::x87DoubleExtended()) + return false; + } return true; + } } else if (const VectorType *VT = Ty->getAs<VectorType>()) { // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX // registers specially. @@ -835,11 +969,19 @@ static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { return NumMembers <= 4; } +/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. +static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { + auto AI = ABIArgInfo::getDirect(T); + AI.setInReg(true); + AI.setCanBeFlattened(false); + return AI; +} + //===----------------------------------------------------------------------===// // X86-32 ABI Implementation //===----------------------------------------------------------------------===// -/// \brief Similar to llvm::CCState, but for Clang. +/// Similar to llvm::CCState, but for Clang. struct CCState { CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {} @@ -848,8 +990,13 @@ struct CCState { unsigned FreeSSERegs; }; +enum { + // Vectorcall only allows the first 6 parameters to be passed in registers. + VectorcallMaxParamNumAsReg = 6 +}; + /// X86_32ABIInfo - The X86-32 ABI information. -class X86_32ABIInfo : public ABIInfo { +class X86_32ABIInfo : public SwiftABIInfo { enum Class { Integer, Float @@ -887,13 +1034,14 @@ class X86_32ABIInfo : public ABIInfo { ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; - /// \brief Return the alignment to use for the given type on the stack. + /// Return the alignment to use for the given type on the stack. unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; - /// \brief Updates the number of available free registers, returns + + /// Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -901,13 +1049,17 @@ class X86_32ABIInfo : public ABIInfo { bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; - /// \brief Rewrite the function info so that all memory arguments use + bool canExpandIndirectArgument(QualType Ty) const; + + /// Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; + void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const; public: @@ -918,12 +1070,26 @@ public: X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) - : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), - IsRetSmallStructInRegABI(RetSmallStructInRegABI), + : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), DefaultNumRegisterParameters(NumRegisterParameters) {} + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + // LLVM's x86-32 lowering currently only assigns up to three + // integer registers and three fp registers. Oddly, it'll use up to + // four vector registers for vectors, but those can overlap with the + // scalar registers. + return occupiesMoreThan(CGT, scalars, /*total*/ 3); + } + + bool isSwiftErrorInRegister() const override { + // x86-32 lowering does not support passing swifterror in a register. + return false; + } }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { @@ -968,10 +1134,15 @@ public: getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } + + StringRef getARCRetainAutoreleasedReturnValueMarker() const override { + return "movl\t%ebp, %ebp" + "\t\t// marker for objc_retainAutoreleaseReturnValue"; + } }; } @@ -1106,6 +1277,88 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, return true; } +static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { + // Treat complex types as the element type. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + // Check for a type which we know has a simple scalar argument-passing + // convention without any padding. (We're specifically looking for 32 + // and 64-bit integer and integer-equivalents, float, and double.) + if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && + !Ty->isEnumeralType() && !Ty->isBlockPointerType()) + return false; + + uint64_t Size = Context.getTypeSize(Ty); + return Size == 32 || Size == 64; +} + +static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, + uint64_t &Size) { + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), Context)) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += Context.getTypeSize(FD->getType()); + } + return true; +} + +static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, + uint64_t &Size) { + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : RD->bases()) { + if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), + Size)) + return false; + } + if (!addFieldSizes(Context, RD, Size)) + return false; + return true; +} + +/// Test whether an argument type which is to be passed indirectly (on the +/// stack) would have the equivalent layout if it was expanded into separate +/// arguments. If so, we prefer to do the latter to avoid inhibiting +/// optimizations. +bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { + // We can only expand structure types. + const RecordType *RT = Ty->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + uint64_t Size = 0; + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (!IsWin32StructABI) { + // On non-Windows, we have to conservatively match our old bitcode + // prototypes in order to be ABI-compatible at the bitcode level. + if (!CXXRD->isCLike()) + return false; + } else { + // Don't do this for dynamic classes. + if (CXXRD->isDynamicClass()) + return false; + } + if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) + return false; + } else { + if (!addFieldSizes(getContext(), RD, Size)) + return false; + } + + // We can do this if there was no alignment padding. + return Size == getContext().getTypeSize(Ty); +} + ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -1124,7 +1377,8 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(RetTy, Base, NumElts)) { // The LLVM struct type for such an aggregate should lower properly. return ABIArgInfo::getDirect(); @@ -1166,6 +1420,10 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) return getIndirectReturnResult(RetTy, State); + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + // Small structures which are register sized are generally returned // in a register. if (shouldReturnTypeInRegister(RetTy, getContext())) { @@ -1193,8 +1451,8 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } static bool isSSEVectorType(ASTContext &Context, QualType Ty) { @@ -1315,9 +1573,15 @@ bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { return true; } -bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, +bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { + // On Windows, aggregates other than HFAs are never passed in registers, and + // they do not consume register slots. Homogenous floating-point aggregates + // (HFAs) have already been dealt with at this point. + if (IsWin32StructABI && isAggregateTypeForABI(Ty)) + return false; + NeedsPadding = false; InReg = !IsMCUABI; @@ -1328,7 +1592,8 @@ bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, return true; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) NeedsPadding = true; @@ -1346,11 +1611,12 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return false; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) > 32) return false; - return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || + return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || Ty->isReferenceType()); } @@ -1375,12 +1641,13 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } } - // vectorcall adds the concept of a homogenous vector aggregate, similar + // Regcall uses the concept of a homogenous vector aggregate, similar // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if (State.CC == llvm::CallingConv::X86_RegCall && isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; if (Ty->isBuiltinType() || Ty->isVectorType()) @@ -1391,23 +1658,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } if (isAggregateTypeForABI(Ty)) { - if (RT) { - // Structs are always byval on win32, regardless of what they contain. - if (IsWin32StructABI) - return getIndirectResult(Ty, true, State); + // Structures with flexible arrays are always indirect. + // FIXME: This should not be byval! + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, true, State); - // Structures with flexible arrays are always indirect. - if (RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectResult(Ty, true, State); - } - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) + // Ignore empty structs/unions on non-Windows. + if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - bool NeedsPadding, InReg; + bool NeedsPadding = false; + bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); @@ -1425,12 +1688,12 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). - if (getContext().getTypeSize(Ty) <= 4*32 && - canExpandIndirectArgument(Ty, getContext()) && - (!IsMCUABI || State.FreeRegs == 0)) + if (getContext().getTypeSize(Ty) <= 4 * 32 && + (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall, + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall, PaddingType); return getIndirectResult(Ty, true, State); @@ -1461,8 +1724,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, if (Ty->isPromotableIntegerType()) { if (InReg) - return ABIArgInfo::getExtendInReg(); - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtendInReg(Ty); + return ABIArgInfo::getExtend(Ty); } if (InReg) @@ -1470,6 +1733,58 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(); } +void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const { + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // After that, integers AND HVAs are assigned Left to Right in the same pass. + // Integers are passed as ECX/EDX if one is available (in order). HVAs will + // first take up the remaining YMM/XMM registers. If insufficient registers + // remain but an integer register (ECX/EDX) is available, it will be passed + // in that, else, on the stack. + for (auto &I : FI.arguments()) { + // First pass do all the vector types. + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = ABIArgInfo::getDirect(); + } else { + I.info = classifyArgumentType(Ty, State); + } + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } + } + + for (auto &I : FI.arguments()) { + // Second pass, do the rest! + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts); + + if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) { + // Assign true HVAs (non vector/native FP types). + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = getDirectX86Hva(); + } else { + I.info = getIndirectResult(Ty, /*ByVal=*/false, State); + } + } else if (!IsHva) { + // Assign all Non-HVAs, so this will exclude Vector/FP args. + I.info = classifyArgumentType(Ty, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } + } +} + void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI.getCallingConvention()); if (IsMCUABI) @@ -1481,10 +1796,13 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) State.FreeRegs = FI.getRegParm(); - else + else if (State.CC == llvm::CallingConv::X86_RegCall) { + State.FreeRegs = 5; + State.FreeSSERegs = 8; + } else State.FreeRegs = DefaultNumRegisterParameters; - if (!getCXXABI().classifyReturnType(FI)) { + if (!::classifyReturnType(getCXXABI(), FI, *this)) { FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); } else if (FI.getReturnInfo().isIndirect()) { // The C++ ABI is not aware of register usage, so we have to check if the @@ -1501,9 +1819,14 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { ++State.FreeRegs; bool UsedInAlloca = false; - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + if (State.CC == llvm::CallingConv::X86_VectorCall) { + computeVectorCallArgs(FI, State, UsedInAlloca); + } else { + // If not vectorcall, revert to normal behavior. + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } // If we needed to use inalloca for any argument, do a second pass and rewrite @@ -1526,7 +1849,7 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, // Insert padding bytes to respect alignment. CharUnits FieldEnd = StackOffset; - StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign); + StackOffset = FieldEnd.alignTo(FieldAlign); if (StackOffset != FieldEnd) { CharUnits NumBytes = StackOffset - FieldEnd; llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); @@ -1547,10 +1870,14 @@ static bool isArgInAlloca(const ABIArgInfo &Info) { return false; case ABIArgInfo::Direct: case ABIArgInfo::Extend: - case ABIArgInfo::Expand: if (Info.getInReg()) return false; return true; + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + // These are aggregate types which are never passed in registers when + // inalloca is involved. + return true; } llvm_unreachable("invalid enum"); } @@ -1637,7 +1964,6 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: - case llvm::Triple::Bitrig: case llvm::Triple::Win32: return true; default: @@ -1645,21 +1971,18 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( } } -void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { +void X86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + if (GV->isDeclaration()) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - // Get the LLVM function. llvm::Function *Fn = cast<llvm::Function>(GV); - - // Now add the 'alignstack' attribute with a value of 16. - llvm::AttrBuilder B; - B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addFnAttr("stackrealign"); + } + if (FD->hasAttr<AnyX86InterruptAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->setCallingConv(llvm::CallingConv::X86_INTR); } } } @@ -1727,7 +2050,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { } /// X86_64ABIInfo - The X86_64 ABI information. -class X86_64ABIInfo : public ABIInfo { +class X86_64ABIInfo : public SwiftABIInfo { enum Class { Integer = 0, SSE, @@ -1814,12 +2137,16 @@ class X86_64ABIInfo : public ABIInfo { ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType Ty, - unsigned freeIntRegs, - unsigned &neededInt, - unsigned &neededSSE, + ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, + unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const; + ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const; + + ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const; + bool IsIllegalVectorType(QualType Ty) const; /// The 0.98 ABI revision clarified a lot of ambiguities, @@ -1831,6 +2158,22 @@ class X86_64ABIInfo : public ABIInfo { return !getTarget().getTriple().isOSDarwin(); } + /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to + /// classify it as INTEGER (for compatibility with older clang compilers). + bool classifyIntegerMMXAsSSE() const { + // Clang <= 3.8 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver3_8) + return false; + + const llvm::Triple &Triple = getTarget().getTriple(); + if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4) + return false; + if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10) + return false; + return true; + } + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. @@ -1838,7 +2181,7 @@ class X86_64ABIInfo : public ABIInfo { public: X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : - ABIInfo(CGT), AVXLevel(AVXLevel), + SwiftABIInfo(CGT), AVXLevel(AVXLevel), Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) { } @@ -1865,13 +2208,21 @@ public: bool has64BitPointers() const { return Has64BitPointers; } + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + bool isSwiftErrorInRegister() const override { + return true; + } }; /// WinX86_64ABIInfo - The Windows X86_64 ABI information. -class WinX86_64ABIInfo : public ABIInfo { +class WinX86_64ABIInfo : public SwiftABIInfo { public: WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) - : ABIInfo(CGT), + : SwiftABIInfo(CGT), IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; @@ -1890,11 +2241,24 @@ public: return isX86VectorCallAggregateSmallEnough(NumMembers); } -private: - ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const; + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } - bool IsMingw64; + bool isSwiftErrorInRegister() const override { + return true; + } + +private: + ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, + bool IsVectorCall, bool IsRegCall) const; + ABIArgInfo reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const; + void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs, + bool IsVectorCall, bool IsRegCall) const; + + bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -1953,19 +2317,28 @@ public: llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { - unsigned Sig; - if (getABIInfo().has64BitPointers()) - Sig = (0xeb << 0) | // jmp rel8 - (0x0a << 8) | // .+0x0c - ('F' << 16) | - ('T' << 24); - else - Sig = (0xeb << 0) | // jmp rel8 - (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + unsigned Sig = (0xeb << 0) | // jmp rel8 + (0x06 << 8) | // .+0x08 + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) + return; + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); + } + if (FD->hasAttr<AnyX86InterruptAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->setCallingConv(llvm::CallingConv::X86_INTR); + } + } + } }; class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo { @@ -1991,7 +2364,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { bool Quote = (Lib.find(" ") != StringRef::npos); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; - if (!Lib.endswith_lower(".lib")) + if (!Lib.endswith_lower(".lib") && !Lib.endswith_lower(".a")) ArgStr += ".lib"; ArgStr += Quote ? "\"" : ""; return ArgStr; @@ -2021,25 +2394,24 @@ public: } }; -static void addStackProbeSizeTargetAttribute(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) { - if (D && isa<FunctionDecl>(D)) { - if (CGM.getCodeGenOpts().StackProbeSize != 4096) { - llvm::Function *Fn = cast<llvm::Function>(GV); +static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) { + if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) { + if (CGM.getCodeGenOpts().StackProbeSize != 4096) Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize)); - } + if (CGM.getCodeGenOpts().NoStackArgProbe) + Fn->addFnAttr("no-stack-arg-probe"); } } -void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { +void WinX86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - - addStackProbeSizeTargetAttribute(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); } class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -2078,12 +2450,23 @@ public: } }; -void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { +void WinX86_64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); + } + if (FD->hasAttr<AnyX86InterruptAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->setCallingConv(llvm::CallingConv::X86_INTR); + } + } - addStackProbeSizeTargetAttribute(D, GV, CGM); + addStackProbeTargetAttributes(D, GV, CGM); } } @@ -2189,13 +2572,13 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::IEEEquad) { + if (LDF == &llvm::APFloat::IEEEquad()) { Lo = SSE; Hi = SSEUp; - } else if (LDF == &llvm::APFloat::x87DoubleExtended) { + } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { Lo = X87; Hi = X87Up; - } else if (LDF == &llvm::APFloat::IEEEdouble) { + } else if (LDF == &llvm::APFloat::IEEEdouble()) { Current = SSE; } else llvm_unreachable("unexpected long double representation!"); @@ -2255,15 +2638,20 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, if (EB_Lo != EB_Hi) Hi = Lo; } else if (Size == 64) { + QualType ElementType = VT->getElementType(); + // gcc passes <1 x double> in memory. :( - if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) + if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) return; - // gcc passes <1 x long long> as INTEGER. - if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::LongLong) || - VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULongLong) || - VT->getElementType()->isSpecificBuiltinType(BuiltinType::Long) || - VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULong)) + // gcc passes <1 x long long> as SSE but clang used to unconditionally + // pass them as integer. For platforms where clang is the de facto + // platform compiler, we must continue to use integer. + if (!classifyIntegerMMXAsSSE() && + (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || + ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || + ElementType->isSpecificBuiltinType(BuiltinType::Long) || + ElementType->isSpecificBuiltinType(BuiltinType::ULong))) Current = Integer; else Current = SSE; @@ -2309,11 +2697,11 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Lo = Hi = SSE; } else if (ET == getContext().LongDoubleTy) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::IEEEquad) + if (LDF == &llvm::APFloat::IEEEquad()) Current = Memory; - else if (LDF == &llvm::APFloat::x87DoubleExtended) + else if (LDF == &llvm::APFloat::x87DoubleExtended()) Current = ComplexX87; - else if (LDF == &llvm::APFloat::IEEEdouble) + else if (LDF == &llvm::APFloat::IEEEdouble()) Lo = Hi = SSE; else llvm_unreachable("unexpected long double representation!"); @@ -2335,8 +2723,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger - // than four eightbytes, ..., it has class MEMORY. - if (Size > 256) + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned @@ -2355,7 +2743,9 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // The only case a 256-bit wide vector could be used is when the array // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. - if (Size > 128 && EltSize != 256) + // + if (Size > 128 && + (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) return; for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { @@ -2376,8 +2766,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger - // than four eightbytes, ..., it has class MEMORY. - if (Size > 256) + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial @@ -2430,6 +2820,10 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); bool BitField = i->isBitField(); + // Ignore padding bit-fields. + if (BitField && i->isUnnamedBitfield()) + continue; + // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than // four eightbytes, or it contains unaligned fields, it has class MEMORY. // @@ -2437,7 +2831,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. // - if (Size > 128 && getContext().getTypeSize(i->getType()) != 256) { + if (Size > 128 && (Size != getContext().getTypeSize(i->getType()) || + Size > getNativeVectorSizeForAVXABI(AVXLevel))) { Lo = Memory; postMerge(Size, Lo, Hi); return; @@ -2461,10 +2856,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // structure to be passed in memory even if unaligned, and // therefore they can straddle an eightbyte. if (BitField) { - // Ignore padding bit-fields. - if (i->isUnnamedBitfield()) - continue; - + assert(!i->isUnnamedBitfield()); uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); uint64_t Size = i->getBitWidthValue(getContext()); @@ -2499,8 +2891,8 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } return getNaturalAlignIndirect(Ty); @@ -2532,8 +2924,8 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) @@ -2592,7 +2984,7 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); - assert((Size == 128 || Size == 256) && "Invalid type found!"); + assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), @@ -2827,7 +3219,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, // the second element at offset 8. Check for this: unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); unsigned HiAlign = TD.getABITypeAlignment(Hi); - unsigned HiStart = llvm::RoundUpToAlignment(LoSize, HiAlign); + unsigned HiStart = llvm::alignTo(LoSize, HiAlign); assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); // To handle this, we have to increase the size of the low part so that the @@ -2849,8 +3241,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, } } - llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr); - + llvm::StructType *Result = llvm::StructType::get(Lo, Hi); // Verify that the second element is at an 8-byte offset. assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && @@ -2903,7 +3294,7 @@ classifyReturnType(QualType RetTy) const { if (RetTy->isIntegralOrEnumerationType() && RetTy->isPromotableIntegerType()) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(RetTy); } break; @@ -2925,8 +3316,7 @@ classifyReturnType(QualType RetTy) const { case ComplexX87: assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), - llvm::Type::getX86_FP80Ty(getVMContext()), - nullptr); + llvm::Type::getX86_FP80Ty(getVMContext())); break; } @@ -3049,7 +3439,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( if (Ty->isIntegralOrEnumerationType() && Ty->isPromotableIntegerType()) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); } break; @@ -3116,22 +3506,113 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( return ABIArgInfo::getDirect(ResType); } +ABIArgInfo +X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const { + auto RT = Ty->getAs<RecordType>(); + assert(RT && "classifyRegCallStructType only valid with struct types"); + + if (RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectReturnResult(Ty); + + // Sum up bases + if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { + if (CXXRD->isDynamicClass()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + + for (const auto &I : CXXRD->bases()) + if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } + + // Sum up members + for (const auto *FD : RT->getDecl()->fields()) { + if (FD->getType()->isRecordType() && !FD->getType()->isUnionType()) { + if (classifyRegCallStructTypeImpl(FD->getType(), NeededInt, NeededSSE) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } else { + unsigned LocalNeededInt, LocalNeededSSE; + if (classifyArgumentType(FD->getType(), UINT_MAX, LocalNeededInt, + LocalNeededSSE, true) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + NeededInt += LocalNeededInt; + NeededSSE += LocalNeededSSE; + } + } + + return ABIArgInfo::getDirect(); +} + +ABIArgInfo X86_64ABIInfo::classifyRegCallStructType(QualType Ty, + unsigned &NeededInt, + unsigned &NeededSSE) const { + + NeededInt = 0; + NeededSSE = 0; + + return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE); +} + void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + const unsigned CallingConv = FI.getCallingConvention(); + // It is possible to force Win64 calling convention on any x86_64 target by + // using __attribute__((ms_abi)). In such case to correctly emit Win64 + // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. + if (CallingConv == llvm::CallingConv::Win64) { + WinX86_64ABIInfo Win64ABIInfo(CGT); + Win64ABIInfo.computeInfo(FI); + return; + } + + bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; // Keep track of the number of assigned registers. - unsigned freeIntRegs = 6, freeSSERegs = 8; + unsigned FreeIntRegs = IsRegCall ? 11 : 6; + unsigned FreeSSERegs = IsRegCall ? 16 : 8; + unsigned NeededInt, NeededSSE; + + if (!::classifyReturnType(getCXXABI(), FI, *this)) { + if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && + !FI.getReturnType()->getTypePtr()->isUnionType()) { + FI.getReturnInfo() = + classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) { + // Complex Long Double Type is passed in Memory when Regcall + // calling convention is used. + const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>(); + if (getContext().getCanonicalType(CT->getElementType()) == + getContext().LongDoubleTy) + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } else + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + } // If the return value is indirect, then the hidden argument is consuming one // integer register. if (FI.getReturnInfo().isIndirect()) - --freeIntRegs; + --FreeIntRegs; // The chain argument effectively gives us another free register. if (FI.isChainCall()) - ++freeIntRegs; + ++FreeIntRegs; unsigned NumRequiredArgs = FI.getNumRequiredArgs(); // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers @@ -3141,19 +3622,21 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { it != ie; ++it, ++ArgNo) { bool IsNamedArg = ArgNo < NumRequiredArgs; - unsigned neededInt, neededSSE; - it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, - neededSSE, IsNamedArg); + if (IsRegCall && it->type->isStructureOrClassType()) + it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE); + else + it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, + NeededSSE, IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the // stack. If registers have already been assigned for some // eightbytes of such an argument, the assignments get reverted. - if (freeIntRegs >= neededInt && freeSSERegs >= neededSSE) { - freeIntRegs -= neededInt; - freeSSERegs -= neededSSE; + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; } else { - it->info = getIndirectResult(it->type, freeIntRegs); + it->info = getIndirectResult(it->type, FreeIntRegs); } } } @@ -3295,15 +3778,17 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; // Copy the first element. - llvm::Value *V = - CGF.Builder.CreateDefaultAlignedLoad( - CGF.Builder.CreateBitCast(RegLoAddr, PTyLo)); + // FIXME: Our choice of alignment here and below is probably pessimistic. + llvm::Value *V = CGF.Builder.CreateAlignedLoad( + TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); // Copy the second element. - V = CGF.Builder.CreateDefaultAlignedLoad( - CGF.Builder.CreateBitCast(RegHiAddr, PTyHi)); + V = CGF.Builder.CreateAlignedLoad( + TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi))); CharUnits Offset = CharUnits::fromQuantity( getDataLayout().getStructLayout(ST)->getElementOffset(1)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset)); @@ -3327,7 +3812,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); RegAddr = Tmp; } - + } else if (neededSSE == 1) { RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset), CharUnits::fromQuantity(16)); @@ -3345,17 +3830,18 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Address RegAddrHi = CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); - llvm::Type *DoubleTy = CGF.DoubleTy; - llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr); + llvm::Type *ST = AI.canHaveCoerceToType() + ? AI.getCoerceToType() + : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); - V = CGF.Builder.CreateLoad( - CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy)); + V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( + RegAddrLo, ST->getStructElementType(0))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); - V = CGF.Builder.CreateLoad( - CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy)); + V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( + RegAddrHi, ST->getStructElementType(1))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8))); @@ -3398,8 +3884,24 @@ Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, /*allowHigherAlign*/ false); } +ABIArgInfo +WinX86_64ABIInfo::reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const { + // Assumes vectorCall calling convention. + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + return current; +} + ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const { + bool IsReturnType, bool IsVectorCall, + bool IsRegCall) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); @@ -3423,21 +3925,34 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, } - // vectorcall adds the concept of a homogenous vector aggregate, similar to - // other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (FreeSSERegs >= NumElts) { - FreeSSERegs -= NumElts; - if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + // vectorcall adds the concept of a homogenous vector aggregate, similar to + // other targets. + if ((IsVectorCall || IsRegCall) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (IsRegCall) { + if (FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + } + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } else if (IsVectorCall) { + if (FreeSSERegs >= NumElts && + (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { + FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); + } else if (IsReturnType) { + return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVAs are delayed and reclassified in the 2nd step. + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } } - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } - if (Ty->isMemberPointerType()) { // If the member pointer is represented by an LLVM int or ptr, pass it // directly. @@ -3456,41 +3971,117 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); } - // Bool type is always extended to the ABI, other builtin types are not - // extended. - const BuiltinType *BT = Ty->getAs<BuiltinType>(); - if (BT && BT->getKind() == BuiltinType::Bool) - return ABIArgInfo::getExtend(); + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + switch (BT->getKind()) { + case BuiltinType::Bool: + // Bool type is always extended to the ABI, other builtin types are not + // extended. + return ABIArgInfo::getExtend(Ty); - // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It - // passes them indirectly through memory. - if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) { - const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::x87DoubleExtended) - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + case BuiltinType::LongDouble: + // Mingw64 GCC uses the old 80 bit extended precision floating point + // unit. It passes them indirectly through memory. + if (IsMingw64) { + const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::x87DoubleExtended()) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + break; + + case BuiltinType::Int128: + case BuiltinType::UInt128: + // If it's a parameter type, the normal ABI rule is that arguments larger + // than 8 bytes are passed indirectly. GCC follows it. We follow it too, + // even though it isn't particularly efficient. + if (!IsReturnType) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + + // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. + // Clang matches them for compatibility. + return ABIArgInfo::getDirect( + llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2)); + + default: + break; + } } return ABIArgInfo::getDirect(); } +void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, + unsigned FreeSSERegs, + bool IsVectorCall, + bool IsRegCall) const { + unsigned Count = 0; + for (auto &I : FI.arguments()) { + // Vectorcall in x64 only permits the first 6 arguments to be passed + // as XMM/YMM registers. + if (Count < VectorcallMaxParamNumAsReg) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + else { + // Since these cannot be passed in registers, pretend no registers + // are left. + unsigned ZeroSSERegsAvail = 0; + I.info = classify(I.type, /*FreeSSERegs=*/ZeroSSERegsAvail, false, + IsVectorCall, IsRegCall); + } + ++Count; + } + + for (auto &I : FI.arguments()) { + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); + } +} + void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; + bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; + + unsigned FreeSSERegs = 0; + if (IsVectorCall) { + // We can use up to 4 SSE return registers with vectorcall. + FreeSSERegs = 4; + } else if (IsRegCall) { + // RegCall gives us 16 SSE registers. + FreeSSERegs = 16; + } - // We can use up to 4 SSE return registers with vectorcall. - unsigned FreeSSERegs = IsVectorCall ? 4 : 0; if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); + FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, + IsVectorCall, IsRegCall); + + if (IsVectorCall) { + // We can use up to 6 SSE register parameters with vectorcall. + FreeSSERegs = 6; + } else if (IsRegCall) { + // RegCall gives us 16 SSE registers, we can reuse the return registers. + FreeSSERegs = 16; + } + + if (IsVectorCall) { + computeVectorCallArgs(FI, FreeSSERegs, IsVectorCall, IsRegCall); + } else { + for (auto &I : FI.arguments()) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + } - // We can use up to 6 SSE register parameters with vectorcall. - FreeSSERegs = IsVectorCall ? 6 : 0; - for (auto &I : FI.arguments()) - I.info = classify(I.type, FreeSSERegs, false); } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + + bool IsIndirect = false; + + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + if (isAggregateTypeForABI(Ty) || Ty->isMemberPointerType()) { + uint64_t Width = getContext().getTypeSize(Ty); + IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); + } + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); @@ -3500,10 +4091,25 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { -bool IsSoftFloatABI; + bool IsSoftFloatABI; + bool IsRetSmallStructInRegABI; + + CharUnits getParamTypeAlignment(QualType Ty) const; + public: - PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) - : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} + PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; @@ -3511,8 +4117,13 @@ public: class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { public: - PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI) - : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {} + PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI, + RetSmallStructInRegABI)) {} + + static bool isStructReturnInRegABI(const llvm::Triple &Triple, + const CodeGenOptions &Opts); int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -3522,13 +4133,74 @@ public: bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; +} + +CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { + // Complex types are passed just like their elements + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + if (Ty->isVectorType()) + return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 + : 4); + + // For single-element float/vector structs, we consider the whole type + // to have the same alignment requirements as its single element. + const Type *AlignTy = nullptr; + if (const Type *EltType = isSingleElementStruct(Ty, getContext())) { + const BuiltinType *BT = EltType->getAs<BuiltinType>(); + if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || + (BT && BT->isFloatingPoint())) + AlignTy = EltType; + } + if (AlignTy) + return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4); + return CharUnits::fromQuantity(4); +} + +ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { + uint64_t Size; + + // -msvr4-struct-return puts small aggregates in GPR3 and GPR4. + if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI && + (Size = getContext().getTypeSize(RetTy)) <= 64) { + // System V ABI (1995), page 3-22, specified: + // > A structure or union whose size is less than or equal to 8 bytes + // > shall be returned in r3 and r4, as if it were first stored in the + // > 8-byte aligned memory area and then the low addressed word were + // > loaded into r3 and the high-addressed word into r4. Bits beyond + // > the last member of the structure or union are not defined. + // + // GCC for big-endian PPC32 inserts the pad before the first member, + // not "beyond the last member" of the struct. To stay compatible + // with GCC, we coerce the struct to an integer of the same size. + // LLVM will extend it and return i32 in r3, or i64 in r3:r4. + if (Size == 0) + return ABIArgInfo::getIgnore(); + else { + llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size); + return ABIArgInfo::getDirect(CoerceTy); + } + } + + return DefaultABIInfo::classifyReturnType(RetTy); } // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { + if (getTarget().getTriple().isOSDarwin()) { + auto TI = getContext().getTypeInfoInChars(Ty); + TI.second = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(4); + return emitVoidPtrVAArg(CGF, VAList, Ty, + classifyArgumentType(Ty).isIndirect(), TI, SlotSize, + /*AllowHigherAlign=*/true); + } + const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { // TODO: Implement this. For now ignore. @@ -3601,7 +4273,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, } // Get the address of the saved value by scaling the number of - // registers we've used by the number of + // registers we've used by the number of CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8); llvm::Value *RegOffset = Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity())); @@ -3612,7 +4284,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, // Increase the used-register count. NumRegs = - Builder.CreateAdd(NumRegs, + Builder.CreateAdd(NumRegs, Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1)); Builder.CreateStore(NumRegs, NumRegsAddr); @@ -3632,7 +4304,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, CharUnits Size; if (!isIndirect) { auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty); - Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign); + Size = TypeInfo.first.alignTo(OverflowAreaAlign); } else { Size = CGF.getPointerSize(); } @@ -3648,7 +4320,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), Align); } - + MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy); // Increase the overflow area. @@ -3672,6 +4344,25 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, return Result; } +bool PPC32TargetCodeGenInfo::isStructReturnInRegABI( + const llvm::Triple &Triple, const CodeGenOptions &Opts) { + assert(Triple.getArch() == llvm::Triple::ppc); + + switch (Opts.getStructReturnConvention()) { + case CodeGenOptions::SRCK_Default: + break; + case CodeGenOptions::SRCK_OnStack: // -maix-struct-return + return false; + case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return + return true; + } + + if (Triple.isOSBinFormatELF() && !Triple.isOSLinux()) + return true; + + return false; +} + bool PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { @@ -3717,7 +4408,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, namespace { /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. -class PPC64_SVR4_ABIInfo : public ABIInfo { +class PPC64_SVR4_ABIInfo : public SwiftABIInfo { public: enum ABIKind { ELFv1 = 0, @@ -3728,6 +4419,7 @@ private: static const unsigned GPRBits = 64; ABIKind Kind; bool HasQPX; + bool IsSoftFloatABI; // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and // will be passed in a QPX register. @@ -3758,8 +4450,10 @@ private: } public: - PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX) - : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {} + PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : SwiftABIInfo(CGT), Kind(Kind), HasQPX(HasQPX), + IsSoftFloatABI(SoftFloatABI) {} bool isPromotableTypeForABI(QualType Ty) const; CharUnits getParamTypeAlignment(QualType Ty) const; @@ -3801,14 +4495,25 @@ public: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + + bool isSwiftErrorInRegister() const override { + return false; + } }; class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, - PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX) - : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {} + PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX, + SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -3966,7 +4671,7 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, // For compatibility with GCC, ignore empty bitfields in C++ mode. if (getContext().getLangOpts().CPlusPlus && - FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) + FD->isZeroLengthBitField(getContext())) continue; uint64_t FldMembers; @@ -3999,8 +4704,19 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, // agree in both total size and mode (float vs. vector) are // treated as being equivalent here. const Type *TyPtr = Ty.getTypePtr(); - if (!Base) + if (!Base) { Base = TyPtr; + // If it's a non-power-of-2 vector, its size is already a power-of-2, + // so make sure to widen it explicitly. + if (const VectorType *VT = Base->getAs<VectorType>()) { + QualType EltTy = VT->getElementType(); + unsigned NumElements = + getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy); + Base = getContext() + .getVectorType(EltTy, NumElements, VT->getVectorKind()) + .getTypePtr(); + } + } if (Base->isVectorType() != TyPtr->isVectorType() || getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) @@ -4015,8 +4731,13 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) + BT->getKind() == BuiltinType::LongDouble || + (getContext().getTargetInfo().hasFloat128Type() && + (BT->getKind() == BuiltinType::Float128))) { + if (IsSoftFloatABI) + return false; return true; + } } if (const VectorType *VT = Ty->getAs<VectorType>()) { if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty)) @@ -4027,10 +4748,13 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( const Type *Base, uint64_t Members) const { - // Vector types require one register, floating point types require one - // or two registers depending on their size. + // Vector and fp128 types require one register, other floating point types + // require one or two registers depending on their size. uint32_t NumRegs = - Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64; + ((getContext().getTargetInfo().hasFloat128Type() && + Base->isFloat128Type()) || + Base->isVectorType()) ? 1 + : (getContext().getTypeSize(Base) + 63) / 64; // Homogeneous Aggregates may occupy at most 8 registers. return Members * NumRegs <= 8; @@ -4083,13 +4807,13 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { // Types up to 8 bytes are passed as integer type (which will be // properly aligned in the argument save area doubleword). if (Bits <= GPRBits) - CoerceTy = llvm::IntegerType::get(getVMContext(), - llvm::RoundUpToAlignment(Bits, 8)); + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); // Larger types are passed as arrays, with the base type selected // according to the required alignment in the save area. else { uint64_t RegBits = ABIAlign * 8; - uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits; + uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits; llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits); CoerceTy = llvm::ArrayType::get(RegTy, NumRegs); } @@ -4103,8 +4827,8 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { /*Realign=*/TyAlign > ABIAlign); } - return (isPromotableTypeForABI(Ty) ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo @@ -4147,10 +4871,10 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { llvm::Type *CoerceTy; if (Bits > GPRBits) { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); - CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr); + CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy); } else - CoerceTy = llvm::IntegerType::get(getVMContext(), - llvm::RoundUpToAlignment(Bits, 8)); + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); return ABIArgInfo::getDirect(CoerceTy); } @@ -4158,8 +4882,8 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { return getNaturalAlignIndirect(RetTy); } - return (isPromotableTypeForABI(RetTy) ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine. @@ -4231,14 +4955,17 @@ PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, // 32-63: fp0-31, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 32, 63); - // 64-76 are various 4-byte special-purpose registers: + // 64-67 are various 8-byte special-purpose registers: // 64: mq // 65: lr // 66: ctr // 67: ap + AssignToArrayRange(Builder, Address, Eight8, 64, 67); + + // 68-76 are various 4-byte special-purpose registers: // 68-75 cr0-7 // 76: xer - AssignToArrayRange(Builder, Address, Four8, 64, 76); + AssignToArrayRange(Builder, Address, Four8, 68, 76); // 77-108: v0-31, the 16-byte vector registers AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); @@ -4248,7 +4975,10 @@ PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, // 111: spe_acc // 112: spefscr // 113: sfp - AssignToArrayRange(Builder, Address, Four8, 109, 113); + // 114: tfhar + // 115: tfiar + // 116: texasr + AssignToArrayRange(Builder, Address, Eight8, 109, 116); return false; } @@ -4274,18 +5004,20 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, namespace { -class AArch64ABIInfo : public ABIInfo { +class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, - DarwinPCS + DarwinPCS, + Win64 }; private: ABIKind Kind; public: - AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {} + AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) + : SwiftABIInfo(CGT), Kind(Kind) {} private: ABIKind getABIKind() const { return Kind; } @@ -4300,7 +5032,7 @@ private: bool isIllegalVectorType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) + if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &it : FI.arguments()) @@ -4315,9 +5047,24 @@ private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { - return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) - : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) + : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) + : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + } + + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); } + bool isSwiftErrorInRegister() const override { + return true; + } + + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4326,7 +5073,7 @@ public: : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue"; + return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { @@ -4334,7 +5081,59 @@ public: } bool doesReturnSlotInterfereWithArgs() const override { return false; } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + llvm::Function *Fn = cast<llvm::Function>(GV); + + auto Kind = CGM.getCodeGenOpts().getSignReturnAddress(); + if (Kind != CodeGenOptions::SignReturnAddressScope::None) { + Fn->addFnAttr("sign-return-address", + Kind == CodeGenOptions::SignReturnAddressScope::All + ? "all" + : "non-leaf"); + + auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); + Fn->addFnAttr("sign-return-address-key", + Key == CodeGenOptions::SignReturnAddressKeyValue::AKey + ? "a_key" + : "b_key"); + } + + if (CGM.getCodeGenOpts().BranchTargetEnforcement) + Fn->addFnAttr("branch-target-enforcement"); + } +}; + +class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { +public: + WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) + : AArch64TargetCodeGenInfo(CGT, K) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } }; + +void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); +} } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { @@ -4343,6 +5142,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { // Handle illegal vector types here. if (isIllegalVectorType(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); + // Android promotes <2 x i8> to i16, not i32 + if (isAndroid() && (Size <= 16)) { + llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -4366,7 +5170,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() && isDarwinPCS() - ? ABIArgInfo::getExtend() + ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } @@ -4379,10 +5183,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { // Empty records are always ignored on Darwin, but actually passed in C++ mode // elsewhere for GNU compatibility. - if (isEmptyRecord(getContext(), Ty, true)) { + uint64_t Size = getContext().getTypeSize(Ty); + bool IsEmpty = isEmptyRecord(getContext(), Ty, true); + if (IsEmpty || Size == 0) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); + // GNU C mode. The only argument that gets ignored is an empty one with size + // 0. + if (IsEmpty && Size == 0) + return ABIArgInfo::getIgnore(); return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } @@ -4395,10 +5205,20 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { } // Aggregates <= 16 bytes are passed directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { - unsigned Alignment = getContext().getTypeAlign(Ty); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(Ty, getContext(), getVMContext()); + } + unsigned Alignment; + if (Kind == AArch64ABIInfo::AAPCS) { + Alignment = getContext().getTypeUnadjustedAlign(Ty); + Alignment = Alignment < 128 ? 64 : 128; + } else { + Alignment = getContext().getTypeAlign(Ty); + } + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4426,11 +5246,12 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() && isDarwinPCS() - ? ABIArgInfo::getExtend() + ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } - if (isEmptyRecord(getContext(), RetTy, true)) + uint64_t Size = getContext().getTypeSize(RetTy); + if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; @@ -4440,10 +5261,14 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); // Aggregates <= 16 bytes are returned directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 128) { + // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(RetTy, getContext(), getVMContext()); + } unsigned Alignment = getContext().getTypeAlign(RetTy); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4463,14 +5288,25 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); - // NumElements should be power of 2 between 1 and 16. - if ((NumElements & (NumElements - 1)) != 0 || NumElements > 16) + // NumElements should be power of 2. + if (!llvm::isPowerOf2_32(NumElements)) return true; return Size != 64 && (Size != 128 || NumElements == 1); } return false; } +bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const { + if (!llvm::isPowerOf2_32(elts)) + return false; + if (totalSize.getQuantity() != 8 && + (totalSize.getQuantity() != 16 || elts == 1)) + return false; + return true; +} + bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, @@ -4543,7 +5379,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top reg_top_offset = CharUnits::fromQuantity(8); - RegSize = llvm::RoundUpToAlignment(RegSize, 8); + RegSize = llvm::alignTo(RegSize, 8); } else { // 4 is the field number of __vr_offs. reg_offs_p = @@ -4713,7 +5549,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, if (IsIndirect) StackSize = StackSlotSize; else - StackSize = TyInfo.first.RoundUpToAlignment(StackSlotSize); + StackSize = TyInfo.first.alignTo(StackSlotSize); llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); llvm::Value *NewStack = @@ -4781,13 +5617,21 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } +Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// namespace { -class ARMABIInfo : public ABIInfo { +class ARMABIInfo : public SwiftABIInfo { public: enum ABIKind { APCS = 0, @@ -4800,7 +5644,8 @@ private: ABIKind Kind; public: - ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) { + ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) + : SwiftABIInfo(CGT), Kind(_Kind) { setCCs(); } @@ -4811,6 +5656,8 @@ public: case llvm::Triple::EABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABI: + case llvm::Triple::MuslEABIHF: return true; default: return false; @@ -4821,22 +5668,21 @@ public: switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABIHF: return true; default: return false; } } - bool isAndroid() const { - return (getTarget().getTriple().getEnvironment() == - llvm::Triple::Android); - } - ABIKind getABIKind() const { return Kind; } private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const; + ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, + uint64_t Members) const; + ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; @@ -4851,6 +5697,16 @@ private: llvm::CallingConv::ID getLLVMDefaultCC() const; llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + bool isSwiftErrorInRegister() const override { + return true; + } + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -4867,7 +5723,7 @@ public: } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue"; + return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, @@ -4886,6 +5742,8 @@ public: void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -4917,10 +5775,7 @@ public: // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } }; @@ -4931,17 +5786,29 @@ public: void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - addStackProbeSizeTargetAttribute(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); } } void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) + if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic()); @@ -4960,7 +5827,7 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { /// Return the default calling convention that LLVM will use. llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { // The default calling convention that LLVM will infer. - if (isEABIHF() || getTarget().getTriple().isWatchOS()) + if (isEABIHF() || getTarget().getTriple().isWatchABI()) return llvm::CallingConv::ARM_AAPCS_VFP; else if (isEABI()) return llvm::CallingConv::ARM_AAPCS; @@ -4988,21 +5855,41 @@ void ARMABIInfo::setCCs() { llvm::CallingConv::ID abiCC = getABIDefaultCC(); if (abiCC != getLLVMDefaultCC()) RuntimeCC = abiCC; +} - // AAPCS apparently requires runtime support functions to be soft-float, but - // that's almost certainly for historic reasons (Thumb1 not supporting VFP - // most likely). It's more convenient for AAPCS16_VFP to be hard-float. - switch (getABIKind()) { - case APCS: - case AAPCS16_VFP: - if (abiCC != getLLVMDefaultCC()) - BuiltinCC = abiCC; - break; - case AAPCS: - case AAPCS_VFP: - BuiltinCC = llvm::CallingConv::ARM_AAPCS; - break; +ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const { + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 32) { + llvm::Type *ResType = + llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } + if (Size == 64 || Size == 128) { + llvm::Type *ResType = llvm::VectorType::get( + llvm::Type::getInt32Ty(getVMContext()), Size / 32); + return ABIArgInfo::getDirect(ResType); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, + const Type *Base, + uint64_t Members) const { + assert(Base && "Base class should be set for homogeneous aggregate"); + // Base can be a floating-point or a vector. + if (const VectorType *VT = Base->getAs<VectorType>()) { + // FP16 vectors should be converted to integer vectors + if (!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) { + uint64_t Size = getContext().getTypeSize(VT); + llvm::Type *NewVecTy = llvm::VectorType::get( + llvm::Type::getInt32Ty(getVMContext()), Size / 32); + llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members); + return ABIArgInfo::getDirect(Ty, 0, nullptr, false); + } } + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, @@ -5019,30 +5906,14 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. - if (isIllegalVectorType(Ty)) { - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 32) { - llvm::Type *ResType = - llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - if (Size == 64) { - llvm::Type *ResType = llvm::VectorType::get( - llvm::Type::getInt32Ty(getVMContext()), 2); - return ABIArgInfo::getDirect(ResType); - } - if (Size == 128) { - llvm::Type *ResType = llvm::VectorType::get( - llvm::Type::getInt32Ty(getVMContext()), 4); - return ABIArgInfo::getDirect(ResType); - } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - } - - // __fp16 gets passed as if it were an int or float, but with the top 16 bits - // unspecified. This is not done for OpenCL as it handles the half type - // natively, and does not need to interwork with AAPCS code. - if (Ty->isHalfType() && !getContext().getLangOpts().OpenCL) { + if (isIllegalVectorType(Ty)) + return coerceIllegalVector(Ty); + + // _Float16 and __fp16 get passed as if it were an int or float, but with + // the top 16 bits unspecified. This is not done for OpenCL as it handles the + // half type natively, and does not need to interwork with AAPCS code. + if ((Ty->isFloat16Type() || Ty->isHalfType()) && + !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); @@ -5055,7 +5926,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, Ty = EnumTy->getDecl()->getIntegerType(); } - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } @@ -5072,11 +5943,8 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(Ty, Base, Members)) { - assert(Base && "Base class should be set for homogeneous aggregate"); - // Base can be a floating-point or a vector. - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); - } + if (isHomogeneousAggregate(Ty, Base, Members)) + return classifyHomogeneousAggregate(Ty, Base, Members); } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // WatchOS does have homogeneous aggregates. Note that we intentionally use // this convention even for a variadic function: the backend will use GPRs @@ -5105,11 +5973,14 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // most 8-byte. We realign the indirect argument if type alignment is bigger // than ABI alignment. uint64_t ABIAlign = 4; - uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8; + uint64_t TyAlign; if (getABIKind() == ARMABIInfo::AAPCS_VFP || - getABIKind() == ARMABIInfo::AAPCS) + getABIKind() == ARMABIInfo::AAPCS) { + TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8); - + } else { + TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); + } if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval"); return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), @@ -5117,12 +5988,18 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, /*Realign=*/TyAlign > ABIAlign); } + // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(Ty, getContext(), getVMContext()); + } + // Otherwise, pass by coercing to a structure of the appropriate size. llvm::Type* ElemTy; unsigned SizeRegs; // FIXME: Try to match the types of the arguments more accurately where // we can. - if (getContext().getTypeAlign(Ty) <= 32) { + if (TyAlign <= 4) { ElemTy = llvm::Type::getInt32Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; } else { @@ -5226,15 +6103,22 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); - // Large vector types should be returned via memory. - if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) { - return getNaturalAlignIndirect(RetTy); - } - - // __fp16 gets returned as if it were an int or float, but with the top 16 - // bits unspecified. This is not done for OpenCL as it handles the half type - // natively, and does not need to interwork with AAPCS code. - if (RetTy->isHalfType() && !getContext().getLangOpts().OpenCL) { + if (const VectorType *VT = RetTy->getAs<VectorType>()) { + // Large vector types should be returned via memory. + if (getContext().getTypeSize(RetTy) > 128) + return getNaturalAlignIndirect(RetTy); + // FP16 vectors should be converted to integer vectors + if (!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) + return coerceIllegalVector(RetTy); + } + + // _Float16 and __fp16 get returned as if it were an int or float, but with + // the top 16 bits unspecified. This is not done for OpenCL as it handles the + // half type natively, and does not need to interwork with AAPCS code. + if ((RetTy->isFloat16Type() || RetTy->isHalfType()) && + !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); @@ -5246,7 +6130,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() + return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect(); } @@ -5287,17 +6171,19 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, if (IsEffectivelyAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(RetTy, Base, Members)) { - assert(Base && "Base class should be set for homogeneous aggregate"); - // Homogeneous Aggregates are returned directly. - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); - } + if (isHomogeneousAggregate(RetTy, Base, Members)) + return classifyHomogeneousAggregate(RetTy, Base, Members); } // Aggregates <= 4 bytes are returned in r0; other aggregates // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 32) { + // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(RetTy, getContext(), getVMContext()); + } if (getDataLayout().isBigEndian()) // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); @@ -5311,7 +6197,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); llvm::Type *CoerceTy = - llvm::ArrayType::get(Int32Ty, llvm::RoundUpToAlignment(Size, 32) / 32); + llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32); return ABIArgInfo::getDirect(CoerceTy); } @@ -5321,6 +6207,13 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, /// isIllegalVector - check whether Ty is an illegal vector type. bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs<VectorType> ()) { + // On targets that don't support FP16, FP16 is expanded into float, and we + // don't want the ABI to depend on whether or not FP16 is supported in + // hardware. Thus return false to coerce FP16 vectors into integer vectors. + if (!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) + return true; if (isAndroid()) { // Android shipped using Clang 3.1, which supported a slightly different // vector ABI. The primary differences were that 3-element vector types @@ -5345,6 +6238,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + if (!llvm::isPowerOf2_32(numElts)) + return false; + unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); + if (size > 64) + return false; + if (vectorSize.getQuantity() != 8 && + (vectorSize.getQuantity() != 16 || numElts == 1)) + return false; + return true; +} + bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. @@ -5440,6 +6347,8 @@ public: void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; + bool shouldEmitStaticExternCAliases() const override; + private: // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. @@ -5458,8 +6367,8 @@ ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { @@ -5471,8 +6380,8 @@ ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) return getNaturalAlignIndirect(Ty, /* byval */ true); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -5493,9 +6402,10 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm_unreachable("NVPTX does not support varargs"); } -void NVPTXTargetCodeGenInfo:: -setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const{ +void NVPTXTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -5559,6 +6469,10 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } + +bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} } //===----------------------------------------------------------------------===// @@ -5567,12 +6481,12 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, namespace { -class SystemZABIInfo : public ABIInfo { +class SystemZABIInfo : public SwiftABIInfo { bool HasVector; public: SystemZABIInfo(CodeGenTypes &CGT, bool HV) - : ABIInfo(CGT), HasVector(HV) {} + : SwiftABIInfo(CGT), HasVector(HV) {} bool isPromotableIntegerType(QualType Ty) const; bool isCompoundType(QualType Ty) const; @@ -5592,6 +6506,14 @@ public: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + bool isSwiftErrorInRegister() const override { + return false; + } }; class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5673,7 +6595,7 @@ QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { // Unlike isSingleElementStruct(), empty structure and array fields // do count. So do anonymous bitfields that aren't zero-sized. if (getContext().getLangOpts().CPlusPlus && - FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) + FD->isZeroLengthBitField(getContext())) continue; // Unlike isSingleElementStruct(), arrays do not count. @@ -5857,8 +6779,8 @@ ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); - return (isPromotableIntegerType(RetTy) ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { @@ -5868,7 +6790,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { // Integers and enums are extended to full register width. if (isPromotableIntegerType(Ty)) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); // Handle vector types and vector-like structure types. Note that // as opposed to float-like structure types, we do not allow any @@ -5927,25 +6849,24 @@ public: } -void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { +void MSP430TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) { - // Handle 'interrupt' attribute: - llvm::Function *F = cast<llvm::Function>(GV); + const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>(); + if (!InterruptAttr) + return; - // Step 1: Set ISR calling convention. - F->setCallingConv(llvm::CallingConv::MSP430_INTR); + // Handle 'interrupt' attribute: + llvm::Function *F = cast<llvm::Function>(GV); - // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attribute::NoInline); + // Step 1: Set ISR calling convention. + F->setCallingConv(llvm::CallingConv::MSP430_INTR); - // Step 3: Emit ISR vector alias. - unsigned Num = attr->getNumber() / 2; - llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, - "__isr_" + Twine(Num), F); - } + // Step 2: Add attributes goodness. + F->addFnAttr(llvm::Attribute::NoInline); + F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber())); } } @@ -5973,7 +6894,7 @@ public: void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; - bool shouldSignExtUnsignedType(QualType Ty) const override; + ABIArgInfo extendType(QualType Ty) const; }; class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5992,6 +6913,16 @@ public: const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; llvm::Function *Fn = cast<llvm::Function>(GV); + + if (FD->hasAttr<MipsLongCallAttr>()) + Fn->addFnAttr("long-call"); + else if (FD->hasAttr<MipsShortCallAttr>()) + Fn->addFnAttr("short-call"); + + // Other attributes do not have a meaning for declarations. + if (GV->isDeclaration()) + return; + if (FD->hasAttr<Mips16Attr>()) { Fn->addFnAttr("mips16"); } @@ -5999,6 +6930,11 @@ public: Fn->addFnAttr("nomips16"); } + if (FD->hasAttr<MicroMipsAttr>()) + Fn->addFnAttr("micromips"); + else if (FD->hasAttr<NoMicroMipsAttr>()) + Fn->addFnAttr("nomicromips"); + const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>(); if (!Attr) return; @@ -6121,8 +7057,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), (uint64_t)StackAlignInBytes); - unsigned CurrOffset = llvm::RoundUpToAlignment(Offset, Align); - Offset = CurrOffset + llvm::RoundUpToAlignment(TySize, Align * 8) / 8; + unsigned CurrOffset = llvm::alignTo(Offset, Align); + Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { // Ignore empty aggregates. @@ -6150,7 +7086,7 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { // All integral types are promoted to the GPR width. if (Ty->isIntegralOrEnumerationType()) - return ABIArgInfo::getExtend(); + return extendType(Ty); return ABIArgInfo::getDirect( nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset)); @@ -6232,8 +7168,14 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + if (RetTy->isPromotableIntegerType()) + return ABIArgInfo::getExtend(RetTy); + + if ((RetTy->isUnsignedIntegerOrEnumerationType() || + RetTy->isSignedIntegerOrEnumerationType()) && Size == 32 && !IsO32) + return ABIArgInfo::getSignExtend(RetTy); + + return ABIArgInfo::getDirect(); } void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -6299,14 +7241,14 @@ Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, return Addr; } -bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const { +ABIArgInfo MipsABIInfo::extendType(QualType Ty) const { int TySize = getContext().getTypeSize(Ty); // MIPS64 ABI requires unsigned 32 bit integers to be sign extended. if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) - return true; + return ABIArgInfo::getSignExtend(Ty); - return false; + return ABIArgInfo::getExtend(Ty); } bool @@ -6338,6 +7280,33 @@ MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, } //===----------------------------------------------------------------------===// +// AVR ABI Implementation. +//===----------------------------------------------------------------------===// + +namespace { +class AVRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AVRTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) + return; + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + auto *Fn = cast<llvm::Function>(GV); + + if (FD->getAttr<AVRInterruptAttr>()) + Fn->addFnAttr("interrupt"); + + if (FD->getAttr<AVRSignalAttr>()) + Fn->addFnAttr("signal"); + } +}; +} + +//===----------------------------------------------------------------------===// // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. // Currently subclassed only to implement custom OpenCL C function attribute // handling. @@ -6356,6 +7325,8 @@ public: void TCETargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -6447,17 +7418,17 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - uint64_t Size = getContext().getTypeSize(Ty); if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); @@ -6485,8 +7456,8 @@ ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? - ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); + return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } if (isEmptyRecord(getContext(), RetTy, true)) @@ -6519,44 +7490,549 @@ Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } //===----------------------------------------------------------------------===// +// Lanai ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class LanaiABIInfo : public DefaultABIInfo { +public: + LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + bool shouldUseInReg(QualType Ty, CCState &State) const; + + void computeInfo(CGFunctionInfo &FI) const override { + CCState State(FI.getCallingConvention()); + // Lanai uses 4 registers to pass arguments unless the function has the + // regparm attribute set. + if (FI.getHasRegParm()) { + State.FreeRegs = FI.getRegParm(); + } else { + State.FreeRegs = 4; + } + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, State); + } + + ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; + ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; +}; +} // end anonymous namespace + +bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const { + unsigned Size = getContext().getTypeSize(Ty); + unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U; + + if (SizeInRegs == 0) + return false; + + if (SizeInRegs > State.FreeRegs) { + State.FreeRegs = 0; + return false; + } + + State.FreeRegs -= SizeInRegs; + + return true; +} + +ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal, + CCState &State) const { + if (!ByVal) { + if (State.FreeRegs) { + --State.FreeRegs; // Non-byval indirects just use one pointer. + return getNaturalAlignIndirectInReg(Ty); + } + return getNaturalAlignIndirect(Ty, false); + } + + // Compute the byval alignment. + const unsigned MinABIStackAlignInBytes = 4; + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + /*Realign=*/TypeAlign > + MinABIStackAlignInBytes); +} + +ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, + CCState &State) const { + // Check with the C++ ABI first. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) { + return getIndirectResult(Ty, /*ByVal=*/false, State); + } else if (RAA == CGCXXABI::RAA_DirectInMemory) { + return getNaturalAlignIndirect(Ty, /*ByRef=*/true); + } + } + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, /*ByVal=*/true, State); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; + if (SizeInRegs <= State.FreeRegs) { + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + State.FreeRegs -= SizeInRegs; + return ABIArgInfo::getDirectInReg(Result); + } else { + State.FreeRegs = 0; + } + return getIndirectResult(Ty, true, State); + } + + // Treat an enum type as its underlying type. + if (const auto *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + bool InReg = shouldUseInReg(Ty, State); + if (Ty->isPromotableIntegerType()) { + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getExtend(Ty); + } + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getDirect(); +} + +namespace { +class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { +public: + LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {} +}; +} + +//===----------------------------------------------------------------------===// // AMDGPU ABI Implementation //===----------------------------------------------------------------------===// namespace { +class AMDGPUABIInfo final : public DefaultABIInfo { +private: + static const unsigned MaxNumRegsForArgsRet = 16; + + unsigned numRegsForType(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const override; + +public: + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : + DefaultABIInfo(CGT) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyKernelArgumentType(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; + + void computeInfo(CGFunctionInfo &FI) const override; +}; + +bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + return true; +} + +bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( + const Type *Base, uint64_t Members) const { + uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; + + // Homogeneous Aggregates may occupy at most 16 registers. + return Members * NumRegs <= MaxNumRegsForArgsRet; +} + +/// Estimate number of registers the type will use when passed in registers. +unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { + unsigned NumRegs = 0; + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Compute from the number of elements. The reported size is based on the + // in-memory size, which includes the padding 4th element for 3-vectors. + QualType EltTy = VT->getElementType(); + unsigned EltSize = getContext().getTypeSize(EltTy); + + // 16-bit element vectors should be passed as packed. + if (EltSize == 16) + return (VT->getNumElements() + 1) / 2; + + unsigned EltNumRegs = (EltSize + 31) / 32; + return EltNumRegs * VT->getNumElements(); + } + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + assert(!RD->hasFlexibleArrayMember()); + + for (const FieldDecl *Field : RD->fields()) { + QualType FieldTy = Field->getType(); + NumRegs += numRegsForType(FieldTy); + } + + return NumRegs; + } + + return (getContext().getTypeSize(Ty) + 31) / 32; +} + +void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + llvm::CallingConv::ID CC = FI.getCallingConvention(); + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + unsigned NumRegsLeft = MaxNumRegsForArgsRet; + for (auto &Arg : FI.arguments()) { + if (CC == llvm::CallingConv::AMDGPU_KERNEL) { + Arg.info = classifyKernelArgumentType(Arg.type); + } else { + Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); + } + } +} + +ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { + if (isAggregateTypeForABI(RetTy)) { + // Records with non-trivial destructors/copy-constructors should not be + // returned by value. + if (!getRecordArgABI(RetTy, getCXXABI())) { + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just return a regular value. + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = RetTy->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyReturnType(RetTy); + } + + // Pack aggregates <= 4 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + if (Size <= 64) { + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) + return ABIArgInfo::getDirect(); + } + } + + // Otherwise just do the default thing. + return DefaultABIInfo::classifyReturnType(RetTy); +} + +/// For kernels all parameters are really passed in a special buffer. It doesn't +/// make sense to pass anything byval, so everything must be direct. +ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: Can we omit empty structs? + + // Coerce single element structs to its element. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + // If we set CanBeFlattened to true, CodeGen will expand the struct to its + // individual elements, which confuses the Clover OpenCL backend; therefore we + // have to set it to false here. Other args of getDirect() are just defaults. + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); +} + +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, + unsigned &NumRegsLeft) const { + assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); + + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (auto RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just pass a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using getExpand(), + // though watch out for things like bitfields. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Pack aggregates <= 8 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) { + unsigned NumRegs = (Size + 31) / 32; + NumRegsLeft -= std::min(NumRegsLeft, NumRegs); + + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // XXX: Should this be i64 instead, and should the limit increase? + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (NumRegsLeft > 0) { + unsigned NumRegs = numRegsForType(Ty); + if (NumRegsLeft >= NumRegs) { + NumRegsLeft -= NumRegs; + return ABIArgInfo::getDirect(); + } + } + } + + // Otherwise just do the default thing. + ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); + if (!ArgInfo.isIndirect()) { + unsigned NumRegs = numRegsForType(Ty); + NumRegsLeft -= std::min(NumRegs, NumRegsLeft); + } + + return ArgInfo; +} + class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; + unsigned getOpenCLKernelCallingConv() const override; + + llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, + llvm::PointerType *T, QualType QT) const override; + + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const override; + llvm::Function * + createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *BlockInvokeFunc, + llvm::Value *BlockLiteral) const override; + bool shouldEmitStaticExternCAliases() const override; + void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; }; - } void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; - if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { - llvm::Function *F = cast<llvm::Function>(GV); - uint32_t NumVGPR = Attr->getNumVGPR(); - if (NumVGPR != 0) - F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR)); + llvm::Function *F = cast<llvm::Function>(GV); + + const auto *ReqdWGS = M.getLangOpts().OpenCL ? + FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + + if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() && + (M.getTriple().getOS() == llvm::Triple::AMDHSA)) + F->addFnAttr("amdgpu-implicitarg-num-bytes", "48"); + + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + if (ReqdWGS || FlatWGS) { + unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; + unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); + + if (Min != 0) { + assert(Min <= Max && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); } - if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { - llvm::Function *F = cast<llvm::Function>(GV); + if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { + unsigned Min = Attr->getMin(); + unsigned Max = Attr->getMax(); + + if (Min != 0) { + assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min); + if (Max != 0) + AttrVal = AttrVal + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-waves-per-eu", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); + } + + if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { unsigned NumSGPR = Attr->getNumSGPR(); + if (NumSGPR != 0) - F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR)); + F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); + } + + if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { + uint32_t NumVGPR = Attr->getNumVGPR(); + + if (NumVGPR != 0) + F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); + } +} + +unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::AMDGPU_KERNEL; +} + +// Currently LLVM assumes null pointers always have value 0, +// which results in incorrectly transformed IR. Therefore, instead of +// emitting null pointers in private and local address spaces, a null +// pointer in generic address space is emitted which is casted to a +// pointer in local or private address space. +llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( + const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, + QualType QT) const { + if (CGM.getContext().getTargetNullPointerValue(QT) == 0) + return llvm::ConstantPointerNull::get(PT); + + auto &Ctx = CGM.getContext(); + auto NPT = llvm::PointerType::get(PT->getElementType(), + Ctx.getTargetAddressSpace(LangAS::opencl_generic)); + return llvm::ConstantExpr::getAddrSpaceCast( + llvm::ConstantPointerNull::get(NPT), PT); +} + +LangAS +AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); + if (!D) + return DefaultGlobalAS; + + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + if (CGM.isTypeConstant(D->getType(), false)) { + if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) + return ConstAS.getValue(); + } + return DefaultGlobalAS; +} + +llvm::SyncScope::ID +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const { + StringRef Name; + switch (S) { + case SyncScope::OpenCLWorkGroup: + Name = "workgroup"; + break; + case SyncScope::OpenCLDevice: + Name = "agent"; + break; + case SyncScope::OpenCLAllSVMDevices: + Name = ""; + break; + case SyncScope::OpenCLSubGroup: + Name = "subgroup"; + } + return C.getOrInsertSyncScopeID(Name); +} + +bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} + +void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( + const FunctionType *&FT) const { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); +} + +//===----------------------------------------------------------------------===// +// SPARC v8 ABI Implementation. +// Based on the SPARC Compliance Definition version 2.4.1. +// +// Ensures that complex values are passed in registers. +// +namespace { +class SparcV8ABIInfo : public DefaultABIInfo { +public: + SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyReturnType(QualType RetTy) const; + void computeInfo(CGFunctionInfo &FI) const override; +}; +} // end anonymous namespace + + +ABIArgInfo +SparcV8ABIInfo::classifyReturnType(QualType Ty) const { + if (Ty->isAnyComplexType()) { + return ABIArgInfo::getDirect(); + } + else { + return DefaultABIInfo::classifyReturnType(Ty); } } +void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const { + + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &Arg : FI.arguments()) + Arg.info = classifyArgumentType(Arg.type); +} + +namespace { +class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { +public: + SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {} +}; +} // end anonymous namespace //===----------------------------------------------------------------------===// // SPARC v9 ABI Implementation. @@ -6623,7 +8099,7 @@ private: return; // Finish the current 64-bit word. - uint64_t Aligned = llvm::RoundUpToAlignment(Size, 64); + uint64_t Aligned = llvm::alignTo(Size, 64); if (Aligned > Size && Aligned <= ToSize) { Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size)); Size = Aligned; @@ -6721,7 +8197,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { // Integer types smaller than a register are extended. if (Size < 64 && Ty->isIntegerType()) - return ABIArgInfo::getExtend(); + return ABIArgInfo::getExtend(Ty); // Other non-aggregates go in registers. if (!isAggregateTypeForABI(Ty)) @@ -6740,7 +8216,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { CoerceBuilder CB(getVMContext(), getDataLayout()); CB.addStruct(0, StrTy); - CB.pad(llvm::RoundUpToAlignment(CB.DL.getTypeSizeInBits(StrTy), 64)); + CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64)); // Try to use the original type for coercion. llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType(); @@ -6770,6 +8246,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CharUnits Stride; switch (AI.getKind()) { case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); @@ -6782,7 +8259,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, case ABIArgInfo::Direct: { auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType()); - Stride = CharUnits::fromQuantity(AllocSize).RoundUpToAlignment(SlotSize); + Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize); ArgAddr = Addr; break; } @@ -6861,6 +8338,137 @@ SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, return false; } +// ARC ABI implementation. +namespace { + +class ARCABIInfo : public DefaultABIInfo { +public: + using DefaultABIInfo::DefaultABIInfo; + +private: + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const { + if (!State.FreeRegs) + return; + if (Info.isIndirect() && Info.getInReg()) + State.FreeRegs--; + else if (Info.isDirect() && Info.getInReg()) { + unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32; + if (sz < State.FreeRegs) + State.FreeRegs -= sz; + else + State.FreeRegs = 0; + } + } + + void computeInfo(CGFunctionInfo &FI) const override { + CCState State(FI.getCallingConvention()); + // ARC uses 8 registers to pass arguments. + State.FreeRegs = 8; + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + updateState(FI.getReturnInfo(), FI.getReturnType(), State); + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State.FreeRegs); + updateState(I.info, I.type, State); + } + } + + ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const; + ABIArgInfo getIndirectByValue(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; +}; + +class ARCTargetCodeGenInfo : public TargetCodeGenInfo { +public: + ARCTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new ARCABIInfo(CGT)) {} +}; + + +ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const { + return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) : + getNaturalAlignIndirect(Ty, false); +} + +ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const { + // Compute the byval alignment. + const unsigned MinABIStackAlignInBytes = 4; + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + TypeAlign > MinABIStackAlignInBytes); +} + +Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), true); +} + +ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty, + uint8_t FreeRegs) const { + // Handle the generic C++ ABI. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) + return getIndirectByRef(Ty, FreeRegs > 0); + + if (RAA == CGCXXABI::RAA_DirectInMemory) + return getIndirectByValue(Ty); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32; + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectByValue(Ty); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + + return FreeRegs >= SizeInRegs ? + ABIArgInfo::getDirectInReg(Result) : + ABIArgInfo::getDirect(Result, 0, nullptr, false); + } + + return Ty->isPromotableIntegerType() ? + (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) : + ABIArgInfo::getExtend(Ty)) : + (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() : + ABIArgInfo::getDirect()); +} + +ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirectInReg(); + + // Arguments of size > 4 registers are indirect. + auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32; + if (RetSize > 4) + return getIndirectByRef(RetTy, /*HasFreeRegs*/ true); + + return DefaultABIInfo::classifyReturnType(RetTy); +} + +} // End anonymous namespace. //===----------------------------------------------------------------------===// // XCore ABI Implementation @@ -6953,7 +8561,7 @@ class FieldEncoding { std::string Enc; public: FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {} - StringRef str() {return Enc.c_str();} + StringRef str() { return Enc; } bool operator<(const FieldEncoding &rhs) const { if (HasName != rhs.HasName) return HasName; return Enc < rhs.Enc; @@ -7000,6 +8608,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CharUnits ArgSize = CharUnits::Zero(); switch (AI.getKind()) { case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Ignore: @@ -7011,7 +8620,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Val = Builder.CreateBitCast(AP, ArgPtrTy); ArgSize = CharUnits::fromQuantity( getDataLayout().getTypeAllocSize(AI.getCoerceToType())); - ArgSize = ArgSize.RoundUpToAlignment(SlotSize); + ArgSize = ArgSize.alignTo(SlotSize); break; case ABIArgInfo::Indirect: Val = Builder.CreateElementBitCast(AP, ArgPtrTy); @@ -7118,7 +8727,7 @@ StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { E.State = IncompleteUsed; ++IncompleteUsedCount; } - return E.Str.c_str(); + return E.Str; } /// The XCore ABI includes a type information section that communicates symbol @@ -7142,15 +8751,41 @@ void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, SmallStringEnc Enc; if (getTypeString(Enc, D, CGM, TSC)) { llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::SmallVector<llvm::Metadata *, 2> MDVals; - MDVals.push_back(llvm::ConstantAsMetadata::get(GV)); - MDVals.push_back(llvm::MDString::get(Ctx, Enc.str())); + llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV), + llvm::MDString::get(Ctx, Enc.str())}; llvm::NamedMDNode *MD = CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings"); MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } } +//===----------------------------------------------------------------------===// +// SPIR ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + unsigned getOpenCLKernelCallingConv() const override; +}; + +} // End anonymous namespace. + +namespace clang { +namespace CodeGen { +void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { + DefaultABIInfo SPIRABI(CGM.getTypes()); + SPIRABI.computeInfo(FI); +} +} +} + +unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::SPIR_KERNEL; +} + static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); @@ -7223,7 +8858,7 @@ static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, // The ABI requires unions to be sorted but not structures. // See FieldEncoding::operator< for sort algorithm. if (RT->isUnionType()) - std::sort(FE.begin(), FE.end()); + llvm::sort(FE); // We can now complete the TypeString. unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { @@ -7267,7 +8902,7 @@ static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, EnumEnc += '}'; FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); } - std::sort(FE.begin(), FE.end()); + llvm::sort(FE); unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) @@ -7482,118 +9117,334 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D, return false; } - //===----------------------------------------------------------------------===// -// Driver code +// RISCV ABI Implementation //===----------------------------------------------------------------------===// -const llvm::Triple &CodeGenModule::getTriple() const { - return getTarget().getTriple(); +namespace { +class RISCVABIInfo : public DefaultABIInfo { +private: + unsigned XLen; // Size of the integer ('x') registers in bits. + static const int NumArgGPRs = 8; + +public: + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) + : DefaultABIInfo(CGT), XLen(XLen) {} + + // DefaultABIInfo's classifyReturnType and classifyArgumentType are + // non-virtual, but computeInfo is virtual, so we overload it. + void computeInfo(CGFunctionInfo &FI) const override; + + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, + int &ArgGPRsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; +}; +} // end anonymous namespace + +void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should + // be passed indirect or if the type size is greater than 2*xlen. e.g. fp128 + // is passed direct in LLVM IR, relying on the backend lowering code to + // rewrite the argument list and pass indirectly on RV32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || + getContext().getTypeSize(RetTy) > (2 * XLen); + + // We must track the number of GPRs used in order to conform to the RISC-V + // ABI, as integer scalars passed in registers should have signext/zeroext + // when promoted, but are anyext if passed on the stack. As GPR usage is + // different for variadic arguments, we must also track whether we are + // examining a vararg or not. + int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { + bool IsFixed = ArgNum < NumFixedArgs; + ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft); + ArgNum++; + } +} + +ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + int &ArgGPRsLeft) const { + assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (ArgGPRsLeft) + ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + uint64_t NeededAlign = getContext().getTypeAlign(Ty); + bool MustUseStack = false; + // Determine the number of GPRs needed to pass the current argument + // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. + int NeededArgGPRs = 1; + if (!IsFixed && NeededAlign == 2 * XLen) + NeededArgGPRs = 2 + (ArgGPRsLeft % 2); + else if (Size > XLen && Size <= 2 * XLen) + NeededArgGPRs = 2; + + if (NeededArgGPRs > ArgGPRsLeft) { + MustUseStack = true; + NeededArgGPRs = ArgGPRsLeft; + } + + ArgGPRsLeft -= NeededArgGPRs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to XLen width, unless passed on the + // stack. + if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) { + return extendType(Ty); + } + + return ABIArgInfo::getDirect(); + } + + // Aggregates which are <= 2*XLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * XLen) { + unsigned Alignment = getContext().getTypeAlign(Ty); + + // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is + // required, and a 2-element XLen array if only XLen alignment is required. + if (Size <= XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), XLen)); + } else if (Alignment == 2 * XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * XLen)); + } else { + return ABIArgInfo::getDirect(llvm::ArrayType::get( + llvm::IntegerType::get(getVMContext(), XLen), 2)); + } + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + int ArgGPRsLeft = 2; + + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft); +} + +Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize); + Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + return Addr; + } + + std::pair<CharUnits, CharUnits> SizeAndAlign = + getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 2*Xlen bytes are passed indirectly. + bool IsIndirect = SizeAndAlign.first > 2 * SlotSize; + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, SizeAndAlign, + SlotSize, /*AllowHigherAlign=*/true); +} + +ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + // RV64 ABI requires unsigned 32 bit integers to be sign extended. + if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); } +namespace { +class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { +public: + RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) + : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + + const auto *Attr = FD->getAttr<RISCVInterruptAttr>(); + if (!Attr) + return; + + const char *Kind; + switch (Attr->getInterrupt()) { + case RISCVInterruptAttr::user: Kind = "user"; break; + case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break; + case RISCVInterruptAttr::machine: Kind = "machine"; break; + } + + auto *Fn = cast<llvm::Function>(GV); + + Fn->addFnAttr("interrupt", Kind); + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Driver code +//===----------------------------------------------------------------------===// + bool CodeGenModule::supportsCOMDAT() const { - return !getTriple().isOSBinFormatMachO(); + return getTriple().supportsCOMDAT(); } const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (TheTargetCodeGenInfo) return *TheTargetCodeGenInfo; + // Helper to set the unique_ptr while still keeping the return value. + auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & { + this->TheTargetCodeGenInfo.reset(P); + return *P; + }; + const llvm::Triple &Triple = getTarget().getTriple(); switch (Triple.getArch()) { default: - return *(TheTargetCodeGenInfo = new DefaultTargetCodeGenInfo(Types)); + return SetCGInfo(new DefaultTargetCodeGenInfo(Types)); case llvm::Triple::le32: - return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types)); + return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); case llvm::Triple::mips: case llvm::Triple::mipsel: if (Triple.getOS() == llvm::Triple::NaCl) - return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types)); - return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, true)); + return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); + return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true)); case llvm::Triple::mips64: case llvm::Triple::mips64el: - return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false)); + return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); + + case llvm::Triple::avr: + return SetCGInfo(new AVRTargetCodeGenInfo(Types)); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; + else if (Triple.isOSWindows()) + return SetCGInfo( + new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64)); - return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind)); + return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } case llvm::Triple::wasm32: case llvm::Triple::wasm64: - return *(TheTargetCodeGenInfo = new WebAssemblyTargetCodeGenInfo(Types)); + return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types)); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - { - if (Triple.getOS() == llvm::Triple::Win32) { - TheTargetCodeGenInfo = - new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP); - return *TheTargetCodeGenInfo; - } + case llvm::Triple::thumbeb: { + if (Triple.getOS() == llvm::Triple::Win32) { + return SetCGInfo( + new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP)); + } - ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; - StringRef ABIStr = getTarget().getABI(); - if (ABIStr == "apcs-gnu") - Kind = ARMABIInfo::APCS; - else if (ABIStr == "aapcs16") - Kind = ARMABIInfo::AAPCS16_VFP; - else if (CodeGenOpts.FloatABI == "hard" || - (CodeGenOpts.FloatABI != "soft" && - Triple.getEnvironment() == llvm::Triple::GNUEABIHF)) - Kind = ARMABIInfo::AAPCS_VFP; + ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; + StringRef ABIStr = getTarget().getABI(); + if (ABIStr == "apcs-gnu") + Kind = ARMABIInfo::APCS; + else if (ABIStr == "aapcs16") + Kind = ARMABIInfo::AAPCS16_VFP; + else if (CodeGenOpts.FloatABI == "hard" || + (CodeGenOpts.FloatABI != "soft" && + (Triple.getEnvironment() == llvm::Triple::GNUEABIHF || + Triple.getEnvironment() == llvm::Triple::MuslEABIHF || + Triple.getEnvironment() == llvm::Triple::EABIHF))) + Kind = ARMABIInfo::AAPCS_VFP; - return *(TheTargetCodeGenInfo = new ARMTargetCodeGenInfo(Types, Kind)); - } + return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind)); + } - case llvm::Triple::ppc: - return *(TheTargetCodeGenInfo = - new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft")); + case llvm::Triple::ppc: { + bool RetSmallStructInRegABI = + PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); + return SetCGInfo( + new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft", + RetSmallStructInRegABI)); + } case llvm::Triple::ppc64: if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; if (getTarget().getABI() == "elfv2") Kind = PPC64_SVR4_ABIInfo::ELFv2; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return *(TheTargetCodeGenInfo = - new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } else - return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types)); + return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2; if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx") Kind = PPC64_SVR4_ABIInfo::ELFv1; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return *(TheTargetCodeGenInfo = - new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - return *(TheTargetCodeGenInfo = new NVPTXTargetCodeGenInfo(Types)); + return SetCGInfo(new NVPTXTargetCodeGenInfo(Types)); case llvm::Triple::msp430: - return *(TheTargetCodeGenInfo = new MSP430TargetCodeGenInfo(Types)); + return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); + + case llvm::Triple::riscv32: + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32)); + case llvm::Triple::riscv64: + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64)); case llvm::Triple::systemz: { bool HasVector = getTarget().getABI() == "vector"; - return *(TheTargetCodeGenInfo = new SystemZTargetCodeGenInfo(Types, - HasVector)); + return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector)); } case llvm::Triple::tce: - return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types)); + case llvm::Triple::tcele: + return SetCGInfo(new TCETargetCodeGenInfo(Types)); case llvm::Triple::x86: { bool IsDarwinVectorABI = Triple.isOSDarwin(); @@ -7602,44 +9453,156 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); if (Triple.getOS() == llvm::Triple::Win32) { - return *(TheTargetCodeGenInfo = new WinX86_32TargetCodeGenInfo( - Types, IsDarwinVectorABI, RetSmallStructInRegABI, - IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); + return SetCGInfo(new WinX86_32TargetCodeGenInfo( + Types, IsDarwinVectorABI, RetSmallStructInRegABI, + IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); } else { - return *(TheTargetCodeGenInfo = new X86_32TargetCodeGenInfo( - Types, IsDarwinVectorABI, RetSmallStructInRegABI, - IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, - CodeGenOpts.FloatABI == "soft")); + return SetCGInfo(new X86_32TargetCodeGenInfo( + Types, IsDarwinVectorABI, RetSmallStructInRegABI, + IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, + CodeGenOpts.FloatABI == "soft")); } } case llvm::Triple::x86_64: { StringRef ABI = getTarget().getABI(); - X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : - ABI == "avx" ? X86AVXABILevel::AVX : - X86AVXABILevel::None); + X86AVXABILevel AVXLevel = + (ABI == "avx512" + ? X86AVXABILevel::AVX512 + : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: - return *(TheTargetCodeGenInfo = - new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); + return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); case llvm::Triple::PS4: - return *(TheTargetCodeGenInfo = - new PS4TargetCodeGenInfo(Types, AVXLevel)); + return SetCGInfo(new PS4TargetCodeGenInfo(Types, AVXLevel)); default: - return *(TheTargetCodeGenInfo = - new X86_64TargetCodeGenInfo(Types, AVXLevel)); + return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel)); } } case llvm::Triple::hexagon: - return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types)); + return SetCGInfo(new HexagonTargetCodeGenInfo(Types)); + case llvm::Triple::lanai: + return SetCGInfo(new LanaiTargetCodeGenInfo(Types)); case llvm::Triple::r600: - return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); + return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::amdgcn: - return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); + return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); + case llvm::Triple::sparc: + return SetCGInfo(new SparcV8TargetCodeGenInfo(Types)); case llvm::Triple::sparcv9: - return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types)); + return SetCGInfo(new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: - return *(TheTargetCodeGenInfo = new XCoreTargetCodeGenInfo(Types)); + return SetCGInfo(new XCoreTargetCodeGenInfo(Types)); + case llvm::Triple::arc: + return SetCGInfo(new ARCTargetCodeGenInfo(Types)); + case llvm::Triple::spir: + case llvm::Triple::spir64: + return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); } } + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The kernel has the same function type as the block invoke function. Its +/// name is the name of the block invoke function postfixed with "_kernel". +/// It simply calls the block invoke function then returns. +llvm::Function * +TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + for (auto &P : InvokeFT->params()) + ArgTys.push_back(P); + auto &C = CGF.getLLVMContext(); + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + auto &Builder = CGF.Builder; + Builder.SetInsertPoint(BB); + llvm::SmallVector<llvm::Value *, 2> Args; + for (auto &A : F->args()) + Args.push_back(&A); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + return F; +} + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The type of the first argument (the block literal) is the struct type +/// of the block literal instead of a pointer type. The first argument +/// (block literal) is passed directly by value to the kernel. The kernel +/// allocates the same type of struct on stack and stores the block literal +/// to it and passes its pointer to the block invoke function. The kernel +/// has "enqueued-block" function attribute and kernel argument metadata. +llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( + CodeGenFunction &CGF, llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto &Builder = CGF.Builder; + auto &C = CGF.getLLVMContext(); + + auto *BlockTy = BlockLiteral->getType()->getPointerElementType(); + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; + llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgNames; + + ArgTys.push_back(BlockTy); + ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgNames.push_back(llvm::MDString::get(C, "block_literal")); + for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { + ArgTys.push_back(InvokeFT->getParamType(I)); + ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + ArgNames.push_back( + llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); + } + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + F->addFnAttr("enqueued-block"); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + Builder.SetInsertPoint(BB); + unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); + auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); + BlockPtr->setAlignment(BlockAlign); + Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); + auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); + llvm::SmallVector<llvm::Value *, 2> Args; + Args.push_back(Cast); + for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I) + Args.push_back(I); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + + F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); + F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); + F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); + F->setMetadata("kernel_arg_base_type", + llvm::MDNode::get(C, ArgBaseTypeNames)); + F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); + if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) + F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); + + return F; +} diff --git a/gnu/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp b/gnu/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp index 760820d2ce0..f155c409f9a 100644 --- a/gnu/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp +++ b/gnu/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp @@ -3870,6 +3870,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(A->getValue()); } + if (Arg *A = Args.getLastArg(options::OPT_maix_struct_return, + options::OPT_msvr4_struct_return)) { + if (TC.getArch() != llvm::Triple::ppc) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getSpelling() << RawTriple.str(); + } else if (A->getOption().matches(options::OPT_maix_struct_return)) { + CmdArgs.push_back("-maix-struct-return"); + } else { + assert(A->getOption().matches(options::OPT_msvr4_struct_return)); + CmdArgs.push_back("-msvr4-struct-return"); + } + } + if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return, options::OPT_freg_struct_return)) { if (TC.getArch() != llvm::Triple::x86) { diff --git a/gnu/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/gnu/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index a7c9b352584..4509e4ff35a 100644 --- a/gnu/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/gnu/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -1199,11 +1199,18 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } - if (Arg *A = Args.getLastArg(OPT_fpcc_struct_return, OPT_freg_struct_return)) { - if (A->getOption().matches(OPT_fpcc_struct_return)) { + // X86_32 has -fppc-struct-return and -freg-struct-return. + // PPC32 has -maix-struct-return and -msvr4-struct-return. + if (Arg *A = + Args.getLastArg(OPT_fpcc_struct_return, OPT_freg_struct_return, + OPT_maix_struct_return, OPT_msvr4_struct_return)) { + const Option &O = A->getOption(); + if (O.matches(OPT_fpcc_struct_return) || + O.matches(OPT_maix_struct_return)) { Opts.setStructReturnConvention(CodeGenOptions::SRCK_OnStack); } else { - assert(A->getOption().matches(OPT_freg_struct_return)); + assert(O.matches(OPT_freg_struct_return) || + O.matches(OPT_msvr4_struct_return)); Opts.setStructReturnConvention(CodeGenOptions::SRCK_InRegs); } } |