diff options
author | Patrick Wildt <patrick@cvs.openbsd.org> | 2020-08-03 14:32:30 +0000 |
---|---|---|
committer | Patrick Wildt <patrick@cvs.openbsd.org> | 2020-08-03 14:32:30 +0000 |
commit | 62bd9971468f5f1112094115afe598c4ef33e18e (patch) | |
tree | d756ee9dda0093fe1346026043d622f350505a1a | |
parent | dc27ca34090c6cf8ef00ce9e77af3f541ca04793 (diff) |
Import LLVM 10.0.0 release including clang, lld and lldb.
ok hackroom
tested by plenty
-rw-r--r-- | gnu/llvm/lld/ELF/InputFiles.cpp | 666 |
1 files changed, 195 insertions, 471 deletions
diff --git a/gnu/llvm/lld/ELF/InputFiles.cpp b/gnu/llvm/lld/ELF/InputFiles.cpp index d5b9efbe18f..43978cd66c6 100644 --- a/gnu/llvm/lld/ELF/InputFiles.cpp +++ b/gnu/llvm/lld/ELF/InputFiles.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" -#include "llvm/Support/RISCVAttributeParser.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/raw_ostream.h" @@ -37,35 +36,33 @@ using namespace llvm::object; using namespace llvm::sys; using namespace llvm::sys::fs; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; - -bool InputFile::isInGroup; -uint32_t InputFile::nextGroupId; - -std::vector<ArchiveFile *> elf::archiveFiles; -std::vector<BinaryFile *> elf::binaryFiles; -std::vector<BitcodeFile *> elf::bitcodeFiles; -std::vector<LazyObjFile *> elf::lazyObjFiles; -std::vector<InputFile *> elf::objectFiles; -std::vector<SharedFile *> elf::sharedFiles; - -std::unique_ptr<TarWriter> elf::tar; +namespace lld { // Returns "<internal>", "foo.a(bar.o)" or "baz.o". -std::string lld::toString(const InputFile *f) { +std::string toString(const elf::InputFile *f) { if (!f) return "<internal>"; if (f->toStringCache.empty()) { if (f->archiveName.empty()) - f->toStringCache = std::string(f->getName()); + f->toStringCache = f->getName(); else f->toStringCache = (f->archiveName + "(" + f->getName() + ")").str(); } return f->toStringCache; } +namespace elf { +bool InputFile::isInGroup; +uint32_t InputFile::nextGroupId; +std::vector<BinaryFile *> binaryFiles; +std::vector<BitcodeFile *> bitcodeFiles; +std::vector<LazyObjFile *> lazyObjFiles; +std::vector<InputFile *> objectFiles; +std::vector<SharedFile *> sharedFiles; + +std::unique_ptr<TarWriter> tar; + static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { unsigned char size; unsigned char endian; @@ -104,19 +101,15 @@ InputFile::InputFile(Kind k, MemoryBufferRef m) ++nextGroupId; } -Optional<MemoryBufferRef> elf::readFile(StringRef path) { - llvm::TimeTraceScope timeScope("Load input files", path); - +Optional<MemoryBufferRef> readFile(StringRef path) { // The --chroot option changes our virtual root directory. // This is useful when you are dealing with files created by --reproduce. if (!config->chroot.empty() && path.startswith("/")) path = saver.save(config->chroot + path); log(path); - config->dependencyFiles.insert(llvm::CachedHashString(path)); - auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, - /*RequiresNullTerminator=*/false); + auto mbOrErr = MemoryBuffer::getFile(path, -1, false); if (auto ec = mbOrErr.getError()) { error("cannot open " + path + ": " + ec.message()); return None; @@ -145,10 +138,8 @@ static bool isCompatible(InputFile *file) { return true; } - StringRef target = - !config->bfdname.empty() ? config->bfdname : config->emulation; - if (!target.empty()) { - error(toString(file) + " is incompatible with " + target); + if (!config->emulation.empty()) { + error(toString(file) + " is incompatible with " + config->emulation); return false; } @@ -157,11 +148,8 @@ static bool isCompatible(InputFile *file) { existing = objectFiles[0]; else if (!sharedFiles.empty()) existing = sharedFiles[0]; - else if (!bitcodeFiles.empty()) - existing = bitcodeFiles[0]; else - llvm_unreachable("Must have -m, OUTPUT_FORMAT or existing input file to " - "determine target emulation"); + existing = bitcodeFiles[0]; error(toString(file) + " is incompatible with " + toString(existing)); return false; @@ -180,7 +168,6 @@ template <class ELFT> static void doParseFile(InputFile *file) { // .a file if (auto *f = dyn_cast<ArchiveFile>(file)) { - archiveFiles.push_back(f); f->parse(); return; } @@ -214,7 +201,7 @@ template <class ELFT> static void doParseFile(InputFile *file) { } // Add symbols in File to the symbol table. -void elf::parseFile(InputFile *file) { +void parseFile(InputFile *file) { switch (config->ekind) { case ELF32LEKind: doParseFile<ELF32LE>(file); @@ -235,7 +222,7 @@ void elf::parseFile(InputFile *file) { // Concatenates arguments to construct a string representing an error location. static std::string createFileLineMsg(StringRef path, unsigned line) { - std::string filename = std::string(path::filename(path)); + std::string filename = path::filename(path); std::string lineno = ":" + std::to_string(line); if (filename == path) return filename + lineno; @@ -256,7 +243,7 @@ static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, return createFileLineMsg(fileLine->first, fileLine->second); // File.sourceFile contains STT_FILE symbol, and that is a last resort. - return std::string(file.sourceFile); + return file.sourceFile; } std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, @@ -277,27 +264,9 @@ std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, } } -StringRef InputFile::getNameForScript() const { - if (archiveName.empty()) - return getName(); - - if (nameForScriptCache.empty()) - nameForScriptCache = (archiveName + Twine(':') + getName()).str(); - - return nameForScriptCache; -} - -template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { - llvm::call_once(initDwarf, [this]() { - dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( - std::make_unique<LLDDwarfObj<ELFT>>(this), "", - [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, - [&](Error warning) { - warn(getName() + ": " + toString(std::move(warning))); - })); - }); - - return dwarf.get(); +template <class ELFT> void ObjFile<ELFT>::initializeDwarf() { + dwarf = make<DWARFCache>(std::make_unique<DWARFContext>( + std::make_unique<LLDDwarfObj<ELFT>>(this))); } // Returns the pair of file name and line number describing location of data @@ -305,7 +274,9 @@ template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { template <class ELFT> Optional<std::pair<std::string, unsigned>> ObjFile<ELFT>::getVariableLoc(StringRef name) { - return getDwarf()->getVariableLoc(name); + llvm::call_once(initDwarfLine, [this]() { initializeDwarf(); }); + + return dwarf->getVariableLoc(name); } // Returns source line information for a given offset @@ -313,6 +284,8 @@ ObjFile<ELFT>::getVariableLoc(StringRef name) { template <class ELFT> Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, uint64_t offset) { + llvm::call_once(initDwarfLine, [this]() { initializeDwarf(); }); + // Detect SectionIndex for specified section. uint64_t sectionIndex = object::SectionedAddress::UndefSection; ArrayRef<InputSectionBase *> sections = s->file->getSections(); @@ -323,7 +296,9 @@ Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, } } - return getDwarf()->getDILineInfo(offset, sectionIndex); + // Use fake address calculated by adding section file offset and offset in + // section. See comments for ObjectInfo class. + return dwarf->getDILineInfo(s->getOffsetInFile() + offset, sectionIndex); } ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) { @@ -361,9 +336,9 @@ template <class ELFT> void ELFFileBase::init() { // Initialize trivial attributes. const ELFFile<ELFT> &obj = getObj<ELFT>(); - emachine = obj.getHeader().e_machine; - osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; - abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; + emachine = obj.getHeader()->e_machine; + osabi = obj.getHeader()->e_ident[llvm::ELF::EI_OSABI]; + abiVersion = obj.getHeader()->e_ident[llvm::ELF::EI_ABIVERSION]; ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this); @@ -391,7 +366,7 @@ template <class ELFT> void ELFFileBase::init() { template <class ELFT> uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { return CHECK( - this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable), + this->getObj().getSectionIndex(&sym, getELFSyms<ELFT>(), shndxTable), this); } @@ -442,9 +417,6 @@ StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { - if (!(sec.sh_flags & SHF_MERGE)) - return false; - // On a regular link we don't merge sections if -O0 (default is -O1). This // sometimes makes the linker significantly faster, although the output will // be bigger. @@ -480,7 +452,10 @@ bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + Twine(entSize) + ")"); - if (sec.sh_flags & SHF_WRITE) + uint64_t flags = sec.sh_flags; + if (!(flags & SHF_MERGE)) + return false; + if (flags & SHF_WRITE) fatal(toString(this) + ":(" + name + "): writable SHF_MERGE section is not supported"); @@ -578,7 +553,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { const Elf_Shdr &sec = objSections[i]; if (sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE) - cgProfileSectionIndex = i; + cgProfile = + check(obj.template getSectionContentsAsArray<Elf_CGProfile>(&sec)); // SHF_EXCLUDE'ed sections are discarded by the linker. However, // if -r is given, we'll let the final link discard such sections. @@ -592,12 +568,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { if (sec.sh_link != 0) this->addrsigSec = &sec; else if (config->icf == ICFLevel::Safe) - warn(toString(this) + - ": --icf=safe conservatively ignores " - "SHT_LLVM_ADDRSIG [index " + - Twine(i) + - "] with sh_link=0 " - "(likely created using objcopy or ld -r)"); + warn(toString(this) + ": --icf=safe is incompatible with object " + "files created using objcopy or ld -r"); } this->sections[i] = &InputSection::discarded; continue; @@ -609,20 +581,27 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { StringRef signature = getShtGroupSignature(objSections, sec); this->sections[i] = &InputSection::discarded; + ArrayRef<Elf_Word> entries = - CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this); + CHECK(obj.template getSectionContentsAsArray<Elf_Word>(&sec), this); if (entries.empty()) fatal(toString(this) + ": empty SHT_GROUP"); - Elf_Word flag = entries[0]; - if (flag && flag != GRP_COMDAT) + // The first word of a SHT_GROUP section contains flags. Currently, + // the standard defines only "GRP_COMDAT" flag for the COMDAT group. + // An group with the empty flag doesn't define anything; such sections + // are just skipped. + if (entries[0] == 0) + continue; + + if (entries[0] != GRP_COMDAT) fatal(toString(this) + ": unsupported SHT_GROUP format"); - bool keepGroup = - (flag & GRP_COMDAT) == 0 || ignoreComdats || + bool isNew = + ignoreComdats || symtab->comdatGroups.try_emplace(CachedHashStringRef(signature), this) .second; - if (keepGroup) { + if (isNew) { if (config->relocatable) this->sections[i] = createInputSection(sec); selectedGroups.push_back(entries); @@ -643,8 +622,6 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { break; case SHT_SYMTAB: case SHT_STRTAB: - case SHT_REL: - case SHT_RELA: case SHT_NULL: break; default: @@ -652,34 +629,22 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { } } - // We have a second loop. It is used to: - // 1) handle SHF_LINK_ORDER sections. - // 2) create SHT_REL[A] sections. In some cases the section header index of a - // relocation section may be smaller than that of the relocated section. In - // such cases, the relocation section would attempt to reference a target - // section that has not yet been created. For simplicity, delay creation of - // relocation sections until now. + // This block handles SHF_LINK_ORDER. for (size_t i = 0, e = objSections.size(); i < e; ++i) { if (this->sections[i] == &InputSection::discarded) continue; const Elf_Shdr &sec = objSections[i]; - - if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) - this->sections[i] = createInputSection(sec); - - // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have - // the flag. - if (!(sec.sh_flags & SHF_LINK_ORDER) || !sec.sh_link) + if (!(sec.sh_flags & SHF_LINK_ORDER)) continue; + // .ARM.exidx sections have a reverse dependency on the InputSection they + // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. InputSectionBase *linkSec = nullptr; if (sec.sh_link < this->sections.size()) linkSec = this->sections[sec.sh_link]; if (!linkSec) fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); - // A SHF_LINK_ORDER section is discarded if its linked-to section is - // discarded. InputSection *isec = cast<InputSection>(this->sections[i]); linkSec->dependentSections.push_back(isec); if (!isa<InputSection>(linkSec)) @@ -697,9 +662,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { // the input objects have been compiled. static void updateARMVFPArgs(const ARMAttributeParser &attributes, const InputFile *f) { - Optional<unsigned> attr = - attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); - if (!attr.hasValue()) + if (!attributes.hasAttribute(ARMBuildAttrs::ABI_VFP_args)) // If an ABI tag isn't present then it is implicitly given the value of 0 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, // including some in glibc that don't use FP args (and should have value 3) @@ -707,7 +670,7 @@ static void updateARMVFPArgs(const ARMAttributeParser &attributes, // as a clash. return; - unsigned vfpArgs = attr.getValue(); + unsigned vfpArgs = attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); ARMVFPArgKind arg; switch (vfpArgs) { case ARMBuildAttrs::BaseAAPCS: @@ -744,11 +707,9 @@ static void updateARMVFPArgs(const ARMAttributeParser &attributes, // is compiled with an architecture that supports these features then lld is // permitted to use them. static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { - Optional<unsigned> attr = - attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); - if (!attr.hasValue()) + if (!attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) return; - auto arch = attr.getValue(); + auto arch = attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); switch (arch) { case ARMBuildAttrs::Pre_v4: case ARMBuildAttrs::v4: @@ -790,21 +751,20 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { // of zero or more type-length-value fields. We want to find a field of a // certain type. It seems a bit too much to just store a 32-bit value, perhaps // the ABI is unnecessarily complicated. -template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) { +template <class ELFT> +static uint32_t readAndFeatures(ObjFile<ELFT> *obj, ArrayRef<uint8_t> data) { using Elf_Nhdr = typename ELFT::Nhdr; using Elf_Note = typename ELFT::Note; uint32_t featuresSet = 0; - ArrayRef<uint8_t> data = sec.data(); - auto reportFatal = [&](const uint8_t *place, const char *msg) { - fatal(toString(sec.file) + ":(" + sec.name + "+0x" + - Twine::utohexstr(place - sec.data().data()) + "): " + msg); - }; while (!data.empty()) { // Read one NOTE record. + if (data.size() < sizeof(Elf_Nhdr)) + fatal(toString(obj) + ": .note.gnu.property: section too short"); + auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); - if (data.size() < sizeof(Elf_Nhdr) || data.size() < nhdr->getSize()) - reportFatal(data.data(), "data is too short"); + if (data.size() < nhdr->getSize()) + fatal(toString(obj) + ": .note.gnu.property: section too short"); Elf_Note note(*nhdr); if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { @@ -819,26 +779,25 @@ template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) { // Read a body of a NOTE record, which consists of type-length-value fields. ArrayRef<uint8_t> desc = note.getDesc(); while (!desc.empty()) { - const uint8_t *place = desc.data(); if (desc.size() < 8) - reportFatal(place, "program property is too short"); - uint32_t type = read32<ELFT::TargetEndianness>(desc.data()); - uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4); - desc = desc.slice(8); - if (desc.size() < size) - reportFatal(place, "program property is too short"); + fatal(toString(obj) + ": .note.gnu.property: section too short"); + + uint32_t type = read32le(desc.data()); + uint32_t size = read32le(desc.data() + 4); if (type == featureAndType) { // We found a FEATURE_1_AND field. There may be more than one of these // in a .note.gnu.property section, for a relocatable object we // accumulate the bits set. - if (size < 4) - reportFatal(place, "FEATURE_1_AND entry is too short"); - featuresSet |= read32<ELFT::TargetEndianness>(desc.data()); + featuresSet |= read32le(desc.data() + 8); } - // Padding is present in the note descriptor, if necessary. - desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); + // On 64-bit, a payload may be followed by a 4-byte padding to make its + // size a multiple of 8. + if (ELFT::Is64Bits) + size = alignTo(size, 8); + + desc = desc.slice(size + 8); // +8 for Type and Size } // Go to next NOTE record to look for more FEATURE_1_AND descriptions. @@ -877,58 +836,30 @@ template <class ELFT> InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { StringRef name = getSectionName(sec); - if (config->emachine == EM_ARM && sec.sh_type == SHT_ARM_ATTRIBUTES) { + switch (sec.sh_type) { + case SHT_ARM_ATTRIBUTES: { + if (config->emachine != EM_ARM) + break; ARMAttributeParser attributes; - ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec)); - if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind - ? support::little - : support::big)) { - auto *isec = make<InputSection>(*this, sec, name); - warn(toString(isec) + ": " + llvm::toString(std::move(e))); - } else { - updateSupportedARMFeatures(attributes); - updateARMVFPArgs(attributes, this); - - // FIXME: Retain the first attribute section we see. The eglibc ARM - // dynamic loaders require the presence of an attribute section for dlopen - // to work. In a full implementation we would merge all attribute - // sections. - if (in.attributes == nullptr) { - in.attributes = make<InputSection>(*this, sec, name); - return in.attributes; - } - return &InputSection::discarded; - } - } - - if (config->emachine == EM_RISCV && sec.sh_type == SHT_RISCV_ATTRIBUTES) { - RISCVAttributeParser attributes; - ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec)); - if (Error e = attributes.parse(contents, support::little)) { - auto *isec = make<InputSection>(*this, sec, name); - warn(toString(isec) + ": " + llvm::toString(std::move(e))); - } else { - // FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is - // present. - - // FIXME: Retain the first attribute section we see. Tools such as - // llvm-objdump make use of the attribute section to determine which - // standard extensions to enable. In a full implementation we would merge - // all attribute sections. - if (in.attributes == nullptr) { - in.attributes = make<InputSection>(*this, sec, name); - return in.attributes; - } - return &InputSection::discarded; + ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(&sec)); + attributes.Parse(contents, /*isLittle*/ config->ekind == ELF32LEKind); + updateSupportedARMFeatures(attributes); + updateARMVFPArgs(attributes, this); + + // FIXME: Retain the first attribute section we see. The eglibc ARM + // dynamic loaders require the presence of an attribute section for dlopen + // to work. In a full implementation we would merge all attribute sections. + if (in.armAttributes == nullptr) { + in.armAttributes = make<InputSection>(*this, sec, name); + return in.armAttributes; } + return &InputSection::discarded; } - - switch (sec.sh_type) { case SHT_LLVM_DEPENDENT_LIBRARIES: { if (config->relocatable) break; ArrayRef<char> data = - CHECK(this->getObj().template getSectionContentsAsArray<char>(sec), this); + CHECK(this->getObj().template getSectionContentsAsArray<char>(&sec), this); if (!data.empty() && data.back() != '\0') { error(toString(this) + ": corrupted dependent libraries section (unterminated string): " + @@ -963,34 +894,48 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { this->sections[sec.sh_info] = target; } + // This section contains relocation information. + // If -r is given, we do not interpret or apply relocation + // but just copy relocation sections to output. + if (config->relocatable) { + InputSection *relocSec = make<InputSection>(*this, sec, name); + // We want to add a dependency to target, similar like we do for + // -emit-relocs below. This is useful for the case when linker script + // contains the "/DISCARD/". It is perhaps uncommon to use a script with + // -r, but we faced it in the Linux kernel and have to handle such case + // and not to crash. + target->dependentSections.push_back(relocSec); + return relocSec; + } + if (target->firstRelocation) fatal(toString(this) + ": multiple relocation sections to one section are not supported"); if (sec.sh_type == SHT_RELA) { - ArrayRef<Elf_Rela> rels = CHECK(getObj().relas(sec), this); + ArrayRef<Elf_Rela> rels = CHECK(getObj().relas(&sec), this); target->firstRelocation = rels.begin(); target->numRelocations = rels.size(); target->areRelocsRela = true; } else { - ArrayRef<Elf_Rel> rels = CHECK(getObj().rels(sec), this); + ArrayRef<Elf_Rel> rels = CHECK(getObj().rels(&sec), this); target->firstRelocation = rels.begin(); target->numRelocations = rels.size(); target->areRelocsRela = false; } assert(isUInt<31>(target->numRelocations)); - // Relocation sections are usually removed from the output, so return - // `nullptr` for the normal case. However, if -r or --emit-relocs is - // specified, we need to copy them to the output. (Some post link analysis - // tools specify --emit-relocs to obtain the information.) - if (!config->relocatable && !config->emitRelocs) - return nullptr; - InputSection *relocSec = make<InputSection>(*this, sec, name); - // If the relocated section is discarded (due to /DISCARD/ or - // --gc-sections), the relocation section should be discarded as well. - target->dependentSections.push_back(relocSec); - return relocSec; + // Relocation sections processed by the linker are usually removed + // from the output, so returning `nullptr` for the normal case. + // However, if -emit-relocs is given, we need to leave them in the output. + // (Some post link analysis tools need this information.) + if (config->emitRelocs) { + InputSection *relocSec = make<InputSection>(*this, sec, name); + // We will not emit relocation section if target was discarded. + target->dependentSections.push_back(relocSec); + return relocSec; + } + return nullptr; } } @@ -1019,7 +964,8 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { // .note.gnu.property containing a single AND'ed bitmap, we discard an input // file's .note.gnu.property section. if (name == ".note.gnu.property") { - this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name)); + ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(&sec)); + this->andFeatures = readAndFeatures(this, contents); return &InputSection::discarded; } @@ -1074,7 +1020,7 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { template <class ELFT> StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &sec) { - return CHECK(getObj().getSectionName(sec, sectionStringTable), this); + return CHECK(getObj().getSectionName(&sec, sectionStringTable), this); } // Initialize this->Symbols. this->Symbols is a parallel array as @@ -1083,72 +1029,55 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); this->symbols.resize(eSyms.size()); - // Fill in InputFile::symbols. Some entries have been initialized + // Our symbol table may have already been partially initialized // because of LazyObjFile. + for (size_t i = 0, end = eSyms.size(); i != end; ++i) + if (!this->symbols[i] && eSyms[i].getBinding() != STB_LOCAL) + this->symbols[i] = + symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)); + + // Fill this->Symbols. A symbol is either local or global. for (size_t i = 0, end = eSyms.size(); i != end; ++i) { - if (this->symbols[i]) - continue; const Elf_Sym &eSym = eSyms[i]; + + // Read symbol attributes. uint32_t secIdx = getSectionIndex(eSym); if (secIdx >= this->sections.size()) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); - if (eSym.getBinding() != STB_LOCAL) { - if (i < firstGlobal) - error(toString(this) + ": non-local symbol (" + Twine(i) + - ") found at index < .symtab's sh_info (" + Twine(firstGlobal) + - ")"); - this->symbols[i] = - symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)); - continue; - } - - // Handle local symbols. Local symbols are not added to the symbol - // table because they are not visible from other object files. We - // allocate symbol instances and add their pointers to symbols. - if (i >= firstGlobal) - errorOrWarn(toString(this) + ": STB_LOCAL symbol (" + Twine(i) + - ") found at index >= .symtab's sh_info (" + - Twine(firstGlobal) + ")"); InputSectionBase *sec = this->sections[secIdx]; - uint8_t type = eSym.getType(); - if (type == STT_FILE) - sourceFile = CHECK(eSym.getName(this->stringTable), this); - if (this->stringTable.size() <= eSym.st_name) - fatal(toString(this) + ": invalid symbol name offset"); - StringRefZ name = this->stringTable.data() + eSym.st_name; - - if (eSym.st_shndx == SHN_UNDEF) - this->symbols[i] = - make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type); - else if (sec == &InputSection::discarded) - this->symbols[i] = - make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type, - /*discardedSecIdx=*/secIdx); - else - this->symbols[i] = make<Defined>(this, name, STB_LOCAL, eSym.st_other, - type, eSym.st_value, eSym.st_size, sec); - } - - // Symbol resolution of non-local symbols. - SmallVector<unsigned, 32> undefineds; - for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { - const Elf_Sym &eSym = eSyms[i]; uint8_t binding = eSym.getBinding(); - if (binding == STB_LOCAL) - continue; // Errored above. - - uint32_t secIdx = getSectionIndex(eSym); - InputSectionBase *sec = this->sections[secIdx]; uint8_t stOther = eSym.st_other; uint8_t type = eSym.getType(); uint64_t value = eSym.st_value; uint64_t size = eSym.st_size; StringRefZ name = this->stringTable.data() + eSym.st_name; + // Handle local symbols. Local symbols are not added to the symbol + // table because they are not visible from other object files. We + // allocate symbol instances and add their pointers to Symbols. + if (binding == STB_LOCAL) { + if (eSym.getType() == STT_FILE) + sourceFile = CHECK(eSym.getName(this->stringTable), this); + + if (this->stringTable.size() <= eSym.st_name) + fatal(toString(this) + ": invalid symbol name offset"); + + if (eSym.st_shndx == SHN_UNDEF) + this->symbols[i] = make<Undefined>(this, name, binding, stOther, type); + else if (sec == &InputSection::discarded) + this->symbols[i] = make<Undefined>(this, name, binding, stOther, type, + /*DiscardedSecIdx=*/secIdx); + else + this->symbols[i] = + make<Defined>(this, name, binding, stOther, type, value, size, sec); + continue; + } + // Handle global undefined symbols. if (eSym.st_shndx == SHN_UNDEF) { - undefineds.push_back(i); + this->symbols[i]->resolve(Undefined{this, name, binding, stOther, type}); + this->symbols[i]->referenced = true; continue; } @@ -1168,20 +1097,8 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { // COMDAT member sections, and if a comdat group is discarded, some // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { - Undefined und{this, name, binding, stOther, type, secIdx}; - Symbol *sym = this->symbols[i]; - // !ArchiveFile::parsed or LazyObjFile::fetched means that the file - // containing this object has not finished processing, i.e. this symbol is - // a result of a lazy symbol fetch. We should demote the lazy symbol to an - // Undefined so that any relocations outside of the group to it will - // trigger a discarded section error. - if ((sym->symbolKind == Symbol::LazyArchiveKind && - !cast<ArchiveFile>(sym->file)->parsed) || - (sym->symbolKind == Symbol::LazyObjectKind && - cast<LazyObjFile>(sym->file)->fetched)) - sym->replace(und); - else - sym->resolve(und); + this->symbols[i]->resolve( + Undefined{this, name, binding, stOther, type, secIdx}); continue; } @@ -1195,20 +1112,6 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { fatal(toString(this) + ": unexpected binding: " + Twine((int)binding)); } - - // Undefined symbols (excluding those defined relative to non-prevailing - // sections) can trigger recursive fetch. Process defined symbols first so - // that the relative order between a defined symbol and an undefined symbol - // does not change the symbol resolution behavior. In addition, a set of - // interconnected symbols will all be resolved to the same file, instead of - // being resolved to different files. - for (unsigned i : undefineds) { - const Elf_Sym &eSym = eSyms[i]; - StringRefZ name = this->stringTable.data() + eSym.st_name; - this->symbols[i]->resolve(Undefined{this, name, eSym.getBinding(), - eSym.st_other, eSym.getType()}); - this->symbols[i]->referenced = true; - } } ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&file) @@ -1218,10 +1121,6 @@ ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&file) void ArchiveFile::parse() { for (const Archive::Symbol &sym : file->symbols()) symtab->addSymbol(LazyArchive{*this, sym}); - - // Inform a future invocation of ObjFile<ELFT>::initializeSymbols() that this - // archive has been processed. - parsed = true; } // Returns a buffer pointing to a member file containing a given symbol. @@ -1243,107 +1142,12 @@ void ArchiveFile::fetch(const Archive::Symbol &sym) { if (tar && c.getParent()->isThin()) tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer()); - InputFile *file = createObjectFile(mb, getName(), c.getChildOffset()); + InputFile *file = createObjectFile( + mb, getName(), c.getParent()->isThin() ? 0 : c.getChildOffset()); file->groupId = groupId; parseFile(file); } -// The handling of tentative definitions (COMMON symbols) in archives is murky. -// A tentative definition will be promoted to a global definition if there are -// no non-tentative definitions to dominate it. When we hold a tentative -// definition to a symbol and are inspecting archive members for inclusion -// there are 2 ways we can proceed: -// -// 1) Consider the tentative definition a 'real' definition (ie promotion from -// tentative to real definition has already happened) and not inspect -// archive members for Global/Weak definitions to replace the tentative -// definition. An archive member would only be included if it satisfies some -// other undefined symbol. This is the behavior Gold uses. -// -// 2) Consider the tentative definition as still undefined (ie the promotion to -// a real definition happens only after all symbol resolution is done). -// The linker searches archive members for STB_GLOBAL definitions to -// replace the tentative definition with. This is the behavior used by -// GNU ld. -// -// The second behavior is inherited from SysVR4, which based it on the FORTRAN -// COMMON BLOCK model. This behavior is needed for proper initialization in old -// (pre F90) FORTRAN code that is packaged into an archive. -// -// The following functions search archive members for definitions to replace -// tentative definitions (implementing behavior 2). -static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, - StringRef archiveName) { - IRSymtabFile symtabFile = check(readIRSymtab(mb)); - for (const irsymtab::Reader::SymbolRef &sym : - symtabFile.TheReader.symbols()) { - if (sym.isGlobal() && sym.getName() == symName) - return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); - } - return false; -} - -template <class ELFT> -static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, - StringRef archiveName) { - ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(mb, archiveName); - StringRef stringtable = obj->getStringTable(); - - for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { - Expected<StringRef> name = sym.getName(stringtable); - if (name && name.get() == symName) - return sym.isDefined() && sym.getBinding() == STB_GLOBAL && - !sym.isCommon(); - } - return false; -} - -static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, - StringRef archiveName) { - switch (getELFKind(mb, archiveName)) { - case ELF32LEKind: - return isNonCommonDef<ELF32LE>(mb, symName, archiveName); - case ELF32BEKind: - return isNonCommonDef<ELF32BE>(mb, symName, archiveName); - case ELF64LEKind: - return isNonCommonDef<ELF64LE>(mb, symName, archiveName); - case ELF64BEKind: - return isNonCommonDef<ELF64BE>(mb, symName, archiveName); - default: - llvm_unreachable("getELFKind"); - } -} - -bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) { - Archive::Child c = - CHECK(sym.getMember(), toString(this) + - ": could not get the member for symbol " + - toELFString(sym)); - MemoryBufferRef mb = - CHECK(c.getMemoryBufferRef(), - toString(this) + - ": could not get the buffer for the member defining symbol " + - toELFString(sym)); - - if (isBitcode(mb)) - return isBitcodeNonCommonDef(mb, sym.getName(), getName()); - - return isNonCommonDef(mb, sym.getName(), getName()); -} - -size_t ArchiveFile::getMemberCount() const { - size_t count = 0; - Error err = Error::success(); - for (const Archive::Child &c : file->children(err)) { - (void)c; - ++count; - } - // This function is used by --print-archive-stats=, where an error does not - // really matter. - consumeError(std::move(err)); - return count; -} - unsigned SharedFile::vernauxNum; // Parse the version definitions in the object file if present, and return a @@ -1375,40 +1179,6 @@ static std::vector<const void *> parseVerdefs(const uint8_t *base, return verdefs; } -// Parse SHT_GNU_verneed to properly set the name of a versioned undefined -// symbol. We detect fatal issues which would cause vulnerabilities, but do not -// implement sophisticated error checking like in llvm-readobj because the value -// of such diagnostics is low. -template <typename ELFT> -std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, - const typename ELFT::Shdr *sec) { - if (!sec) - return {}; - std::vector<uint32_t> verneeds; - ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this); - const uint8_t *verneedBuf = data.begin(); - for (unsigned i = 0; i != sec->sh_info; ++i) { - if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) - fatal(toString(this) + " has an invalid Verneed"); - auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); - const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; - for (unsigned j = 0; j != vn->vn_cnt; ++j) { - if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) - fatal(toString(this) + " has an invalid Vernaux"); - auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); - if (aux->vna_name >= this->stringTable.size()) - fatal(toString(this) + " has a Vernaux with an invalid vna_name"); - uint16_t version = aux->vna_other & VERSYM_VERSION; - if (version >= verneeds.size()) - verneeds.resize(version + 1); - verneeds[version] = aux->vna_name; - vernauxBuf += aux->vna_next; - } - verneedBuf += vn->vn_next; - } - return verneeds; -} - // We do not usually care about alignments of data in shared object // files because the loader takes care of it. However, if we promote a // DSO symbol to point to .bss due to copy relocation, we need to keep @@ -1452,7 +1222,6 @@ template <class ELFT> void SharedFile::parse() { const Elf_Shdr *versymSec = nullptr; const Elf_Shdr *verdefSec = nullptr; - const Elf_Shdr *verneedSec = nullptr; // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. for (const Elf_Shdr &sec : sections) { @@ -1461,7 +1230,7 @@ template <class ELFT> void SharedFile::parse() { continue; case SHT_DYNAMIC: dynamicTags = - CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this); + CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(&sec), this); break; case SHT_GNU_versym: versymSec = &sec; @@ -1469,9 +1238,6 @@ template <class ELFT> void SharedFile::parse() { case SHT_GNU_verdef: verdefSec = &sec; break; - case SHT_GNU_verneed: - verneedSec = &sec; - break; } } @@ -1511,16 +1277,15 @@ template <class ELFT> void SharedFile::parse() { sharedFiles.push_back(this); verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); - std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); // Parse ".gnu.version" section which is a parallel array for the symbol // table. If a given file doesn't have a ".gnu.version" section, we use // VER_NDX_GLOBAL. size_t size = numELFSyms - firstGlobal; - std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); + std::vector<uint32_t> versyms(size, VER_NDX_GLOBAL); if (versymSec) { ArrayRef<Elf_Versym> versym = - CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec), + CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(versymSec), this) .slice(firstGlobal); for (size_t i = 0; i < size; ++i) @@ -1548,34 +1313,17 @@ template <class ELFT> void SharedFile::parse() { continue; } - uint16_t idx = versyms[i] & ~VERSYM_HIDDEN; if (sym.isUndefined()) { - // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but - // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. - if (idx != VER_NDX_LOCAL && idx != VER_NDX_GLOBAL) { - if (idx >= verneeds.size()) { - error("corrupt input file: version need index " + Twine(idx) + - " for symbol " + name + " is out of bounds\n>>> defined in " + - toString(this)); - continue; - } - StringRef verName = this->stringTable.data() + verneeds[idx]; - versionedNameBuffer.clear(); - name = - saver.save((name + "@" + verName).toStringRef(versionedNameBuffer)); - } Symbol *s = symtab->addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); s->exportDynamic = true; - if (s->isUndefined() && !s->isWeak() && - config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) - requiredSymbols.push_back(s); continue; } // MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly // assigns VER_NDX_LOCAL to this section global symbol. Here is a // workaround for this bug. + uint32_t idx = versyms[i] & ~VERSYM_HIDDEN; if (config->emachine == EM_MIPS && idx == VER_NDX_LOCAL && name == "_gp_disp") continue; @@ -1616,10 +1364,9 @@ static ELFKind getBitcodeELFKind(const Triple &t) { return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; } -static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { +static uint8_t getBitcodeMachineKind(StringRef path, const Triple &t) { switch (t.getArch()) { case Triple::aarch64: - case Triple::aarch64_be: return EM_AARCH64; case Triple::amdgcn: case Triple::r600: @@ -1637,7 +1384,6 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { case Triple::msp430: return EM_MSP430; case Triple::ppc: - case Triple::ppcle: return EM_PPC; case Triple::ppc64: case Triple::ppc64le: @@ -1656,23 +1402,10 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { } } -static uint8_t getOsAbi(const Triple &t) { - switch (t.getOS()) { - case Triple::AMDHSA: - return ELF::ELFOSABI_AMDGPU_HSA; - case Triple::AMDPAL: - return ELF::ELFOSABI_AMDGPU_PAL; - case Triple::Mesa3D: - return ELF::ELFOSABI_AMDGPU_MESA3D; - default: - return ELF::ELFOSABI_NONE; - } -} - BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive) : InputFile(BitcodeKind, mb) { - this->archiveName = std::string(archiveName); + this->archiveName = archiveName; std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) @@ -1684,11 +1417,10 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, // into consideration at LTO time (which very likely causes undefined // symbols later in the link stage). So we append file offset to make // filename unique. - StringRef name = - archiveName.empty() - ? saver.save(path) - : saver.save(archiveName + "(" + path::filename(path) + " at " + - utostr(offsetInArchive) + ")"); + StringRef name = archiveName.empty() + ? saver.save(path) + : saver.save(archiveName + "(" + path + " at " + + utostr(offsetInArchive) + ")"); MemoryBufferRef mbref(mb.getBuffer(), name); obj = CHECK(lto::InputFile::create(mbref), this); @@ -1696,7 +1428,6 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, Triple t(obj->getTargetTriple()); ekind = getBitcodeELFKind(t); emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t); - osabi = getOsAbi(t); } static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { @@ -1744,12 +1475,9 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats, template <class ELFT> void BitcodeFile::parse() { std::vector<bool> keptComdats; - for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { + for (StringRef s : obj->getComdatTable()) keptComdats.push_back( - s.second == Comdat::NoDeduplicate || - symtab->comdatGroups.try_emplace(CachedHashStringRef(s.first), this) - .second); - } + symtab->comdatGroups.try_emplace(CachedHashStringRef(s), this).second); for (const lto::InputFile::Symbol &objSym : obj->symbols()) symbols.push_back(createBitcodeSymbol<ELFT>(keptComdats, objSym, *this)); @@ -1781,8 +1509,8 @@ void BinaryFile::parse() { STV_DEFAULT, STT_OBJECT, data.size(), 0, nullptr}); } -InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { +InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive) { if (isBitcode(mb)) return make<BitcodeFile>(mb, archiveName, offsetInArchive); @@ -1801,13 +1529,14 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, } void LazyObjFile::fetch() { - if (fetched) + if (mb.getBuffer().empty()) return; - fetched = true; InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); file->groupId = groupId; + mb = {}; + // Copy symbol vector so that the new InputFile doesn't have to // insert the same defined symbols to the symbol table again. file->symbols = std::move(symbols); @@ -1864,29 +1593,21 @@ template <class ELFT> void LazyObjFile::parse() { continue; sym->resolve(LazyObject{*this, sym->getName()}); - // If fetched, stop iterating because this->symbols has been transferred - // to the instantiated ObjFile. - if (fetched) + // MemoryBuffer is emptied if this file is instantiated as ObjFile. + if (mb.getBuffer().empty()) return; } return; } } -bool LazyObjFile::shouldFetchForCommon(const StringRef &name) { - if (isBitcode(mb)) - return isBitcodeNonCommonDef(mb, name, archiveName); - - return isNonCommonDef(mb, name, archiveName); -} - -std::string elf::replaceThinLTOSuffix(StringRef path) { +std::string replaceThinLTOSuffix(StringRef path) { StringRef suffix = config->thinLTOObjectSuffixReplace.first; StringRef repl = config->thinLTOObjectSuffixReplace.second; if (path.consume_back(suffix)) return (path + repl).str(); - return std::string(path); + return path; } template void BitcodeFile::parse<ELF32LE>(); @@ -1899,12 +1620,15 @@ template void LazyObjFile::parse<ELF32BE>(); template void LazyObjFile::parse<ELF64LE>(); template void LazyObjFile::parse<ELF64BE>(); -template class elf::ObjFile<ELF32LE>; -template class elf::ObjFile<ELF32BE>; -template class elf::ObjFile<ELF64LE>; -template class elf::ObjFile<ELF64BE>; +template class ObjFile<ELF32LE>; +template class ObjFile<ELF32BE>; +template class ObjFile<ELF64LE>; +template class ObjFile<ELF64BE>; template void SharedFile::parse<ELF32LE>(); template void SharedFile::parse<ELF32BE>(); template void SharedFile::parse<ELF64LE>(); template void SharedFile::parse<ELF64BE>(); + +} // namespace elf +} // namespace lld |