diff options
author | Patrick Wildt <patrick@cvs.openbsd.org> | 2017-01-14 19:56:11 +0000 |
---|---|---|
committer | Patrick Wildt <patrick@cvs.openbsd.org> | 2017-01-14 19:56:11 +0000 |
commit | ca82c85029ae0befb17bc14a4faa9f3d51dd72b3 (patch) | |
tree | 2df9dea922feef454abe6d1499112a4abc270079 /gnu/llvm/tools/obj2yaml | |
parent | 04c0d479b956b5e4f4e20ce989b95443aa03da0b (diff) |
Import LLVM 3.9.1 including clang and lld.
Diffstat (limited to 'gnu/llvm/tools/obj2yaml')
-rw-r--r-- | gnu/llvm/tools/obj2yaml/CMakeLists.txt | 7 | ||||
-rw-r--r-- | gnu/llvm/tools/obj2yaml/Error.cpp | 17 | ||||
-rw-r--r-- | gnu/llvm/tools/obj2yaml/Error.h | 21 | ||||
-rw-r--r-- | gnu/llvm/tools/obj2yaml/coff2yaml.cpp | 14 | ||||
-rw-r--r-- | gnu/llvm/tools/obj2yaml/elf2yaml.cpp | 24 | ||||
-rw-r--r-- | gnu/llvm/tools/obj2yaml/macho2yaml.cpp | 527 | ||||
-rw-r--r-- | gnu/llvm/tools/obj2yaml/obj2yaml.cpp | 12 | ||||
-rw-r--r-- | gnu/llvm/tools/obj2yaml/obj2yaml.h | 2 |
8 files changed, 601 insertions, 23 deletions
diff --git a/gnu/llvm/tools/obj2yaml/CMakeLists.txt b/gnu/llvm/tools/obj2yaml/CMakeLists.txt index 3cdac5c7487..9b895525060 100644 --- a/gnu/llvm/tools/obj2yaml/CMakeLists.txt +++ b/gnu/llvm/tools/obj2yaml/CMakeLists.txt @@ -1,8 +1,13 @@ set(LLVM_LINK_COMPONENTS Object + ObjectYAML Support ) add_llvm_tool(obj2yaml - obj2yaml.cpp coff2yaml.cpp elf2yaml.cpp Error.cpp + obj2yaml.cpp + coff2yaml.cpp + elf2yaml.cpp + macho2yaml.cpp + Error.cpp ) diff --git a/gnu/llvm/tools/obj2yaml/Error.cpp b/gnu/llvm/tools/obj2yaml/Error.cpp index abef8af58cb..9d1af680a73 100644 --- a/gnu/llvm/tools/obj2yaml/Error.cpp +++ b/gnu/llvm/tools/obj2yaml/Error.cpp @@ -13,6 +13,9 @@ using namespace llvm; namespace { +// FIXME: This class is only here to support the transition to llvm::Error. It +// will be removed once this transition is complete. Clients should prefer to +// deal with the Error value directly, rather than converting to error_code. class _obj2yaml_error_category : public std::error_category { public: const char *name() const LLVM_NOEXCEPT override; @@ -34,14 +37,26 @@ std::string _obj2yaml_error_category::message(int ev) const { return "Unrecognized file type."; case obj2yaml_error::unsupported_obj_file_format: return "Unsupported object file format."; + case obj2yaml_error::not_implemented: + return "Feature not yet implemented."; } llvm_unreachable("An enumerator of obj2yaml_error does not have a message " "defined."); } namespace llvm { - const std::error_category &obj2yaml_category() { + +const std::error_category &obj2yaml_category() { static _obj2yaml_error_category o; return o; } + +char Obj2YamlError::ID = 0; + +void Obj2YamlError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; } + +std::error_code Obj2YamlError::convertToErrorCode() const { + return std::error_code(static_cast<int>(Code), obj2yaml_category()); +} + } // namespace llvm diff --git a/gnu/llvm/tools/obj2yaml/Error.h b/gnu/llvm/tools/obj2yaml/Error.h index 982f59e236c..f5111f257ce 100644 --- a/gnu/llvm/tools/obj2yaml/Error.h +++ b/gnu/llvm/tools/obj2yaml/Error.h @@ -10,6 +10,8 @@ #ifndef LLVM_TOOLS_OBJ2YAML_ERROR_H #define LLVM_TOOLS_OBJ2YAML_ERROR_H +#include "llvm/Support/Error.h" + #include <system_error> namespace llvm { @@ -19,13 +21,30 @@ enum class obj2yaml_error { success = 0, file_not_found, unrecognized_file_format, - unsupported_obj_file_format + unsupported_obj_file_format, + not_implemented }; inline std::error_code make_error_code(obj2yaml_error e) { return std::error_code(static_cast<int>(e), obj2yaml_category()); } +class Obj2YamlError : public ErrorInfo<Obj2YamlError> { +public: + static char ID; + Obj2YamlError(obj2yaml_error C) : Code(C) {} + Obj2YamlError(std::string ErrMsg) : ErrMsg(std::move(ErrMsg)) {} + Obj2YamlError(obj2yaml_error C, std::string ErrMsg) + : ErrMsg(std::move(ErrMsg)), Code(C) {} + void log(raw_ostream &OS) const override; + const std::string &getErrorMessage() const { return ErrMsg; } + std::error_code convertToErrorCode() const override; + +private: + std::string ErrMsg; + obj2yaml_error Code; +}; + } // namespace llvm namespace std { diff --git a/gnu/llvm/tools/obj2yaml/coff2yaml.cpp b/gnu/llvm/tools/obj2yaml/coff2yaml.cpp index f675bfe4e61..c734601ede7 100644 --- a/gnu/llvm/tools/obj2yaml/coff2yaml.cpp +++ b/gnu/llvm/tools/obj2yaml/coff2yaml.cpp @@ -9,7 +9,7 @@ #include "obj2yaml.h" #include "llvm/Object/COFF.h" -#include "llvm/Object/COFFYAML.h" +#include "llvm/ObjectYAML/COFFYAML.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/YAMLTraits.h" @@ -109,6 +109,7 @@ void COFFDumper::dumpSections(unsigned NumSections) { NewYAMLSection.Header.VirtualAddress = ObjSection.getAddress(); NewYAMLSection.Header.VirtualSize = COFFSection->VirtualSize; NewYAMLSection.Alignment = ObjSection.getAlignment(); + assert(NewYAMLSection.Alignment <= 8192); ArrayRef<uint8_t> sectionData; if (!ObjSection.isBSS()) @@ -120,9 +121,14 @@ void COFFDumper::dumpSections(unsigned NumSections) { const object::coff_relocation *reloc = Obj.getCOFFRelocation(Reloc); COFFYAML::Relocation Rel; object::symbol_iterator Sym = Reloc.getSymbol(); - ErrorOr<StringRef> SymbolNameOrErr = Sym->getName(); - if (std::error_code EC = SymbolNameOrErr.getError()) - report_fatal_error(EC.message()); + Expected<StringRef> SymbolNameOrErr = Sym->getName(); + if (!SymbolNameOrErr) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(SymbolNameOrErr.takeError(), OS, ""); + OS.flush(); + report_fatal_error(Buf); + } Rel.SymbolName = *SymbolNameOrErr; Rel.VirtualAddress = reloc->VirtualAddress; Rel.Type = reloc->Type; diff --git a/gnu/llvm/tools/obj2yaml/elf2yaml.cpp b/gnu/llvm/tools/obj2yaml/elf2yaml.cpp index f2b01380734..782832d5457 100644 --- a/gnu/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/gnu/llvm/tools/obj2yaml/elf2yaml.cpp @@ -11,7 +11,7 @@ #include "obj2yaml.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/ELFYAML.h" +#include "llvm/ObjectYAML/ELFYAML.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/YAMLTraits.h" @@ -179,10 +179,10 @@ ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab, S.Size = Sym->st_size; S.Other = Sym->st_other; - ErrorOr<StringRef> NameOrErr = Sym->getName(StrTable); - if (std::error_code EC = NameOrErr.getError()) - return EC; - S.Name = NameOrErr.get(); + Expected<StringRef> SymbolNameOrErr = Sym->getName(StrTable); + if (!SymbolNameOrErr) + return errorToErrorCode(SymbolNameOrErr.takeError()); + S.Name = SymbolNameOrErr.get(); ErrorOr<const Elf_Shdr *> ShdrOrErr = Obj.getSection(Sym, SymTab, ShndxTable); if (std::error_code EC = ShdrOrErr.getError()) @@ -191,7 +191,7 @@ ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab, if (!Shdr) return obj2yaml_error::success; - NameOrErr = Obj.getSectionName(Shdr); + ErrorOr<StringRef> NameOrErr = Obj.getSectionName(Shdr); if (std::error_code EC = NameOrErr.getError()) return EC; S.Section = NameOrErr.get(); @@ -217,9 +217,9 @@ std::error_code ELFDumper<ELFT>::dumpRelocation(const RelT *Rel, return EC; StringRef StrTab = *StrTabOrErr; - ErrorOr<StringRef> NameOrErr = Sym->getName(StrTab); - if (std::error_code EC = NameOrErr.getError()) - return EC; + Expected<StringRef> NameOrErr = Sym->getName(StrTab); + if (!NameOrErr) + return errorToErrorCode(NameOrErr.takeError()); R.Symbol = NameOrErr.get(); return obj2yaml_error::success; @@ -368,9 +368,9 @@ ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) { auto sectionContents = Obj.getSectionContents(Shdr); if (std::error_code ec = sectionContents.getError()) return ec; - ErrorOr<StringRef> symbolName = symbol->getName(StrTab); - if (std::error_code EC = symbolName.getError()) - return EC; + Expected<StringRef> symbolName = symbol->getName(StrTab); + if (!symbolName) + return errorToErrorCode(symbolName.takeError()); S->Info = *symbolName; const Elf_Word *groupMembers = reinterpret_cast<const Elf_Word *>(sectionContents->data()); diff --git a/gnu/llvm/tools/obj2yaml/macho2yaml.cpp b/gnu/llvm/tools/obj2yaml/macho2yaml.cpp new file mode 100644 index 00000000000..c9a1385f173 --- /dev/null +++ b/gnu/llvm/tools/obj2yaml/macho2yaml.cpp @@ -0,0 +1,527 @@ +//===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" +#include "obj2yaml.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" + +#include <string.h> // for memcpy + +using namespace llvm; + +class MachODumper { + + template <typename StructType> + const char *processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd); + + const object::MachOObjectFile &Obj; + void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y); + void dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y); + void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y); + void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); + void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, + ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); + void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); + void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y); + +public: + MachODumper(const object::MachOObjectFile &O) : Obj(O) {} + Expected<std::unique_ptr<MachOYAML::Object>> dump(); +}; + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + memcpy((void *) & (LC.Data.LCStruct##_data), LoadCmd.Ptr, \ + sizeof(MachO::LCStruct)); \ + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ + MachO::swapStruct(LC.Data.LCStruct##_data); \ + EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \ + break; + +template <typename SectionType> +MachOYAML::Section constructSectionCommon(SectionType Sec) { + MachOYAML::Section TempSec; + memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16); + memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16); + TempSec.addr = Sec.addr; + TempSec.size = Sec.size; + TempSec.offset = Sec.offset; + TempSec.align = Sec.align; + TempSec.reloff = Sec.reloff; + TempSec.nreloc = Sec.nreloc; + TempSec.flags = Sec.flags; + TempSec.reserved1 = Sec.reserved1; + TempSec.reserved2 = Sec.reserved2; + TempSec.reserved3 = 0; + return TempSec; +} + +template <typename SectionType> +MachOYAML::Section constructSection(SectionType Sec); + +template <> MachOYAML::Section constructSection(MachO::section Sec) { + MachOYAML::Section TempSec = constructSectionCommon(Sec); + TempSec.reserved3 = 0; + return TempSec; +} + +template <> MachOYAML::Section constructSection(MachO::section_64 Sec) { + MachOYAML::Section TempSec = constructSectionCommon(Sec); + TempSec.reserved3 = Sec.reserved3; + return TempSec; +} + +template <typename SectionType, typename SegmentType> +const char * +extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, + std::vector<MachOYAML::Section> &Sections, + bool IsLittleEndian) { + auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; + const SectionType *Curr = + reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); + for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { + if (IsLittleEndian != sys::IsLittleEndianHost) { + SectionType Sec; + memcpy((void *)&Sec, Curr, sizeof(SectionType)); + MachO::swapStruct(Sec); + Sections.push_back(constructSection(Sec)); + } else { + Sections.push_back(constructSection(*Curr)); + } + } + return reinterpret_cast<const char *>(Curr); +} + +template <typename StructType> +const char *MachODumper::processLoadCommandData( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return LoadCmd.Ptr + sizeof(StructType); +} + +template <> +const char *MachODumper::processLoadCommandData<MachO::segment_command>( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return extractSections<MachO::section, MachO::segment_command>( + LoadCmd, LC.Sections, Obj.isLittleEndian()); +} + +template <> +const char *MachODumper::processLoadCommandData<MachO::segment_command_64>( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return extractSections<MachO::section_64, MachO::segment_command_64>( + LoadCmd, LC.Sections, Obj.isLittleEndian()); +} + +template <typename StructType> +const char * +readString(MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + auto Start = LoadCmd.Ptr + sizeof(StructType); + auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); + auto Size = strnlen(Start, MaxSize); + LC.PayloadString = StringRef(Start, Size).str(); + return Start + Size; +} + +template <> +const char *MachODumper::processLoadCommandData<MachO::dylib_command>( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return readString<MachO::dylib_command>(LC, LoadCmd); +} + +template <> +const char *MachODumper::processLoadCommandData<MachO::dylinker_command>( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return readString<MachO::dylinker_command>(LC, LoadCmd); +} + +template <> +const char *MachODumper::processLoadCommandData<MachO::rpath_command>( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return readString<MachO::rpath_command>(LC, LoadCmd); +} + +Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { + auto Y = make_unique<MachOYAML::Object>(); + dumpHeader(Y); + dumpLoadCommands(Y); + dumpLinkEdit(Y); + return std::move(Y); +} + +void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) { + Y->Header.magic = Obj.getHeader().magic; + Y->Header.cputype = Obj.getHeader().cputype; + Y->Header.cpusubtype = Obj.getHeader().cpusubtype; + Y->Header.filetype = Obj.getHeader().filetype; + Y->Header.ncmds = Obj.getHeader().ncmds; + Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; + Y->Header.flags = Obj.getHeader().flags; + Y->Header.reserved = 0; +} + +void MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) { + for (auto LoadCmd : Obj.load_commands()) { + MachOYAML::LoadCommand LC; + const char *EndPtr = LoadCmd.Ptr; + switch (LoadCmd.C.cmd) { + default: + memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr, + sizeof(MachO::load_command)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(LC.Data.load_command_data); + EndPtr = processLoadCommandData<MachO::load_command>(LC, LoadCmd); + break; +#include "llvm/Support/MachO.def" + } + auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); + if (!std::all_of(EndPtr, &EndPtr[RemainingBytes], + [](const char C) { return C == 0; })) { + LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr, + &EndPtr[RemainingBytes]); + RemainingBytes = 0; + } + LC.ZeroPadBytes = RemainingBytes; + Y->LoadCommands.push_back(std::move(LC)); + } +} + +void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { + dumpRebaseOpcodes(Y); + dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes()); + dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes, + Obj.getDyldInfoWeakBindOpcodes()); + dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(), + true); + dumpExportTrie(Y); + dumpSymbols(Y); +} + +void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { + MachOYAML::LinkEditData &LEData = Y->LinkEdit; + + auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); + for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); + ++OpCode) { + MachOYAML::RebaseOpcode RebaseOp; + RebaseOp.Opcode = + static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK); + RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; + + unsigned Count; + uint64_t ULEB = 0; + + switch (RebaseOp.Opcode) { + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: + + ULEB = decodeULEB128(OpCode + 1, &Count); + RebaseOp.ExtraData.push_back(ULEB); + OpCode += Count; + // Intentionally no break here -- This opcode has two ULEB values + case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: + + ULEB = decodeULEB128(OpCode + 1, &Count); + RebaseOp.ExtraData.push_back(ULEB); + OpCode += Count; + break; + default: + break; + } + + LEData.RebaseOpcodes.push_back(RebaseOp); + + if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) + break; + } +} + +StringRef ReadStringRef(const uint8_t *Start) { + const uint8_t *Itr = Start; + for (; *Itr; ++Itr) + ; + return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); +} + +void MachODumper::dumpBindOpcodes( + std::vector<MachOYAML::BindOpcode> &BindOpcodes, + ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { + for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); + ++OpCode) { + MachOYAML::BindOpcode BindOp; + BindOp.Opcode = + static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK); + BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; + + unsigned Count; + uint64_t ULEB = 0; + int64_t SLEB = 0; + + switch (BindOp.Opcode) { + case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: + ULEB = decodeULEB128(OpCode + 1, &Count); + BindOp.ULEBExtraData.push_back(ULEB); + OpCode += Count; + // Intentionally no break here -- this opcode has two ULEB values + + case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: + case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + case MachO::BIND_OPCODE_ADD_ADDR_ULEB: + case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: + ULEB = decodeULEB128(OpCode + 1, &Count); + BindOp.ULEBExtraData.push_back(ULEB); + OpCode += Count; + break; + + case MachO::BIND_OPCODE_SET_ADDEND_SLEB: + SLEB = decodeSLEB128(OpCode + 1, &Count); + BindOp.SLEBExtraData.push_back(SLEB); + OpCode += Count; + break; + + case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: + BindOp.Symbol = ReadStringRef(OpCode + 1); + OpCode += BindOp.Symbol.size() + 1; + break; + default: + break; + } + + BindOpcodes.push_back(BindOp); + + // Lazy bindings have DONE opcodes between operations, so we need to keep + // processing after a DONE. + if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) + break; + } +} + +/*! + * /brief processes a node from the export trie, and its children. + * + * To my knowledge there is no documentation of the encoded format of this data + * other than in the heads of the Apple linker engineers. To that end hopefully + * this comment and the implementation below can serve to light the way for + * anyone crazy enough to come down this path in the future. + * + * This function reads and preserves the trie structure of the export trie. To + * my knowledge there is no code anywhere else that reads the data and preserves + * the Trie. LD64 (sources available at opensource.apple.com) has a similar + * implementation that parses the export trie into a vector. That code as well + * as LLVM's libObject MachO implementation were the basis for this. + * + * The export trie is an encoded trie. The node serialization is a bit awkward. + * The below pseudo-code is the best description I've come up with for it. + * + * struct SerializedNode { + * ULEB128 TerminalSize; + * struct TerminalData { <-- This is only present if TerminalSize > 0 + * ULEB128 Flags; + * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) + * ULEB128 Other; <-- Present if ( Flags & REEXPORT || + * Flags & STUB_AND_RESOLVER ) + * char[] ImportName; <-- Present if ( Flags & REEXPORT ) + * } + * uint8_t ChildrenCount; + * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; + * SerializedNode Children[ChildrenCount] + * } + * + * Terminal nodes are nodes that represent actual exports. They can appear + * anywhere in the tree other than at the root; they do not need to be leaf + * nodes. When reading the data out of the trie this routine reads it in-order, + * but it puts the child names and offsets directly into the child nodes. This + * results in looping over the children twice during serialization and + * de-serialization, but it makes the YAML representation more human readable. + * + * Below is an example of the graph from a "Hello World" executable: + * + * ------- + * | '' | + * ------- + * | + * ------- + * | '_' | + * ------- + * | + * |----------------------------------------| + * | | + * ------------------------ --------------------- + * | '_mh_execute_header' | | 'main' | + * | Flags: 0x00000000 | | Flags: 0x00000000 | + * | Addr: 0x00000000 | | Addr: 0x00001160 | + * ------------------------ --------------------- + * + * This graph represents the trie for the exports "__mh_execute_header" and + * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are + * terminal. +*/ + +const uint8_t *processExportNode(const uint8_t *CurrPtr, + const uint8_t *const End, + MachOYAML::ExportEntry &Entry) { + if (CurrPtr >= End) + return CurrPtr; + unsigned Count = 0; + Entry.TerminalSize = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + if (Entry.TerminalSize != 0) { + Entry.Flags = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + Entry.Address = 0; + Entry.Other = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); + } else { + Entry.Address = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { + Entry.Other = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + } else + Entry.Other = 0; + } + } + uint8_t childrenCount = *CurrPtr++; + if (childrenCount == 0) + return CurrPtr; + + Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount, + MachOYAML::ExportEntry()); + for (auto &Child : Entry.Children) { + Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); + CurrPtr += Child.Name.length() + 1; + Child.NodeOffset = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + } + for (auto &Child : Entry.Children) { + CurrPtr = processExportNode(CurrPtr, End, Child); + } + return CurrPtr; +} + +void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { + MachOYAML::LinkEditData &LEData = Y->LinkEdit; + auto ExportsTrie = Obj.getDyldInfoExportsTrie(); + processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie); +} + +template <typename nlist_t> +MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { + MachOYAML::NListEntry NL; + NL.n_strx = nlist.n_strx; + NL.n_type = nlist.n_type; + NL.n_sect = nlist.n_sect; + NL.n_desc = nlist.n_desc; + NL.n_value = nlist.n_value; + return NL; +} + +void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) { + MachOYAML::LinkEditData &LEData = Y->LinkEdit; + + for (auto Symbol : Obj.symbols()) { + MachOYAML::NListEntry NLE = + Obj.is64Bit() ? constructNameList<MachO::nlist_64>( + *reinterpret_cast<const MachO::nlist_64 *>( + Symbol.getRawDataRefImpl().p)) + : constructNameList<MachO::nlist>( + *reinterpret_cast<const MachO::nlist *>( + Symbol.getRawDataRefImpl().p)); + LEData.NameList.push_back(NLE); + } + + StringRef RemainingTable = Obj.getStringTableData(); + while (RemainingTable.size() > 0) { + auto SymbolPair = RemainingTable.split('\0'); + RemainingTable = SymbolPair.second; + if (SymbolPair.first.empty()) + break; + LEData.StringTable.push_back(SymbolPair.first); + } +} + +Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { + MachODumper Dumper(Obj); + Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); + if (!YAML) + return YAML.takeError(); + + yaml::YamlObjectFile YAMLFile; + YAMLFile.MachO = std::move(YAML.get()); + + yaml::Output Yout(Out); + Yout << YAMLFile; + return Error::success(); +} + +Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { + yaml::YamlObjectFile YAMLFile; + YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); + MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; + YAML.Header.magic = Obj.getMagic(); + YAML.Header.nfat_arch = Obj.getNumberOfObjects(); + + for (auto Slice : Obj.objects()) { + MachOYAML::FatArch arch; + arch.cputype = Slice.getCPUType(); + arch.cpusubtype = Slice.getCPUSubType(); + arch.offset = Slice.getOffset(); + arch.size = Slice.getSize(); + arch.align = Slice.getAlign(); + arch.reserved = Slice.getReserved(); + YAML.FatArchs.push_back(arch); + + auto SliceObj = Slice.getAsObjectFile(); + if (!SliceObj) + return SliceObj.takeError(); + + MachODumper Dumper(*SliceObj.get()); + Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump(); + if (!YAMLObj) + return YAMLObj.takeError(); + YAML.Slices.push_back(*YAMLObj.get()); + } + + yaml::Output Yout(Out); + Yout << YAML; + return Error::success(); +} + +std::error_code macho2yaml(raw_ostream &Out, const object::Binary &Binary) { + if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary)) { + if (auto Err = macho2yaml(Out, *MachOObj)) { + return errorToErrorCode(std::move(Err)); + } + return obj2yaml_error::success; + } + + if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary)) { + if (auto Err = macho2yaml(Out, *MachOObj)) { + return errorToErrorCode(std::move(Err)); + } + return obj2yaml_error::success; + } + + return obj2yaml_error::unsupported_obj_file_format; +} diff --git a/gnu/llvm/tools/obj2yaml/obj2yaml.cpp b/gnu/llvm/tools/obj2yaml/obj2yaml.cpp index ee6284da6e4..3f9373ee17e 100644 --- a/gnu/llvm/tools/obj2yaml/obj2yaml.cpp +++ b/gnu/llvm/tools/obj2yaml/obj2yaml.cpp @@ -29,11 +29,15 @@ static std::error_code dumpObject(const ObjectFile &Obj) { } static std::error_code dumpInput(StringRef File) { - ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(File); - if (std::error_code EC = BinaryOrErr.getError()) - return EC; + Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(File); + if (!BinaryOrErr) + return errorToErrorCode(BinaryOrErr.takeError()); Binary &Binary = *BinaryOrErr.get().getBinary(); + // Universal MachO is not a subclass of ObjectFile, so it needs to be handled + // here with the other binary types. + if (Binary.isMachO() || Binary.isMachOUniversalBinary()) + return macho2yaml(outs(), Binary); // TODO: If this is an archive, then burst it and dump each entry if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary)) return dumpObject(*Obj); @@ -46,7 +50,7 @@ cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"), int main(int argc, char *argv[]) { cl::ParseCommandLineOptions(argc, argv); - sys::PrintStackTraceOnErrorSignal(); + sys::PrintStackTraceOnErrorSignal(argv[0]); PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. diff --git a/gnu/llvm/tools/obj2yaml/obj2yaml.h b/gnu/llvm/tools/obj2yaml/obj2yaml.h index 643ab7bc434..28c74751c0d 100644 --- a/gnu/llvm/tools/obj2yaml/obj2yaml.h +++ b/gnu/llvm/tools/obj2yaml/obj2yaml.h @@ -21,5 +21,7 @@ std::error_code coff2yaml(llvm::raw_ostream &Out, const llvm::object::COFFObjectFile &Obj); std::error_code elf2yaml(llvm::raw_ostream &Out, const llvm::object::ObjectFile &Obj); +std::error_code macho2yaml(llvm::raw_ostream &Out, + const llvm::object::Binary &Obj); #endif |