504 lines
18 KiB
C++
504 lines
18 KiB
C++
//===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/StringSet.h"
|
|
#include "llvm/ADT/Triple.h"
|
|
#include "llvm/DebugInfo/DIContext.h"
|
|
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
|
#include "llvm/Object/Archive.h"
|
|
#include "llvm/Object/ELFObjectFile.h"
|
|
#include "llvm/Object/MachOUniversal.h"
|
|
#include "llvm/Object/ObjectFile.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/Format.h"
|
|
#include "llvm/Support/ManagedStatic.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/PrettyStackTrace.h"
|
|
#include "llvm/Support/Regex.h"
|
|
#include "llvm/Support/Signals.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <inttypes.h>
|
|
#include <map>
|
|
#include <string>
|
|
#include <system_error>
|
|
#include <vector>
|
|
|
|
#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
|
|
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
|
|
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
|
|
#include "llvm/DebugInfo/GSYM/GsymReader.h"
|
|
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
|
|
#include "llvm/DebugInfo/GSYM/LookupResult.h"
|
|
#include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
|
|
|
|
using namespace llvm;
|
|
using namespace gsym;
|
|
using namespace object;
|
|
|
|
/// @}
|
|
/// Command line options.
|
|
/// @{
|
|
|
|
namespace {
|
|
using namespace cl;
|
|
|
|
OptionCategory GeneralOptions("Options");
|
|
OptionCategory ConversionOptions("Conversion Options");
|
|
OptionCategory LookupOptions("Lookup Options");
|
|
|
|
static opt<bool> Help("h", desc("Alias for -help"), Hidden,
|
|
cat(GeneralOptions));
|
|
|
|
static opt<bool> Verbose("verbose",
|
|
desc("Enable verbose logging and encoding details."),
|
|
cat(GeneralOptions));
|
|
|
|
static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
|
|
ZeroOrMore, cat(GeneralOptions));
|
|
|
|
static opt<std::string>
|
|
ConvertFilename("convert", cl::init(""),
|
|
cl::desc("Convert the specified file to the GSYM format.\n"
|
|
"Supported files include ELF and mach-o files "
|
|
"that will have their debug info (DWARF) and "
|
|
"symbol table converted."),
|
|
cl::value_desc("path"), cat(ConversionOptions));
|
|
|
|
static list<std::string>
|
|
ArchFilters("arch",
|
|
desc("Process debug information for the specified CPU "
|
|
"architecture only.\nArchitectures may be specified by "
|
|
"name or by number.\nThis option can be specified "
|
|
"multiple times, once for each desired architecture."),
|
|
cl::value_desc("arch"), cat(ConversionOptions));
|
|
|
|
static opt<std::string>
|
|
OutputFilename("out-file", cl::init(""),
|
|
cl::desc("Specify the path where the converted GSYM file "
|
|
"will be saved.\nWhen not specified, a '.gsym' "
|
|
"extension will be appended to the file name "
|
|
"specified in the --convert option."),
|
|
cl::value_desc("path"), cat(ConversionOptions));
|
|
static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
|
|
aliasopt(OutputFilename),
|
|
cat(ConversionOptions));
|
|
|
|
static opt<bool> Verify("verify",
|
|
desc("Verify the generated GSYM file against the "
|
|
"information in the file that was converted."),
|
|
cat(ConversionOptions));
|
|
|
|
static opt<unsigned>
|
|
NumThreads("num-threads",
|
|
desc("Specify the maximum number (n) of simultaneous threads "
|
|
"to use when converting files to GSYM.\nDefaults to the "
|
|
"number of cores on the current machine."),
|
|
cl::value_desc("n"), cat(ConversionOptions));
|
|
|
|
static list<uint64_t> LookupAddresses("address",
|
|
desc("Lookup an address in a GSYM file"),
|
|
cl::value_desc("addr"),
|
|
cat(LookupOptions));
|
|
|
|
|
|
|
|
} // namespace
|
|
/// @}
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static void error(StringRef Prefix, llvm::Error Err) {
|
|
if (!Err)
|
|
return;
|
|
errs() << Prefix << ": " << Err << "\n";
|
|
consumeError(std::move(Err));
|
|
exit(1);
|
|
}
|
|
|
|
static void error(StringRef Prefix, std::error_code EC) {
|
|
if (!EC)
|
|
return;
|
|
errs() << Prefix << ": " << EC.message() << "\n";
|
|
exit(1);
|
|
}
|
|
|
|
|
|
/// If the input path is a .dSYM bundle (as created by the dsymutil tool),
|
|
/// replace it with individual entries for each of the object files inside the
|
|
/// bundle otherwise return the input path.
|
|
static std::vector<std::string> expandBundle(const std::string &InputPath) {
|
|
std::vector<std::string> BundlePaths;
|
|
SmallString<256> BundlePath(InputPath);
|
|
// Manually open up the bundle to avoid introducing additional dependencies.
|
|
if (sys::fs::is_directory(BundlePath) &&
|
|
sys::path::extension(BundlePath) == ".dSYM") {
|
|
std::error_code EC;
|
|
sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
|
|
for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd;
|
|
Dir != DirEnd && !EC; Dir.increment(EC)) {
|
|
const std::string &Path = Dir->path();
|
|
sys::fs::file_status Status;
|
|
EC = sys::fs::status(Path, Status);
|
|
error(Path, EC);
|
|
switch (Status.type()) {
|
|
case sys::fs::file_type::regular_file:
|
|
case sys::fs::file_type::symlink_file:
|
|
case sys::fs::file_type::type_unknown:
|
|
BundlePaths.push_back(Path);
|
|
break;
|
|
default: /*ignore*/;
|
|
}
|
|
}
|
|
error(BundlePath, EC);
|
|
}
|
|
if (!BundlePaths.size())
|
|
BundlePaths.push_back(InputPath);
|
|
return BundlePaths;
|
|
}
|
|
|
|
static uint32_t getCPUType(MachOObjectFile &MachO) {
|
|
if (MachO.is64Bit())
|
|
return MachO.getHeader64().cputype;
|
|
else
|
|
return MachO.getHeader().cputype;
|
|
}
|
|
|
|
/// Return true if the object file has not been filtered by an --arch option.
|
|
static bool filterArch(MachOObjectFile &Obj) {
|
|
if (ArchFilters.empty())
|
|
return true;
|
|
|
|
Triple ObjTriple(Obj.getArchTriple());
|
|
StringRef ObjArch = ObjTriple.getArchName();
|
|
|
|
for (auto Arch : ArchFilters) {
|
|
// Match name.
|
|
if (Arch == ObjArch)
|
|
return true;
|
|
|
|
// Match architecture number.
|
|
unsigned Value;
|
|
if (!StringRef(Arch).getAsInteger(0, Value))
|
|
if (Value == getCPUType(Obj))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// Determine the virtual address that is considered the base address of an ELF
|
|
/// object file.
|
|
///
|
|
/// The base address of an ELF file is the the "p_vaddr" of the first program
|
|
/// header whose "p_type" is PT_LOAD.
|
|
///
|
|
/// \param ELFFile An ELF object file we will search.
|
|
///
|
|
/// \returns A valid image base address if we are able to extract one.
|
|
template <class ELFT>
|
|
static llvm::Optional<uint64_t>
|
|
getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
|
|
auto PhdrRangeOrErr = ELFFile.program_headers();
|
|
if (!PhdrRangeOrErr) {
|
|
consumeError(PhdrRangeOrErr.takeError());
|
|
return llvm::None;
|
|
}
|
|
for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
|
|
if (Phdr.p_type == ELF::PT_LOAD)
|
|
return (uint64_t)Phdr.p_vaddr;
|
|
return llvm::None;
|
|
}
|
|
|
|
/// Determine the virtual address that is considered the base address of mach-o
|
|
/// object file.
|
|
///
|
|
/// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
|
|
///
|
|
/// \param MachO A mach-o object file we will search.
|
|
///
|
|
/// \returns A valid image base address if we are able to extract one.
|
|
static llvm::Optional<uint64_t>
|
|
getImageBaseAddress(const object::MachOObjectFile *MachO) {
|
|
for (const auto &Command : MachO->load_commands()) {
|
|
if (Command.C.cmd == MachO::LC_SEGMENT) {
|
|
MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
|
|
StringRef SegName = SLC.segname;
|
|
if (SegName == "__TEXT")
|
|
return SLC.vmaddr;
|
|
} else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
|
|
MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
|
|
StringRef SegName = SLC.segname;
|
|
if (SegName == "__TEXT")
|
|
return SLC.vmaddr;
|
|
}
|
|
}
|
|
return llvm::None;
|
|
}
|
|
|
|
/// Determine the virtual address that is considered the base address of an
|
|
/// object file.
|
|
///
|
|
/// Since GSYM files are used for symbolication, many clients will need to
|
|
/// easily adjust addresses they find in stack traces so the lookups happen
|
|
/// on unslid addresses from the original object file. If the base address of
|
|
/// a GSYM file is set to the base address of the image, then this address
|
|
/// adjusting is much easier.
|
|
///
|
|
/// \param Obj An object file we will search.
|
|
///
|
|
/// \returns A valid image base address if we are able to extract one.
|
|
static llvm::Optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
|
|
if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
|
|
return getImageBaseAddress(MachO);
|
|
else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
|
|
return getImageBaseAddress(ELFObj->getELFFile());
|
|
else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
|
|
return getImageBaseAddress(ELFObj->getELFFile());
|
|
else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
|
|
return getImageBaseAddress(ELFObj->getELFFile());
|
|
else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
|
|
return getImageBaseAddress(ELFObj->getELFFile());
|
|
return llvm::None;
|
|
}
|
|
|
|
|
|
static llvm::Error handleObjectFile(ObjectFile &Obj,
|
|
const std::string &OutFile) {
|
|
auto ThreadCount =
|
|
NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
|
|
auto &OS = outs();
|
|
|
|
GsymCreator Gsym;
|
|
|
|
// See if we can figure out the base address for a given object file, and if
|
|
// we can, then set the base address to use to this value. This will ease
|
|
// symbolication since clients can slide the GSYM lookup addresses by using
|
|
// the load bias of the shared library.
|
|
if (auto ImageBaseAddr = getImageBaseAddress(Obj))
|
|
Gsym.setBaseAddress(*ImageBaseAddr);
|
|
|
|
// We need to know where the valid sections are that contain instructions.
|
|
// See header documentation for DWARFTransformer::SetValidTextRanges() for
|
|
// defails.
|
|
AddressRanges TextRanges;
|
|
for (const object::SectionRef &Sect : Obj.sections()) {
|
|
if (!Sect.isText())
|
|
continue;
|
|
const uint64_t Size = Sect.getSize();
|
|
if (Size == 0)
|
|
continue;
|
|
const uint64_t StartAddr = Sect.getAddress();
|
|
TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
|
|
}
|
|
|
|
// Make sure there is DWARF to convert first.
|
|
std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
|
|
if (!DICtx)
|
|
return createStringError(std::errc::invalid_argument,
|
|
"unable to create DWARF context");
|
|
logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), OS,
|
|
"DwarfTransformer: ");
|
|
|
|
// Make a DWARF transformer object and populate the ranges of the code
|
|
// so we don't end up adding invalid functions to GSYM data.
|
|
DwarfTransformer DT(*DICtx, OS, Gsym);
|
|
if (!TextRanges.empty())
|
|
Gsym.SetValidTextRanges(TextRanges);
|
|
|
|
// Convert all DWARF to GSYM.
|
|
if (auto Err = DT.convert(ThreadCount))
|
|
return Err;
|
|
|
|
// Get the UUID and convert symbol table to GSYM.
|
|
if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
|
|
return Err;
|
|
|
|
// Finalize the GSYM to make it ready to save to disk. This will remove
|
|
// duplicate FunctionInfo entries where we might have found an entry from
|
|
// debug info and also a symbol table entry from the object file.
|
|
if (auto Err = Gsym.finalize(OS))
|
|
return Err;
|
|
|
|
// Save the GSYM file to disk.
|
|
support::endianness Endian = Obj.makeTriple().isLittleEndian() ?
|
|
support::little : support::big;
|
|
if (auto Err = Gsym.save(OutFile.c_str(), Endian))
|
|
return Err;
|
|
|
|
// Verify the DWARF if requested. This will ensure all the info in the DWARF
|
|
// can be looked up in the GSYM and that all lookups get matching data.
|
|
if (Verify) {
|
|
if (auto Err = DT.verify(OutFile))
|
|
return Err;
|
|
}
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
|
|
const std::string &OutFile) {
|
|
Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
|
|
error(Filename, errorToErrorCode(BinOrErr.takeError()));
|
|
|
|
if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
|
|
Triple ObjTriple(Obj->makeTriple());
|
|
auto ArchName = ObjTriple.getArchName();
|
|
outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
|
|
if (auto Err = handleObjectFile(*Obj, OutFile.c_str()))
|
|
return Err;
|
|
} else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
|
|
// Iterate over all contained architectures and filter out any that were
|
|
// not specified with the "--arch <arch>" option. If the --arch option was
|
|
// not specified on the command line, we will process all architectures.
|
|
std::vector< std::unique_ptr<MachOObjectFile> > FilterObjs;
|
|
for (auto &ObjForArch : Fat->objects()) {
|
|
if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
|
|
auto &Obj = **MachOOrErr;
|
|
if (filterArch(Obj))
|
|
FilterObjs.emplace_back(MachOOrErr->release());
|
|
} else {
|
|
error(Filename, MachOOrErr.takeError());
|
|
}
|
|
}
|
|
if (FilterObjs.empty())
|
|
error(Filename, createStringError(std::errc::invalid_argument,
|
|
"no matching architectures found"));
|
|
|
|
// Now handle each architecture we need to convert.
|
|
for (auto &Obj: FilterObjs) {
|
|
Triple ObjTriple(Obj->getArchTriple());
|
|
auto ArchName = ObjTriple.getArchName();
|
|
std::string ArchOutFile(OutFile);
|
|
// If we are only handling a single architecture, then we will use the
|
|
// normal output file. If we are handling multiple architectures append
|
|
// the architecture name to the end of the out file path so that we
|
|
// don't overwrite the previous architecture's gsym file.
|
|
if (FilterObjs.size() > 1) {
|
|
ArchOutFile.append(1, '.');
|
|
ArchOutFile.append(ArchName.str());
|
|
}
|
|
outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
|
|
if (auto Err = handleObjectFile(*Obj, ArchOutFile))
|
|
return Err;
|
|
}
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
static llvm::Error handleFileConversionToGSYM(StringRef Filename,
|
|
const std::string &OutFile) {
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
|
|
MemoryBuffer::getFileOrSTDIN(Filename);
|
|
error(Filename, BuffOrErr.getError());
|
|
std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
|
|
return handleBuffer(Filename, *Buffer, OutFile);
|
|
}
|
|
|
|
static llvm::Error convertFileToGSYM(raw_ostream &OS) {
|
|
// Expand any .dSYM bundles to the individual object files contained therein.
|
|
std::vector<std::string> Objects;
|
|
std::string OutFile = OutputFilename;
|
|
if (OutFile.empty()) {
|
|
OutFile = ConvertFilename;
|
|
OutFile += ".gsym";
|
|
}
|
|
|
|
OS << "Input file: " << ConvertFilename << "\n";
|
|
|
|
auto Objs = expandBundle(ConvertFilename);
|
|
llvm::append_range(Objects, Objs);
|
|
|
|
for (auto Object : Objects) {
|
|
if (auto Err = handleFileConversionToGSYM(Object, OutFile))
|
|
return Err;
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
int main(int argc, char const *argv[]) {
|
|
// Print a stack trace if we signal out.
|
|
sys::PrintStackTraceOnErrorSignal(argv[0]);
|
|
PrettyStackTraceProgram X(argc, argv);
|
|
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
|
|
|
|
llvm::InitializeAllTargets();
|
|
|
|
const char *Overview =
|
|
"A tool for dumping, searching and creating GSYM files.\n\n"
|
|
"Specify one or more GSYM paths as arguments to dump all of the "
|
|
"information in each GSYM file.\n"
|
|
"Specify a single GSYM file along with one or more --lookup options to "
|
|
"lookup addresses within that GSYM file.\n"
|
|
"Use the --convert option to specify a file with option --out-file "
|
|
"option to convert to GSYM format.\n";
|
|
HideUnrelatedOptions(
|
|
{&GeneralOptions, &ConversionOptions, &LookupOptions});
|
|
cl::ParseCommandLineOptions(argc, argv, Overview);
|
|
|
|
if (Help) {
|
|
PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
|
|
return 0;
|
|
}
|
|
|
|
raw_ostream &OS = outs();
|
|
|
|
if (!ConvertFilename.empty()) {
|
|
// Convert DWARF to GSYM
|
|
if (!InputFilenames.empty()) {
|
|
OS << "error: no input files can be specified when using the --convert "
|
|
"option.\n";
|
|
return 1;
|
|
}
|
|
// Call error() if we have an error and it will exit with a status of 1
|
|
if (auto Err = convertFileToGSYM(OS))
|
|
error("DWARF conversion failed: ", std::move(Err));
|
|
return 0;
|
|
}
|
|
|
|
// Dump or access data inside GSYM files
|
|
for (const auto &GSYMPath : InputFilenames) {
|
|
auto Gsym = GsymReader::openFile(GSYMPath);
|
|
if (!Gsym)
|
|
error(GSYMPath, Gsym.takeError());
|
|
|
|
if (LookupAddresses.empty()) {
|
|
Gsym->dump(outs());
|
|
continue;
|
|
}
|
|
|
|
// Lookup an address in a GSYM file and print any matches.
|
|
OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
|
|
for (auto Addr: LookupAddresses) {
|
|
if (auto Result = Gsym->lookup(Addr)) {
|
|
// If verbose is enabled dump the full function info for the address.
|
|
if (Verbose) {
|
|
if (auto FI = Gsym->getFunctionInfo(Addr)) {
|
|
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
|
|
Gsym->dump(OS, *FI);
|
|
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
|
|
}
|
|
}
|
|
OS << Result.get();
|
|
} else {
|
|
if (Verbose)
|
|
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
|
|
OS << HEX64(Addr) << ": ";
|
|
logAllUnhandledErrors(Result.takeError(), OS, "error: ");
|
|
}
|
|
if (Verbose)
|
|
OS << "\n";
|
|
}
|
|
}
|
|
return EXIT_SUCCESS;
|
|
}
|