399 lines
16 KiB
C++
399 lines
16 KiB
C++
//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "MachOLayoutBuilder.h"
|
|
#include "llvm/Support/Alignment.h"
|
|
#include "llvm/Support/Errc.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
namespace llvm {
|
|
namespace objcopy {
|
|
namespace macho {
|
|
|
|
StringTableBuilder::Kind
|
|
MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
|
|
if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
|
|
return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
|
|
return Is64Bit ? StringTableBuilder::MachO64Linked
|
|
: StringTableBuilder::MachOLinked;
|
|
}
|
|
|
|
uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
|
|
uint32_t Size = 0;
|
|
for (const LoadCommand &LC : O.LoadCommands) {
|
|
const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
|
|
auto cmd = MLC.load_command_data.cmd;
|
|
switch (cmd) {
|
|
case MachO::LC_SEGMENT:
|
|
Size += sizeof(MachO::segment_command) +
|
|
sizeof(MachO::section) * LC.Sections.size();
|
|
continue;
|
|
case MachO::LC_SEGMENT_64:
|
|
Size += sizeof(MachO::segment_command_64) +
|
|
sizeof(MachO::section_64) * LC.Sections.size();
|
|
continue;
|
|
}
|
|
|
|
switch (cmd) {
|
|
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
|
|
case MachO::LCName: \
|
|
Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
|
|
break;
|
|
#include "llvm/BinaryFormat/MachO.def"
|
|
#undef HANDLE_LOAD_COMMAND
|
|
}
|
|
}
|
|
|
|
return Size;
|
|
}
|
|
|
|
void MachOLayoutBuilder::constructStringTable() {
|
|
for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
|
|
StrTableBuilder.add(Sym->Name);
|
|
StrTableBuilder.finalize();
|
|
}
|
|
|
|
void MachOLayoutBuilder::updateSymbolIndexes() {
|
|
uint32_t Index = 0;
|
|
for (auto &Symbol : O.SymTable.Symbols)
|
|
Symbol->Index = Index++;
|
|
}
|
|
|
|
// Updates the index and the number of local/external/undefined symbols.
|
|
void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
|
|
assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
|
|
// Make sure that nlist entries in the symbol table are sorted by the those
|
|
// types. The order is: local < defined external < undefined external.
|
|
assert(llvm::is_sorted(O.SymTable.Symbols,
|
|
[](const std::unique_ptr<SymbolEntry> &A,
|
|
const std::unique_ptr<SymbolEntry> &B) {
|
|
bool AL = A->isLocalSymbol(),
|
|
BL = B->isLocalSymbol();
|
|
if (AL != BL)
|
|
return AL;
|
|
return !AL && !A->isUndefinedSymbol() &&
|
|
B->isUndefinedSymbol();
|
|
}) &&
|
|
"Symbols are not sorted by their types.");
|
|
|
|
uint32_t NumLocalSymbols = 0;
|
|
auto Iter = O.SymTable.Symbols.begin();
|
|
auto End = O.SymTable.Symbols.end();
|
|
for (; Iter != End; ++Iter) {
|
|
if ((*Iter)->isExternalSymbol())
|
|
break;
|
|
|
|
++NumLocalSymbols;
|
|
}
|
|
|
|
uint32_t NumExtDefSymbols = 0;
|
|
for (; Iter != End; ++Iter) {
|
|
if ((*Iter)->isUndefinedSymbol())
|
|
break;
|
|
|
|
++NumExtDefSymbols;
|
|
}
|
|
|
|
MLC.dysymtab_command_data.ilocalsym = 0;
|
|
MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
|
|
MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
|
|
MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
|
|
MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
|
|
MLC.dysymtab_command_data.nundefsym =
|
|
O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
|
|
}
|
|
|
|
// Recomputes and updates offset and size fields in load commands and sections
|
|
// since they could be modified.
|
|
uint64_t MachOLayoutBuilder::layoutSegments() {
|
|
auto HeaderSize =
|
|
Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
|
|
const bool IsObjectFile =
|
|
O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
|
|
uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
|
|
for (LoadCommand &LC : O.LoadCommands) {
|
|
auto &MLC = LC.MachOLoadCommand;
|
|
StringRef Segname;
|
|
uint64_t SegmentVmAddr;
|
|
uint64_t SegmentVmSize;
|
|
switch (MLC.load_command_data.cmd) {
|
|
case MachO::LC_SEGMENT:
|
|
SegmentVmAddr = MLC.segment_command_data.vmaddr;
|
|
SegmentVmSize = MLC.segment_command_data.vmsize;
|
|
Segname = StringRef(MLC.segment_command_data.segname,
|
|
strnlen(MLC.segment_command_data.segname,
|
|
sizeof(MLC.segment_command_data.segname)));
|
|
break;
|
|
case MachO::LC_SEGMENT_64:
|
|
SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
|
|
SegmentVmSize = MLC.segment_command_64_data.vmsize;
|
|
Segname = StringRef(MLC.segment_command_64_data.segname,
|
|
strnlen(MLC.segment_command_64_data.segname,
|
|
sizeof(MLC.segment_command_64_data.segname)));
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
if (Segname == "__LINKEDIT") {
|
|
// We update the __LINKEDIT segment later (in layoutTail).
|
|
assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
|
|
LinkEditLoadCommand = &MLC;
|
|
continue;
|
|
}
|
|
|
|
// Update file offsets and sizes of sections.
|
|
uint64_t SegOffset = Offset;
|
|
uint64_t SegFileSize = 0;
|
|
uint64_t VMSize = 0;
|
|
for (std::unique_ptr<Section> &Sec : LC.Sections) {
|
|
assert(SegmentVmAddr <= Sec->Addr &&
|
|
"Section's address cannot be smaller than Segment's one");
|
|
uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
|
|
if (IsObjectFile) {
|
|
if (!Sec->hasValidOffset()) {
|
|
Sec->Offset = 0;
|
|
} else {
|
|
uint64_t PaddingSize =
|
|
offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
|
|
Sec->Offset = SegOffset + SegFileSize + PaddingSize;
|
|
Sec->Size = Sec->Content.size();
|
|
SegFileSize += PaddingSize + Sec->Size;
|
|
}
|
|
} else {
|
|
if (!Sec->hasValidOffset()) {
|
|
Sec->Offset = 0;
|
|
} else {
|
|
Sec->Offset = SegOffset + SectOffset;
|
|
Sec->Size = Sec->Content.size();
|
|
SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
|
|
}
|
|
}
|
|
VMSize = std::max(VMSize, SectOffset + Sec->Size);
|
|
}
|
|
|
|
if (IsObjectFile) {
|
|
Offset += SegFileSize;
|
|
} else {
|
|
Offset = alignTo(Offset + SegFileSize, PageSize);
|
|
SegFileSize = alignTo(SegFileSize, PageSize);
|
|
// Use the original vmsize if the segment is __PAGEZERO.
|
|
VMSize =
|
|
Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
|
|
}
|
|
|
|
switch (MLC.load_command_data.cmd) {
|
|
case MachO::LC_SEGMENT:
|
|
MLC.segment_command_data.cmdsize =
|
|
sizeof(MachO::segment_command) +
|
|
sizeof(MachO::section) * LC.Sections.size();
|
|
MLC.segment_command_data.nsects = LC.Sections.size();
|
|
MLC.segment_command_data.fileoff = SegOffset;
|
|
MLC.segment_command_data.vmsize = VMSize;
|
|
MLC.segment_command_data.filesize = SegFileSize;
|
|
break;
|
|
case MachO::LC_SEGMENT_64:
|
|
MLC.segment_command_64_data.cmdsize =
|
|
sizeof(MachO::segment_command_64) +
|
|
sizeof(MachO::section_64) * LC.Sections.size();
|
|
MLC.segment_command_64_data.nsects = LC.Sections.size();
|
|
MLC.segment_command_64_data.fileoff = SegOffset;
|
|
MLC.segment_command_64_data.vmsize = VMSize;
|
|
MLC.segment_command_64_data.filesize = SegFileSize;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return Offset;
|
|
}
|
|
|
|
uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
|
|
for (LoadCommand &LC : O.LoadCommands)
|
|
for (std::unique_ptr<Section> &Sec : LC.Sections) {
|
|
Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
|
|
Sec->NReloc = Sec->Relocations.size();
|
|
Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
|
|
}
|
|
|
|
return Offset;
|
|
}
|
|
|
|
Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
|
|
// If we are building the layout of an executable or dynamic library
|
|
// which does not have any segments other than __LINKEDIT,
|
|
// the Offset can be equal to zero by this time. It happens because of the
|
|
// convention that in such cases the file offsets specified by LC_SEGMENT
|
|
// start with zero (unlike the case of a relocatable object file).
|
|
const uint64_t HeaderSize =
|
|
Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
|
|
assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
|
|
Offset >= HeaderSize + O.Header.SizeOfCmds) &&
|
|
"Incorrect tail offset");
|
|
Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
|
|
|
|
// The order of LINKEDIT elements is as follows:
|
|
// rebase info, binding info, weak binding info, lazy binding info, export
|
|
// trie, data-in-code, symbol table, indirect symbol table, symbol table
|
|
// strings, code signature.
|
|
uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
|
|
uint64_t StartOfLinkEdit = Offset;
|
|
uint64_t StartOfRebaseInfo = StartOfLinkEdit;
|
|
uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
|
|
uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
|
|
uint64_t StartOfLazyBindingInfo =
|
|
StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
|
|
uint64_t StartOfExportTrie =
|
|
StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
|
|
uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
|
|
uint64_t StartOfDataInCode =
|
|
StartOfFunctionStarts + O.FunctionStarts.Data.size();
|
|
uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
|
|
uint64_t StartOfIndirectSymbols =
|
|
StartOfSymbols + NListSize * O.SymTable.Symbols.size();
|
|
uint64_t StartOfSymbolStrings =
|
|
StartOfIndirectSymbols +
|
|
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
|
|
uint64_t StartOfCodeSignature =
|
|
StartOfSymbolStrings + StrTableBuilder.getSize();
|
|
if (O.CodeSignatureCommandIndex)
|
|
StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
|
|
uint64_t LinkEditSize =
|
|
(StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
|
|
|
|
// Now we have determined the layout of the contents of the __LINKEDIT
|
|
// segment. Update its load command.
|
|
if (LinkEditLoadCommand) {
|
|
MachO::macho_load_command *MLC = LinkEditLoadCommand;
|
|
switch (LinkEditLoadCommand->load_command_data.cmd) {
|
|
case MachO::LC_SEGMENT:
|
|
MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
|
|
MLC->segment_command_data.fileoff = StartOfLinkEdit;
|
|
MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
|
|
MLC->segment_command_data.filesize = LinkEditSize;
|
|
break;
|
|
case MachO::LC_SEGMENT_64:
|
|
MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
|
|
MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
|
|
MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
|
|
MLC->segment_command_64_data.filesize = LinkEditSize;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (LoadCommand &LC : O.LoadCommands) {
|
|
auto &MLC = LC.MachOLoadCommand;
|
|
auto cmd = MLC.load_command_data.cmd;
|
|
switch (cmd) {
|
|
case MachO::LC_CODE_SIGNATURE:
|
|
MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
|
|
MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size();
|
|
break;
|
|
case MachO::LC_SYMTAB:
|
|
MLC.symtab_command_data.symoff = StartOfSymbols;
|
|
MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
|
|
MLC.symtab_command_data.stroff = StartOfSymbolStrings;
|
|
MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
|
|
break;
|
|
case MachO::LC_DYSYMTAB: {
|
|
if (MLC.dysymtab_command_data.ntoc != 0 ||
|
|
MLC.dysymtab_command_data.nmodtab != 0 ||
|
|
MLC.dysymtab_command_data.nextrefsyms != 0 ||
|
|
MLC.dysymtab_command_data.nlocrel != 0 ||
|
|
MLC.dysymtab_command_data.nextrel != 0)
|
|
return createStringError(llvm::errc::not_supported,
|
|
"shared library is not yet supported");
|
|
|
|
if (!O.IndirectSymTable.Symbols.empty()) {
|
|
MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
|
|
MLC.dysymtab_command_data.nindirectsyms =
|
|
O.IndirectSymTable.Symbols.size();
|
|
}
|
|
|
|
updateDySymTab(MLC);
|
|
break;
|
|
}
|
|
case MachO::LC_DATA_IN_CODE:
|
|
MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
|
|
MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
|
|
break;
|
|
case MachO::LC_FUNCTION_STARTS:
|
|
MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
|
|
MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
|
|
break;
|
|
case MachO::LC_DYLD_INFO:
|
|
case MachO::LC_DYLD_INFO_ONLY:
|
|
MLC.dyld_info_command_data.rebase_off =
|
|
O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
|
|
MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
|
|
MLC.dyld_info_command_data.bind_off =
|
|
O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
|
|
MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
|
|
MLC.dyld_info_command_data.weak_bind_off =
|
|
O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
|
|
MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
|
|
MLC.dyld_info_command_data.lazy_bind_off =
|
|
O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
|
|
MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
|
|
MLC.dyld_info_command_data.export_off =
|
|
O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
|
|
MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
|
|
break;
|
|
// Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
|
|
// <mach-o/loader.h> is not an offset in the binary file, instead, it is a
|
|
// relative virtual address. At the moment modification of the __TEXT
|
|
// segment of executables isn't supported anyway (e.g. data in code entries
|
|
// are not recalculated). Moreover, in general
|
|
// LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
|
|
// without making additional assumptions (e.g. that the entire __TEXT
|
|
// segment should be encrypted) we do not know how to recalculate the
|
|
// boundaries of the encrypted part. For now just copy over these load
|
|
// commands until we encounter a real world usecase where
|
|
// LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
|
|
case MachO::LC_ENCRYPTION_INFO:
|
|
case MachO::LC_ENCRYPTION_INFO_64:
|
|
case MachO::LC_LOAD_DYLINKER:
|
|
case MachO::LC_MAIN:
|
|
case MachO::LC_RPATH:
|
|
case MachO::LC_SEGMENT:
|
|
case MachO::LC_SEGMENT_64:
|
|
case MachO::LC_VERSION_MIN_MACOSX:
|
|
case MachO::LC_VERSION_MIN_IPHONEOS:
|
|
case MachO::LC_VERSION_MIN_TVOS:
|
|
case MachO::LC_VERSION_MIN_WATCHOS:
|
|
case MachO::LC_BUILD_VERSION:
|
|
case MachO::LC_ID_DYLIB:
|
|
case MachO::LC_LOAD_DYLIB:
|
|
case MachO::LC_LOAD_WEAK_DYLIB:
|
|
case MachO::LC_UUID:
|
|
case MachO::LC_SOURCE_VERSION:
|
|
// Nothing to update.
|
|
break;
|
|
default:
|
|
// Abort if it's unsupported in order to prevent corrupting the object.
|
|
return createStringError(llvm::errc::not_supported,
|
|
"unsupported load command (cmd=0x%x)", cmd);
|
|
}
|
|
}
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
Error MachOLayoutBuilder::layout() {
|
|
O.Header.NCmds = O.LoadCommands.size();
|
|
O.Header.SizeOfCmds = computeSizeOfCmds();
|
|
constructStringTable();
|
|
updateSymbolIndexes();
|
|
uint64_t Offset = layoutSegments();
|
|
Offset = layoutRelocations(Offset);
|
|
return layoutTail(Offset);
|
|
}
|
|
|
|
} // end namespace macho
|
|
} // end namespace objcopy
|
|
} // end namespace llvm
|