//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Windows-specific. // A parser for the module-definition file (.def file). // // The format of module-definition files are described in this document: // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx // //===----------------------------------------------------------------------===// #include "llvm/Object/COFFModuleDefinition.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Object/COFF.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/Error.h" #include "llvm/Support/Error.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" using namespace llvm::COFF; using namespace llvm; namespace llvm { namespace object { enum Kind { Unknown, Eof, Identifier, Comma, Equal, EqualEqual, KwBase, KwConstant, KwData, KwExports, KwHeapsize, KwLibrary, KwName, KwNoname, KwPrivate, KwStacksize, KwVersion, }; struct Token { explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} Kind K; StringRef Value; }; static bool isDecorated(StringRef Sym, bool MingwDef) { // In def files, the symbols can either be listed decorated or undecorated. // // - For cdecl symbols, only the undecorated form is allowed. // - For fastcall and vectorcall symbols, both fully decorated or // undecorated forms can be present. // - For stdcall symbols in non-MinGW environments, the decorated form is // fully decorated with leading underscore and trailing stack argument // size - like "_Func@0". // - In MinGW def files, a decorated stdcall symbol does not include the // leading underscore though, like "Func@0". // This function controls whether a leading underscore should be added to // the given symbol name or not. For MinGW, treat a stdcall symbol name such // as "Func@0" as undecorated, i.e. a leading underscore must be added. // For non-MinGW, look for '@' in the whole string and consider "_Func@0" // as decorated, i.e. don't add any more leading underscores. // We can't check for a leading underscore here, since function names // themselves can start with an underscore, while a second one still needs // to be added. return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") || (!MingwDef && Sym.contains('@')); } static Error createError(const Twine &Err) { return make_error(StringRef(Err.str()), object_error::parse_failed); } class Lexer { public: Lexer(StringRef S) : Buf(S) {} Token lex() { Buf = Buf.trim(); if (Buf.empty()) return Token(Eof); switch (Buf[0]) { case '\0': return Token(Eof); case ';': { size_t End = Buf.find('\n'); Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); return lex(); } case '=': Buf = Buf.drop_front(); if (Buf.startswith("=")) { Buf = Buf.drop_front(); return Token(EqualEqual, "=="); } return Token(Equal, "="); case ',': Buf = Buf.drop_front(); return Token(Comma, ","); case '"': { StringRef S; std::tie(S, Buf) = Buf.substr(1).split('"'); return Token(Identifier, S); } default: { size_t End = Buf.find_first_of("=,;\r\n \t\v"); StringRef Word = Buf.substr(0, End); Kind K = llvm::StringSwitch(Word) .Case("BASE", KwBase) .Case("CONSTANT", KwConstant) .Case("DATA", KwData) .Case("EXPORTS", KwExports) .Case("HEAPSIZE", KwHeapsize) .Case("LIBRARY", KwLibrary) .Case("NAME", KwName) .Case("NONAME", KwNoname) .Case("PRIVATE", KwPrivate) .Case("STACKSIZE", KwStacksize) .Case("VERSION", KwVersion) .Default(Identifier); Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); return Token(K, Word); } } } private: StringRef Buf; }; class Parser { public: explicit Parser(StringRef S, MachineTypes M, bool B) : Lex(S), Machine(M), MingwDef(B) {} Expected parse() { do { if (Error Err = parseOne()) return std::move(Err); } while (Tok.K != Eof); return Info; } private: void read() { if (Stack.empty()) { Tok = Lex.lex(); return; } Tok = Stack.back(); Stack.pop_back(); } Error readAsInt(uint64_t *I) { read(); if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) return createError("integer expected"); return Error::success(); } Error expect(Kind Expected, StringRef Msg) { read(); if (Tok.K != Expected) return createError(Msg); return Error::success(); } void unget() { Stack.push_back(Tok); } Error parseOne() { read(); switch (Tok.K) { case Eof: return Error::success(); case KwExports: for (;;) { read(); if (Tok.K != Identifier) { unget(); return Error::success(); } if (Error Err = parseExport()) return Err; } case KwHeapsize: return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); case KwStacksize: return parseNumbers(&Info.StackReserve, &Info.StackCommit); case KwLibrary: case KwName: { bool IsDll = Tok.K == KwLibrary; // Check before parseName. std::string Name; if (Error Err = parseName(&Name, &Info.ImageBase)) return Err; Info.ImportName = Name; // Set the output file, but don't override /out if it was already passed. if (Info.OutputFile.empty()) { Info.OutputFile = Name; // Append the appropriate file extension if not already present. if (!sys::path::has_extension(Name)) Info.OutputFile += IsDll ? ".dll" : ".exe"; } return Error::success(); } case KwVersion: return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); default: return createError("unknown directive: " + Tok.Value); } } Error parseExport() { COFFShortExport E; E.Name = std::string(Tok.Value); read(); if (Tok.K == Equal) { read(); if (Tok.K != Identifier) return createError("identifier expected, but got " + Tok.Value); E.ExtName = E.Name; E.Name = std::string(Tok.Value); } else { unget(); } if (Machine == IMAGE_FILE_MACHINE_I386) { if (!isDecorated(E.Name, MingwDef)) E.Name = (std::string("_").append(E.Name)); if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) E.ExtName = (std::string("_").append(E.ExtName)); } for (;;) { read(); if (Tok.K == Identifier && Tok.Value[0] == '@') { if (Tok.Value == "@") { // "foo @ 10" read(); Tok.Value.getAsInteger(10, E.Ordinal); } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { // "foo \n @bar" - Not an ordinal modifier at all, but the next // export (fastcall decorated) - complete the current one. unget(); Info.Exports.push_back(E); return Error::success(); } // "foo @10" read(); if (Tok.K == KwNoname) { E.Noname = true; } else { unget(); } continue; } if (Tok.K == KwData) { E.Data = true; continue; } if (Tok.K == KwConstant) { E.Constant = true; continue; } if (Tok.K == KwPrivate) { E.Private = true; continue; } if (Tok.K == EqualEqual) { read(); E.AliasTarget = std::string(Tok.Value); if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef)) E.AliasTarget = std::string("_").append(E.AliasTarget); continue; } unget(); Info.Exports.push_back(E); return Error::success(); } } // HEAPSIZE/STACKSIZE reserve[,commit] Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { if (Error Err = readAsInt(Reserve)) return Err; read(); if (Tok.K != Comma) { unget(); Commit = nullptr; return Error::success(); } if (Error Err = readAsInt(Commit)) return Err; return Error::success(); } // NAME outputPath [BASE=address] Error parseName(std::string *Out, uint64_t *Baseaddr) { read(); if (Tok.K == Identifier) { *Out = std::string(Tok.Value); } else { *Out = ""; unget(); return Error::success(); } read(); if (Tok.K == KwBase) { if (Error Err = expect(Equal, "'=' expected")) return Err; if (Error Err = readAsInt(Baseaddr)) return Err; } else { unget(); *Baseaddr = 0; } return Error::success(); } // VERSION major[.minor] Error parseVersion(uint32_t *Major, uint32_t *Minor) { read(); if (Tok.K != Identifier) return createError("identifier expected, but got " + Tok.Value); StringRef V1, V2; std::tie(V1, V2) = Tok.Value.split('.'); if (V1.getAsInteger(10, *Major)) return createError("integer expected, but got " + Tok.Value); if (V2.empty()) *Minor = 0; else if (V2.getAsInteger(10, *Minor)) return createError("integer expected, but got " + Tok.Value); return Error::success(); } Lexer Lex; Token Tok; std::vector Stack; MachineTypes Machine; COFFModuleDefinition Info; bool MingwDef; }; Expected parseCOFFModuleDefinition(MemoryBufferRef MB, MachineTypes Machine, bool MingwDef) { return Parser(MB.getBuffer(), Machine, MingwDef).parse(); } } // namespace object } // namespace llvm