//===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// #include "llvm-c/Target.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt TripleName("triple", cl::desc("Target triple to assemble for, " "see -version for available targets")); static cl::opt MCPU("mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"), cl::value_desc("cpu-name"), cl::init("")); // This is useful for variable-length instruction sets. static cl::opt InsnLimit( "insn-limit", cl::desc("Limit the number of instructions to process (0 for no limit)"), cl::value_desc("count"), cl::init(0)); static cl::list MAttrs("mattr", cl::CommaSeparated, cl::desc("Target specific attributes (-mattr=help for details)"), cl::value_desc("a1,+a2,-a3,...")); // The feature string derived from -mattr's values. std::string FeaturesStr; static cl::list FuzzerArgs("fuzzer-args", cl::Positional, cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, cl::PositionalEatsArgs); static std::vector ModifiedArgv; enum OutputFileType { OFT_Null, OFT_AssemblyFile, OFT_ObjectFile }; static cl::opt FileType("filetype", cl::init(OFT_AssemblyFile), cl::desc("Choose an output file type:"), cl::values( clEnumValN(OFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"), clEnumValN(OFT_Null, "null", "Don't emit anything (for timing purposes)"), clEnumValN(OFT_ObjectFile, "obj", "Emit a native object ('.o') file"))); class LLVMFuzzerInputBuffer : public MemoryBuffer { public: LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_) : Data(reinterpret_cast(data_)), Size(size_) { init(Data, Data+Size, false); } virtual BufferKind getBufferKind() const { return MemoryBuffer_Malloc; // it's not disk-backed so I think that's // the intent ... though AFAIK it // probably came from an mmap or sbrk } private: const char *Data; size_t Size; }; static int AssembleInput(const char *ProgName, const Target *TheTarget, SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, MCAsmInfo &MAI, MCSubtargetInfo &STI, MCInstrInfo &MCII, MCTargetOptions &MCOptions) { static const bool NoInitialTextSection = false; std::unique_ptr Parser( createMCAsmParser(SrcMgr, Ctx, Str, MAI)); std::unique_ptr TAP( TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); if (!TAP) { errs() << ProgName << ": error: this target '" << TripleName << "', does not support assembly parsing.\n"; abort(); } Parser->setTargetParser(*TAP); return Parser->Run(NoInitialTextSection); } int AssembleOneInput(const uint8_t *Data, size_t Size) { const bool ShowInst = false; const bool AsmVerbose = false; const bool UseDwarfDirectory = true; Triple TheTriple(Triple::normalize(TripleName)); SourceMgr SrcMgr; std::unique_ptr BufferPtr(new LLVMFuzzerInputBuffer(Data, Size)); // Tell SrcMgr about this buffer, which is what the parser will pick up. SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); static const std::vector NoIncludeDirs; SrcMgr.setIncludeDirs(NoIncludeDirs); static std::string ArchName; std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, Error); if (!TheTarget) { errs() << "error: this target '" << TheTriple.normalize() << "/" << ArchName << "', was not found: '" << Error << "'\n"; abort(); } std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); if (!MRI) { errs() << "Unable to create target register info!"; abort(); } MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); std::unique_ptr MAI( TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); if (!MAI) { errs() << "Unable to create target asm info!"; abort(); } MCObjectFileInfo MOFI; MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); static const bool UsePIC = false; MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx); const unsigned OutputAsmVariant = 0; std::unique_ptr MCII(TheTarget->createMCInstrInfo()); MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, *MAI, *MCII, *MRI); if (!IP) { errs() << "error: unable to create instruction printer for target triple '" << TheTriple.normalize() << "' with assembly variant " << OutputAsmVariant << ".\n"; abort(); } const char *ProgName = "llvm-mc-fuzzer"; std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr)); std::unique_ptr CE = nullptr; std::unique_ptr MAB = nullptr; std::string OutputString; raw_string_ostream Out(OutputString); auto FOut = std::make_unique(Out); std::unique_ptr Str; if (FileType == OFT_AssemblyFile) { Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), AsmVerbose, UseDwarfDirectory, IP, std::move(CE), std::move(MAB), ShowInst)); } else { assert(FileType == OFT_ObjectFile && "Invalid file type!"); std::error_code EC; const std::string OutputFilename = "-"; auto Out = std::make_unique(OutputFilename, EC, sys::fs::OF_None); if (EC) { errs() << EC.message() << '\n'; abort(); } // Don't waste memory on names of temp labels. Ctx.setUseNamesOnTempLabels(false); std::unique_ptr BOS; raw_pwrite_stream *OS = &Out->os(); if (!Out->os().supportsSeeking()) { BOS = std::make_unique(Out->os()); OS = BOS.get(); } MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); Str.reset(TheTarget->createMCObjectStreamer( TheTriple, Ctx, std::unique_ptr(MAB), MAB->createObjectWriter(*OS), std::unique_ptr(CE), *STI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ false)); } const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, *MCII, MCOptions); (void) Res; return 0; } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return AssembleOneInput(Data, Size); } extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, char ***argv) { // The command line is unusual compared to other fuzzers due to the need to // specify the target. Options like -triple, -mcpu, and -mattr work like // their counterparts in llvm-mc, while -fuzzer-args collects options for the // fuzzer itself. // // Examples: // // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to // 4-bytes each and use the contents of ./corpus as the test corpus: // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ // -fuzzer-args -max_len=4 -runs=100000 ./corpus // // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA // feature enabled using up to 64-byte inputs: // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ // -disassemble -fuzzer-args ./corpus // // If your aim is to find instructions that are not tested, then it is // advisable to constrain the maximum input size to a single instruction // using -max_len as in the first example. This results in a test corpus of // individual instructions that test unique paths. Without this constraint, // there will be considerable redundancy in the corpus. char **OriginalArgv = *argv; LLVMInitializeAllTargetInfos(); LLVMInitializeAllTargetMCs(); LLVMInitializeAllAsmParsers(); cl::ParseCommandLineOptions(*argc, OriginalArgv); // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that // the driver can parse its arguments. // // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a // non-const buffer to avoid the need to clean up when the fuzzer terminates. ModifiedArgv.push_back(OriginalArgv[0]); for (const auto &FuzzerArg : FuzzerArgs) { for (int i = 1; i < *argc; ++i) { if (FuzzerArg == OriginalArgv[i]) ModifiedArgv.push_back(OriginalArgv[i]); } } *argc = ModifiedArgv.size(); *argv = ModifiedArgv.data(); // Package up features to be passed to target/subtarget // We have to pass it via a global since the callback doesn't // permit any user data. if (MAttrs.size()) { SubtargetFeatures Features; for (unsigned i = 0; i != MAttrs.size(); ++i) Features.AddFeature(MAttrs[i]); FeaturesStr = Features.getString(); } if (TripleName.empty()) TripleName = sys::getDefaultTargetTriple(); return 0; }