summaryrefslogtreecommitdiff
path: root/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
diff options
context:
space:
mode:
authorBrian Cain <bcain@codeaurora.org>2017-02-27 06:22:17 +0000
committerBrian Cain <bcain@codeaurora.org>2017-02-27 06:22:17 +0000
commitf54e7aac328ef32810194f3d41aa766a306492a7 (patch)
tree25e966e3baa32ff6f6f941ed685930d2d50ea2e9 /tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
parent34270516e12b2f9e25ff63442d51b3720f2811e6 (diff)
llvm-mc-fuzzer: add support for assembly
This creates an llvm-mc-disassemble-fuzzer from the existing llvm-mc-fuzzer and finishing the assemble support in llvm-mc-assemble-fuzzer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296323 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp')
-rw-r--r--tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp313
1 files changed, 313 insertions, 0 deletions
diff --git a/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
new file mode 100644
index 00000000000..0344d8cd8c9
--- /dev/null
+++ b/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
@@ -0,0 +1,313 @@
+//===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerInterface.h"
+#include "llvm-c/Target.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+ TripleName("triple", cl::desc("Target triple to assemble for, "
+ "see -version for available targets"));
+
+static cl::opt<std::string>
+ MCPU("mcpu",
+ cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"), cl::init(""));
+
+// This is useful for variable-length instruction sets.
+static cl::opt<unsigned> InsnLimit(
+ "insn-limit",
+ cl::desc("Limit the number of instructions to process (0 for no limit)"),
+ cl::value_desc("count"), cl::init(0));
+
+static cl::list<std::string>
+ MAttrs("mattr", cl::CommaSeparated,
+ cl::desc("Target specific attributes (-mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."));
+// The feature string derived from -mattr's values.
+std::string FeaturesStr;
+
+static cl::list<std::string>
+ FuzzerArgs("fuzzer-args", cl::Positional,
+ cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
+ cl::PositionalEatsArgs);
+static std::vector<char *> ModifiedArgv;
+
+enum OutputFileType {
+ OFT_Null,
+ OFT_AssemblyFile,
+ OFT_ObjectFile
+};
+static cl::opt<OutputFileType>
+FileType("filetype", cl::init(OFT_AssemblyFile),
+ cl::desc("Choose an output file type:"),
+ cl::values(
+ clEnumValN(OFT_AssemblyFile, "asm",
+ "Emit an assembly ('.s') file"),
+ clEnumValN(OFT_Null, "null",
+ "Don't emit anything (for timing purposes)"),
+ clEnumValN(OFT_ObjectFile, "obj",
+ "Emit a native object ('.o') file")));
+
+
+class LLVMFuzzerInputBuffer : public MemoryBuffer
+{
+ public:
+ LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
+ : Data(reinterpret_cast<const char *>(data_)),
+ Size(size_) {
+ init(Data, Data+Size, false);
+ }
+
+
+ virtual BufferKind getBufferKind() const {
+ return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
+ // the intent ... though AFAIK it
+ // probably came from an mmap or sbrk
+ }
+
+ private:
+ const char *Data;
+ size_t Size;
+};
+
+static int AssembleInput(const char *ProgName, const Target *TheTarget,
+ SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
+ MCAsmInfo &MAI, MCSubtargetInfo &STI,
+ MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
+ static const bool NoInitialTextSection = false;
+
+ std::unique_ptr<MCAsmParser> Parser(
+ createMCAsmParser(SrcMgr, Ctx, Str, MAI));
+
+ std::unique_ptr<MCTargetAsmParser> TAP(
+ TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
+
+ if (!TAP) {
+ errs() << ProgName
+ << ": error: this target '" << TripleName
+ << "', does not support assembly parsing.\n";
+ abort();
+ }
+
+ Parser->setTargetParser(*TAP);
+
+ return Parser->Run(NoInitialTextSection);
+}
+
+
+int AssembleOneInput(const uint8_t *Data, size_t Size) {
+ const bool ShowInst = false;
+ const bool AsmVerbose = false;
+ const bool UseDwarfDirectory = true;
+
+ Triple TheTriple(Triple::normalize(TripleName));
+
+ SourceMgr SrcMgr;
+
+ std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
+
+ // Tell SrcMgr about this buffer, which is what the parser will pick up.
+ SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
+
+ static const std::vector<std::string> NoIncludeDirs;
+ SrcMgr.setIncludeDirs(NoIncludeDirs);
+
+ static std::string ArchName;
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
+ Error);
+ if (!TheTarget) {
+ errs() << "error: this target '" << TheTriple.normalize()
+ << "/" << ArchName << "', was not found: '" << Error << "'\n";
+
+ abort();
+ }
+
+ std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
+ if (!MRI) {
+ errs() << "Unable to create target register info!";
+ abort();
+ }
+
+ std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
+ if (!MAI) {
+ errs() << "Unable to create target asm info!";
+ abort();
+ }
+
+
+ MCObjectFileInfo MOFI;
+ MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
+
+ static const bool UsePIC = false;
+ static const CodeModel::Model CMModel = CodeModel::Default;
+ MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, CMModel, Ctx);
+
+ const unsigned OutputAsmVariant = 0;
+ std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+ MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
+ *MAI, *MCII, *MRI);
+ if (!IP) {
+ errs()
+ << "error: unable to create instruction printer for target triple '"
+ << TheTriple.normalize() << "' with assembly variant "
+ << OutputAsmVariant << ".\n";
+
+ abort();
+ }
+
+ const char *ProgName = "llvm-mc-fuzzer";
+ std::unique_ptr<MCSubtargetInfo> STI(
+ TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
+ MCCodeEmitter *CE = nullptr;
+ MCAsmBackend *MAB = nullptr;
+
+ MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
+
+ std::string OutputString;
+ raw_string_ostream Out(OutputString);
+ auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
+
+ std::unique_ptr<MCStreamer> Str;
+
+ if (FileType == OFT_AssemblyFile) {
+ Str.reset(TheTarget->createAsmStreamer(
+ Ctx, std::move(FOut), AsmVerbose,
+ UseDwarfDirectory, IP, CE, MAB, ShowInst));
+ } else {
+ assert(FileType == OFT_ObjectFile && "Invalid file type!");
+
+ std::error_code EC;
+ const std::string OutputFilename = "-";
+ auto Out = llvm::make_unique<tool_output_file>(OutputFilename, EC,
+ sys::fs::F_None);
+ if (EC) {
+ errs() << EC.message() << '\n';
+ abort();
+ }
+
+ // Don't waste memory on names of temp labels.
+ Ctx.setUseNamesOnTempLabels(false);
+
+ std::unique_ptr<buffer_ostream> BOS;
+ raw_pwrite_stream *OS = &Out->os();
+ if (!Out->os().supportsSeeking()) {
+ BOS = make_unique<buffer_ostream>(Out->os());
+ OS = BOS.get();
+ }
+
+ MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
+ MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU,
+ MCOptions);
+ Str.reset(TheTarget->createMCObjectStreamer(
+ TheTriple, Ctx, *MAB, *OS, CE, *STI, MCOptions.MCRelaxAll,
+ MCOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ false));
+ }
+ const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
+ *MCII, MCOptions);
+
+ (void) Res;
+
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ return AssembleOneInput(Data, Size);
+}
+
+int LLVMFuzzerInitialize(int *argc, char ***argv) {
+ // The command line is unusual compared to other fuzzers due to the need to
+ // specify the target. Options like -triple, -mcpu, and -mattr work like
+ // their counterparts in llvm-mc, while -fuzzer-args collects options for the
+ // fuzzer itself.
+ //
+ // Examples:
+ //
+ // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
+ // 4-bytes each and use the contents of ./corpus as the test corpus:
+ // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
+ // -fuzzer-args -max_len=4 -runs=100000 ./corpus
+ //
+ // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
+ // feature enabled using up to 64-byte inputs:
+ // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
+ // -disassemble -fuzzer-args ./corpus
+ //
+ // If your aim is to find instructions that are not tested, then it is
+ // advisable to constrain the maximum input size to a single instruction
+ // using -max_len as in the first example. This results in a test corpus of
+ // individual instructions that test unique paths. Without this constraint,
+ // there will be considerable redundancy in the corpus.
+
+ char **OriginalArgv = *argv;
+
+ LLVMInitializeAllTargetInfos();
+ LLVMInitializeAllTargetMCs();
+ LLVMInitializeAllAsmParsers();
+
+ cl::ParseCommandLineOptions(*argc, OriginalArgv);
+
+ // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
+ // the driver can parse its arguments.
+ //
+ // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
+ // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
+ // non-const buffer to avoid the need to clean up when the fuzzer terminates.
+ ModifiedArgv.push_back(OriginalArgv[0]);
+ for (const auto &FuzzerArg : FuzzerArgs) {
+ for (int i = 1; i < *argc; ++i) {
+ if (FuzzerArg == OriginalArgv[i])
+ ModifiedArgv.push_back(OriginalArgv[i]);
+ }
+ }
+ *argc = ModifiedArgv.size();
+ *argv = ModifiedArgv.data();
+
+ // Package up features to be passed to target/subtarget
+ // We have to pass it via a global since the callback doesn't
+ // permit any user data.
+ if (MAttrs.size()) {
+ SubtargetFeatures Features;
+ for (unsigned i = 0; i != MAttrs.size(); ++i)
+ Features.AddFeature(MAttrs[i]);
+ FeaturesStr = Features.getString();
+ }
+
+ if (TripleName.empty())
+ TripleName = sys::getDefaultTargetTriple();
+
+ return 0;
+}