summaryrefslogtreecommitdiff
path: root/lib/fuzzer/dataflow/DataFlow.cpp
blob: a79c796ac456c9f0ca06315fd517dec65233a052 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
/*===- DataFlow.cpp - a standalone DataFlow tracer                  -------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// An experimental data-flow tracer for fuzz targets.
// It is based on DFSan and SanitizerCoverage.
// https://clang.llvm.org/docs/DataFlowSanitizer.html
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
//
// It executes the fuzz target on the given input while monitoring the
// data flow for every instrumented comparison instruction.
//
// The output shows which functions depend on which bytes of the input.
//
// Build:
//   1. Compile this file with -fsanitize=dataflow
//   2. Build the fuzz target with -g -fsanitize=dataflow
//       -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp
//   3. Link those together with -fsanitize=dataflow
//
//  -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
//  instruction, DFSan modifies the calls to pass the data flow labels.
//  The callbacks update the data flow label for the current function.
//  See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
//
//  -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function
//  entries so that the comparison callback knows that current function.
//
//
// Run:
//   # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout)
//   ./a.out INPUT_FILE [OUTPUT_FILE]
//
//   # Print all instrumented functions. llvm-symbolizer must be present in PATH
//   ./a.out
//
// Example output:
// ===============
//  F0 11111111111111
//  F1 10000000000000
//  ===============
// "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
//    The byte string is LEN+1 bytes. The last byte is set if the function
//    depends on the input length.
//===----------------------------------------------------------------------===*/

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

#include <execinfo.h>  // backtrace_symbols_fd

#include <sanitizer/dfsan_interface.h>

extern "C" {
extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
} // extern "C"

static size_t InputLen;
static size_t NumFuncs;
static const uintptr_t *FuncsBeg;
static __thread size_t CurrentFunc;
static dfsan_label *FuncLabels;  // Array of NumFuncs elements.
static char *PrintableStringForLabel;  // InputLen + 2 bytes.
static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];

// Prints all instrumented functions.
static int PrintFunctions() {
  // We don't have the symbolizer integrated with dfsan yet.
  // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
  // TODO(kcc): this is pretty ugly and may break in lots of ways.
  //      We'll need to make a proper in-process symbolizer work with DFSan.
  FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
                     "| llvm-symbolizer "
                     "| grep 'dfs\\$' "
                     "| sed 's/dfs\\$//g'", "w");
  for (size_t I = 0; I < NumFuncs; I++) {
    uintptr_t PC = FuncsBeg[I * 2];
    void *const Buf[1] = {(void*)PC};
    backtrace_symbols_fd(Buf, 1, fileno(Pipe));
  }
  pclose(Pipe);
  return 0;
}

extern "C"
void SetBytesForLabel(dfsan_label L, char *Bytes) {
  if (LabelSeen[L])
    return;
  LabelSeen[L] = true;
  assert(L);
  if (L <= InputLen + 1) {
    Bytes[L - 1] = '1';
  } else {
    auto *DLI = dfsan_get_label_info(L);
    SetBytesForLabel(DLI->l1, Bytes);
    SetBytesForLabel(DLI->l2, Bytes);
  }
}

static char *GetPrintableStringForLabel(dfsan_label L) {
  memset(PrintableStringForLabel, '0', InputLen + 1);
  PrintableStringForLabel[InputLen + 1] = 0;
  memset(LabelSeen, 0, sizeof(LabelSeen));
  SetBytesForLabel(L, PrintableStringForLabel);
  return PrintableStringForLabel;
}

static void PrintDataFlow(FILE *Out) {
  for (size_t I = 0; I < NumFuncs; I++)
    if (FuncLabels[I])
      fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
}

int main(int argc, char **argv) {
  if (LLVMFuzzerInitialize)
    LLVMFuzzerInitialize(&argc, &argv);
  if (argc == 1)
    return PrintFunctions();
  assert(argc == 4 || argc == 5);
  size_t Beg = atoi(argv[1]);
  size_t End = atoi(argv[2]);
  assert(Beg < End);

  const char *Input = argv[3];
  fprintf(stderr, "INFO: reading '%s'\n", Input);
  FILE *In = fopen(Input, "r");
  assert(In);
  fseek(In, 0, SEEK_END);
  InputLen = ftell(In);
  fseek(In, 0, SEEK_SET);
  unsigned char *Buf = (unsigned char*)malloc(InputLen);
  size_t NumBytesRead = fread(Buf, 1, InputLen, In);
  assert(NumBytesRead == InputLen);
  PrintableStringForLabel = (char*)malloc(InputLen + 2);
  fclose(In);

  fprintf(stderr, "INFO: running '%s'\n", Input);
  for (size_t I = 1; I <= InputLen; I++) {
    dfsan_label L = dfsan_create_label("", nullptr);
    assert(L == I);
    size_t Idx = I - 1;
    if (Idx >= Beg && Idx < End)
      dfsan_set_label(L, Buf + Idx, 1);
  }
  dfsan_label SizeL = dfsan_create_label("", nullptr);
  assert(SizeL == InputLen + 1);
  dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));

  LLVMFuzzerTestOneInput(Buf, InputLen);
  free(Buf);

  bool OutIsStdout = argc == 4;
  fprintf(stderr, "INFO: writing dataflow to %s\n",
          OutIsStdout ? "<stdout>" : argv[4]);
  FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
  PrintDataFlow(Out);
  if (!OutIsStdout) fclose(Out);
}

extern "C" {

void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
                                         uint32_t *stop) {
  assert(NumFuncs == 0 && "This tool does not support DSOs");
  assert(start < stop && "The code is not instrumented for coverage");
  if (start == stop || *start) return;  // Initialize only once.
  for (uint32_t *x = start; x < stop; x++)
    *x = ++NumFuncs;  // The first index is 1.
  FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
  fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs);
}

void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
                              const uintptr_t *pcs_end) {
  assert(NumFuncs == (pcs_end - pcs_beg) / 2);
  FuncsBeg = pcs_beg;
}

void __sanitizer_cov_trace_pc_indir(uint64_t x){}  // unused.

void __sanitizer_cov_trace_pc_guard(uint32_t *guard){
  uint32_t FuncNum = *guard - 1;  // Guards start from 1.
  assert(FuncNum < NumFuncs);
  CurrentFunc = FuncNum;
}

void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
                                         dfsan_label L1, dfsan_label UnusedL) {
  assert(CurrentFunc < NumFuncs);
  FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
}

#define HOOK(Name, Type)                                                       \
  void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) {            \
    assert(CurrentFunc < NumFuncs);                                            \
    FuncLabels[CurrentFunc] =                                                  \
        dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2));             \
  }

HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)

} // extern "C"