summaryrefslogtreecommitdiff
path: root/lib/dfsan/dfsan_custom.cc
diff options
context:
space:
mode:
authorLorenzo Martignoni <martignlo@google.com>2014-10-08 10:01:42 +0000
committerLorenzo Martignoni <martignlo@google.com>2014-10-08 10:01:42 +0000
commit9b4c0afb60230885211abec02091c6dac94f178c (patch)
tree307c322ab5d73a8b0d371ad8ba3d50436f3d7cfa /lib/dfsan/dfsan_custom.cc
parent5c960cbcb98b4d10570bcada4ad19791cf4f7142 (diff)
Custom wrappers for DFSanitizing sprintf & snprintf.
Differential Revision: http://reviews.llvm.org/D5561 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@219293 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/dfsan/dfsan_custom.cc')
-rw-r--r--lib/dfsan/dfsan_custom.cc281
1 files changed, 281 insertions, 0 deletions
diff --git a/lib/dfsan/dfsan_custom.cc b/lib/dfsan/dfsan_custom.cc
index d06a00321..ffd8b000b 100644
--- a/lib/dfsan/dfsan_custom.cc
+++ b/lib/dfsan/dfsan_custom.cc
@@ -12,12 +12,14 @@
// This file defines the custom functions listed in done_abilist.txt.
//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_linux.h"
#include "dfsan/dfsan.h"
#include <arpa/inet.h>
+#include <assert.h>
#include <ctype.h>
#include <dlfcn.h>
#include <link.h>
@@ -26,6 +28,8 @@
#include <pwd.h>
#include <sched.h>
#include <signal.h>
+#include <stdarg.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -839,4 +843,281 @@ __dfsw_write(int fd, const void *buf, size_t count,
*ret_label = 0;
return write(fd, buf, count);
}
+
+// Type used to extract a dfsan_label with va_arg()
+typedef int dfsan_label_va;
+
+// A chunk of data representing the output of formatting either a constant
+// string or a single format directive.
+struct Chunk {
+ // Address of the beginning of the formatted string
+ const char *ptr;
+ // Size of the formatted string
+ size_t size;
+
+ // Type of DFSan label (depends on the format directive)
+ enum {
+ // Constant string, no argument and thus no label
+ NONE = 0,
+ // Label for an argument of '%n'
+ IGNORED,
+ // Label for a '%s' argument
+ STRING,
+ // Label for any other type of argument
+ NUMERIC,
+ } label_type;
+
+ // Value of the argument (if label_type == STRING)
+ const char *arg;
+};
+
+// Formats the input. The output is stored in 'str' starting from offset
+// 'off'. The format directive is represented by the first 'format_size' bytes
+// of 'format'. If 'has_size' is true, 'size' bounds the number of output
+// bytes. Returns the return value of the vsnprintf call used to format the
+// input.
+static int format_chunk(char *str, size_t off, bool has_size, size_t size,
+ const char *format, size_t format_size, ...) {
+ char *chunk_format = (char *) malloc(format_size + 1);
+ assert(chunk_format);
+ internal_memcpy(chunk_format, format, format_size);
+ chunk_format[format_size] = '\0';
+
+ va_list ap;
+ va_start(ap, format_size);
+ int r = 0;
+ if (has_size) {
+ r = vsnprintf(str + off, off < size ? size - off : 0, chunk_format, ap);
+ } else {
+ r = vsprintf(str + off, chunk_format, ap);
+ }
+ va_end(ap);
+
+ free(chunk_format);
+ return r;
+}
+
+// Formats the input and propagates the input labels to the output. The output
+// is stored in 'str'. If 'has_size' is true, 'size' bounds the number of
+// output bytes. 'format' and 'ap' are the format string and the list of
+// arguments for formatting. Returns the return value vsnprintf would return.
+//
+// The function tokenizes the format string in chunks representing either a
+// constant string or a single format directive (e.g., '%.3f') and formats each
+// chunk independently into the output string. This approach allows to figure
+// out which bytes of the output string depends on which argument and thus to
+// propagate labels more precisely.
+static int format_buffer(char *str, bool has_size, size_t size,
+ const char *format, va_list ap) {
+ InternalMmapVector<Chunk> chunks(8);
+ size_t off = 0;
+
+ while (*format) {
+ chunks.push_back(Chunk());
+ Chunk& chunk = chunks.back();
+ chunk.ptr = str + off;
+ chunk.arg = nullptr;
+
+ int status = 0;
+
+ if (*format != '%') {
+ // Ordinary character. Consume all the characters until a '%' or the end
+ // of the string.
+ size_t format_size = 0;
+ for (; *format && *format != '%'; ++format, ++format_size) {}
+ status = format_chunk(str, off, has_size, size, format - format_size,
+ format_size);
+ chunk.label_type = Chunk::NONE;
+ } else {
+ // Conversion directive. Consume all the characters until a conversion
+ // specifier or the end of the string.
+ bool end_format = false;
+#define FORMAT_CHUNK(t) \
+ format_chunk(str, off, has_size, size, format - format_size, \
+ format_size + 1, va_arg(ap, t))
+
+ for (size_t format_size = 1; *++format && !end_format; ++format_size) {
+ switch (*format) {
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ switch (*(format - 1)) {
+ case 'h':
+ // Also covers the 'hh' case (since the size of the arg is still
+ // an int).
+ status = FORMAT_CHUNK(int);
+ break;
+ case 'l':
+ if (format_size >= 2 && *(format - 2) == 'l') {
+ status = FORMAT_CHUNK(long long int);
+ } else {
+ status = FORMAT_CHUNK(long int);
+ }
+ break;
+ case 'q':
+ status = FORMAT_CHUNK(long long int);
+ break;
+ case 'j':
+ status = FORMAT_CHUNK(intmax_t);
+ break;
+ case 'z':
+ status = FORMAT_CHUNK(size_t);
+ break;
+ case 't':
+ status = FORMAT_CHUNK(size_t);
+ break;
+ default:
+ status = FORMAT_CHUNK(int);
+ }
+ chunk.label_type = Chunk::NUMERIC;
+ end_format = true;
+ break;
+
+ case 'a':
+ case 'A':
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ if (*(format - 1) == 'L') {
+ status = FORMAT_CHUNK(long double);
+ } else {
+ status = FORMAT_CHUNK(double);
+ }
+ chunk.label_type = Chunk::NUMERIC;
+ end_format = true;
+ break;
+
+ case 'c':
+ status = FORMAT_CHUNK(int);
+ chunk.label_type = Chunk::NUMERIC;
+ end_format = true;
+ break;
+
+ case 's':
+ chunk.arg = va_arg(ap, char *);
+ status =
+ format_chunk(str, off, has_size, size,
+ format - format_size, format_size + 1,
+ chunk.arg);
+ chunk.label_type = Chunk::STRING;
+ end_format = true;
+ break;
+
+ case 'p':
+ status = FORMAT_CHUNK(void *);
+ chunk.label_type = Chunk::NUMERIC;
+ end_format = true;
+ break;
+
+ case 'n':
+ *(va_arg(ap, int *)) = (int)off;
+ chunk.label_type = Chunk::IGNORED;
+ end_format = true;
+ break;
+
+ case '%':
+ status = format_chunk(str, off, has_size, size,
+ format - format_size, format_size + 1);
+ chunk.label_type = Chunk::NONE;
+ end_format = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+#undef FORMAT_CHUNK
+ }
+
+ if (status < 0) {
+ return status;
+ }
+
+ // A return value of {v,}snprintf of size or more means that the output was
+ // truncated.
+ if (has_size) {
+ if (off < size) {
+ size_t ustatus = (size_t) status;
+ chunk.size = ustatus >= (size - off) ?
+ ustatus - (size - off) : ustatus;
+ } else {
+ chunk.size = 0;
+ }
+ } else {
+ chunk.size = status;
+ }
+ off += status;
+ }
+
+ // Consume the labels of the output buffer, (optional) size, and format
+ // string.
+ //
+ // TODO(martignlo): Decide how to combine labels (e.g., whether to ignore or
+ // not the label of the format string).
+ va_arg(ap, dfsan_label_va);
+ if (has_size) {
+ va_arg(ap, dfsan_label_va);
+ }
+ va_arg(ap, dfsan_label_va);
+
+ // Label each output chunk according to the label supplied as argument to the
+ // function. We need to go through all the chunks and arguments even if the
+ // string was only partially printed ({v,}snprintf case).
+ for (size_t i = 0; i < chunks.size(); ++i) {
+ const Chunk& chunk = chunks[i];
+
+ switch (chunk.label_type) {
+ case Chunk::NONE:
+ dfsan_set_label(0, (void*) chunk.ptr, chunk.size);
+ break;
+ case Chunk::IGNORED:
+ va_arg(ap, dfsan_label_va);
+ dfsan_set_label(0, (void*) chunk.ptr, chunk.size);
+ break;
+ case Chunk::NUMERIC: {
+ dfsan_label label = va_arg(ap, dfsan_label_va);
+ dfsan_set_label(label, (void*) chunk.ptr, chunk.size);
+ break;
+ }
+ case Chunk::STRING: {
+ // Consume the label of the pointer to the string
+ va_arg(ap, dfsan_label_va);
+ internal_memcpy(shadow_for((void *) chunk.ptr),
+ shadow_for((void *) chunk.arg),
+ sizeof(dfsan_label) * (strlen(chunk.arg)));
+ break;
+ }
+ }
+ }
+
+ dfsan_label *ret_label_ptr = va_arg(ap, dfsan_label *);
+ *ret_label_ptr = 0;
+
+ // Number of bytes written in total.
+ return off;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __dfsw_sprintf(char *str, const char *format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ int ret = format_buffer(str, false, 0, format, ap);
+ va_end(ap);
+ return ret;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __dfsw_snprintf(char *str, size_t size, const char *format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ int ret = format_buffer(str, true, size, format, ap);
+ va_end(ap);
+ return ret;
+}
}