summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/msan/msan.cc9
-rw-r--r--lib/msan/msan.h7
-rw-r--r--lib/msan/msan_interceptors.cc20
-rw-r--r--lib/msan/msan_interface_internal.h4
-rw-r--r--lib/msan/msan_linux.cc39
-rw-r--r--lib/msan/tests/CMakeLists.txt17
-rw-r--r--lib/msan/tests/msan_loadable.cc45
-rw-r--r--lib/msan/tests/msan_test.cc49
-rw-r--r--lib/msandr/msandr.cc91
9 files changed, 262 insertions, 19 deletions
diff --git a/lib/msan/msan.cc b/lib/msan/msan.cc
index cfa074182..96f99d4a8 100644
--- a/lib/msan/msan.cc
+++ b/lib/msan/msan.cc
@@ -59,6 +59,7 @@ static THREADLOCAL struct {
} __msan_stack_bounds;
static THREADLOCAL bool is_in_symbolizer;
+static THREADLOCAL bool is_in_loader;
extern "C" const int __msan_track_origins;
int __msan_get_track_origins() {
@@ -87,6 +88,14 @@ void EnterSymbolizer() { is_in_symbolizer = true; }
void ExitSymbolizer() { is_in_symbolizer = false; }
bool IsInSymbolizer() { return is_in_symbolizer; }
+void EnterLoader() { is_in_loader = true; }
+void ExitLoader() { is_in_loader = false; }
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader() { return is_in_loader; }
+}
+
static Flags msan_flags;
Flags *flags() {
diff --git a/lib/msan/msan.h b/lib/msan/msan.h
index fae1ad58a..123dd36bf 100644
--- a/lib/msan/msan.h
+++ b/lib/msan/msan.h
@@ -26,6 +26,8 @@
#define MEM_IS_SHADOW(mem) ((uptr)mem >= 0x200000000000ULL && \
(uptr)mem <= 0x400000000000ULL)
+struct link_map; // Opaque type returned by dlopen().
+
const int kMsanParamTlsSizeInWords = 100;
const int kMsanRetvalTlsSizeInWords = 100;
@@ -55,6 +57,9 @@ struct SymbolizerScope {
~SymbolizerScope() { ExitSymbolizer(); }
};
+void EnterLoader();
+void ExitLoader();
+
void MsanDie();
void PrintWarning(uptr pc, uptr bp);
void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin);
@@ -66,6 +71,8 @@ void ReportUMR(StackTrace *stack, u32 origin);
void ReportExpectedUMRNotFound(StackTrace *stack);
void ReportAtExitStatistics();
+void UnpoisonMappedDSO(struct link_map *map);
+
#define GET_MALLOC_STACK_TRACE \
StackTrace stack; \
stack.size = 0; \
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
index a6f25e8aa..f81c8de89 100644
--- a/lib/msan/msan_interceptors.cc
+++ b/lib/msan/msan_interceptors.cc
@@ -762,6 +762,25 @@ INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {
return res;
}
+// dlopen() ultimately calls mmap() down inside the loader, which generally
+// doesn't participate in dynamic symbol resolution. Therefore we won't
+// intercept its calls to mmap, and we have to hook it here. The loader
+// initializes the module before returning, so without the dynamic component, we
+// won't be able to clear the shadow before the initializers. Fixing this would
+// require putting our own initializer first to clear the shadow.
+INTERCEPTOR(void *, dlopen, const char *filename, int flag) {
+ ENSURE_MSAN_INITED();
+ EnterLoader();
+ link_map *map = (link_map *)REAL(dlopen)(filename, flag);
+ ExitLoader();
+ if (!__msan_has_dynamic_component()) {
+ // If msandr didn't clear the shadow before the initializers ran, we do it
+ // ourselves afterwards.
+ UnpoisonMappedDSO(map);
+ }
+ return (void *)map;
+}
+
INTERCEPTOR(int, getrusage, int who, void *usage) {
ENSURE_MSAN_INITED();
int res = REAL(getrusage)(who, usage);
@@ -973,6 +992,7 @@ void InitializeInterceptors() {
INTERCEPT_FUNCTION(recvfrom);
INTERCEPT_FUNCTION(recvmsg);
INTERCEPT_FUNCTION(dladdr);
+ INTERCEPT_FUNCTION(dlopen);
INTERCEPT_FUNCTION(getrusage);
inited = 1;
}
diff --git a/lib/msan/msan_interface_internal.h b/lib/msan/msan_interface_internal.h
index 905c5b793..e1cd13c3f 100644
--- a/lib/msan/msan_interface_internal.h
+++ b/lib/msan/msan_interface_internal.h
@@ -104,6 +104,10 @@ int __msan_get_retval_tls_offset();
SANITIZER_INTERFACE_ATTRIBUTE
int __msan_get_param_tls_offset();
+// For intercepting mmap from ld.so in msandr.
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader();
+
// For testing.
SANITIZER_INTERFACE_ATTRIBUTE
u32 __msan_get_umr_origin();
diff --git a/lib/msan/msan_linux.cc b/lib/msan/msan_linux.cc
index 0b08b7def..64aa35b0b 100644
--- a/lib/msan/msan_linux.cc
+++ b/lib/msan/msan_linux.cc
@@ -16,6 +16,9 @@
#include "msan.h"
+#include <algorithm>
+#include <elf.h>
+#include <link.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
@@ -87,6 +90,42 @@ static void MsanAtExit(void) {
void InstallAtExitHandler() {
atexit(MsanAtExit);
}
+
+void UnpoisonMappedDSO(link_map *map) {
+ typedef ElfW(Phdr) Elf_Phdr;
+ typedef ElfW(Ehdr) Elf_Ehdr;
+ char *base = (char *)map->l_addr;
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
+ char *phdrs = base + ehdr->e_phoff;
+ char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize;
+
+ // Find the segment with the minimum base so we can "relocate" the p_vaddr
+ // fields. Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC
+ // objects have a non-zero base.
+ uptr preferred_base = ~0ULL;
+ for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+ Elf_Phdr *phdr = (Elf_Phdr *)iter;
+ if (phdr->p_type == PT_LOAD)
+ preferred_base = std::min(preferred_base, (uptr)phdr->p_vaddr);
+ }
+
+ // Compute the delta from the real base to get a relocation delta.
+ ptrdiff_t delta = (uptr)base - preferred_base;
+ // Now we can figure out what the loader really mapped.
+ for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+ Elf_Phdr *phdr = (Elf_Phdr *)iter;
+ if (phdr->p_type == PT_LOAD) {
+ uptr seg_start = phdr->p_vaddr + delta;
+ uptr seg_end = seg_start + phdr->p_memsz;
+ // None of these values are aligned. We consider the ragged edges of the
+ // load command as defined, since they are mapped from the file.
+ seg_start = RoundDownTo(seg_start, GetPageSizeCached());
+ seg_end = RoundUpTo(seg_end, GetPageSizeCached());
+ __msan_unpoison((void *)seg_start, seg_end - seg_start);
+ }
+ }
+}
+
} // namespace __msan
#endif // __linux__
diff --git a/lib/msan/tests/CMakeLists.txt b/lib/msan/tests/CMakeLists.txt
index 7067c4578..813aad02a 100644
--- a/lib/msan/tests/CMakeLists.txt
+++ b/lib/msan/tests/CMakeLists.txt
@@ -32,6 +32,7 @@ set(MSAN_LIBCXX_LINK_FLAGS
# Unittest sources and build flags.
set(MSAN_UNITTEST_SOURCE msan_test.cc)
+set(MSAN_LOADABLE_SOURCE msan_loadable.cc)
set(MSAN_UNITTEST_HEADERS
msandr_test_so.h
../../../include/sanitizer/msan_interface.h
@@ -65,6 +66,10 @@ set(MSAN_UNITTEST_LINK_FLAGS
# FIXME: we build libcxx without cxxabi and need libstdc++ to provide it.
-lstdc++
)
+set(MSAN_LOADABLE_LINK_FLAGS
+ -fsanitize=memory
+ -shared
+)
# Compile source for the given architecture, using compiler
# options in ${ARGN}, and add it to the object list.
@@ -96,6 +101,7 @@ macro(add_msan_test test_suite test_name arch)
add_compiler_rt_test(${test_suite} ${test_name}
OBJECTS ${ARGN}
DEPS ${MSAN_RUNTIME_LIBRARIES} ${ARGN}
+ ${MSAN_LOADABLE_SO}
LINK_FLAGS ${MSAN_UNITTEST_LINK_FLAGS}
${TARGET_LINK_FLAGS}
"-Wl,-rpath=${CMAKE_CURRENT_BINARY_DIR}")
@@ -130,11 +136,22 @@ macro(add_msan_tests_for_arch arch)
msan_compile(MSAN_INST_TEST_OBJECTS ${MSAN_UNITTEST_SOURCE} ${arch}
${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+ # Instrumented loadable module objects.
+ set(MSAN_INST_LOADABLE_OBJECTS)
+ msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch}
+ ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+
# Uninstrumented shared object for MSanDR tests.
set(MSANDR_TEST_OBJECTS)
msan_compile(MSANDR_TEST_OBJECTS ${MSANDR_UNITTEST_SOURCE} ${arch}
${MSAN_UNITTEST_COMMON_CFLAGS})
+ # Instrumented loadable library tests.
+ set(MSAN_LOADABLE_SO)
+ msan_link_shared(MSAN_LOADABLE_SO "libmsan_loadable" ${arch}
+ OBJECTS ${MSAN_INST_LOADABLE_OBJECTS}
+ DEPS ${MSAN_INST_LOADABLE_OBJECTS} ${MSAN_RUNTIME_LIBRARIES})
+
# Uninstrumented shared library tests.
set(MSANDR_TEST_SO)
msan_link_shared(MSANDR_TEST_SO "libmsandr_test" ${arch}
diff --git a/lib/msan/tests/msan_loadable.cc b/lib/msan/tests/msan_loadable.cc
new file mode 100644
index 000000000..db3bf4898
--- /dev/null
+++ b/lib/msan/tests/msan_loadable.cc
@@ -0,0 +1,45 @@
+//===-- msan_loadable.cc --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer unit tests.
+//===----------------------------------------------------------------------===//
+
+#include "msan/msan_interface_internal.h"
+#include <stdlib.h>
+
+static void *dso_global;
+
+// No name mangling.
+extern "C" {
+
+__attribute__((constructor))
+void loadable_module_init(void) {
+ if (!__msan_has_dynamic_component())
+ return;
+ // The real test is that this compare should not make an uninit.
+ if (dso_global == NULL)
+ dso_global = malloc(4);
+}
+
+__attribute__((destructor))
+void loadable_module_fini(void) {
+ if (!__msan_has_dynamic_component())
+ return;
+ free(dso_global);
+ // *Don't* overwrite it with NULL! That would unpoison it, but our test
+ // relies on reloading at the same address and keeping the poison.
+}
+
+void **get_dso_global() {
+ return &dso_global;
+}
+
+}
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
index b30a8dffd..c1040d5c2 100644
--- a/lib/msan/tests/msan_test.cc
+++ b/lib/msan/tests/msan_test.cc
@@ -1288,6 +1288,55 @@ TEST(MemorySanitizer, dladdr) {
EXPECT_NOT_POISONED((unsigned long)info.dli_saddr);
}
+#ifdef __GLIBC__
+extern "C" {
+ extern void *__libc_stack_end;
+}
+
+static char **GetArgv(void) {
+ uintptr_t *stack_end = (uintptr_t *)__libc_stack_end;
+ return (char**)(stack_end + 1);
+}
+
+#else // __GLIBC__
+# error "TODO: port this"
+#endif
+
+TEST(MemorySanitizer, dlopen) {
+ // Compute the path to our loadable DSO. We assume it's in the same
+ // directory. Only use string routines that we intercept so far to do this.
+ char **argv = GetArgv();
+ const char *basename = "libmsan_loadable.x86_64.so";
+ size_t path_max = strlen(argv[0]) + 1 + strlen(basename) + 1;
+ char *path = new char[path_max];
+ char *last_slash = strrchr(argv[0], '/');
+ assert(last_slash);
+ snprintf(path, path_max, "%.*s/%s", int(last_slash - argv[0]),
+ argv[0], basename);
+
+ // We need to clear shadow for globals when doing dlopen. In order to test
+ // this, we have to poison the shadow for the DSO before we load it. In
+ // general this is difficult, but the loader tends to reload things in the
+ // same place, so we open, close, and then reopen. The global should always
+ // start out clean after dlopen.
+ for (int i = 0; i < 2; i++) {
+ void *lib = dlopen(path, RTLD_LAZY);
+ if (lib == NULL) {
+ printf("dlerror: %s\n", dlerror());
+ assert(lib != NULL);
+ }
+ void **(*get_dso_global)() = (void **(*)())dlsym(lib, "get_dso_global");
+ assert(get_dso_global);
+ void **dso_global = get_dso_global();
+ EXPECT_NOT_POISONED(*dso_global);
+ __msan_poison(dso_global, sizeof(*dso_global));
+ EXPECT_POISONED(*dso_global);
+ dlclose(lib);
+ }
+
+ delete[] path;
+}
+
TEST(MemorySanitizer, scanf) {
const char *input = "42 hello";
int* d = new int;
diff --git a/lib/msandr/msandr.cc b/lib/msandr/msandr.cc
index 235a1eddd..fee9834de 100644
--- a/lib/msandr/msandr.cc
+++ b/lib/msandr/msandr.cc
@@ -37,6 +37,7 @@
#include <drsyscall.h>
#include <sys/mman.h>
+#include <sys/syscall.h> /* for SYS_mmap */
#include <algorithm>
#include <string>
@@ -103,6 +104,17 @@ ModuleData::ModuleData(const module_data_t *info)
int(*__msan_get_retval_tls_offset)();
int(*__msan_get_param_tls_offset)();
+void (*__msan_unpoison)(void *base, size_t size);
+bool (*__msan_is_in_loader)();
+
+static generic_func_t LookupCallback(module_data_t *app, const char *name) {
+ generic_func_t callback = dr_get_proc_address(app->handle, name);
+ if (callback == NULL) {
+ dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
+ CHECK(callback);
+ }
+ return callback;
+}
void InitializeMSanCallbacks() {
module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
@@ -113,25 +125,18 @@ void InitializeMSanCallbacks() {
}
g_app_path = app->full_path;
- const char *callback_name = "__msan_get_retval_tls_offset";
- __msan_get_retval_tls_offset =
- (int(*)()) dr_get_proc_address(app->handle, callback_name);
- if (__msan_get_retval_tls_offset == NULL) {
- dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
- CHECK(__msan_get_retval_tls_offset);
- }
+ __msan_get_retval_tls_offset = (int (*)())
+ LookupCallback(app, "__msan_get_retval_tls_offset");
+ __msan_get_param_tls_offset = (int (*)())
+ LookupCallback(app, "__msan_get_param_tls_offset");
+ __msan_unpoison = (void(*)(void *, size_t))
+ LookupCallback(app, "__msan_unpoison");
+ __msan_is_in_loader = (bool (*)())
+ LookupCallback(app, "__msan_is_in_loader");
- callback_name = "__msan_get_param_tls_offset";
- __msan_get_param_tls_offset =
- (int(*)()) dr_get_proc_address(app->handle, callback_name);
- if (__msan_get_param_tls_offset == NULL) {
- dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
- CHECK(__msan_get_param_tls_offset);
- }
+ dr_free_module_data(app);
}
-#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL)
-
// FIXME: Handle absolute addresses and PC-relative addresses.
// FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have
// a zero base anyway.
@@ -520,7 +525,7 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
if (arg->pre)
return true;
- if (arg->mode != DRSYS_PARAM_OUT)
+ if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
return true;
size_t sz = arg->size;
@@ -538,8 +543,19 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
(unsigned long long)(sz & 0xFFFFFFFF));
}
- void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr);
- memset(p, 0, sz);
+ if (VERBOSITY > 0) {
+ drmf_status_t res;
+ drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
+ const char *name;
+ res = drsys_syscall_name(syscall, &name);
+ dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
+ name, arg->ordinal, arg->start_addr,
+ (char *)arg->start_addr + sz);
+ }
+
+ // We don't switch to the app context because __msan_unpoison() doesn't need
+ // TLS segments.
+ __msan_unpoison(arg->start_addr, sz);
return true; /* keep going */
}
@@ -576,6 +592,19 @@ bool event_pre_syscall(void *drcontext, int sysnum) {
return true;
}
+static bool IsInLoader(void *drcontext) {
+ // TODO: This segment swap is inefficient. DR should just let us query the
+ // app segment base, which it has. Alternatively, if we disable
+ // -mangle_app_seg, then we won't need the swap.
+ bool need_swap = !dr_using_app_state(drcontext);
+ if (need_swap)
+ dr_switch_to_app_state(drcontext);
+ bool is_in_loader = __msan_is_in_loader();
+ if (need_swap)
+ dr_switch_to_dr_state(drcontext);
+ return is_in_loader;
+}
+
void event_post_syscall(void *drcontext, int sysnum) {
drsys_syscall_t *syscall;
drsys_sysnum_t sysnum_full;
@@ -598,6 +627,30 @@ void event_post_syscall(void *drcontext, int sysnum) {
drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
CHECK(res == DRMF_SUCCESS);
}
+
+ // Our normal mmap interceptor can't intercept calls from the loader itself.
+ // This means we don't clear the shadow for calls to dlopen. For now, we
+ // solve this by intercepting mmap from ld.so here, but ideally we'd have a
+ // solution that doesn't rely on msandr.
+ //
+ // Be careful not to intercept maps done by the msan rtl. Otherwise we end up
+ // unpoisoning vast regions of memory and OOMing.
+ // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
+ // does instead of doing a large memset. However, we need the memory to be
+ // zeroed, where as tsan does not, so plain madvise is not enough.
+ if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
+ if (IsInLoader(drcontext)) {
+ app_pc base = (app_pc)dr_syscall_get_result(drcontext);
+ ptr_uint_t size;
+ drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
+ CHECK(res == DRMF_SUCCESS);
+ if (VERBOSITY > 0)
+ dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
+ // We don't switch to the app context because __msan_unpoison() doesn't
+ // need TLS segments.
+ __msan_unpoison(base, size);
+ }
+ }
}
} // namespace