summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorReid Kleckner <reid@kleckner.net>2013-03-11 18:07:42 +0000
committerReid Kleckner <reid@kleckner.net>2013-03-11 18:07:42 +0000
commit0f92deb81207c80481ff0257fbaba640fe669633 (patch)
treed607657bc3c761a28ef7798195339f04fc8d837b /lib
parentce700979f644c790c2d9d80f5cc2a1ada0380284 (diff)
[msan] intercept dlopen and clear shadow for it
Summary: The loader does not call mmap() through the PLT because it has to bootstrap the process before libc is present. Hooking dlopen() isn't enough either because the loader runs module initializers before returning, and they could run arbitrary msan instrumented code. If msandr is present, then we can intercept the mmaps from dlopen at the syscall layer and clear the shadow there. If msandr is missing, we clear the shadow after dlopen() and hope any initializers are trivial. Reviewers: eugenis CC: kcc, llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D509 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@176818 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/msan/msan.cc9
-rw-r--r--lib/msan/msan.h7
-rw-r--r--lib/msan/msan_interceptors.cc20
-rw-r--r--lib/msan/msan_interface_internal.h4
-rw-r--r--lib/msan/msan_linux.cc39
-rw-r--r--lib/msan/tests/CMakeLists.txt17
-rw-r--r--lib/msan/tests/msan_loadable.cc45
-rw-r--r--lib/msan/tests/msan_test.cc49
-rw-r--r--lib/msandr/msandr.cc91
9 files changed, 262 insertions, 19 deletions
diff --git a/lib/msan/msan.cc b/lib/msan/msan.cc
index cfa074182..96f99d4a8 100644
--- a/lib/msan/msan.cc
+++ b/lib/msan/msan.cc
@@ -59,6 +59,7 @@ static THREADLOCAL struct {
} __msan_stack_bounds;
static THREADLOCAL bool is_in_symbolizer;
+static THREADLOCAL bool is_in_loader;
extern "C" const int __msan_track_origins;
int __msan_get_track_origins() {
@@ -87,6 +88,14 @@ void EnterSymbolizer() { is_in_symbolizer = true; }
void ExitSymbolizer() { is_in_symbolizer = false; }
bool IsInSymbolizer() { return is_in_symbolizer; }
+void EnterLoader() { is_in_loader = true; }
+void ExitLoader() { is_in_loader = false; }
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader() { return is_in_loader; }
+}
+
static Flags msan_flags;
Flags *flags() {
diff --git a/lib/msan/msan.h b/lib/msan/msan.h
index fae1ad58a..123dd36bf 100644
--- a/lib/msan/msan.h
+++ b/lib/msan/msan.h
@@ -26,6 +26,8 @@
#define MEM_IS_SHADOW(mem) ((uptr)mem >= 0x200000000000ULL && \
(uptr)mem <= 0x400000000000ULL)
+struct link_map; // Opaque type returned by dlopen().
+
const int kMsanParamTlsSizeInWords = 100;
const int kMsanRetvalTlsSizeInWords = 100;
@@ -55,6 +57,9 @@ struct SymbolizerScope {
~SymbolizerScope() { ExitSymbolizer(); }
};
+void EnterLoader();
+void ExitLoader();
+
void MsanDie();
void PrintWarning(uptr pc, uptr bp);
void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin);
@@ -66,6 +71,8 @@ void ReportUMR(StackTrace *stack, u32 origin);
void ReportExpectedUMRNotFound(StackTrace *stack);
void ReportAtExitStatistics();
+void UnpoisonMappedDSO(struct link_map *map);
+
#define GET_MALLOC_STACK_TRACE \
StackTrace stack; \
stack.size = 0; \
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
index a6f25e8aa..f81c8de89 100644
--- a/lib/msan/msan_interceptors.cc
+++ b/lib/msan/msan_interceptors.cc
@@ -762,6 +762,25 @@ INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {
return res;
}
+// dlopen() ultimately calls mmap() down inside the loader, which generally
+// doesn't participate in dynamic symbol resolution. Therefore we won't
+// intercept its calls to mmap, and we have to hook it here. The loader
+// initializes the module before returning, so without the dynamic component, we
+// won't be able to clear the shadow before the initializers. Fixing this would
+// require putting our own initializer first to clear the shadow.
+INTERCEPTOR(void *, dlopen, const char *filename, int flag) {
+ ENSURE_MSAN_INITED();
+ EnterLoader();
+ link_map *map = (link_map *)REAL(dlopen)(filename, flag);
+ ExitLoader();
+ if (!__msan_has_dynamic_component()) {
+ // If msandr didn't clear the shadow before the initializers ran, we do it
+ // ourselves afterwards.
+ UnpoisonMappedDSO(map);
+ }
+ return (void *)map;
+}
+
INTERCEPTOR(int, getrusage, int who, void *usage) {
ENSURE_MSAN_INITED();
int res = REAL(getrusage)(who, usage);
@@ -973,6 +992,7 @@ void InitializeInterceptors() {
INTERCEPT_FUNCTION(recvfrom);
INTERCEPT_FUNCTION(recvmsg);
INTERCEPT_FUNCTION(dladdr);
+ INTERCEPT_FUNCTION(dlopen);
INTERCEPT_FUNCTION(getrusage);
inited = 1;
}
diff --git a/lib/msan/msan_interface_internal.h b/lib/msan/msan_interface_internal.h
index 905c5b793..e1cd13c3f 100644
--- a/lib/msan/msan_interface_internal.h
+++ b/lib/msan/msan_interface_internal.h
@@ -104,6 +104,10 @@ int __msan_get_retval_tls_offset();
SANITIZER_INTERFACE_ATTRIBUTE
int __msan_get_param_tls_offset();
+// For intercepting mmap from ld.so in msandr.
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader();
+
// For testing.
SANITIZER_INTERFACE_ATTRIBUTE
u32 __msan_get_umr_origin();
diff --git a/lib/msan/msan_linux.cc b/lib/msan/msan_linux.cc
index 0b08b7def..64aa35b0b 100644
--- a/lib/msan/msan_linux.cc
+++ b/lib/msan/msan_linux.cc
@@ -16,6 +16,9 @@
#include "msan.h"
+#include <algorithm>
+#include <elf.h>
+#include <link.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
@@ -87,6 +90,42 @@ static void MsanAtExit(void) {
void InstallAtExitHandler() {
atexit(MsanAtExit);
}
+
+void UnpoisonMappedDSO(link_map *map) {
+ typedef ElfW(Phdr) Elf_Phdr;
+ typedef ElfW(Ehdr) Elf_Ehdr;
+ char *base = (char *)map->l_addr;
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
+ char *phdrs = base + ehdr->e_phoff;
+ char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize;
+
+ // Find the segment with the minimum base so we can "relocate" the p_vaddr
+ // fields. Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC
+ // objects have a non-zero base.
+ uptr preferred_base = ~0ULL;
+ for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+ Elf_Phdr *phdr = (Elf_Phdr *)iter;
+ if (phdr->p_type == PT_LOAD)
+ preferred_base = std::min(preferred_base, (uptr)phdr->p_vaddr);
+ }
+
+ // Compute the delta from the real base to get a relocation delta.
+ ptrdiff_t delta = (uptr)base - preferred_base;
+ // Now we can figure out what the loader really mapped.
+ for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+ Elf_Phdr *phdr = (Elf_Phdr *)iter;
+ if (phdr->p_type == PT_LOAD) {
+ uptr seg_start = phdr->p_vaddr + delta;
+ uptr seg_end = seg_start + phdr->p_memsz;
+ // None of these values are aligned. We consider the ragged edges of the
+ // load command as defined, since they are mapped from the file.
+ seg_start = RoundDownTo(seg_start, GetPageSizeCached());
+ seg_end = RoundUpTo(seg_end, GetPageSizeCached());
+ __msan_unpoison((void *)seg_start, seg_end - seg_start);
+ }
+ }
+}
+
} // namespace __msan
#endif // __linux__
diff --git a/lib/msan/tests/CMakeLists.txt b/lib/msan/tests/CMakeLists.txt
index 7067c4578..813aad02a 100644
--- a/lib/msan/tests/CMakeLists.txt
+++ b/lib/msan/tests/CMakeLists.txt
@@ -32,6 +32,7 @@ set(MSAN_LIBCXX_LINK_FLAGS
# Unittest sources and build flags.
set(MSAN_UNITTEST_SOURCE msan_test.cc)
+set(MSAN_LOADABLE_SOURCE msan_loadable.cc)
set(MSAN_UNITTEST_HEADERS
msandr_test_so.h
../../../include/sanitizer/msan_interface.h
@@ -65,6 +66,10 @@ set(MSAN_UNITTEST_LINK_FLAGS
# FIXME: we build libcxx without cxxabi and need libstdc++ to provide it.
-lstdc++
)
+set(MSAN_LOADABLE_LINK_FLAGS
+ -fsanitize=memory
+ -shared
+)
# Compile source for the given architecture, using compiler
# options in ${ARGN}, and add it to the object list.
@@ -96,6 +101,7 @@ macro(add_msan_test test_suite test_name arch)
add_compiler_rt_test(${test_suite} ${test_name}
OBJECTS ${ARGN}
DEPS ${MSAN_RUNTIME_LIBRARIES} ${ARGN}
+ ${MSAN_LOADABLE_SO}
LINK_FLAGS ${MSAN_UNITTEST_LINK_FLAGS}
${TARGET_LINK_FLAGS}
"-Wl,-rpath=${CMAKE_CURRENT_BINARY_DIR}")
@@ -130,11 +136,22 @@ macro(add_msan_tests_for_arch arch)
msan_compile(MSAN_INST_TEST_OBJECTS ${MSAN_UNITTEST_SOURCE} ${arch}
${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+ # Instrumented loadable module objects.
+ set(MSAN_INST_LOADABLE_OBJECTS)
+ msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch}
+ ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+
# Uninstrumented shared object for MSanDR tests.
set(MSANDR_TEST_OBJECTS)
msan_compile(MSANDR_TEST_OBJECTS ${MSANDR_UNITTEST_SOURCE} ${arch}
${MSAN_UNITTEST_COMMON_CFLAGS})
+ # Instrumented loadable library tests.
+ set(MSAN_LOADABLE_SO)
+ msan_link_shared(MSAN_LOADABLE_SO "libmsan_loadable" ${arch}
+ OBJECTS ${MSAN_INST_LOADABLE_OBJECTS}
+ DEPS ${MSAN_INST_LOADABLE_OBJECTS} ${MSAN_RUNTIME_LIBRARIES})
+
# Uninstrumented shared library tests.
set(MSANDR_TEST_SO)
msan_link_shared(MSANDR_TEST_SO "libmsandr_test" ${arch}
diff --git a/lib/msan/tests/msan_loadable.cc b/lib/msan/tests/msan_loadable.cc
new file mode 100644
index 000000000..db3bf4898
--- /dev/null
+++ b/lib/msan/tests/msan_loadable.cc
@@ -0,0 +1,45 @@
+//===-- msan_loadable.cc --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer unit tests.
+//===----------------------------------------------------------------------===//
+
+#include "msan/msan_interface_internal.h"
+#include <stdlib.h>
+
+static void *dso_global;
+
+// No name mangling.
+extern "C" {
+
+__attribute__((constructor))
+void loadable_module_init(void) {
+ if (!__msan_has_dynamic_component())
+ return;
+ // The real test is that this compare should not make an uninit.
+ if (dso_global == NULL)
+ dso_global = malloc(4);
+}
+
+__attribute__((destructor))
+void loadable_module_fini(void) {
+ if (!__msan_has_dynamic_component())
+ return;
+ free(dso_global);
+ // *Don't* overwrite it with NULL! That would unpoison it, but our test
+ // relies on reloading at the same address and keeping the poison.
+}
+
+void **get_dso_global() {
+ return &dso_global;
+}
+
+}
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
index b30a8dffd..c1040d5c2 100644
--- a/lib/msan/tests/msan_test.cc
+++ b/lib/msan/tests/msan_test.cc
@@ -1288,6 +1288,55 @@ TEST(MemorySanitizer, dladdr) {
EXPECT_NOT_POISONED((unsigned long)info.dli_saddr);
}
+#ifdef __GLIBC__
+extern "C" {
+ extern void *__libc_stack_end;
+}
+
+static char **GetArgv(void) {
+ uintptr_t *stack_end = (uintptr_t *)__libc_stack_end;
+ return (char**)(stack_end + 1);
+}
+
+#else // __GLIBC__
+# error "TODO: port this"
+#endif
+
+TEST(MemorySanitizer, dlopen) {
+ // Compute the path to our loadable DSO. We assume it's in the same
+ // directory. Only use string routines that we intercept so far to do this.
+ char **argv = GetArgv();
+ const char *basename = "libmsan_loadable.x86_64.so";
+ size_t path_max = strlen(argv[0]) + 1 + strlen(basename) + 1;
+ char *path = new char[path_max];
+ char *last_slash = strrchr(argv[0], '/');
+ assert(last_slash);
+ snprintf(path, path_max, "%.*s/%s", int(last_slash - argv[0]),
+ argv[0], basename);
+
+ // We need to clear shadow for globals when doing dlopen. In order to test
+ // this, we have to poison the shadow for the DSO before we load it. In
+ // general this is difficult, but the loader tends to reload things in the
+ // same place, so we open, close, and then reopen. The global should always
+ // start out clean after dlopen.
+ for (int i = 0; i < 2; i++) {
+ void *lib = dlopen(path, RTLD_LAZY);
+ if (lib == NULL) {
+ printf("dlerror: %s\n", dlerror());
+ assert(lib != NULL);
+ }
+ void **(*get_dso_global)() = (void **(*)())dlsym(lib, "get_dso_global");
+ assert(get_dso_global);
+ void **dso_global = get_dso_global();
+ EXPECT_NOT_POISONED(*dso_global);
+ __msan_poison(dso_global, sizeof(*dso_global));
+ EXPECT_POISONED(*dso_global);
+ dlclose(lib);
+ }
+
+ delete[] path;
+}
+
TEST(MemorySanitizer, scanf) {
const char *input = "42 hello";
int* d = new int;
diff --git a/lib/msandr/msandr.cc b/lib/msandr/msandr.cc
index 235a1eddd..fee9834de 100644
--- a/lib/msandr/msandr.cc
+++ b/lib/msandr/msandr.cc
@@ -37,6 +37,7 @@
#include <drsyscall.h>
#include <sys/mman.h>
+#include <sys/syscall.h> /* for SYS_mmap */
#include <algorithm>
#include <string>
@@ -103,6 +104,17 @@ ModuleData::ModuleData(const module_data_t *info)
int(*__msan_get_retval_tls_offset)();
int(*__msan_get_param_tls_offset)();
+void (*__msan_unpoison)(void *base, size_t size);
+bool (*__msan_is_in_loader)();
+
+static generic_func_t LookupCallback(module_data_t *app, const char *name) {
+ generic_func_t callback = dr_get_proc_address(app->handle, name);
+ if (callback == NULL) {
+ dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
+ CHECK(callback);
+ }
+ return callback;
+}
void InitializeMSanCallbacks() {
module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
@@ -113,25 +125,18 @@ void InitializeMSanCallbacks() {
}
g_app_path = app->full_path;
- const char *callback_name = "__msan_get_retval_tls_offset";
- __msan_get_retval_tls_offset =
- (int(*)()) dr_get_proc_address(app->handle, callback_name);
- if (__msan_get_retval_tls_offset == NULL) {
- dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
- CHECK(__msan_get_retval_tls_offset);
- }
+ __msan_get_retval_tls_offset = (int (*)())
+ LookupCallback(app, "__msan_get_retval_tls_offset");
+ __msan_get_param_tls_offset = (int (*)())
+ LookupCallback(app, "__msan_get_param_tls_offset");
+ __msan_unpoison = (void(*)(void *, size_t))
+ LookupCallback(app, "__msan_unpoison");
+ __msan_is_in_loader = (bool (*)())
+ LookupCallback(app, "__msan_is_in_loader");
- callback_name = "__msan_get_param_tls_offset";
- __msan_get_param_tls_offset =
- (int(*)()) dr_get_proc_address(app->handle, callback_name);
- if (__msan_get_param_tls_offset == NULL) {
- dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
- CHECK(__msan_get_param_tls_offset);
- }
+ dr_free_module_data(app);
}
-#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL)
-
// FIXME: Handle absolute addresses and PC-relative addresses.
// FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have
// a zero base anyway.
@@ -520,7 +525,7 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
if (arg->pre)
return true;
- if (arg->mode != DRSYS_PARAM_OUT)
+ if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
return true;
size_t sz = arg->size;
@@ -538,8 +543,19 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
(unsigned long long)(sz & 0xFFFFFFFF));
}
- void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr);
- memset(p, 0, sz);
+ if (VERBOSITY > 0) {
+ drmf_status_t res;
+ drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
+ const char *name;
+ res = drsys_syscall_name(syscall, &name);
+ dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
+ name, arg->ordinal, arg->start_addr,
+ (char *)arg->start_addr + sz);
+ }
+
+ // We don't switch to the app context because __msan_unpoison() doesn't need
+ // TLS segments.
+ __msan_unpoison(arg->start_addr, sz);
return true; /* keep going */
}
@@ -576,6 +592,19 @@ bool event_pre_syscall(void *drcontext, int sysnum) {
return true;
}
+static bool IsInLoader(void *drcontext) {
+ // TODO: This segment swap is inefficient. DR should just let us query the
+ // app segment base, which it has. Alternatively, if we disable
+ // -mangle_app_seg, then we won't need the swap.
+ bool need_swap = !dr_using_app_state(drcontext);
+ if (need_swap)
+ dr_switch_to_app_state(drcontext);
+ bool is_in_loader = __msan_is_in_loader();
+ if (need_swap)
+ dr_switch_to_dr_state(drcontext);
+ return is_in_loader;
+}
+
void event_post_syscall(void *drcontext, int sysnum) {
drsys_syscall_t *syscall;
drsys_sysnum_t sysnum_full;
@@ -598,6 +627,30 @@ void event_post_syscall(void *drcontext, int sysnum) {
drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
CHECK(res == DRMF_SUCCESS);
}
+
+ // Our normal mmap interceptor can't intercept calls from the loader itself.
+ // This means we don't clear the shadow for calls to dlopen. For now, we
+ // solve this by intercepting mmap from ld.so here, but ideally we'd have a
+ // solution that doesn't rely on msandr.
+ //
+ // Be careful not to intercept maps done by the msan rtl. Otherwise we end up
+ // unpoisoning vast regions of memory and OOMing.
+ // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
+ // does instead of doing a large memset. However, we need the memory to be
+ // zeroed, where as tsan does not, so plain madvise is not enough.
+ if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
+ if (IsInLoader(drcontext)) {
+ app_pc base = (app_pc)dr_syscall_get_result(drcontext);
+ ptr_uint_t size;
+ drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
+ CHECK(res == DRMF_SUCCESS);
+ if (VERBOSITY > 0)
+ dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
+ // We don't switch to the app context because __msan_unpoison() doesn't
+ // need TLS segments.
+ __msan_unpoison(base, size);
+ }
+ }
}
} // namespace