summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2016-12-19 03:21:26 +0000
committerDean Michael Berris <dberris@google.com>2016-12-19 03:21:26 +0000
commit15ff979902c3ef8f2872f2d3967712897d8abb11 (patch)
tree7b43b93a635de774f433f72b9deedbfec8d12a52
parent140ee892452f4dc56ed45596fc605a20f9c71260 (diff)
[XRay] [compiler-rt] Move machine-dependent code into machine-dependent files.
Summary: Include the necessary headers while there. Reviewers: dberris Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D25360 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@290077 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/xray/xray_AArch64.cc14
-rw-r--r--lib/xray/xray_arm.cc14
-rw-r--r--lib/xray/xray_emulate_tsc.h23
-rw-r--r--lib/xray/xray_inmemory_log.cc185
-rw-r--r--lib/xray/xray_interface_internal.h2
-rw-r--r--lib/xray/xray_x86_64.cc61
-rw-r--r--lib/xray/xray_x86_64.h16
7 files changed, 176 insertions, 139 deletions
diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc
index c2d33a200..8f24610da 100644
--- a/lib/xray/xray_AArch64.cc
+++ b/lib/xray/xray_AArch64.cc
@@ -14,12 +14,26 @@
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
+#include "xray_emulate_tsc.h"
#include "xray_interface_internal.h"
#include <atomic>
#include <cassert>
namespace __xray {
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+ // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
+ // not have a constant frequency like TSC on x86[_64]; it may go faster or
+ // slower depending on CPU's turbo or power saving modes. Furthermore, to
+ // read from CP15 on ARM a kernel modification or a driver is needed.
+ // We can not require this from users of compiler-rt.
+ // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
+ // To get the measurements per second, we scale this by the number of
+ // nanoseconds per second, pretending that the TSC frequency is 1GHz and
+ // one TSC tick is 1 nanosecond.
+ return NanosecondsPerSecond;
+}
+
// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t {
PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc
index 9ce8451be..d89322e83 100644
--- a/lib/xray/xray_arm.cc
+++ b/lib/xray/xray_arm.cc
@@ -14,12 +14,26 @@
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
+#include "xray_emulate_tsc.h"
#include "xray_interface_internal.h"
#include <atomic>
#include <cassert>
namespace __xray {
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+ // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
+ // not have a constant frequency like TSC on x86[_64]; it may go faster or
+ // slower depending on CPU's turbo or power saving modes. Furthermore, to
+ // read from CP15 on ARM a kernel modification or a driver is needed.
+ // We can not require this from users of compiler-rt.
+ // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
+ // To get the measurements per second, we scale this by the number of
+ // nanoseconds per second, pretending that the TSC frequency is 1GHz and
+ // one TSC tick is 1 nanosecond.
+ return NanosecondsPerSecond;
+}
+
// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t {
PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
diff --git a/lib/xray/xray_emulate_tsc.h b/lib/xray/xray_emulate_tsc.h
new file mode 100644
index 000000000..14a7993bc
--- /dev/null
+++ b/lib/xray/xray_emulate_tsc.h
@@ -0,0 +1,23 @@
+#pragma once
+#include <time.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+
+namespace __xray {
+
+static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno));
+ TS.tv_sec = 0;
+ TS.tv_nsec = 0;
+ }
+ CPU = 0;
+ return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
+}
+
+}
diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc
index a93972df3..fc1cfd7d3 100644
--- a/lib/xray/xray_inmemory_log.cc
+++ b/lib/xray/xray_inmemory_log.cc
@@ -26,12 +26,12 @@
#include <unistd.h>
#if defined(__x86_64__)
-#include <x86intrin.h>
+#include "xray_x86_64.h"
#elif defined(__arm__) || defined(__aarch64__)
-static const int64_t NanosecondsPerSecond = 1000LL * 1000 * 1000;
+#include "xray_emulate_tsc.h"
#else
#error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
+#endif /* Architecture-specific inline intrinsics */
#include "sanitizer_common/sanitizer_libc.h"
#include "xray/xray_records.h"
@@ -71,52 +71,6 @@ static void retryingWriteAll(int Fd, char *Begin,
}
}
-#if defined(__x86_64__)
-static std::pair<ssize_t, bool>
-retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
- auto BytesToRead = std::distance(Begin, End);
- ssize_t BytesRead;
- ssize_t TotalBytesRead = 0;
- while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
- if (BytesRead == -1) {
- if (errno == EINTR)
- continue;
- Report("Read error; errno = %d\n", errno);
- return std::make_pair(TotalBytesRead, false);
- }
-
- TotalBytesRead += BytesRead;
- BytesToRead -= BytesRead;
- Begin += BytesRead;
- }
- return std::make_pair(TotalBytesRead, true);
-}
-
-static bool readValueFromFile(const char *Filename,
- long long *Value) XRAY_NEVER_INSTRUMENT {
- int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
- if (Fd == -1)
- return false;
- static constexpr size_t BufSize = 256;
- char Line[BufSize] = {};
- ssize_t BytesRead;
- bool Success;
- std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
- if (!Success)
- return false;
- close(Fd);
- char *End = nullptr;
- long long Tmp = internal_simple_strtoll(Line, &End, 10);
- bool Result = false;
- if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
- *Value = Tmp;
- Result = true;
- }
- return Result;
-}
-
-#endif /* CPU architecture */
-
class ThreadExitFlusher {
int Fd;
XRayRecord *Start;
@@ -151,6 +105,47 @@ void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT {
fprintf(stderr, "%s", Buffer);
}
+static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
+ // FIXME: Figure out how to make this less stderr-dependent.
+ SetPrintfAndReportCallback(PrintToStdErr);
+ // Open a temporary file once for the log.
+ static char TmpFilename[256] = {};
+ static char TmpWildcardPattern[] = "XXXXXX";
+ auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base,
+ sizeof(TmpFilename) - 10);
+ if (static_cast<size_t>((E + 6) - TmpFilename) >
+ (sizeof(TmpFilename) - 1)) {
+ Report("XRay log file base too long: %s\n", flags()->xray_logfile_base);
+ return -1;
+ }
+ internal_strncat(TmpFilename, TmpWildcardPattern,
+ sizeof(TmpWildcardPattern) - 1);
+ int Fd = mkstemp(TmpFilename);
+ if (Fd == -1) {
+ Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
+ TmpFilename);
+ return -1;
+ }
+ if (Verbosity())
+ fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);
+
+ // Since we're here, we get to write the header. We set it up so that the
+ // header will only be written once, at the start, and let the threads
+ // logging do writes which just append.
+ XRayFileHeader Header;
+ Header.Version = 1;
+ Header.Type = FileTypes::NAIVE_LOG;
+ Header.CycleFrequency = __xray::cycleFrequency();
+
+ // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
+ // before setting the values in the header.
+ Header.ConstantTSC = 1;
+ Header.NonstopTSC = 1;
+ retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
+ reinterpret_cast<char *>(&Header) + sizeof(Header));
+ return Fd;
+}
+
void __xray_InMemoryRawLog(int32_t FuncId,
XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
using Buffer =
@@ -158,75 +153,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
static constexpr size_t BuffLen = 1024;
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
thread_local static size_t Offset = 0;
- static int Fd = [] {
- // FIXME: Figure out how to make this less stderr-dependent.
- SetPrintfAndReportCallback(PrintToStdErr);
- // Open a temporary file once for the log.
- static char TmpFilename[256] = {};
- static char TmpWildcardPattern[] = "XXXXXX";
- auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base,
- sizeof(TmpFilename) - 10);
- if (static_cast<size_t>((E + 6) - TmpFilename) >
- (sizeof(TmpFilename) - 1)) {
- Report("XRay log file base too long: %s\n", flags()->xray_logfile_base);
- return -1;
- }
- internal_strncat(TmpFilename, TmpWildcardPattern,
- sizeof(TmpWildcardPattern) - 1);
- int Fd = mkstemp(TmpFilename);
- if (Fd == -1) {
- Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
- TmpFilename);
- return -1;
- }
- if (Verbosity())
- fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);
-
- // Get the cycle frequency from SysFS on Linux.
- long long CPUFrequency = -1;
-#if defined(__x86_64__)
- if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
- &CPUFrequency)) {
- CPUFrequency *= 1000;
- } else if (readValueFromFile(
- "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
- &CPUFrequency)) {
- CPUFrequency *= 1000;
- } else {
- Report("Unable to determine CPU frequency for TSC accounting.\n");
- }
-#elif defined(__arm__) || defined(__aarch64__)
- // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
- // not have a constant frequency like TSC on x86(_64), it may go faster
- // or slower depending on CPU turbo or power saving mode. Furthermore,
- // to read from CP15 on ARM a kernel modification or a driver is needed.
- // We can not require this from users of compiler-rt.
- // So on ARM we use clock_gettime() which gives the result in nanoseconds.
- // To get the measurements per second, we scale this by the number of
- // nanoseconds per second, pretending that the TSC frequency is 1GHz and
- // one TSC tick is 1 nanosecond.
- CPUFrequency = NanosecondsPerSecond;
-#else
-#error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
-
- // Since we're here, we get to write the header. We set it up so that the
- // header will only be written once, at the start, and let the threads
- // logging do writes which just append.
- XRayFileHeader Header;
- Header.Version = 1;
- Header.Type = FileTypes::NAIVE_LOG;
- Header.CycleFrequency =
- CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);
-
- // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
- // before setting the values in the header.
- Header.ConstantTSC = 1;
- Header.NonstopTSC = 1;
- retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
- reinterpret_cast<char *>(&Header) + sizeof(Header));
- return Fd;
- }();
+ static int Fd = __xray_OpenLogFile();
if (Fd == -1)
return;
thread_local __xray::ThreadExitFlusher Flusher(
@@ -237,27 +164,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
// through a pointer offset.
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
R.RecordType = RecordTypes::NORMAL;
-#if defined(__x86_64__)
- {
- unsigned CPU;
- R.TSC = __rdtscp(&CPU);
- R.CPU = CPU;
- }
-#elif defined(__arm__) || defined(__aarch64__)
- {
- timespec TS;
- int result = clock_gettime(CLOCK_REALTIME, &TS);
- if (result != 0) {
- Report("clock_gettime() returned %d, errno=%d.\n", result, int(errno));
- TS.tv_sec = 0;
- TS.tv_nsec = 0;
- }
- R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
- R.CPU = 0;
- }
-#else
-#error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
+ R.TSC = __xray::readTSC(R.CPU);
R.TId = TId;
R.Type = Type;
R.FuncId = FuncId;
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index c4f72433d..a8434a699 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -48,6 +48,8 @@ struct XRaySledMap {
size_t Entries;
};
+uint64_t cycleFrequency();
+
bool patchFunctionEntry(bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled);
bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
index 09ca34c0d..74bd9cfd9 100644
--- a/lib/xray/xray_x86_64.cc
+++ b/lib/xray/xray_x86_64.cc
@@ -3,10 +3,71 @@
#include "xray_interface_internal.h"
#include <atomic>
#include <cstdint>
+#include <fcntl.h>
#include <limits>
+#include <tuple>
+#include <unistd.h>
namespace __xray {
+static std::pair<ssize_t, bool>
+retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
+ auto BytesToRead = std::distance(Begin, End);
+ ssize_t BytesRead;
+ ssize_t TotalBytesRead = 0;
+ while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
+ if (BytesRead == -1) {
+ if (errno == EINTR)
+ continue;
+ Report("Read error; errno = %d\n", errno);
+ return std::make_pair(TotalBytesRead, false);
+ }
+
+ TotalBytesRead += BytesRead;
+ BytesToRead -= BytesRead;
+ Begin += BytesRead;
+ }
+ return std::make_pair(TotalBytesRead, true);
+}
+
+static bool readValueFromFile(const char *Filename,
+ long long *Value) XRAY_NEVER_INSTRUMENT {
+ int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
+ if (Fd == -1)
+ return false;
+ static constexpr size_t BufSize = 256;
+ char Line[BufSize] = {};
+ ssize_t BytesRead;
+ bool Success;
+ std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+ if (!Success)
+ return false;
+ close(Fd);
+ char *End = nullptr;
+ long long Tmp = internal_simple_strtoll(Line, &End, 10);
+ bool Result = false;
+ if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
+ *Value = Tmp;
+ Result = true;
+ }
+ return Result;
+}
+
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+ long long CPUFrequency = -1;
+ if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
+ &CPUFrequency)) {
+ CPUFrequency *= 1000;
+ } else if (readValueFromFile(
+ "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+ &CPUFrequency)) {
+ CPUFrequency *= 1000;
+ } else {
+ Report("Unable to determine CPU frequency for TSC accounting.\n");
+ }
+ return CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);
+}
+
static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
diff --git a/lib/xray/xray_x86_64.h b/lib/xray/xray_x86_64.h
new file mode 100644
index 000000000..c34e02ce7
--- /dev/null
+++ b/lib/xray/xray_x86_64.h
@@ -0,0 +1,16 @@
+#pragma once
+#include <x86intrin.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+
+namespace __xray {
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ unsigned LongCPU;
+ uint64_t TSC = __rdtscp(&LongCPU);
+ CPU = LongCPU;
+ return TSC;
+}
+
+}