summaryrefslogtreecommitdiff
path: root/lib/esan
diff options
context:
space:
mode:
authorDerek Bruening <bruening@google.com>2016-06-03 16:27:50 +0000
committerDerek Bruening <bruening@google.com>2016-06-03 16:27:50 +0000
commitb05f4c85dea351be7a3d529e6f98e22b6bb163d7 (patch)
tree4772602a60f8fbf0fc3f33dbed3b4a8c5389a0ab /lib/esan
parentd5bcae95c1d1b013893bd3ca6cc04e7f7f5d5736 (diff)
[esan|wset] Add 8-level working set snapshot accumulation
Summary: Adds a new option -snapshot_step controlling the frequency distribution for an 8-level series of samples using each bit of each shadow byte. Implements accumulation from each level to the next higher level at the specified frequency. Adds storage of the 8 series of samples using CircularBuffer instances. Fixes an error in the circular buffer data structure where a static object's destructor will be called too early. Prints the results out at the end in a simple manner to give us something to start with. Updates the workingset-samples test to test the new feature. Reviewers: aizatsky Subscribers: vitalybuka, zhaoqin, kcc, eugenis, llvm-commits, kubabrecka Differential Revision: http://reviews.llvm.org/D20833 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@271683 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/esan')
-rw-r--r--lib/esan/esan_circular_buffer.h6
-rw-r--r--lib/esan/esan_flags.inc6
-rw-r--r--lib/esan/working_set.cpp97
3 files changed, 96 insertions, 13 deletions
diff --git a/lib/esan/esan_circular_buffer.h b/lib/esan/esan_circular_buffer.h
index 98891109c..9ce102d04 100644
--- a/lib/esan/esan_circular_buffer.h
+++ b/lib/esan/esan_circular_buffer.h
@@ -28,9 +28,11 @@ class CircularBuffer {
explicit CircularBuffer() {}
CircularBuffer(uptr BufferCapacity) {
initialize(BufferCapacity);
+ WasConstructed = true;
}
~CircularBuffer() {
- free();
+ if (WasConstructed) // Else caller will call free() explicitly.
+ free();
}
void initialize(uptr BufferCapacity) {
Capacity = BufferCapacity;
@@ -38,6 +40,7 @@ class CircularBuffer {
Data = (T *)MmapOrDie(Capacity * sizeof(T), "CircularBuffer");
StartIdx = 0;
Count = 0;
+ WasConstructed = false;
}
void free() {
UnmapOrDie(Data, Capacity * sizeof(T));
@@ -83,6 +86,7 @@ class CircularBuffer {
CircularBuffer(const CircularBuffer&);
void operator=(const CircularBuffer&);
+ bool WasConstructed;
T *Data;
uptr Capacity;
uptr StartIdx;
diff --git a/lib/esan/esan_flags.inc b/lib/esan/esan_flags.inc
index f8a95267f..eb9d0e5a9 100644
--- a/lib/esan/esan_flags.inc
+++ b/lib/esan/esan_flags.inc
@@ -39,3 +39,9 @@ ESAN_FLAG(bool, record_snapshots, true,
// To disable samples, turn off record_snapshots.
ESAN_FLAG(int, sample_freq, 20,
"Working set tool: sampling frequency in milliseconds.")
+
+// This controls the difference in frequency between each successive series
+// of snapshots. There are 8 in total, with number 0 using sample_freq.
+// Number N samples number N-1 every (1 << snapshot_step) instance of N-1.
+ESAN_FLAG(int, snapshot_step, 2, "Working set tool: the log of the sampling "
+ "performed for the next-higher-frequency snapshot series.")
diff --git a/lib/esan/working_set.cpp b/lib/esan/working_set.cpp
index f4ed8a17e..9e7520f49 100644
--- a/lib/esan/working_set.cpp
+++ b/lib/esan/working_set.cpp
@@ -14,6 +14,7 @@
#include "working_set.h"
#include "esan.h"
+#include "esan_circular_buffer.h"
#include "esan_flags.h"
#include "esan_shadow.h"
#include "esan_sideline.h"
@@ -24,9 +25,15 @@
// cache line has ever been accessed.
// - The lowest bit of each shadow byte indicates whether the corresponding
// cache line was accessed since the last sample.
-// - The other bits can be used either for a single working set snapshot
-// between two consecutive samples, or an aggregate working set snapshot
-// over multiple sample periods (future work).
+// - The other bits are used for working set snapshots at successively
+// lower frequencies, each bit to the left from the lowest bit stepping
+// down the frequency by 2 to the power of getFlags()->snapshot_step.
+// Thus we have something like this:
+// Bit 0: Since last sample
+// Bit 1: Since last 2^2 samples
+// Bit 2: Since last 2^4 samples
+// Bit 3: ...
+// Bit 7: Ever accessed.
// We live with races in accessing each shadow byte.
typedef unsigned char byte;
@@ -37,6 +44,10 @@ static const u32 CacheLineSize = 64;
// See the shadow byte layout description above.
static const u32 TotalWorkingSetBitIdx = 7;
+// We accumulate to the left until we hit this bit.
+// We don't need to accumulate to the final bit as it's set on each ref
+// by the compiler instrumentation.
+static const u32 MaxAccumBitIdx = 6;
static const u32 CurWorkingSetBitIdx = 0;
static const byte ShadowAccessedVal =
(1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx);
@@ -47,6 +58,26 @@ static SidelineThread Thread;
// may want to consider a 64-bit int.
static u32 SnapshotNum;
+// We store the wset size for each of 8 different sampling frequencies.
+static const u32 NumFreq = 8; // One for each bit of our shadow bytes.
+// We cannot use static objects as the global destructor is called
+// prior to our finalize routine.
+// These are each circular buffers, sized up front.
+CircularBuffer<u32> SizePerFreq[NumFreq];
+// We cannot rely on static initializers (they may run too late) but
+// we record the size here for clarity:
+u32 CircularBufferSizes[NumFreq] = {
+ // These are each mmap-ed so our minimum is one page.
+ 32*1024,
+ 16*1024,
+ 8*1024,
+ 4*1024,
+ 4*1024,
+ 4*1024,
+ 4*1024,
+ 4*1024,
+};
+
void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
bool IsWrite) {
if (Size == 0)
@@ -95,13 +126,17 @@ static u32 countAndClearShadowValues(u32 BitIdx, uptr ShadowStart,
ByteValue << 24;
// Get word aligned start.
ShadowStart = RoundDownTo(ShadowStart, sizeof(u32));
+ bool Accum = getFlags()->record_snapshots && BitIdx < MaxAccumBitIdx;
for (u32 *Ptr = (u32 *)ShadowStart; Ptr < (u32 *)ShadowEnd; ++Ptr) {
if ((*Ptr & WordValue) != 0) {
byte *BytePtr = (byte *)Ptr;
for (u32 j = 0; j < sizeof(u32); ++j) {
if (BytePtr[j] & ByteValue) {
++WorkingSetSize;
- // TODO: Accumulate to the lower-frequency bit to the left.
+ if (Accum) {
+ // Accumulate to the lower-frequency bit to the left.
+ BytePtr[j] |= (ByteValue << 1);
+ }
}
}
// Clear this bit from every shadow byte.
@@ -134,19 +169,41 @@ static u32 computeWorkingSizeAndReset(u32 BitIdx) {
// This is invoked from a signal handler but in a sideline thread doing nothing
// else so it is a little less fragile than a typical signal handler.
static void takeSample(void *Arg) {
- // FIXME: record the size and report at process end. For now this simply
- // serves as a test of the sideline thread functionality.
- VReport(1, "%s: snapshot #%d: %u\n", SanitizerToolName, SnapshotNum,
- computeWorkingSizeAndReset(CurWorkingSetBitIdx));
- ++SnapshotNum;
+ u32 BitIdx = CurWorkingSetBitIdx;
+ u32 Freq = 1;
+ ++SnapshotNum; // Simpler to skip 0 whose mod matches everything.
+ while (BitIdx <= MaxAccumBitIdx && (SnapshotNum % Freq) == 0) {
+ u32 NumLines = computeWorkingSizeAndReset(BitIdx);
+ VReport(1, "%s: snapshot #%5d bit %d freq %4d: %8u\n", SanitizerToolName,
+ SnapshotNum, BitIdx, Freq, NumLines);
+ SizePerFreq[BitIdx].push_back(NumLines);
+ Freq = Freq << getFlags()->snapshot_step;
+ BitIdx++;
+ }
}
void initializeWorkingSet() {
CHECK(getFlags()->cache_line_size == CacheLineSize);
registerMemoryFaultHandler();
- if (getFlags()->record_snapshots)
+ if (getFlags()->record_snapshots) {
+ for (u32 i = 0; i < NumFreq; ++i)
+ SizePerFreq[i].initialize(CircularBufferSizes[i]);
Thread.launchThread(takeSample, nullptr, getFlags()->sample_freq);
+ }
+}
+
+static u32 getPeriodForPrinting(u32 MilliSec, const char *&Unit) {
+ if (MilliSec > 600000) {
+ Unit = "min";
+ return MilliSec / 60000;
+ } else if (MilliSec > 10000) {
+ Unit = "sec";
+ return MilliSec / 1000;
+ } else {
+ Unit = "ms";
+ return MilliSec;
+ }
}
static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {
@@ -167,12 +224,28 @@ static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {
}
int finalizeWorkingSet() {
- if (getFlags()->record_snapshots)
+ const char *Unit;
+ if (getFlags()->record_snapshots) {
Thread.joinThread();
+ u32 Freq = 1;
+ Report(" Total number of samples: %u\n", SnapshotNum);
+ for (u32 i = 0; i < NumFreq; ++i) {
+ u32 Time = getPeriodForPrinting(getFlags()->sample_freq*Freq, Unit);
+ Report(" Samples array #%d at period %u %s\n", i, Time, Unit);
+ // FIXME: report whether we wrapped around and thus whether we
+ // have data on the whole run or just the last N samples.
+ for (u32 j = 0; j < SizePerFreq[i].size(); ++j) {
+ u32 Size = getSizeForPrinting(SizePerFreq[i][j], Unit);
+ Report("#%4d: %8u %s (%9u cache lines)\n", j, Size, Unit,
+ SizePerFreq[i][j]);
+ }
+ Freq = Freq << getFlags()->snapshot_step;
+ SizePerFreq[i].free();
+ }
+ }
// Get the working set size for the entire execution.
u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx);
- const char *Unit;
u32 Size = getSizeForPrinting(NumOfCachelines, Unit);
Report(" %s: the total working set size: %u %s (%u cache lines)\n",
SanitizerToolName, Size, Unit, NumOfCachelines);