tsan: allocate vector clocks using slab allocator

Vector clocks is the most actively allocated object in tsan runtime. Current internal allocator is not scalable enough to handle allocation of clocks in scalable way (too small caches). This changes transforms clocks to 2-level array with 512-byte blocks. Since all blocks are of the same size, it's possible to cache them more efficiently in per-thread caches. git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@214912 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dmitry Vyukov <dvyukov@google.com> 2014-08-05 18:45:02 +0000
committer: Dmitry Vyukov <dvyukov@google.com> 2014-08-05 18:45:02 +0000
commit: f31ccc97cdeccd15470ff98bf8d436df72a1760e (patch)
tree: 8e7a3ba0224bf49b5a616dcccd7744f39d7c9071 /lib
parent: 0a71bbee3352ed2b0bec6af55d40393b02e0aa25 (diff)
12 files changed, 317 insertions, 113 deletions
diff --git a/lib/sanitizer_common/sanitizer_thread_registry.cc b/lib/sanitizer_common/sanitizer_thread_registry.cc
index ed2c6019d..fa8482de2 100644
--- a/lib/sanitizer_common/sanitizer_thread_registry.cc
+++ b/lib/sanitizer_common/sanitizer_thread_registry.cc
@@ -219,6 +219,10 @@ void ThreadRegistry::SetThreadNameByUserId(uptr user_id, const char *name) {
 }
 
 void ThreadRegistry::DetachThread(u32 tid) {
+  DetachThread(tid, 0);
+}
+
+void ThreadRegistry::DetachThread(u32 tid, void *arg) {
   BlockingMutexLock l(&mtx_);
   CHECK_LT(tid, n_contexts_);
   ThreadContextBase *tctx = threads_[tid];
@@ -227,6 +231,7 @@ void ThreadRegistry::DetachThread(u32 tid) {
     Report("%s: Detach of non-existent thread\n", SanitizerToolName);
     return;
   }
+  tctx->OnDetached(arg);
   if (tctx->status == ThreadStatusFinished) {
     tctx->SetDead();
     QuarantinePush(tctx);
diff --git a/lib/sanitizer_common/sanitizer_thread_registry.h b/lib/sanitizer_common/sanitizer_thread_registry.h
index 8bb7ff384..e364c21f6 100644
--- a/lib/sanitizer_common/sanitizer_thread_registry.h
+++ b/lib/sanitizer_common/sanitizer_thread_registry.h
@@ -68,6 +68,7 @@ class ThreadContextBase {
   virtual void OnStarted(void *arg) {}
   virtual void OnCreated(void *arg) {}
   virtual void OnReset() {}
+  virtual void OnDetached(void *arg) {}
 };
 
 typedef ThreadContextBase* (*ThreadContextFactory)(u32 tid);
@@ -111,6 +112,7 @@ class ThreadRegistry {
   void SetThreadName(u32 tid, const char *name);
   void SetThreadNameByUserId(uptr user_id, const char *name);
   void DetachThread(u32 tid);
+  void DetachThread(u32 tid, void *arg);
   void JoinThread(u32 tid, void *arg);
   void FinishThread(u32 tid);
   void StartThread(u32 tid, uptr os_id, void *arg);
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index e140a3cb9..a46e7c7dc 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 #include "tsan_clock.h"
 #include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
 
 // SyncClock and ThreadClock implement vector clocks for sync variables
 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
@@ -102,13 +103,13 @@ ThreadClock::ThreadClock(unsigned tid, unsigned reused)
   clk_[tid_].reused = reused_;
 }
 
-void ThreadClock::acquire(const SyncClock *src) {
+void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
   DCHECK(nclk_ <= kMaxTid);
-  DCHECK(src->clk_.Size() <= kMaxTid);
+  DCHECK(src->size_ <= kMaxTid);
   CPP_STAT_INC(StatClockAcquire);
 
   // Check if it's empty -> no need to do anything.
-  const uptr nclk = src->clk_.Size();
+  const uptr nclk = src->size_;
   if (nclk == 0) {
     CPP_STAT_INC(StatClockAcquireEmpty);
     return;
@@ -118,12 +119,12 @@ void ThreadClock::acquire(const SyncClock *src) {
   bool acquired = false;
   if (nclk > tid_) {
     CPP_STAT_INC(StatClockAcquireLarge);
-    if (src->clk_[tid_].reused == reused_) {
+    if (src->elem(tid_).reused == reused_) {
       CPP_STAT_INC(StatClockAcquireRepeat);
       for (unsigned i = 0; i < kDirtyTids; i++) {
         unsigned tid = src->dirty_tids_[i];
         if (tid != kInvalidTid) {
-          u64 epoch = src->clk_[tid].epoch;
+          u64 epoch = src->elem(tid).epoch;
           if (clk_[tid].epoch < epoch) {
             clk_[tid].epoch = epoch;
             acquired = true;
@@ -142,7 +143,7 @@ void ThreadClock::acquire(const SyncClock *src) {
   CPP_STAT_INC(StatClockAcquireFull);
   nclk_ = max(nclk_, nclk);
   for (uptr i = 0; i < nclk; i++) {
-    u64 epoch = src->clk_[i].epoch;
+    u64 epoch = src->elem(i).epoch;
     if (clk_[i].epoch < epoch) {
       clk_[i].epoch = epoch;
       acquired = true;
@@ -151,7 +152,7 @@ void ThreadClock::acquire(const SyncClock *src) {
 
   // Remember that this thread has acquired this clock.
   if (nclk > tid_)
-    src->clk_[tid_].reused = reused_;
+    src->elem(tid_).reused = reused_;
 
   if (acquired) {
     CPP_STAT_INC(StatClockAcquiredSomething);
@@ -159,28 +160,26 @@ void ThreadClock::acquire(const SyncClock *src) {
   }
 }
 
-void ThreadClock::release(SyncClock *dst) const {
+void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
   DCHECK_LE(nclk_, kMaxTid);
-  DCHECK_LE(dst->clk_.Size(), kMaxTid);
+  DCHECK_LE(dst->size_, kMaxTid);
 
-  if (dst->clk_.Size() == 0) {
+  if (dst->size_ == 0) {
     // ReleaseStore will correctly set release_store_tid_,
     // which can be important for future operations.
-    ReleaseStore(dst);
+    ReleaseStore(c, dst);
     return;
   }
 
   CPP_STAT_INC(StatClockRelease);
   // Check if we need to resize dst.
-  if (dst->clk_.Size() < nclk_) {
-    CPP_STAT_INC(StatClockReleaseResize);
-    dst->clk_.Resize(nclk_);
-  }
+  if (dst->size_ < nclk_)
+    Resize(c, dst);
 
   // Check if we had not acquired anything from other threads
   // since the last release on dst. If so, we need to update
-  // only dst->clk_[tid_].
-  if (dst->clk_[tid_].epoch > last_acquire_) {
+  // only dst->elem(tid_).
+  if (dst->elem(tid_).epoch > last_acquire_) {
     UpdateCurrentThread(dst);
     if (dst->release_store_tid_ != tid_ ||
         dst->release_store_reused_ != reused_)
@@ -196,14 +195,15 @@ void ThreadClock::release(SyncClock *dst) const {
     CPP_STAT_INC(StatClockReleaseAcquired);
   // Update dst->clk_.
   for (uptr i = 0; i < nclk_; i++) {
-    dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
-    dst->clk_[i].reused = 0;
+    ClockElem &ce = dst->elem(i);
+    ce.epoch = max(ce.epoch, clk_[i].epoch);
+    ce.reused = 0;
   }
   // Clear 'acquired' flag in the remaining elements.
-  if (nclk_ < dst->clk_.Size())
+  if (nclk_ < dst->size_)
     CPP_STAT_INC(StatClockReleaseClearTail);
-  for (uptr i = nclk_; i < dst->clk_.Size(); i++)
-    dst->clk_[i].reused = 0;
+  for (uptr i = nclk_; i < dst->size_; i++)
+    dst->elem(i).reused = 0;
   for (unsigned i = 0; i < kDirtyTids; i++)
     dst->dirty_tids_[i] = kInvalidTid;
   dst->release_store_tid_ = kInvalidTid;
@@ -211,23 +211,21 @@ void ThreadClock::release(SyncClock *dst) const {
   // If we've acquired dst, remember this fact,
   // so that we don't need to acquire it on next acquire.
   if (acquired)
-    dst->clk_[tid_].reused = reused_;
+    dst->elem(tid_).reused = reused_;
 }
 
-void ThreadClock::ReleaseStore(SyncClock *dst) const {
+void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
   DCHECK(nclk_ <= kMaxTid);
-  DCHECK(dst->clk_.Size() <= kMaxTid);
+  DCHECK(dst->size_ <= kMaxTid);
   CPP_STAT_INC(StatClockStore);
 
   // Check if we need to resize dst.
-  if (dst->clk_.Size() < nclk_) {
-    CPP_STAT_INC(StatClockStoreResize);
-    dst->clk_.Resize(nclk_);
-  }
+  if (dst->size_ < nclk_)
+    Resize(c, dst);
 
   if (dst->release_store_tid_ == tid_ &&
       dst->release_store_reused_ == reused_ &&
-      dst->clk_[tid_].epoch > last_acquire_) {
+      dst->elem(tid_).epoch > last_acquire_) {
     CPP_STAT_INC(StatClockStoreFast);
     UpdateCurrentThread(dst);
     return;
@@ -236,13 +234,17 @@ void ThreadClock::ReleaseStore(SyncClock *dst) const {
   // O(N) release-store.
   CPP_STAT_INC(StatClockStoreFull);
   for (uptr i = 0; i < nclk_; i++) {
-    dst->clk_[i].epoch = clk_[i].epoch;
-    dst->clk_[i].reused = 0;
+    ClockElem &ce = dst->elem(i);
+    ce.epoch = clk_[i].epoch;
+    ce.reused = 0;
   }
   // Clear the tail of dst->clk_.
-  if (nclk_ < dst->clk_.Size()) {
-    internal_memset(&dst->clk_[nclk_], 0,
-        (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
+  if (nclk_ < dst->size_) {
+    for (uptr i = nclk_; i < dst->size_; i++) {
+      ClockElem &ce = dst->elem(i);
+      ce.epoch = 0;
+      ce.reused = 0;
+    }
     CPP_STAT_INC(StatClockStoreTail);
   }
   for (unsigned i = 0; i < kDirtyTids; i++)
@@ -250,19 +252,19 @@ void ThreadClock::ReleaseStore(SyncClock *dst) const {
   dst->release_store_tid_ = tid_;
   dst->release_store_reused_ = reused_;
   // Rememeber that we don't need to acquire it in future.
-  dst->clk_[tid_].reused = reused_;
+  dst->elem(tid_).reused = reused_;
 }
 
-void ThreadClock::acq_rel(SyncClock *dst) {
+void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
   CPP_STAT_INC(StatClockAcquireRelease);
-  acquire(dst);
-  ReleaseStore(dst);
+  acquire(c, dst);
+  ReleaseStore(c, dst);
 }
 
 // Updates only single element related to the current thread in dst->clk_.
 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
   // Update the threads time, but preserve 'acquired' flag.
-  dst->clk_[tid_].epoch = clk_[tid_].epoch;
+  dst->elem(tid_).epoch = clk_[tid_].epoch;
 
   for (unsigned i = 0; i < kDirtyTids; i++) {
     if (dst->dirty_tids_[i] == tid_) {
@@ -277,27 +279,73 @@ void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
   }
   // Reset all 'acquired' flags, O(N).
   CPP_STAT_INC(StatClockReleaseSlow);
-  for (uptr i = 0; i < dst->clk_.Size(); i++) {
-    dst->clk_[i].reused = 0;
-  }
+  for (uptr i = 0; i < dst->size_; i++)
+    dst->elem(i).reused = 0;
   for (unsigned i = 0; i < kDirtyTids; i++)
     dst->dirty_tids_[i] = kInvalidTid;
 }
 
 // Checks whether the current threads has already acquired src.
 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
-  if (src->clk_[tid_].reused != reused_)
+  if (src->elem(tid_).reused != reused_)
     return false;
   for (unsigned i = 0; i < kDirtyTids; i++) {
     unsigned tid = src->dirty_tids_[i];
     if (tid != kInvalidTid) {
-      if (clk_[tid].epoch < src->clk_[tid].epoch)
+      if (clk_[tid].epoch < src->elem(tid).epoch)
         return false;
     }
   }
   return true;
 }
 
+void ThreadClock::Resize(ClockCache *c, SyncClock *dst) const {
+  CPP_STAT_INC(StatClockReleaseResize);
+  if (RoundUpTo(nclk_, ClockBlock::kClockCount) <=
+      RoundUpTo(dst->size_, ClockBlock::kClockCount)) {
+    // Growing within the same block.
+    // Memory is already allocated, just increase the size.
+    dst->size_ = nclk_;
+    return;
+  }
+  if (nclk_ <= ClockBlock::kClockCount) {
+    // Grow from 0 to one-level table.
+    CHECK_EQ(dst->size_, 0);
+    CHECK_EQ(dst->tab_, 0);
+    CHECK_EQ(dst->tab_idx_, 0);
+    dst->size_ = nclk_;
+    dst->tab_idx_ = ctx->clock_alloc.Alloc(c);
+    dst->tab_ = ctx->clock_alloc.Map(dst->tab_idx_);
+    internal_memset(dst->tab_, 0, sizeof(*dst->tab_));
+    return;
+  }
+  // Growing two-level table.
+  if (dst->size_ == 0) {
+    // Allocate first level table.
+    dst->tab_idx_ = ctx->clock_alloc.Alloc(c);
+    dst->tab_ = ctx->clock_alloc.Map(dst->tab_idx_);
+    internal_memset(dst->tab_, 0, sizeof(*dst->tab_));
+  } else if (dst->size_ <= ClockBlock::kClockCount) {
+    // Transform one-level table to two-level table.
+    u32 old = dst->tab_idx_;
+    dst->tab_idx_ = ctx->clock_alloc.Alloc(c);
+    dst->tab_ = ctx->clock_alloc.Map(dst->tab_idx_);
+    internal_memset(dst->tab_, 0, sizeof(*dst->tab_));
+    dst->tab_->table[0] = old;
+  }
+  // At this point we have first level table allocated.
+  // Add second level tables as necessary.
+  for (uptr i = RoundUpTo(dst->size_, ClockBlock::kClockCount);
+      i < nclk_; i += ClockBlock::kClockCount) {
+    u32 idx = ctx->clock_alloc.Alloc(c);
+    ClockBlock *cb = ctx->clock_alloc.Map(idx);
+    internal_memset(cb, 0, sizeof(*cb));
+    CHECK_EQ(dst->tab_->table[i/ClockBlock::kClockCount], 0);
+    dst->tab_->table[i/ClockBlock::kClockCount] = idx;
+  }
+  dst->size_ = nclk_;
+}
+
 // Sets a single element in the vector clock.
 // This function is called only from weird places like AcquireGlobal.
 void ThreadClock::set(unsigned tid, u64 v) {
@@ -320,34 +368,59 @@ void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
       tid_, reused_, last_acquire_);
 }
 
-SyncClock::SyncClock()
-    : clk_(MBlockClock) {
+SyncClock::SyncClock() {
+  tab_ = 0;
+  tab_idx_ = 0;
+  size_ = 0;
   release_store_tid_ = kInvalidTid;
   release_store_reused_ = 0;
   for (uptr i = 0; i < kDirtyTids; i++)
     dirty_tids_[i] = kInvalidTid;
 }
 
-void SyncClock::Reset() {
-  clk_.Reset();
-  Zero();
+SyncClock::~SyncClock() {
+  CHECK_EQ(size_, 0);
+  CHECK_EQ(tab_, 0);
+  CHECK_EQ(tab_idx_, 0);
 }
 
-void SyncClock::Zero() {
-  clk_.Resize(0);
+void SyncClock::Reset(ClockCache *c) {
+  if (size_ == 0) {
+    // nothing
+  } else if (size_ <= ClockBlock::kClockCount) {
+    // One-level table.
+    ctx->clock_alloc.Free(c, tab_idx_);
+  } else {
+    // Two-level table.
+    for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
+      ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
+    ctx->clock_alloc.Free(c, tab_idx_);
+  }
+  tab_ = 0;
+  tab_idx_ = 0;
+  size_ = 0;
   release_store_tid_ = kInvalidTid;
   release_store_reused_ = 0;
   for (uptr i = 0; i < kDirtyTids; i++)
     dirty_tids_[i] = kInvalidTid;
 }
 
+ClockElem &SyncClock::elem(unsigned tid) const {
+  DCHECK_LT(tid, size_);
+  if (size_ <= ClockBlock::kClockCount)
+    return tab_->clock[tid];
+  u32 idx = tab_->table[tid / ClockBlock::kClockCount];
+  ClockBlock *cb = ctx->clock_alloc.Map(idx);
+  return cb->clock[tid % ClockBlock::kClockCount];
+}
+
 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
   printf("clock=[");
-  for (uptr i = 0; i < clk_.Size(); i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
+  for (uptr i = 0; i < size_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
   printf("] reused=[");
-  for (uptr i = 0; i < clk_.Size(); i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
+  for (uptr i = 0; i < size_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
       release_store_tid_, release_store_reused_,
       dirty_tids_[0], dirty_tids_[1]);
diff --git a/lib/tsan/rtl/tsan_clock.h b/lib/tsan/rtl/tsan_clock.h
index f7ab69a3a..7b4de555a 100644
--- a/lib/tsan/rtl/tsan_clock.h
+++ b/lib/tsan/rtl/tsan_clock.h
@@ -14,7 +14,7 @@
 #define TSAN_CLOCK_H
 
 #include "tsan_defs.h"
-#include "tsan_vector.h"
+#include "tsan_dense_alloc.h"
 
 namespace __tsan {
 
@@ -23,37 +23,64 @@ struct ClockElem {
   u64 reused : 64 - kClkBits;
 };
 
+struct ClockBlock {
+  static const uptr kSize = 512;
+  static const uptr kTableSize = kSize / sizeof(u32);
+  static const uptr kClockCount = kSize / sizeof(ClockElem);
+
+  union {
+    u32       table[kTableSize];
+    ClockElem clock[kClockCount];
+  };
+
+  ClockBlock() {
+  }
+};
+
+typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;
+typedef DenseSlabAllocCache ClockCache;
+
 // The clock that lives in sync variables (mutexes, atomics, etc).
 class SyncClock {
  public:
   SyncClock();
+  ~SyncClock();
 
   uptr size() const {
-    return clk_.Size();
+    return size_;
   }
 
   u64 get(unsigned tid) const {
-    DCHECK_LT(tid, clk_.Size());
-    return clk_[tid].epoch;
+    return elem(tid).epoch;
   }
 
-  void Reset();
-  void Zero();
+  void Reset(ClockCache *c);
 
   void DebugDump(int(*printf)(const char *s, ...));
 
  private:
+  friend struct ThreadClock;
+  static const uptr kDirtyTids = 2;
+
   unsigned release_store_tid_;
   unsigned release_store_reused_;
-  static const uptr kDirtyTids = 2;
   unsigned dirty_tids_[kDirtyTids];
-  mutable Vector<ClockElem> clk_;
-  friend struct ThreadClock;
+  // tab_ contains indirect pointer to a 512b block using DenseSlabAlloc.
+  // If size_ <= 64, then tab_ points to an array with 64 ClockElem's.
+  // Otherwise, tab_ points to an array with 128 u32 elements,
+  // each pointing to the second-level 512b block with 64 ClockElem's.
+  ClockBlock *tab_;
+  u32 tab_idx_;
+  u32 size_;
+
+  ClockElem &elem(unsigned tid) const;
 };
 
 // The clock that lives in threads.
 struct ThreadClock {
  public:
+  typedef DenseSlabAllocCache Cache;
+
   explicit ThreadClock(unsigned tid, unsigned reused = 0);
 
   u64 get(unsigned tid) const {
@@ -76,10 +103,10 @@ struct ThreadClock {
     return nclk_;
   }
 
-  void acquire(const SyncClock *src);
-  void release(SyncClock *dst) const;
-  void acq_rel(SyncClock *dst);
-  void ReleaseStore(SyncClock *dst) const;
+  void acquire(ClockCache *c, const SyncClock *src);
+  void release(ClockCache *c, SyncClock *dst) const;
+  void acq_rel(ClockCache *c, SyncClock *dst);
+  void ReleaseStore(ClockCache *c, SyncClock *dst) const;
 
   void DebugReset();
   void DebugDump(int(*printf)(const char *s, ...));
@@ -94,6 +121,7 @@ struct ThreadClock {
 
   bool IsAlreadyAcquired(const SyncClock *src) const;
   void UpdateCurrentThread(SyncClock *dst) const;
+  void Resize(ClockCache *c, SyncClock *dst) const;
 };
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index 466861fa7..39b607ff6 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -107,7 +107,7 @@ void InitializeFlags(Flags *f, const char *env) {
   ParseCommonFlagsFromString(f, env);
 
   // Copy back to common flags.
-  *common_flags() = *f;
+  internal_memcpy(common_flags(), f, sizeof(*common_flags()));
 
   // Sanity check.
   if (!f->report_bugs) {
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index 7beb4f524..13fd55b38 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -374,6 +374,7 @@ struct ThreadState {
 
   DenseSlabAllocCache block_cache;
   DenseSlabAllocCache sync_cache;
+  DenseSlabAllocCache clock_cache;
 
 #ifndef TSAN_GO
   u32 last_sleep_stack_id;
@@ -418,6 +419,7 @@ class ThreadContext : public ThreadContextBase {
   void OnStarted(void *arg);
   void OnCreated(void *arg);
   void OnReset();
+  void OnDetached(void *arg);
 };
 
 struct RacyStacks {
@@ -466,6 +468,8 @@ struct Context {
   InternalMmapVector<FiredSuppression> fired_suppressions;
   DDetector *dd;
 
+  ClockAlloc clock_alloc;
+
   Flags flags;
 
   u64 stat[StatCnt];
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index 4cf47ecd1..9f57a1a2a 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -118,7 +118,7 @@ void MutexDestroy(ThreadState *thr, uptr pc, uptr addr) {
   u64 mid = s->GetId();
   u32 last_lock = s->last_lock;
   if (!unlock_locked)
-    s->Reset();  // must not reset it before the report is printed
+    s->Reset(thr);  // must not reset it before the report is printed
   s->mtx.Unlock();
   if (unlock_locked) {
     ThreadRegistryLock l(ctx->thread_registry);
@@ -136,7 +136,7 @@ void MutexDestroy(ThreadState *thr, uptr pc, uptr addr) {
   if (unlock_locked) {
     SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr);
     if (s != 0) {
-      s->Reset();
+      s->Reset(thr);
       s->mtx.Unlock();
     }
   }
@@ -429,7 +429,7 @@ void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) {
   if (thr->ignore_sync)
     return;
   thr->clock.set(thr->fast_state.epoch());
-  thr->clock.acquire(c);
+  thr->clock.acquire(&thr->clock_cache, c);
   StatInc(thr, StatSyncAcquire);
 }
 
@@ -438,7 +438,7 @@ void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
     return;
   thr->clock.set(thr->fast_state.epoch());
   thr->fast_synch_epoch = thr->fast_state.epoch();
-  thr->clock.release(c);
+  thr->clock.release(&thr->clock_cache, c);
   StatInc(thr, StatSyncRelease);
 }
 
@@ -447,7 +447,7 @@ void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) {
     return;
   thr->clock.set(thr->fast_state.epoch());
   thr->fast_synch_epoch = thr->fast_state.epoch();
-  thr->clock.ReleaseStore(c);
+  thr->clock.ReleaseStore(&thr->clock_cache, c);
   StatInc(thr, StatSyncRelease);
 }
 
@@ -456,7 +456,7 @@ void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
     return;
   thr->clock.set(thr->fast_state.epoch());
   thr->fast_synch_epoch = thr->fast_state.epoch();
-  thr->clock.acq_rel(c);
+  thr->clock.acq_rel(&thr->clock_cache, c);
   StatInc(thr, StatSyncAcquire);
   StatInc(thr, StatSyncRelease);
 }
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index 94bf7548a..a08c1e048 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -36,13 +36,13 @@ ThreadContext::~ThreadContext() {
 #endif
 
 void ThreadContext::OnDead() {
-  sync.Reset();
+  CHECK_EQ(sync.size(), 0);
 }
 
 void ThreadContext::OnJoined(void *arg) {
   ThreadState *caller_thr = static_cast<ThreadState *>(arg);
   AcquireImpl(caller_thr, 0, &sync);
-  sync.Reset();
+  sync.Reset(&caller_thr->clock_cache);
 }
 
 struct OnCreatedArgs {
@@ -65,11 +65,16 @@ void ThreadContext::OnCreated(void *arg) {
 }
 
 void ThreadContext::OnReset() {
-  sync.Reset();
+  CHECK_EQ(sync.size(), 0);
   FlushUnneededShadowMemory(GetThreadTrace(tid), TraceSize() * sizeof(Event));
   //!!! FlushUnneededShadowMemory(GetThreadTraceHeader(tid), sizeof(Trace));
 }
 
+void ThreadContext::OnDetached(void *arg) {
+  ThreadState *thr1 = static_cast<ThreadState*>(arg);
+  sync.Reset(&thr1->clock_cache);
+}
+
 struct OnStartedArgs {
   ThreadState *thr;
   uptr stk_addr;
@@ -113,7 +118,7 @@ void ThreadContext::OnStarted(void *arg) {
   Trace *thr_trace = ThreadTrace(thr->tid);
   thr_trace->headers[trace].epoch0 = epoch0;
   StatInc(thr, StatSyncAcquire);
-  sync.Reset();
+  sync.Reset(&thr->clock_cache);
   DPrintf("#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
           "tls_addr=%zx tls_size=%zx\n",
           tid, (uptr)epoch0, args->stk_addr, args->stk_size,
@@ -134,6 +139,7 @@ void ThreadContext::OnFinished() {
     ctx->dd->DestroyPhysicalThread(thr->dd_pt);
     ctx->dd->DestroyLogicalThread(thr->dd_lt);
   }
+  ctx->clock_alloc.FlushCache(&thr->clock_cache);
   ctx->metamap.OnThreadIdle(thr);
 #ifndef TSAN_GO
   AllocatorThreadFinish(thr);
@@ -307,7 +313,7 @@ void ThreadJoin(ThreadState *thr, uptr pc, int tid) {
 void ThreadDetach(ThreadState *thr, uptr pc, int tid) {
   CHECK_GT(tid, 0);
   CHECK_LT(tid, kMaxTid);
-  ctx->thread_registry->DetachThread(tid);
+  ctx->thread_registry->DetachThread(tid, thr);
 }
 
 void ThreadSetName(ThreadState *thr, const char *name) {
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 10f52b489..9cc778947 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -21,7 +21,7 @@ void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s);
 
 SyncVar::SyncVar()
     : mtx(MutexTypeSyncVar, StatMtxSyncVar) {
-  Reset();
+  Reset(0);
 }
 
 void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid) {
@@ -36,7 +36,7 @@ void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid) {
     DDMutexInit(thr, pc, this);
 }
 
-void SyncVar::Reset() {
+void SyncVar::Reset(ThreadState *thr) {
   uid = 0;
   creation_stack_id = 0;
   owner_tid = kInvalidTid;
@@ -47,8 +47,13 @@ void SyncVar::Reset() {
   is_broken = 0;
   is_linker_init = 0;
 
-  clock.Zero();
-  read_clock.Reset();
+  if (thr == 0) {
+    CHECK_EQ(clock.size(), 0);
+    CHECK_EQ(read_clock.size(), 0);
+  } else {
+    clock.Reset(&thr->clock_cache);
+    read_clock.Reset(&thr->clock_cache);
+  }
 }
 
 MetaMap::MetaMap() {
@@ -93,7 +98,7 @@ void MetaMap::FreeRange(ThreadState *thr, uptr pc, uptr p, uptr sz) {
         DCHECK(idx & kFlagSync);
         SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
         u32 next = s->next;
-        s->Reset();
+        s->Reset(thr);
         sync_alloc_.Free(&thr->sync_cache, idx & ~kFlagMask);
         idx = next;
       } else {
@@ -143,7 +148,7 @@ SyncVar* MetaMap::GetAndLock(ThreadState *thr, uptr pc,
       SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
       if (s->addr == addr) {
         if (myidx != 0) {
-          mys->Reset();
+          mys->Reset(thr);
           sync_alloc_.Free(&thr->sync_cache, myidx);
         }
         if (write_lock)
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
index 7c8682fc1..574810d8a 100644
--- a/lib/tsan/rtl/tsan_sync.h
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -47,7 +47,7 @@ struct SyncVar {
   SyncClock clock;
 
   void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid);
-  void Reset();
+  void Reset(ThreadState *thr);
 
   u64 GetId() const {
     // 47 lsb is addr, then 14 bits is low part of uid, then 3 zero bits.
diff --git a/lib/tsan/tests/unit/tsan_clock_test.cc b/lib/tsan/tests/unit/tsan_clock_test.cc
index 49e7f3f85..a1fd2b7f6 100644
--- a/lib/tsan/tests/unit/tsan_clock_test.cc
+++ b/lib/tsan/tests/unit/tsan_clock_test.cc
@@ -17,6 +17,8 @@
 
 namespace __tsan {
 
+ClockCache cache;
+
 TEST(Clock, VectorBasic) {
   ThreadClock clk(0);
   ASSERT_EQ(clk.size(), 1U);
@@ -38,30 +40,32 @@ TEST(Clock, ChunkedBasic) {
   SyncClock chunked;
   ASSERT_EQ(vector.size(), 1U);
   ASSERT_EQ(chunked.size(), 0U);
-  vector.acquire(&chunked);
+  vector.acquire(&cache, &chunked);
   ASSERT_EQ(vector.size(), 1U);
   ASSERT_EQ(chunked.size(), 0U);
-  vector.release(&chunked);
+  vector.release(&cache, &chunked);
   ASSERT_EQ(vector.size(), 1U);
   ASSERT_EQ(chunked.size(), 1U);
-  vector.acq_rel(&chunked);
+  vector.acq_rel(&cache, &chunked);
   ASSERT_EQ(vector.size(), 1U);
   ASSERT_EQ(chunked.size(), 1U);
+  chunked.Reset(&cache);
 }
 
 TEST(Clock, AcquireRelease) {
   ThreadClock vector1(100);
   vector1.tick();
   SyncClock chunked;
-  vector1.release(&chunked);
+  vector1.release(&cache, &chunked);
   ASSERT_EQ(chunked.size(), 101U);
   ThreadClock vector2(0);
-  vector2.acquire(&chunked);
+  vector2.acquire(&cache, &chunked);
   ASSERT_EQ(vector2.size(), 101U);
   ASSERT_EQ(vector2.get(0), 0U);
   ASSERT_EQ(vector2.get(1), 0U);
   ASSERT_EQ(vector2.get(99), 0U);
   ASSERT_EQ(vector2.get(100), 1U);
+  chunked.Reset(&cache);
 }
 
 TEST(Clock, RepeatedAcquire) {
@@ -71,10 +75,12 @@ TEST(Clock, RepeatedAcquire) {
   thr2.tick();
 
   SyncClock sync;
-  thr1.ReleaseStore(&sync);
+  thr1.ReleaseStore(&cache, &sync);
+
+  thr2.acquire(&cache, &sync);
+  thr2.acquire(&cache, &sync);
 
-  thr2.acquire(&sync);
-  thr2.acquire(&sync);
+  sync.Reset(&cache);
 }
 
 TEST(Clock, ManyThreads) {
@@ -83,9 +89,9 @@ TEST(Clock, ManyThreads) {
     ThreadClock vector(0);
     vector.tick();
     vector.set(i, 1);
-    vector.release(&chunked);
+    vector.release(&cache, &chunked);
     ASSERT_EQ(i + 1, chunked.size());
-    vector.acquire(&chunked);
+    vector.acquire(&cache, &chunked);
     ASSERT_EQ(i + 1, vector.size());
   }
 
@@ -93,10 +99,12 @@ TEST(Clock, ManyThreads) {
     ASSERT_EQ(1U, chunked.get(i));
 
   ThreadClock vector(1);
-  vector.acquire(&chunked);
+  vector.acquire(&cache, &chunked);
   ASSERT_EQ(100U, vector.size());
   for (unsigned i = 0; i < 100; i++)
     ASSERT_EQ(1U, vector.get(i));
+
+  chunked.Reset(&cache);
 }
 
 TEST(Clock, DifferentSizes) {
@@ -107,33 +115,102 @@ TEST(Clock, DifferentSizes) {
     vector2.tick();
     {
       SyncClock chunked;
-      vector1.release(&chunked);
+      vector1.release(&cache, &chunked);
       ASSERT_EQ(chunked.size(), 11U);
-      vector2.release(&chunked);
+      vector2.release(&cache, &chunked);
       ASSERT_EQ(chunked.size(), 21U);
+      chunked.Reset(&cache);
     }
     {
       SyncClock chunked;
-      vector2.release(&chunked);
+      vector2.release(&cache, &chunked);
       ASSERT_EQ(chunked.size(), 21U);
-      vector1.release(&chunked);
+      vector1.release(&cache, &chunked);
       ASSERT_EQ(chunked.size(), 21U);
+      chunked.Reset(&cache);
     }
     {
       SyncClock chunked;
-      vector1.release(&chunked);
-      vector2.acquire(&chunked);
+      vector1.release(&cache, &chunked);
+      vector2.acquire(&cache, &chunked);
       ASSERT_EQ(vector2.size(), 21U);
+      chunked.Reset(&cache);
     }
     {
       SyncClock chunked;
-      vector2.release(&chunked);
-      vector1.acquire(&chunked);
+      vector2.release(&cache, &chunked);
+      vector1.acquire(&cache, &chunked);
       ASSERT_EQ(vector1.size(), 21U);
+      chunked.Reset(&cache);
     }
   }
 }
 
+TEST(Clock, Growth) {
+  {
+    ThreadClock vector(10);
+    vector.tick();
+    vector.set(5, 42);
+    SyncClock sync;
+    vector.release(&cache, &sync);
+    ASSERT_EQ(sync.size(), 11U);
+    ASSERT_EQ(sync.get(0), 0ULL);
+    ASSERT_EQ(sync.get(1), 0ULL);
+    ASSERT_EQ(sync.get(5), 42ULL);
+    ASSERT_EQ(sync.get(9), 0ULL);
+    ASSERT_EQ(sync.get(10), 1ULL);
+    sync.Reset(&cache);
+  }
+  {
+    ThreadClock vector1(10);
+    vector1.tick();
+    ThreadClock vector2(20);
+    vector2.tick();
+    SyncClock sync;
+    vector1.release(&cache, &sync);
+    vector2.release(&cache, &sync);
+    ASSERT_EQ(sync.size(), 21U);
+    ASSERT_EQ(sync.get(0), 0ULL);
+    ASSERT_EQ(sync.get(10), 1ULL);
+    ASSERT_EQ(sync.get(19), 0ULL);
+    ASSERT_EQ(sync.get(20), 1ULL);
+    sync.Reset(&cache);
+  }
+  {
+    ThreadClock vector(100);
+    vector.tick();
+    vector.set(5, 42);
+    vector.set(90, 84);
+    SyncClock sync;
+    vector.release(&cache, &sync);
+    ASSERT_EQ(sync.size(), 101U);
+    ASSERT_EQ(sync.get(0), 0ULL);
+    ASSERT_EQ(sync.get(1), 0ULL);
+    ASSERT_EQ(sync.get(5), 42ULL);
+    ASSERT_EQ(sync.get(60), 0ULL);
+    ASSERT_EQ(sync.get(70), 0ULL);
+    ASSERT_EQ(sync.get(90), 84ULL);
+    ASSERT_EQ(sync.get(99), 0ULL);
+    ASSERT_EQ(sync.get(100), 1ULL);
+    sync.Reset(&cache);
+  }
+  {
+    ThreadClock vector1(10);
+    vector1.tick();
+    ThreadClock vector2(100);
+    vector2.tick();
+    SyncClock sync;
+    vector1.release(&cache, &sync);
+    vector2.release(&cache, &sync);
+    ASSERT_EQ(sync.size(), 101U);
+    ASSERT_EQ(sync.get(0), 0ULL);
+    ASSERT_EQ(sync.get(10), 1ULL);
+    ASSERT_EQ(sync.get(99), 0ULL);
+    ASSERT_EQ(sync.get(100), 1ULL);
+    sync.Reset(&cache);
+  }
+}
+
 const int kThreads = 4;
 const int kClocks = 4;
 
@@ -257,31 +334,31 @@ static bool ClockFuzzer(bool printing) {
       if (printing)
         printf("acquire thr%d <- clk%d\n", tid, cid);
       thr0[tid]->acquire(sync0[cid]);
-      thr1[tid]->acquire(sync1[cid]);
+      thr1[tid]->acquire(&cache, sync1[cid]);
       break;
     case 1:
       if (printing)
         printf("release thr%d -> clk%d\n", tid, cid);
       thr0[tid]->release(sync0[cid]);
-      thr1[tid]->release(sync1[cid]);
+      thr1[tid]->release(&cache, sync1[cid]);
       break;
     case 2:
       if (printing)
         printf("acq_rel thr%d <> clk%d\n", tid, cid);
       thr0[tid]->acq_rel(sync0[cid]);
-      thr1[tid]->acq_rel(sync1[cid]);
+      thr1[tid]->acq_rel(&cache, sync1[cid]);
       break;
     case 3:
       if (printing)
         printf("rel_str thr%d >> clk%d\n", tid, cid);
       thr0[tid]->ReleaseStore(sync0[cid]);
-      thr1[tid]->ReleaseStore(sync1[cid]);
+      thr1[tid]->ReleaseStore(&cache, sync1[cid]);
       break;
     case 4:
       if (printing)
         printf("reset clk%d\n", cid);
       sync0[cid]->Reset();
-      sync1[cid]->Reset();
+      sync1[cid]->Reset(&cache);
       break;
     case 5:
       if (printing)
@@ -331,6 +408,10 @@ static bool ClockFuzzer(bool printing) {
       return false;
     }
   }
+
+  for (unsigned i = 0; i < kClocks; i++) {
+    sync1[i]->Reset(&cache);
+  }
   return true;
 }
 
diff --git a/lib/tsan/tests/unit/tsan_sync_test.cc b/lib/tsan/tests/unit/tsan_sync_test.cc
index 6f36c64a5..d3616a1a4 100644
--- a/lib/tsan/tests/unit/tsan_sync_test.cc
+++ b/lib/tsan/tests/unit/tsan_sync_test.cc
@@ -114,7 +114,7 @@ TEST(MetaMap, ResetSync) {
   u64 block[1] = {};  // fake malloc block
   m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
   SyncVar *s = m->GetOrCreateAndLock(thr, 0, (uptr)&block[0], true);
-  s->Reset();
+  s->Reset(thr);
   s->mtx.Unlock();
   uptr sz = m->FreeBlock(thr, 0, (uptr)&block[0]);
   EXPECT_EQ(sz, 1 * sizeof(u64));
author	Dmitry Vyukov <dvyukov@google.com>	2014-08-05 18:45:02 +0000
committer	Dmitry Vyukov <dvyukov@google.com>	2014-08-05 18:45:02 +0000
commit	f31ccc97cdeccd15470ff98bf8d436df72a1760e (patch)
tree	8e7a3ba0224bf49b5a616dcccd7744f39d7c9071 /lib
parent	0a71bbee3352ed2b0bec6af55d40393b02e0aa25 (diff)