summaryrefslogtreecommitdiff
path: root/libgo/go/runtime
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2018-01-09 01:23:08 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2018-01-09 01:23:08 +0000
commit1a2f01efa63036a5104f203a4789e682c0e0915d (patch)
tree373e15778dc8295354584e1f86915ae493b604ff /libgo/go/runtime
parent8799df67f2dab88f9fda11739c501780a85575e2 (diff)
libgo: update to Go1.10beta1
Update the Go library to the 1.10beta1 release. Requires a few changes to the compiler for modifications to the map runtime code, and to handle some nowritebarrier cases in the runtime. Reviewed-on: https://go-review.googlesource.com/86455 gotools/: * Makefile.am (go_cmd_vet_files): New variable. (go_cmd_buildid_files, go_cmd_test2json_files): New variables. (s-zdefaultcc): Change from constants to functions. (noinst_PROGRAMS): Add vet, buildid, and test2json. (cgo$(EXEEXT)): Link against $(LIBGOTOOL). (vet$(EXEEXT)): New target. (buildid$(EXEEXT)): New target. (test2json$(EXEEXT)): New target. (install-exec-local): Install all $(noinst_PROGRAMS). (uninstall-local): Uninstasll all $(noinst_PROGRAMS). (check-go-tool): Depend on $(noinst_PROGRAMS). Copy down objabi.go. (check-runtime): Depend on $(noinst_PROGRAMS). (check-cgo-test, check-carchive-test): Likewise. (check-vet): New target. (check): Depend on check-vet. Look at cmd_vet-testlog. (.PHONY): Add check-vet. * Makefile.in: Rebuild. From-SVN: r256365
Diffstat (limited to 'libgo/go/runtime')
-rw-r--r--libgo/go/runtime/alg.go9
-rw-r--r--libgo/go/runtime/append_test.go78
-rw-r--r--libgo/go/runtime/cgo_gccgo.go16
-rw-r--r--libgo/go/runtime/cgocall.go6
-rw-r--r--libgo/go/runtime/cgocheck.go4
-rw-r--r--libgo/go/runtime/chan.go74
-rw-r--r--libgo/go/runtime/chan_test.go110
-rw-r--r--libgo/go/runtime/cpuprof.go1
-rw-r--r--libgo/go/runtime/cputicks.go2
-rw-r--r--libgo/go/runtime/crash_cgo_test.go92
-rw-r--r--libgo/go/runtime/crash_test.go47
-rw-r--r--libgo/go/runtime/crash_unix_test.go11
-rw-r--r--libgo/go/runtime/debug.go3
-rw-r--r--libgo/go/runtime/export_test.go43
-rw-r--r--libgo/go/runtime/extern.go4
-rw-r--r--libgo/go/runtime/gc_test.go143
-rw-r--r--libgo/go/runtime/hash32.go26
-rw-r--r--libgo/go/runtime/hash64.go22
-rw-r--r--libgo/go/runtime/hash_test.go34
-rw-r--r--libgo/go/runtime/hashmap.go695
-rw-r--r--libgo/go/runtime/hashmap_fast.go927
-rw-r--r--libgo/go/runtime/heapdump.go11
-rw-r--r--libgo/go/runtime/internal/atomic/atomic_test.go2
-rw-r--r--libgo/go/runtime/internal/sys/sys.go4
-rw-r--r--libgo/go/runtime/lock_sema.go8
-rw-r--r--libgo/go/runtime/malloc.go82
-rw-r--r--libgo/go/runtime/malloc_test.go5
-rw-r--r--libgo/go/runtime/map_test.go222
-rw-r--r--libgo/go/runtime/mbarrier.go58
-rw-r--r--libgo/go/runtime/mbitmap.go26
-rw-r--r--libgo/go/runtime/mcache.go3
-rw-r--r--libgo/go/runtime/mem_gccgo.go54
-rw-r--r--libgo/go/runtime/memmove_test.go9
-rw-r--r--libgo/go/runtime/mfinal.go15
-rw-r--r--libgo/go/runtime/mfinal_test.go21
-rw-r--r--libgo/go/runtime/mgc.go322
-rw-r--r--libgo/go/runtime/mgc_gccgo.go23
-rw-r--r--libgo/go/runtime/mgclarge.go4
-rw-r--r--libgo/go/runtime/mgcmark.go47
-rw-r--r--libgo/go/runtime/mgcwork.go56
-rw-r--r--libgo/go/runtime/mheap.go81
-rw-r--r--libgo/go/runtime/mksizeclasses.go13
-rw-r--r--libgo/go/runtime/mstats.go16
-rw-r--r--libgo/go/runtime/mwbbuf.go248
-rw-r--r--libgo/go/runtime/netpoll_kqueue.go19
-rw-r--r--libgo/go/runtime/netpoll_windows.go2
-rw-r--r--libgo/go/runtime/os_freebsd.go11
-rw-r--r--libgo/go/runtime/os_linux.go71
-rw-r--r--libgo/go/runtime/os_linux_ppc64x.go53
-rw-r--r--libgo/go/runtime/os_netbsd.go16
-rw-r--r--libgo/go/runtime/panic.go27
-rw-r--r--libgo/go/runtime/pprof/pprof.go94
-rw-r--r--libgo/go/runtime/pprof/pprof_test.go291
-rw-r--r--libgo/go/runtime/pprof/proto.go4
-rw-r--r--libgo/go/runtime/print.go18
-rw-r--r--libgo/go/runtime/proc.go641
-rw-r--r--libgo/go/runtime/proc_runtime_test.go2
-rw-r--r--libgo/go/runtime/proc_test.go153
-rw-r--r--libgo/go/runtime/runtime-lldb_test.go84
-rw-r--r--libgo/go/runtime/runtime.go6
-rw-r--r--libgo/go/runtime/runtime1.go11
-rw-r--r--libgo/go/runtime/runtime2.go141
-rw-r--r--libgo/go/runtime/runtime_mmap_test.go29
-rw-r--r--libgo/go/runtime/runtime_test.go9
-rw-r--r--libgo/go/runtime/rwmutex_test.go5
-rw-r--r--libgo/go/runtime/select.go76
-rw-r--r--libgo/go/runtime/sema.go5
-rw-r--r--libgo/go/runtime/signal_gccgo.go5
-rw-r--r--libgo/go/runtime/signal_sighandler.go6
-rw-r--r--libgo/go/runtime/signal_unix.go95
-rw-r--r--libgo/go/runtime/sigqueue.go35
-rw-r--r--libgo/go/runtime/sizeclasses.go134
-rw-r--r--libgo/go/runtime/slice.go42
-rw-r--r--libgo/go/runtime/string.go6
-rw-r--r--libgo/go/runtime/stubs.go29
-rw-r--r--libgo/go/runtime/stubs2.go7
-rw-r--r--libgo/go/runtime/testdata/testprog/gc.go3
-rw-r--r--libgo/go/runtime/testdata/testprog/gettid.go29
-rw-r--r--libgo/go/runtime/testdata/testprog/gettid_none.go15
-rw-r--r--libgo/go/runtime/testdata/testprog/lockosthread.go94
-rw-r--r--libgo/go/runtime/testdata/testprog/syscall_windows.go45
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/callback.go6
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/catchpanic.go46
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/cgo.go6
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/lockosthread.c13
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/lockosthread.go111
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/sigstack.go95
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/stack_windows.go54
-rw-r--r--libgo/go/runtime/time.go256
-rw-r--r--libgo/go/runtime/trace.go112
-rw-r--r--libgo/go/runtime/trace/example_test.go41
-rw-r--r--libgo/go/runtime/trace/trace.go37
-rw-r--r--libgo/go/runtime/trace/trace_test.go57
-rw-r--r--libgo/go/runtime/traceback_gccgo.go4
94 files changed, 4855 insertions, 1973 deletions
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index 174320fe85a..7c98f1bc940 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -57,18 +57,15 @@ const (
func memhash0(p unsafe.Pointer, h uintptr) uintptr {
return h
}
+
func memhash8(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 1)
}
+
func memhash16(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 2)
}
-func memhash32(p unsafe.Pointer, h uintptr) uintptr {
- return memhash(p, h, 4)
-}
-func memhash64(p unsafe.Pointer, h uintptr) uintptr {
- return memhash(p, h, 8)
-}
+
func memhash128(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 16)
}
diff --git a/libgo/go/runtime/append_test.go b/libgo/go/runtime/append_test.go
index 6bd8f3bd951..ef1e812c0dc 100644
--- a/libgo/go/runtime/append_test.go
+++ b/libgo/go/runtime/append_test.go
@@ -18,42 +18,52 @@ func BenchmarkMakeSlice(b *testing.B) {
}
}
-func BenchmarkGrowSliceBytes(b *testing.B) {
- b.StopTimer()
- var x = make([]byte, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]byte(nil), x...)
- }
-}
-
-func BenchmarkGrowSliceInts(b *testing.B) {
- b.StopTimer()
- var x = make([]int, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]int(nil), x...)
- }
-}
-
-func BenchmarkGrowSlicePtr(b *testing.B) {
- b.StopTimer()
- var x = make([]*byte, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]*byte(nil), x...)
- }
-}
+type (
+ struct24 struct{ a, b, c int64 }
+ struct32 struct{ a, b, c, d int64 }
+ struct40 struct{ a, b, c, d, e int64 }
+)
-type struct24 struct{ a, b, c int64 }
+func BenchmarkGrowSlice(b *testing.B) {
+ b.Run("Byte", func(b *testing.B) {
+ x := make([]byte, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]byte(nil), x...)
+ }
+ })
+ b.Run("Int", func(b *testing.B) {
+ x := make([]int, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]int(nil), x...)
+ }
+ })
+ b.Run("Ptr", func(b *testing.B) {
+ x := make([]*byte, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]*byte(nil), x...)
+ }
+ })
+ b.Run("Struct", func(b *testing.B) {
+ b.Run("24", func(b *testing.B) {
+ x := make([]struct24, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]struct24(nil), x...)
+ }
+ })
+ b.Run("32", func(b *testing.B) {
+ x := make([]struct32, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]struct32(nil), x...)
+ }
+ })
+ b.Run("40", func(b *testing.B) {
+ x := make([]struct40, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]struct40(nil), x...)
+ }
+ })
-func BenchmarkGrowSliceStruct24Bytes(b *testing.B) {
- b.StopTimer()
- var x = make([]struct24, 9)
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- _ = append([]struct24(nil), x...)
- }
+ })
}
func BenchmarkAppend(b *testing.B) {
diff --git a/libgo/go/runtime/cgo_gccgo.go b/libgo/go/runtime/cgo_gccgo.go
index c3bf9552ea8..05be4964500 100644
--- a/libgo/go/runtime/cgo_gccgo.go
+++ b/libgo/go/runtime/cgo_gccgo.go
@@ -27,6 +27,13 @@ var iscgo bool
// The extra M must be created before any C/C++ code calls cgocallback.
var cgoHasExtraM bool
+// cgoAlwaysFalse is a boolean value that is always false.
+// The cgo-generated code says if cgoAlwaysFalse { cgoUse(p) }.
+// The compiler cannot see that cgoAlwaysFalse is always false,
+// so it emits the test and keeps the call, giving the desired
+// escape analysis result. The test is cheaper than the call.
+var cgoAlwaysFalse bool
+
// Cgocall prepares to call from code written in Go to code written in
// C/C++. This takes the current goroutine out of the Go scheduler, as
// though it were making a system call. Otherwise the program can
@@ -37,12 +44,11 @@ var cgoHasExtraM bool
// defer syscall.Cgocalldone()
// cfunction()
func Cgocall() {
- lockOSThread()
mp := getg().m
mp.ncgocall++
mp.ncgo++
- mp.incgo = true
entersyscall(0)
+ mp.incgo = true
}
// CgocallDone prepares to return to Go code from C/C++ code.
@@ -59,8 +65,6 @@ func CgocallDone() {
if readgstatus(gp)&^_Gscan == _Gsyscall {
exitsyscall(0)
}
-
- unlockOSThread()
}
// CgocallBack is used when calling from C/C++ code into Go code.
@@ -78,6 +82,8 @@ func CgocallBack() {
mp.dropextram = true
}
+ lockOSThread()
+
exitsyscall(0)
gp.m.incgo = false
@@ -100,6 +106,8 @@ func CgocallBack() {
// CgocallBackDone prepares to return to C/C++ code that has called
// into Go code.
func CgocallBackDone() {
+ unlockOSThread()
+
// If we are the top level Go function called from C/C++, then
// we need to release the m. But don't release it if we are
// panicing; since this is the top level, we are going to
diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go
index 4a416fbf6ad..9d161202dfa 100644
--- a/libgo/go/runtime/cgocall.go
+++ b/libgo/go/runtime/cgocall.go
@@ -234,10 +234,8 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
// No more possible pointers.
break
}
- if hbits.isPointer() {
- if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) {
- panic(errorString(msg))
- }
+ if hbits.isPointer() && cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) {
+ panic(errorString(msg))
}
hbits = hbits.next()
}
diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go
index 30f054b3633..b85b519460e 100644
--- a/libgo/go/runtime/cgocheck.go
+++ b/libgo/go/runtime/cgocheck.go
@@ -16,6 +16,10 @@ const cgoWriteBarrierFail = "Go pointer stored into non-Go memory"
// cgoCheckWriteBarrier is called whenever a pointer is stored into memory.
// It throws if the program is storing a Go pointer into non-Go memory.
+//
+// This is called from the write barrier, so its entire call tree must
+// be nosplit.
+//
//go:nosplit
//go:nowritebarrier
func cgoCheckWriteBarrier(dst *uintptr, src uintptr) {
diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go
index 7bb919c41db..8db728d5430 100644
--- a/libgo/go/runtime/chan.go
+++ b/libgo/go/runtime/chan.go
@@ -64,11 +64,19 @@ type waitq struct {
}
//go:linkname reflect_makechan reflect.makechan
-func reflect_makechan(t *chantype, size int64) *hchan {
+func reflect_makechan(t *chantype, size int) *hchan {
return makechan(t, size)
}
-func makechan(t *chantype, size int64) *hchan {
+func makechan64(t *chantype, size int64) *hchan {
+ if int64(int(size)) != size {
+ panic(plainError("makechan: size out of range"))
+ }
+
+ return makechan(t, int(size))
+}
+
+func makechan(t *chantype, size int) *hchan {
elem := t.elem
// compiler checks this but be safe.
@@ -78,29 +86,33 @@ func makechan(t *chantype, size int64) *hchan {
if hchanSize%maxAlign != 0 || elem.align > maxAlign {
throw("makechan: bad alignment")
}
- if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/elem.size) {
+
+ if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > _MaxMem-hchanSize {
panic(plainError("makechan: size out of range"))
}
+ // Hchan does not contain pointers interesting for GC when elements stored in buf do not contain pointers.
+ // buf points into the same allocation, elemtype is persistent.
+ // SudoG's are referenced from their owning thread so they can't be collected.
+ // TODO(dvyukov,rlh): Rethink when collector can move allocated objects.
var c *hchan
- if elem.kind&kindNoPointers != 0 || size == 0 {
- // Allocate memory in one call.
- // Hchan does not contain pointers interesting for GC in this case:
- // buf points into the same allocation, elemtype is persistent.
- // SudoG's are referenced from their owning thread so they can't be collected.
- // TODO(dvyukov,rlh): Rethink when collector can move allocated objects.
+ switch {
+ case size == 0 || elem.size == 0:
+ // Queue or element size is zero.
+ c = (*hchan)(mallocgc(hchanSize, nil, true))
+ // Race detector uses this location for synchronization.
+ c.buf = unsafe.Pointer(c)
+ case elem.kind&kindNoPointers != 0:
+ // Elements do not contain pointers.
+ // Allocate hchan and buf in one call.
c = (*hchan)(mallocgc(hchanSize+uintptr(size)*elem.size, nil, true))
- if size > 0 && elem.size != 0 {
- c.buf = add(unsafe.Pointer(c), hchanSize)
- } else {
- // race detector uses this location for synchronization
- // Also prevents us from pointing beyond the allocation (see issue 9401).
- c.buf = unsafe.Pointer(c)
- }
- } else {
+ c.buf = add(unsafe.Pointer(c), hchanSize)
+ default:
+ // Elements contain pointers.
c = new(hchan)
- c.buf = newarray(elem, int(size))
+ c.buf = mallocgc(uintptr(size)*elem.size, elem, true)
}
+
c.elemsize = uint16(elem.size)
c.elemtype = elem
c.dataqsiz = uint(size)
@@ -119,7 +131,7 @@ func chanbuf(c *hchan, i uint) unsafe.Pointer {
// entry point for c <- x from compiled code
//go:nosplit
func chansend1(c *hchan, elem unsafe.Pointer) {
- chansend(c, elem, true, getcallerpc(unsafe.Pointer(&c)))
+ chansend(c, elem, true, getcallerpc())
}
/*
@@ -223,7 +235,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
mysg.elem = ep
mysg.waitlink = nil
mysg.g = gp
- mysg.selectdone = nil
+ mysg.isSelect = false
mysg.c = c
gp.waiting = mysg
gp.param = nil
@@ -331,7 +343,7 @@ func closechan(c *hchan) {
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&c))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(c), callerpc, funcPC(closechan))
racerelease(unsafe.Pointer(c))
}
@@ -508,7 +520,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
mysg.waitlink = nil
gp.waiting = mysg
mysg.g = gp
- mysg.selectdone = nil
+ mysg.isSelect = false
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
@@ -603,7 +615,7 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
// }
//
func selectnbsend(c *hchan, elem unsafe.Pointer) (selected bool) {
- return chansend(c, elem, false, getcallerpc(unsafe.Pointer(&c)))
+ return chansend(c, elem, false, getcallerpc())
}
// compiler implements
@@ -653,7 +665,7 @@ func selectnbrecv2(elem unsafe.Pointer, received *bool, c *hchan) (selected bool
//go:linkname reflect_chansend reflect.chansend
func reflect_chansend(c *hchan, elem unsafe.Pointer, nb bool) (selected bool) {
- return chansend(c, elem, !nb, getcallerpc(unsafe.Pointer(&c)))
+ return chansend(c, elem, !nb, getcallerpc())
}
//go:linkname reflect_chanrecv reflect.chanrecv
@@ -712,10 +724,16 @@ func (q *waitq) dequeue() *sudog {
sgp.next = nil // mark as removed (see dequeueSudog)
}
- // if sgp participates in a select and is already signaled, ignore it
- if sgp.selectdone != nil {
- // claim the right to signal
- if *sgp.selectdone != 0 || !atomic.Cas(sgp.selectdone, 0, 1) {
+ // if a goroutine was put on this queue because of a
+ // select, there is a small window between the goroutine
+ // being woken up by a different case and it grabbing the
+ // channel locks. Once it has the lock
+ // it removes itself from the queue, so we won't see it after that.
+ // We use a flag in the G struct to tell us when someone
+ // else has won the race to signal this goroutine but the goroutine
+ // hasn't removed itself from the queue yet.
+ if sgp.isSelect {
+ if !atomic.Cas(&sgp.g.selectDone, 0, 1) {
continue
}
}
diff --git a/libgo/go/runtime/chan_test.go b/libgo/go/runtime/chan_test.go
index b96af8af5d7..29fb321c926 100644
--- a/libgo/go/runtime/chan_test.go
+++ b/libgo/go/runtime/chan_test.go
@@ -5,6 +5,8 @@
package runtime_test
import (
+ "internal/testenv"
+ "math"
"runtime"
"sync"
"sync/atomic"
@@ -435,6 +437,65 @@ func TestSelectStress(t *testing.T) {
wg.Wait()
}
+func TestSelectFairness(t *testing.T) {
+ const trials = 10000
+ if runtime.GOOS == "linux" && runtime.GOARCH == "ppc64le" {
+ testenv.SkipFlaky(t, 22047)
+ }
+ c1 := make(chan byte, trials+1)
+ c2 := make(chan byte, trials+1)
+ for i := 0; i < trials+1; i++ {
+ c1 <- 1
+ c2 <- 2
+ }
+ c3 := make(chan byte)
+ c4 := make(chan byte)
+ out := make(chan byte)
+ done := make(chan byte)
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for {
+ var b byte
+ select {
+ case b = <-c3:
+ case b = <-c4:
+ case b = <-c1:
+ case b = <-c2:
+ }
+ select {
+ case out <- b:
+ case <-done:
+ return
+ }
+ }
+ }()
+ cnt1, cnt2 := 0, 0
+ for i := 0; i < trials; i++ {
+ switch b := <-out; b {
+ case 1:
+ cnt1++
+ case 2:
+ cnt2++
+ default:
+ t.Fatalf("unexpected value %d on channel", b)
+ }
+ }
+ // If the select in the goroutine is fair,
+ // cnt1 and cnt2 should be about the same value.
+ // With 10,000 trials, the expected margin of error at
+ // a confidence level of five nines is 4.4172 / (2 * Sqrt(10000)).
+ r := float64(cnt1) / trials
+ e := math.Abs(r - 0.5)
+ t.Log(cnt1, cnt2, r, e)
+ if e > 4.4172/(2*math.Sqrt(trials)) {
+ t.Errorf("unfair select: in %d trials, results were %d, %d", trials, cnt1, cnt2)
+ }
+ close(done)
+ wg.Wait()
+}
+
func TestChanSendInterface(t *testing.T) {
type mt struct{}
m := &mt{}
@@ -674,6 +735,55 @@ done:
<-ready2
}
+type struct0 struct{}
+
+func BenchmarkMakeChan(b *testing.B) {
+ b.Run("Byte", func(b *testing.B) {
+ var x chan byte
+ for i := 0; i < b.N; i++ {
+ x = make(chan byte, 8)
+ }
+ close(x)
+ })
+ b.Run("Int", func(b *testing.B) {
+ var x chan int
+ for i := 0; i < b.N; i++ {
+ x = make(chan int, 8)
+ }
+ close(x)
+ })
+ b.Run("Ptr", func(b *testing.B) {
+ var x chan *byte
+ for i := 0; i < b.N; i++ {
+ x = make(chan *byte, 8)
+ }
+ close(x)
+ })
+ b.Run("Struct", func(b *testing.B) {
+ b.Run("0", func(b *testing.B) {
+ var x chan struct0
+ for i := 0; i < b.N; i++ {
+ x = make(chan struct0, 8)
+ }
+ close(x)
+ })
+ b.Run("32", func(b *testing.B) {
+ var x chan struct32
+ for i := 0; i < b.N; i++ {
+ x = make(chan struct32, 8)
+ }
+ close(x)
+ })
+ b.Run("40", func(b *testing.B) {
+ var x chan struct40
+ for i := 0; i < b.N; i++ {
+ x = make(chan struct40, 8)
+ }
+ close(x)
+ })
+ })
+}
+
func BenchmarkChanNonblocking(b *testing.B) {
myc := make(chan int)
b.RunParallel(func(pb *testing.PB) {
diff --git a/libgo/go/runtime/cpuprof.go b/libgo/go/runtime/cpuprof.go
index b031b1a5e75..91cdf2b5594 100644
--- a/libgo/go/runtime/cpuprof.go
+++ b/libgo/go/runtime/cpuprof.go
@@ -160,6 +160,7 @@ func (p *cpuProfile) addExtra() {
funcPC(_ExternalCode) + sys.PCQuantum,
}
cpuprof.log.write(nil, 0, hdr[:], lostStk[:])
+ p.lostExtra = 0
}
}
diff --git a/libgo/go/runtime/cputicks.go b/libgo/go/runtime/cputicks.go
index ee15aca24ef..7e62dc1e108 100644
--- a/libgo/go/runtime/cputicks.go
+++ b/libgo/go/runtime/cputicks.go
@@ -4,6 +4,6 @@
package runtime
-// careful: cputicks is not guaranteed to be monotonic! In particular, we have
+// careful: cputicks is not guaranteed to be monotonic! In particular, we have
// noticed drift between cpus on certain os/arch combinations. See issue 8976.
func cputicks() int64
diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go
index b79873185cc..7e14e573bc5 100644
--- a/libgo/go/runtime/crash_cgo_test.go
+++ b/libgo/go/runtime/crash_cgo_test.go
@@ -13,6 +13,7 @@ import (
"os"
"os/exec"
"runtime"
+ "strconv"
"strings"
"testing"
"time"
@@ -113,7 +114,7 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) {
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
if err != nil {
t.Fatalf("exit status: %v\n%s", err, got)
}
@@ -136,7 +137,7 @@ func TestCgoExternalThreadSignal(t *testing.T) {
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
if err != nil {
t.Fatalf("exit status: %v\n%s", err, got)
}
@@ -203,14 +204,14 @@ func TestCgoCheckBytes(t *testing.T) {
const tries = 10
var tot1, tot2 time.Duration
for i := 0; i < tries; i++ {
- cmd := testEnv(exec.Command(exe, "CgoCheckBytes"))
+ cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes"))
cmd.Env = append(cmd.Env, "GODEBUG=cgocheck=0", fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i))
start := time.Now()
cmd.Run()
d1 := time.Since(start)
- cmd = testEnv(exec.Command(exe, "CgoCheckBytes"))
+ cmd = testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes"))
cmd.Env = append(cmd.Env, fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i))
start = time.Now()
@@ -251,7 +252,7 @@ func TestCgoCCodeSIGPROF(t *testing.T) {
func TestCgoCrashTraceback(t *testing.T) {
t.Parallel()
- if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+ if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") {
t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
}
if runtime.Compiler == "gccgo" {
@@ -279,7 +280,7 @@ func TestCgoTracebackContext(t *testing.T) {
func testCgoPprof(t *testing.T, buildArg, runArg string) {
t.Parallel()
- if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+ if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") {
t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
}
if runtime.Compiler == "gccgo" {
@@ -292,7 +293,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) {
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, runArg)).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, runArg)).CombinedOutput()
if err != nil {
if testenv.Builder() == "linux-amd64-alpine" {
// See Issue 18243 and Issue 19938.
@@ -304,7 +305,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) {
defer os.Remove(fn)
for try := 0; try < 2; try++ {
- cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1"))
+ cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1"))
// Check that pprof works both with and without explicit executable on command line.
if try == 0 {
cmd.Args = append(cmd.Args, exe, fn)
@@ -339,7 +340,7 @@ func TestCgoPprof(t *testing.T) {
}
func TestCgoPprofPIE(t *testing.T) {
- testCgoPprof(t, "-ldflags=-extldflags=-pie", "CgoPprof")
+ testCgoPprof(t, "-buildmode=pie", "CgoPprof")
}
func TestCgoPprofThread(t *testing.T) {
@@ -371,7 +372,7 @@ func TestRaceProf(t *testing.T) {
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput()
if err != nil {
t.Fatal(err)
}
@@ -400,7 +401,7 @@ func TestRaceSignal(t *testing.T) {
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput()
if err != nil {
t.Logf("%s\n", got)
t.Fatal(err)
@@ -423,3 +424,72 @@ func TestCgoNumGoroutine(t *testing.T) {
t.Errorf("expected %q got %v", want, got)
}
}
+
+func TestCatchPanic(t *testing.T) {
+ t.Parallel()
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no signals on %s", runtime.GOOS)
+ case "darwin":
+ if runtime.GOARCH == "amd64" {
+ t.Skipf("crash() on darwin/amd64 doesn't raise SIGABRT")
+ }
+ }
+
+ testenv.MustHaveGoRun(t)
+
+ exe, err := buildTestProg(t, "testprogcgo")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for _, early := range []bool{true, false} {
+ cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCatchPanic"))
+ // Make sure a panic results in a crash.
+ cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
+ if early {
+ // Tell testprogcgo to install an early signal handler for SIGABRT
+ cmd.Env = append(cmd.Env, "CGOCATCHPANIC_EARLY_HANDLER=1")
+ }
+ if out, err := cmd.CombinedOutput(); err != nil {
+ t.Errorf("testprogcgo CgoCatchPanic failed: %v\n%s", err, out)
+ }
+ }
+}
+
+func TestCgoLockOSThreadExit(t *testing.T) {
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skipf("no pthreads on %s", runtime.GOOS)
+ }
+ t.Parallel()
+ testLockOSThreadExit(t, "testprogcgo")
+}
+
+func TestWindowsStackMemoryCgo(t *testing.T) {
+ if runtime.GOOS != "windows" {
+ t.Skip("skipping windows specific test")
+ }
+ testenv.SkipFlaky(t, 22575)
+ o := runTestProg(t, "testprogcgo", "StackMemory")
+ stackUsage, err := strconv.Atoi(o)
+ if err != nil {
+ t.Fatalf("Failed to read stack usage: %v", err)
+ }
+ if expected, got := 100<<10, stackUsage; got > expected {
+ t.Fatalf("expected < %d bytes of memory per thread, got %d", expected, got)
+ }
+}
+
+func TestSigStackSwapping(t *testing.T) {
+ switch runtime.GOOS {
+ case "plan9", "windows":
+ t.Skip("no sigaltstack on %s", runtime.GOOS)
+ }
+ t.Parallel()
+ got := runTestProg(t, "testprogcgo", "SigStack")
+ want := "OK\n"
+ if got != want {
+ t.Errorf("expected %q got %v", want, got)
+ }
+}
diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go
index 1cde6bf7997..8ec034835ec 100644
--- a/libgo/go/runtime/crash_test.go
+++ b/libgo/go/runtime/crash_test.go
@@ -32,25 +32,6 @@ func TestMain(m *testing.M) {
os.Exit(status)
}
-func testEnv(cmd *exec.Cmd) *exec.Cmd {
- if cmd.Env != nil {
- panic("environment already set")
- }
- for _, env := range os.Environ() {
- // Exclude GODEBUG from the environment to prevent its output
- // from breaking tests that are trying to parse other command output.
- if strings.HasPrefix(env, "GODEBUG=") {
- continue
- }
- // Exclude GOTRACEBACK for the same reason.
- if strings.HasPrefix(env, "GOTRACEBACK=") {
- continue
- }
- cmd.Env = append(cmd.Env, env)
- }
- return cmd
-}
-
var testprog struct {
sync.Mutex
dir string
@@ -62,7 +43,11 @@ type buildexe struct {
err error
}
-func runTestProg(t *testing.T, binary, name string) string {
+func runTestProg(t *testing.T, binary, name string, env ...string) string {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+
testenv.MustHaveGoBuild(t)
exe, err := buildTestProg(t, binary)
@@ -70,7 +55,11 @@ func runTestProg(t *testing.T, binary, name string) string {
t.Fatal(err)
}
- cmd := testEnv(exec.Command(exe, name))
+ cmd := testenv.CleanCmdEnv(exec.Command(exe, name))
+ cmd.Env = append(cmd.Env, env...)
+ if testing.Short() {
+ cmd.Env = append(cmd.Env, "RUNTIME_TEST_SHORT=1")
+ }
var b bytes.Buffer
cmd.Stdout = &b
cmd.Stderr = &b
@@ -111,6 +100,10 @@ func runTestProg(t *testing.T, binary, name string) string {
}
func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+
checkStaleRuntime(t)
testprog.Lock()
@@ -139,7 +132,7 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error)
exe := filepath.Join(testprog.dir, name+".exe")
cmd := exec.Command(testenv.GoToolPath(t), append([]string{"build", "-o", exe}, flags...)...)
cmd.Dir = "testdata/" + binary
- out, err := testEnv(cmd).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out)
testprog.target[name] = target
@@ -158,14 +151,14 @@ var (
func checkStaleRuntime(t *testing.T) {
staleRuntimeOnce.Do(func() {
// 'go run' uses the installed copy of runtime.a, which may be out of date.
- out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.Stale}}", "runtime")).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.Stale}}", "runtime")).CombinedOutput()
if err != nil {
staleRuntimeErr = fmt.Errorf("failed to execute 'go list': %v\n%v", err, string(out))
return
}
if string(out) != "false\n" {
t.Logf("go list -f {{.Stale}} runtime:\n%s", out)
- out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.StaleReason}}", "runtime")).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.StaleReason}}", "runtime")).CombinedOutput()
if err != nil {
t.Logf("go list -f {{.StaleReason}} failed: %v", err)
}
@@ -483,7 +476,7 @@ func TestMemPprof(t *testing.T) {
t.Fatal(err)
}
- got, err := testEnv(exec.Command(exe, "MemProf")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "MemProf")).CombinedOutput()
if err != nil {
t.Fatal(err)
}
@@ -491,7 +484,7 @@ func TestMemPprof(t *testing.T) {
defer os.Remove(fn)
for try := 0; try < 2; try++ {
- cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top"))
+ cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top"))
// Check that pprof works both with and without explicit executable on command line.
if try == 0 {
cmd.Args = append(cmd.Args, exe, fn)
@@ -606,7 +599,7 @@ func TestPanicRace(t *testing.T) {
const tries = 10
retry:
for i := 0; i < tries; i++ {
- got, err := testEnv(exec.Command(exe, "PanicRace")).CombinedOutput()
+ got, err := testenv.CleanCmdEnv(exec.Command(exe, "PanicRace")).CombinedOutput()
if err == nil {
t.Logf("try %d: program exited successfully, should have failed", i+1)
continue
diff --git a/libgo/go/runtime/crash_unix_test.go b/libgo/go/runtime/crash_unix_test.go
index 09c25471d10..584a6c74232 100644
--- a/libgo/go/runtime/crash_unix_test.go
+++ b/libgo/go/runtime/crash_unix_test.go
@@ -65,13 +65,13 @@ func TestCrashDumpsAllThreads(t *testing.T) {
cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
cmd.Dir = dir
- out, err := testEnv(cmd).CombinedOutput()
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source: %v\n%s", err, out)
}
cmd = exec.Command(filepath.Join(dir, "a.exe"))
- cmd = testEnv(cmd)
+ cmd = testenv.CleanCmdEnv(cmd)
cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
// Set GOGC=off. Because of golang.org/issue/10958, the tight
@@ -132,6 +132,7 @@ import (
"fmt"
"os"
"runtime"
+ "time"
)
func main() {
@@ -149,6 +150,8 @@ func main() {
<-c
}
+ time.Sleep(time.Millisecond)
+
// Tell our parent that all the goroutines are executing.
if _, err := os.NewFile(3, "pipe").WriteString("x"); err != nil {
fmt.Fprintf(os.Stderr, "write to pipe failed: %v\n", err)
@@ -184,7 +187,7 @@ func TestPanicSystemstack(t *testing.T) {
t.Parallel()
cmd := exec.Command(os.Args[0], "testPanicSystemstackInternal")
- cmd = testEnv(cmd)
+ cmd = testenv.CleanCmdEnv(cmd)
cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
pr, pw, err := os.Pipe()
if err != nil {
@@ -249,7 +252,7 @@ func TestSignalExitStatus(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- err = testEnv(exec.Command(exe, "SignalExitStatus")).Run()
+ err = testenv.CleanCmdEnv(exec.Command(exe, "SignalExitStatus")).Run()
if err == nil {
t.Error("test program succeeded unexpectedly")
} else if ee, ok := err.(*exec.ExitError); !ok {
diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go
index fdd73463aba..7cddd29ed0f 100644
--- a/libgo/go/runtime/debug.go
+++ b/libgo/go/runtime/debug.go
@@ -15,9 +15,6 @@ import (
// The number of logical CPUs on the local machine can be queried with NumCPU.
// This call will go away when the scheduler improves.
func GOMAXPROCS(n int) int {
- if n > _MaxGomaxprocs {
- n = _MaxGomaxprocs
- }
lock(&sched.lock)
ret := int(gomaxprocs)
unlock(&sched.lock)
diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go
index 6325dcb3948..e385f14c5bc 100644
--- a/libgo/go/runtime/export_test.go
+++ b/libgo/go/runtime/export_test.go
@@ -149,12 +149,19 @@ func RunSchedLocalQueueEmptyTest(iters int) {
}
}
-var StringHash = stringHash
-var BytesHash = bytesHash
-var Int32Hash = int32Hash
-var Int64Hash = int64Hash
-var EfaceHash = efaceHash
-var IfaceHash = ifaceHash
+var (
+ StringHash = stringHash
+ BytesHash = bytesHash
+ Int32Hash = int32Hash
+ Int64Hash = int64Hash
+ MemHash = memhash
+ MemHash32 = memhash32
+ MemHash64 = memhash64
+ EfaceHash = efaceHash
+ IfaceHash = ifaceHash
+)
+
+var UseAeshash = &useAeshash
func MemclrBytes(b []byte) {
s := (*slice)(unsafe.Pointer(&b))
@@ -364,3 +371,27 @@ func (rw *RWMutex) Lock() {
func (rw *RWMutex) Unlock() {
rw.rw.unlock()
}
+
+func MapBucketsCount(m map[int]int) int {
+ h := *(**hmap)(unsafe.Pointer(&m))
+ return 1 << h.B
+}
+
+func MapBucketsPointerIsNil(m map[int]int) bool {
+ h := *(**hmap)(unsafe.Pointer(&m))
+ return h.buckets == nil
+}
+
+func LockOSCounts() (external, internal uint32) {
+ g := getg()
+ if g.m.lockedExt+g.m.lockedInt == 0 {
+ if g.lockedm != 0 {
+ panic("lockedm on non-locked goroutine")
+ }
+ } else {
+ if g.lockedm == 0 {
+ panic("nil lockedm on locked goroutine")
+ }
+ }
+ return g.m.lockedExt, g.m.lockedInt
+}
diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go
index 6ca978980f2..36787e38b02 100644
--- a/libgo/go/runtime/extern.go
+++ b/libgo/go/runtime/extern.go
@@ -184,8 +184,8 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool)
// program counter adjustment.
func Callers(skip int, pc []uintptr) int
-// GOROOT returns the root of the Go tree.
-// It uses the GOROOT environment variable, if set,
+// GOROOT returns the root of the Go tree. It uses the
+// GOROOT environment variable, if set at process start,
// or else the root used during the Go build.
func GOROOT() string {
s := gogetenv("GOROOT")
diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go
index f14e0d5050e..a8c52d206f3 100644
--- a/libgo/go/runtime/gc_test.go
+++ b/libgo/go/runtime/gc_test.go
@@ -10,12 +10,14 @@ import (
"reflect"
"runtime"
"runtime/debug"
+ "sync/atomic"
"testing"
"time"
"unsafe"
)
func TestGcSys(t *testing.T) {
+ t.Skip("does not test anything; https://golang.org/issue/23343")
if os.Getenv("GOGC") == "off" {
t.Skip("skipping test; GOGC=off in environment")
}
@@ -171,7 +173,7 @@ func TestPeriodicGC(t *testing.T) {
// slack if things are slow.
var numGCs uint32
const want = 2
- for i := 0; i < 20 && numGCs < want; i++ {
+ for i := 0; i < 200 && numGCs < want; i++ {
time.Sleep(5 * time.Millisecond)
// Test that periodic GC actually happened.
@@ -501,3 +503,142 @@ func BenchmarkReadMemStats(b *testing.B) {
hugeSink = nil
}
+
+func TestUserForcedGC(t *testing.T) {
+ // Test that runtime.GC() triggers a GC even if GOGC=off.
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
+
+ var ms1, ms2 runtime.MemStats
+ runtime.ReadMemStats(&ms1)
+ runtime.GC()
+ runtime.ReadMemStats(&ms2)
+ if ms1.NumGC == ms2.NumGC {
+ t.Fatalf("runtime.GC() did not trigger GC")
+ }
+ if ms1.NumForcedGC == ms2.NumForcedGC {
+ t.Fatalf("runtime.GC() was not accounted in NumForcedGC")
+ }
+}
+
+func writeBarrierBenchmark(b *testing.B, f func()) {
+ runtime.GC()
+ var ms runtime.MemStats
+ runtime.ReadMemStats(&ms)
+ //b.Logf("heap size: %d MB", ms.HeapAlloc>>20)
+
+ // Keep GC running continuously during the benchmark, which in
+ // turn keeps the write barrier on continuously.
+ var stop uint32
+ done := make(chan bool)
+ go func() {
+ for atomic.LoadUint32(&stop) == 0 {
+ runtime.GC()
+ }
+ close(done)
+ }()
+ defer func() {
+ atomic.StoreUint32(&stop, 1)
+ <-done
+ }()
+
+ b.ResetTimer()
+ f()
+ b.StopTimer()
+}
+
+func BenchmarkWriteBarrier(b *testing.B) {
+ if runtime.GOMAXPROCS(-1) < 2 {
+ // We don't want GC to take our time.
+ b.Skip("need GOMAXPROCS >= 2")
+ }
+
+ // Construct a large tree both so the GC runs for a while and
+ // so we have a data structure to manipulate the pointers of.
+ type node struct {
+ l, r *node
+ }
+ var wbRoots []*node
+ var mkTree func(level int) *node
+ mkTree = func(level int) *node {
+ if level == 0 {
+ return nil
+ }
+ n := &node{mkTree(level - 1), mkTree(level - 1)}
+ if level == 10 {
+ // Seed GC with enough early pointers so it
+ // doesn't accidentally switch to mark 2 when
+ // it only has the top of the tree.
+ wbRoots = append(wbRoots, n)
+ }
+ return n
+ }
+ const depth = 22 // 64 MB
+ root := mkTree(22)
+
+ writeBarrierBenchmark(b, func() {
+ var stack [depth]*node
+ tos := -1
+
+ // There are two write barriers per iteration, so i+=2.
+ for i := 0; i < b.N; i += 2 {
+ if tos == -1 {
+ stack[0] = root
+ tos = 0
+ }
+
+ // Perform one step of reversing the tree.
+ n := stack[tos]
+ if n.l == nil {
+ tos--
+ } else {
+ n.l, n.r = n.r, n.l
+ stack[tos] = n.l
+ stack[tos+1] = n.r
+ tos++
+ }
+
+ if i%(1<<12) == 0 {
+ // Avoid non-preemptible loops (see issue #10958).
+ runtime.Gosched()
+ }
+ }
+ })
+
+ runtime.KeepAlive(wbRoots)
+}
+
+func BenchmarkBulkWriteBarrier(b *testing.B) {
+ if runtime.GOMAXPROCS(-1) < 2 {
+ // We don't want GC to take our time.
+ b.Skip("need GOMAXPROCS >= 2")
+ }
+
+ // Construct a large set of objects we can copy around.
+ const heapSize = 64 << 20
+ type obj [16]*byte
+ ptrs := make([]*obj, heapSize/unsafe.Sizeof(obj{}))
+ for i := range ptrs {
+ ptrs[i] = new(obj)
+ }
+
+ writeBarrierBenchmark(b, func() {
+ const blockSize = 1024
+ var pos int
+ for i := 0; i < b.N; i += blockSize {
+ // Rotate block.
+ block := ptrs[pos : pos+blockSize]
+ first := block[0]
+ copy(block, block[1:])
+ block[blockSize-1] = first
+
+ pos += blockSize
+ if pos+blockSize > len(ptrs) {
+ pos = 0
+ }
+
+ runtime.Gosched()
+ }
+ })
+
+ runtime.KeepAlive(ptrs)
+}
diff --git a/libgo/go/runtime/hash32.go b/libgo/go/runtime/hash32.go
index dd2e657fe3f..401fe2857d9 100644
--- a/libgo/go/runtime/hash32.go
+++ b/libgo/go/runtime/hash32.go
@@ -86,6 +86,32 @@ tail:
return uintptr(h)
}
+func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint32(seed + 4*hashkey[0])
+ h ^= readUnaligned32(p)
+ h = rotl_15(h*m1) * m2
+ h ^= h >> 17
+ h *= m3
+ h ^= h >> 13
+ h *= m4
+ h ^= h >> 16
+ return uintptr(h)
+}
+
+func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint32(seed + 8*hashkey[0])
+ h ^= readUnaligned32(p)
+ h = rotl_15(h*m1) * m2
+ h ^= readUnaligned32(add(p, 4))
+ h = rotl_15(h*m1) * m2
+ h ^= h >> 17
+ h *= m3
+ h ^= h >> 13
+ h *= m4
+ h ^= h >> 16
+ return uintptr(h)
+}
+
// Note: in order to get the compiler to issue rotl instructions, we
// need to constant fold the shift amount by hand.
// TODO: convince the compiler to issue rotl instructions after inlining.
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
index f7d4a6f2f2a..5912943a4e9 100644
--- a/libgo/go/runtime/hash64.go
+++ b/libgo/go/runtime/hash64.go
@@ -86,6 +86,28 @@ tail:
return uintptr(h)
}
+func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint64(seed + 4*hashkey[0])
+ v := uint64(readUnaligned32(p))
+ h ^= v
+ h ^= v << 32
+ h = rotl_31(h*m1) * m2
+ h ^= h >> 29
+ h *= m3
+ h ^= h >> 32
+ return uintptr(h)
+}
+
+func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+ h := uint64(seed + 8*hashkey[0])
+ h ^= uint64(readUnaligned32(p)) | uint64(readUnaligned32(add(p, 4)))<<32
+ h = rotl_31(h*m1) * m2
+ h ^= h >> 29
+ h *= m3
+ h ^= h >> 32
+ return uintptr(h)
+}
+
// Note: in order to get the compiler to issue rotl instructions, we
// need to constant fold the shift amount by hand.
// TODO: convince the compiler to issue rotl instructions after inlining.
diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go
index 167c49eb5f5..54c91609f60 100644
--- a/libgo/go/runtime/hash_test.go
+++ b/libgo/go/runtime/hash_test.go
@@ -14,6 +14,40 @@ import (
"unsafe"
)
+func TestMemHash32Equality(t *testing.T) {
+ if *UseAeshash {
+ t.Skip("skipping since AES hash implementation is used")
+ }
+ var b [4]byte
+ r := rand.New(rand.NewSource(1234))
+ seed := uintptr(r.Uint64())
+ for i := 0; i < 100; i++ {
+ randBytes(r, b[:])
+ got := MemHash32(unsafe.Pointer(&b), seed)
+ want := MemHash(unsafe.Pointer(&b), seed, 4)
+ if got != want {
+ t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
+ }
+ }
+}
+
+func TestMemHash64Equality(t *testing.T) {
+ if *UseAeshash {
+ t.Skip("skipping since AES hash implementation is used")
+ }
+ var b [8]byte
+ r := rand.New(rand.NewSource(1234))
+ seed := uintptr(r.Uint64())
+ for i := 0; i < 100; i++ {
+ randBytes(r, b[:])
+ got := MemHash64(unsafe.Pointer(&b), seed)
+ want := MemHash(unsafe.Pointer(&b), seed, 8)
+ if got != want {
+ t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
+ }
+ }
+}
+
// Smhasher is a torture test for hash functions.
// https://code.google.com/p/smhasher/
// This code is a port of some of the Smhasher tests to Go.
diff --git a/libgo/go/runtime/hashmap.go b/libgo/go/runtime/hashmap.go
index a3e50cd9221..a1fe49e9305 100644
--- a/libgo/go/runtime/hashmap.go
+++ b/libgo/go/runtime/hashmap.go
@@ -63,6 +63,8 @@ import (
// themselves, so that the compiler will export them.
//
//go:linkname makemap runtime.makemap
+//go:linkname makemap64 runtime.makemap64
+//go:linkname makemap_small runtime.makemap_small
//go:linkname mapaccess1 runtime.mapaccess1
//go:linkname mapaccess2 runtime.mapaccess2
//go:linkname mapaccess1_fat runtime.mapaccess1_fat
@@ -77,8 +79,10 @@ const (
bucketCntBits = 3
bucketCnt = 1 << bucketCntBits
- // Maximum average load of a bucket that triggers growth.
- loadFactor = 6.5
+ // Maximum average load of a bucket that triggers growth is 6.5.
+ // Represent as loadFactorNum/loadFactDen, to allow integer math.
+ loadFactorNum = 13
+ loadFactorDen = 2
// Maximum key or value size to keep inline (instead of mallocing per element).
// Must fit in a uint8.
@@ -137,12 +141,13 @@ type mapextra struct {
// If both key and value do not contain pointers and are inline, then we mark bucket
// type as containing no pointers. This avoids scanning such maps.
// However, bmap.overflow is a pointer. In order to keep overflow buckets
- // alive, we store pointers to all overflow buckets in hmap.overflow.
- // Overflow is used only if key and value do not contain pointers.
- // overflow[0] contains overflow buckets for hmap.buckets.
- // overflow[1] contains overflow buckets for hmap.oldbuckets.
+ // alive, we store pointers to all overflow buckets in hmap.overflow and h.map.oldoverflow.
+ // overflow and oldoverflow are only used if key and value do not contain pointers.
+ // overflow contains overflow buckets for hmap.buckets.
+ // oldoverflow contains overflow buckets for hmap.oldbuckets.
// The indirection allows to store a pointer to the slice in hiter.
- overflow [2]*[]*bmap
+ overflow *[]*bmap
+ oldoverflow *[]*bmap
// nextOverflow holds a pointer to a free overflow bucket.
nextOverflow *bmap
@@ -171,7 +176,8 @@ type hiter struct {
h *hmap
buckets unsafe.Pointer // bucket ptr at hash_iter initialization time
bptr *bmap // current bucket
- overflow [2]*[]*bmap // keeps overflow buckets alive
+ overflow *[]*bmap // keeps overflow buckets of hmap.buckets alive
+ oldoverflow *[]*bmap // keeps overflow buckets of hmap.oldbuckets alive
startBucket uintptr // bucket iteration started at
offset uint8 // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
wrapped bool // already wrapped around from end of bucket array to beginning
@@ -181,6 +187,28 @@ type hiter struct {
checkBucket uintptr
}
+// bucketShift returns 1<<b, optimized for code generation.
+func bucketShift(b uint8) uintptr {
+ if sys.GoarchAmd64|sys.GoarchAmd64p32|sys.Goarch386 != 0 {
+ b &= sys.PtrSize*8 - 1 // help x86 archs remove shift overflow checks
+ }
+ return uintptr(1) << b
+}
+
+// bucketMask returns 1<<b - 1, optimized for code generation.
+func bucketMask(b uint8) uintptr {
+ return bucketShift(b) - 1
+}
+
+// tophash calculates the tophash value for hash.
+func tophash(hash uintptr) uint8 {
+ top := uint8(hash >> (sys.PtrSize*8 - 8))
+ if top < minTopHash {
+ top += minTopHash
+ }
+ return top
+}
+
func evacuated(b *bmap) bool {
h := b.tophash[0]
return h > empty && h < minTopHash
@@ -194,6 +222,10 @@ func (b *bmap) setoverflow(t *maptype, ovf *bmap) {
*(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf
}
+func (b *bmap) keys() unsafe.Pointer {
+ return add(unsafe.Pointer(b), dataOffset)
+}
+
// incrnoverflow increments h.noverflow.
// noverflow counts the number of overflow buckets.
// This is used to trigger same-size map growth.
@@ -242,7 +274,7 @@ func (h *hmap) newoverflow(t *maptype, b *bmap) *bmap {
h.incrnoverflow()
if t.bucket.kind&kindNoPointers != 0 {
h.createOverflow()
- *h.extra.overflow[0] = append(*h.extra.overflow[0], ovf)
+ *h.extra.overflow = append(*h.extra.overflow, ovf)
}
b.setoverflow(t, ovf)
return ovf
@@ -252,97 +284,69 @@ func (h *hmap) createOverflow() {
if h.extra == nil {
h.extra = new(mapextra)
}
- if h.extra.overflow[0] == nil {
- h.extra.overflow[0] = new([]*bmap)
+ if h.extra.overflow == nil {
+ h.extra.overflow = new([]*bmap)
}
}
-// makemap implements a Go map creation make(map[k]v, hint)
+func makemap64(t *maptype, hint int64, h *hmap) *hmap {
+ if int64(int(hint)) != hint {
+ hint = 0
+ }
+ return makemap(t, int(hint), h)
+}
+
+// makehmap_small implements Go map creation for make(map[k]v) and
+// make(map[k]v, hint) when hint is known to be at most bucketCnt
+// at compile time and the map needs to be allocated on the heap.
+func makemap_small() *hmap {
+ h := new(hmap)
+ h.hash0 = fastrand()
+ return h
+}
+
+// makemap implements Go map creation for make(map[k]v, hint).
// If the compiler has determined that the map or the first bucket
// can be created on the stack, h and/or bucket may be non-nil.
// If h != nil, the map can be created directly in h.
-// If bucket != nil, bucket can be used as the first bucket.
-func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
- if sz := unsafe.Sizeof(hmap{}); sz > 48 || sz != t.hmap.size {
+// If h.buckets != nil, bucket pointed to can be used as the first bucket.
+func makemap(t *maptype, hint int, h *hmap) *hmap {
+ // The size of hmap should be 48 bytes on 64 bit
+ // and 28 bytes on 32 bit platforms.
+ if sz := unsafe.Sizeof(hmap{}); sz != 8+5*sys.PtrSize {
println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
throw("bad hmap size")
}
- if hint < 0 || hint > int64(maxSliceCap(t.bucket.size)) {
+ if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {
hint = 0
}
- if !ismapkey(t.key) {
- throw("runtime.makemap: unsupported map key type")
- }
-
- // check compiler's and reflect's math
- if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) ||
- t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) {
- throw("key size wrong")
- }
- if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) ||
- t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) {
- throw("value size wrong")
- }
-
- // invariants we depend on. We should probably check these at compile time
- // somewhere, but for now we'll do it here.
- if t.key.align > bucketCnt {
- throw("key align too big")
- }
- if t.elem.align > bucketCnt {
- throw("value align too big")
- }
- if t.key.size%uintptr(t.key.align) != 0 {
- throw("key size not a multiple of key align")
- }
- if t.elem.size%uintptr(t.elem.align) != 0 {
- throw("value size not a multiple of value align")
- }
- if bucketCnt < 8 {
- throw("bucketsize too small for proper alignment")
- }
- if dataOffset%uintptr(t.key.align) != 0 {
- throw("need padding in bucket (key)")
- }
- if dataOffset%uintptr(t.elem.align) != 0 {
- throw("need padding in bucket (value)")
+ // initialize Hmap
+ if h == nil {
+ h = (*hmap)(newobject(t.hmap))
}
+ h.hash0 = fastrand()
// find size parameter which will hold the requested # of elements
B := uint8(0)
- for ; overLoadFactor(hint, B); B++ {
+ for overLoadFactor(hint, B) {
+ B++
}
+ h.B = B
// allocate initial hash table
// if B == 0, the buckets field is allocated lazily later (in mapassign)
// If hint is large zeroing this memory could take a while.
- buckets := bucket
- var extra *mapextra
- if B != 0 {
+ if h.B != 0 {
var nextOverflow *bmap
- buckets, nextOverflow = makeBucketArray(t, B)
+ h.buckets, nextOverflow = makeBucketArray(t, h.B)
if nextOverflow != nil {
- extra = new(mapextra)
- extra.nextOverflow = nextOverflow
+ h.extra = new(mapextra)
+ h.extra.nextOverflow = nextOverflow
}
}
- // initialize Hmap
- if h == nil {
- h = (*hmap)(newobject(t.hmap))
- }
- h.count = 0
- h.B = B
- h.extra = extra
- h.flags = 0
- h.hash0 = fastrand()
- h.buckets = buckets
- h.oldbuckets = nil
- h.nevacuate = 0
- h.noverflow = 0
-
return h
}
@@ -353,7 +357,7 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
// hold onto it for very long.
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
pc := funcPC(mapaccess1)
racereadpc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -370,7 +374,7 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
hashfn := t.key.hashfn
equalfn := t.key.equalfn
hash := hashfn(key, uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -382,11 +386,8 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -403,16 +404,13 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
return v
}
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
- }
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
pc := funcPC(mapaccess2)
racereadpc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -429,7 +427,7 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
hashfn := t.key.hashfn
equalfn := t.key.equalfn
hash := hashfn(key, uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -441,11 +439,8 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -462,11 +457,8 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
return v, true
}
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
- }
}
+ return unsafe.Pointer(&zeroVal[0]), false
}
// returns both key and value. Used by map iterator
@@ -477,7 +469,7 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
hashfn := t.key.hashfn
equalfn := t.key.equalfn
hash := hashfn(key, uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -489,11 +481,8 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -510,11 +499,8 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
return k, v
}
}
- b = b.overflow(t)
- if b == nil {
- return nil, nil
- }
}
+ return nil, nil
}
func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer {
@@ -539,7 +525,7 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
pc := funcPC(mapassign)
racewritepc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -559,19 +545,16 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
growWork(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
+ top := tophash(hash)
var inserti *uint8
var insertk unsafe.Pointer
@@ -611,7 +594,7 @@ again:
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
@@ -651,7 +634,7 @@ done:
func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
pc := funcPC(mapdelete)
racewritepc(unsafe.Pointer(h), callerpc, pc)
raceReadObjectPC(t.key, key, callerpc, pc)
@@ -674,16 +657,14 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
// in which case we have not actually done a write (delete).
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
growWork(t, h, bucket)
}
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+ top := tophash(hash)
+search:
+ for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
@@ -696,53 +677,58 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
if !equalfn(key, k2) {
continue
}
+ // Only clear key if there are pointers in it.
if t.indirectkey {
*(*unsafe.Pointer)(k) = nil
- } else {
- typedmemclr(t.key, k)
+ } else if t.key.kind&kindNoPointers == 0 {
+ memclrHasPointers(k, t.key.size)
}
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize))
- if t.indirectvalue {
- *(*unsafe.Pointer)(v) = nil
- } else {
- typedmemclr(t.elem, v)
+ // Only clear value if there are pointers in it.
+ if t.indirectvalue || t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+ if t.indirectvalue {
+ *(*unsafe.Pointer)(v) = nil
+ } else {
+ memclrHasPointers(v, t.elem.size)
+ }
}
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
}
+// mapiterinit initializes the hiter struct used for ranging over maps.
+// The hiter struct pointed to by 'it' is allocated on the stack
+// by the compilers order pass or on the heap by reflect_mapiterinit.
+// Both need to have zeroed hiter since the struct contains pointers.
+// Gccgo-specific: *it need not be zeroed by the compiler,
+// and it's cheaper to zero it here.
func mapiterinit(t *maptype, h *hmap, it *hiter) {
- // Clear pointer fields so garbage collector does not complain.
it.key = nil
it.value = nil
it.t = nil
it.h = nil
it.buckets = nil
it.bptr = nil
- it.overflow[0] = nil
- it.overflow[1] = nil
+ it.overflow = nil
+ it.oldoverflow = nil
+ it.wrapped = false
+ it.i = 0
+ it.checkBucket = 0
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit))
}
if h == nil || h.count == 0 {
- it.key = nil
- it.value = nil
return
}
@@ -762,6 +748,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
// while we are iterating.
h.createOverflow()
it.overflow = h.extra.overflow
+ it.oldoverflow = h.extra.oldoverflow
}
// decide where to start
@@ -769,16 +756,14 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
if h.B > 31-bucketCntBits {
r += uintptr(fastrand()) << 31
}
- it.startBucket = r & (uintptr(1)<<h.B - 1)
+ it.startBucket = r & bucketMask(h.B)
it.offset = uint8(r >> h.B & (bucketCnt - 1))
// iterator state
it.bucket = it.startBucket
- it.wrapped = false
- it.bptr = nil
// Remember we have an iterator.
- // Can run concurrently with another hash_iter_init().
+ // Can run concurrently with another mapiterinit().
if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
atomic.Or8(&h.flags, iterator|oldIterator)
}
@@ -789,7 +774,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
func mapiternext(it *hiter) {
h := it.h
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer( /* &it */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
}
if h.flags&hashWriting != 0 {
@@ -829,7 +814,7 @@ next:
checkBucket = noCheck
}
bucket++
- if bucket == uintptr(1)<<it.B {
+ if bucket == bucketShift(it.B) {
bucket = 0
it.wrapped = true
}
@@ -837,90 +822,75 @@ next:
}
for ; i < bucketCnt; i++ {
offi := (i + it.offset) & (bucketCnt - 1)
+ if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty {
+ continue
+ }
k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
+ if t.indirectkey {
+ k = *((*unsafe.Pointer)(k))
+ }
v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
- if b.tophash[offi] != empty && b.tophash[offi] != evacuatedEmpty {
- if checkBucket != noCheck && !h.sameSizeGrow() {
- // Special case: iterator was started during a grow to a larger size
- // and the grow is not done yet. We're working on a bucket whose
- // oldbucket has not been evacuated yet. Or at least, it wasn't
- // evacuated when we started the bucket. So we're iterating
- // through the oldbucket, skipping any keys that will go
- // to the other new bucket (each oldbucket expands to two
- // buckets during a grow).
- k2 := k
- if t.indirectkey {
- k2 = *((*unsafe.Pointer)(k2))
- }
- if t.reflexivekey || equalfn(k2, k2) {
- // If the item in the oldbucket is not destined for
- // the current new bucket in the iteration, skip it.
- hash := hashfn(k2, uintptr(h.hash0))
- if hash&(uintptr(1)<<it.B-1) != checkBucket {
- continue
- }
- } else {
- // Hash isn't repeatable if k != k (NaNs). We need a
- // repeatable and randomish choice of which direction
- // to send NaNs during evacuation. We'll use the low
- // bit of tophash to decide which way NaNs go.
- // NOTE: this case is why we need two evacuate tophash
- // values, evacuatedX and evacuatedY, that differ in
- // their low bit.
- if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
- continue
- }
- }
- }
- if b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY {
- // this is the golden data, we can return it.
- if t.indirectkey {
- k = *((*unsafe.Pointer)(k))
- }
- it.key = k
- if t.indirectvalue {
- v = *((*unsafe.Pointer)(v))
+ if checkBucket != noCheck && !h.sameSizeGrow() {
+ // Special case: iterator was started during a grow to a larger size
+ // and the grow is not done yet. We're working on a bucket whose
+ // oldbucket has not been evacuated yet. Or at least, it wasn't
+ // evacuated when we started the bucket. So we're iterating
+ // through the oldbucket, skipping any keys that will go
+ // to the other new bucket (each oldbucket expands to two
+ // buckets during a grow).
+ if t.reflexivekey || equalfn(k, k) {
+ // If the item in the oldbucket is not destined for
+ // the current new bucket in the iteration, skip it.
+ hash := hashfn(k, uintptr(h.hash0))
+ if hash&bucketMask(it.B) != checkBucket {
+ continue
}
- it.value = v
} else {
- // The hash table has grown since the iterator was started.
- // The golden data for this key is now somewhere else.
- k2 := k
- if t.indirectkey {
- k2 = *((*unsafe.Pointer)(k2))
- }
- if t.reflexivekey || equalfn(k2, k2) {
- // Check the current hash table for the data.
- // This code handles the case where the key
- // has been deleted, updated, or deleted and reinserted.
- // NOTE: we need to regrab the key as it has potentially been
- // updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
- rk, rv := mapaccessK(t, h, k2)
- if rk == nil {
- continue // key has been deleted
- }
- it.key = rk
- it.value = rv
- } else {
- // if key!=key then the entry can't be deleted or
- // updated, so we can just return it. That's lucky for
- // us because when key!=key we can't look it up
- // successfully in the current table.
- it.key = k2
- if t.indirectvalue {
- v = *((*unsafe.Pointer)(v))
- }
- it.value = v
+ // Hash isn't repeatable if k != k (NaNs). We need a
+ // repeatable and randomish choice of which direction
+ // to send NaNs during evacuation. We'll use the low
+ // bit of tophash to decide which way NaNs go.
+ // NOTE: this case is why we need two evacuate tophash
+ // values, evacuatedX and evacuatedY, that differ in
+ // their low bit.
+ if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
+ continue
}
}
- it.bucket = bucket
- if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
- it.bptr = b
+ }
+ if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
+ !(t.reflexivekey || equalfn(k, k)) {
+ // This is the golden data, we can return it.
+ // OR
+ // key!=key, so the entry can't be deleted or updated, so we can just return it.
+ // That's lucky for us because when key!=key we can't look it up successfully.
+ it.key = k
+ if t.indirectvalue {
+ v = *((*unsafe.Pointer)(v))
}
- it.i = i + 1
- it.checkBucket = checkBucket
- return
+ it.value = v
+ } else {
+ // The hash table has grown since the iterator was started.
+ // The golden data for this key is now somewhere else.
+ // Check the current hash table for the data.
+ // This code handles the case where the key
+ // has been deleted, updated, or deleted and reinserted.
+ // NOTE: we need to regrab the key as it has potentially been
+ // updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
+ rk, rv := mapaccessK(t, h, k)
+ if rk == nil {
+ continue // key has been deleted
+ }
+ it.key = rk
+ it.value = rv
}
+ it.bucket = bucket
+ if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
+ it.bptr = b
+ }
+ it.i = i + 1
+ it.checkBucket = checkBucket
+ return
}
b = b.overflow(t)
i = 0
@@ -928,7 +898,7 @@ next:
}
func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow *bmap) {
- base := uintptr(1 << b)
+ base := bucketShift(b)
nbuckets := base
// For small b, overflow buckets are unlikely.
// Avoid the overhead of the calculation.
@@ -936,7 +906,7 @@ func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow
// Add on the estimated number of overflow buckets
// required to insert the median number of elements
// used with this value of b.
- nbuckets += 1 << (b - 4)
+ nbuckets += bucketShift(b - 4)
sz := t.bucket.size * nbuckets
up := roundupsize(sz)
if up != sz {
@@ -962,7 +932,7 @@ func hashGrow(t *maptype, h *hmap) {
// Otherwise, there are too many overflow buckets,
// so keep the same number of buckets and "grow" laterally.
bigger := uint8(1)
- if !overLoadFactor(int64(h.count), h.B) {
+ if !overLoadFactor(h.count+1, h.B) {
bigger = 0
h.flags |= sameSizeGrow
}
@@ -981,13 +951,13 @@ func hashGrow(t *maptype, h *hmap) {
h.nevacuate = 0
h.noverflow = 0
- if h.extra != nil && h.extra.overflow[0] != nil {
+ if h.extra != nil && h.extra.overflow != nil {
// Promote current overflow buckets to the old generation.
- if h.extra.overflow[1] != nil {
- throw("overflow is not nil")
+ if h.extra.oldoverflow != nil {
+ throw("oldoverflow is not nil")
}
- h.extra.overflow[1] = h.extra.overflow[0]
- h.extra.overflow[0] = nil
+ h.extra.oldoverflow = h.extra.overflow
+ h.extra.overflow = nil
}
if nextOverflow != nil {
if h.extra == nil {
@@ -1001,9 +971,8 @@ func hashGrow(t *maptype, h *hmap) {
}
// overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor.
-func overLoadFactor(count int64, B uint8) bool {
- // TODO: rewrite to use integer math and comparison?
- return count >= bucketCnt && float32(count) >= loadFactor*float32((uint64(1)<<B))
+func overLoadFactor(count int, B uint8) bool {
+ return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
}
// tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets.
@@ -1014,10 +983,11 @@ func tooManyOverflowBuckets(noverflow uint16, B uint8) bool {
// If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory.
// "too many" means (approximately) as many overflow buckets as regular buckets.
// See incrnoverflow for more details.
- if B < 16 {
- return noverflow >= uint16(1)<<B
+ if B > 15 {
+ B = 15
}
- return noverflow >= 1<<15
+ // The compiler doesn't see here that B < 16; mask B to generate shorter shift code.
+ return noverflow >= uint16(1)<<(B&15)
}
// growing reports whether h is growing. The growth may be to the same size or bigger.
@@ -1036,7 +1006,7 @@ func (h *hmap) noldbuckets() uintptr {
if !h.sameSizeGrow() {
oldB--
}
- return uintptr(1) << oldB
+ return bucketShift(oldB)
}
// oldbucketmask provides a mask that can be applied to calculate n % noldbuckets().
@@ -1060,33 +1030,38 @@ func bucketEvacuated(t *maptype, h *hmap, bucket uintptr) bool {
return evacuated(b)
}
+// evacDst is an evacuation destination.
+type evacDst struct {
+ b *bmap // current destination bucket
+ i int // key/val index into b
+ k unsafe.Pointer // pointer to current key storage
+ v unsafe.Pointer // pointer to current value storage
+}
+
func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
newbit := h.noldbuckets()
hashfn := t.key.hashfn
- equalfn := t.key.equalfn
if !evacuated(b) {
// TODO: reuse overflow buckets instead of using new ones, if there
// is no iterator using the old buckets. (If !oldIterator.)
- var (
- x, y *bmap // current low/high buckets in new map
- xi, yi int // key/val indices into x and y
- xk, yk unsafe.Pointer // pointers to current x and y key storage
- xv, yv unsafe.Pointer // pointers to current x and y value storage
- )
- x = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
- xi = 0
- xk = add(unsafe.Pointer(x), dataOffset)
- xv = add(xk, bucketCnt*uintptr(t.keysize))
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*uintptr(t.keysize))
+
if !h.sameSizeGrow() {
// Only calculate y pointers if we're growing bigger.
// Otherwise GC can see bad pointers.
- y = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
- yi = 0
- yk = add(unsafe.Pointer(y), dataOffset)
- yv = add(yk, bucketCnt*uintptr(t.keysize))
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*uintptr(t.keysize))
}
+
for ; b != nil; b = b.overflow(t) {
k := add(unsafe.Pointer(b), dataOffset)
v := add(k, bucketCnt*uintptr(t.keysize))
@@ -1103,122 +1078,102 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
if t.indirectkey {
k2 = *((*unsafe.Pointer)(k2))
}
- useX := true
+ var useY uint8
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/value to bucket x or bucket y).
hash := hashfn(k2, uintptr(h.hash0))
- if h.flags&iterator != 0 {
- if !t.reflexivekey && !equalfn(k2, k2) {
- // If key != key (NaNs), then the hash could be (and probably
- // will be) entirely different from the old hash. Moreover,
- // it isn't reproducible. Reproducibility is required in the
- // presence of iterators, as our evacuation decision must
- // match whatever decision the iterator made.
- // Fortunately, we have the freedom to send these keys either
- // way. Also, tophash is meaningless for these kinds of keys.
- // We let the low bit of tophash drive the evacuation decision.
- // We recompute a new random tophash for the next level so
- // these keys will get evenly distributed across all buckets
- // after multiple grows.
- if top&1 != 0 {
- hash |= newbit
- } else {
- hash &^= newbit
- }
- top = uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
+ if h.flags&iterator != 0 && !t.reflexivekey && !t.key.equalfn(k2, k2) {
+ // If key != key (NaNs), then the hash could be (and probably
+ // will be) entirely different from the old hash. Moreover,
+ // it isn't reproducible. Reproducibility is required in the
+ // presence of iterators, as our evacuation decision must
+ // match whatever decision the iterator made.
+ // Fortunately, we have the freedom to send these keys either
+ // way. Also, tophash is meaningless for these kinds of keys.
+ // We let the low bit of tophash drive the evacuation decision.
+ // We recompute a new random tophash for the next level so
+ // these keys will get evenly distributed across all buckets
+ // after multiple grows.
+ useY = top & 1
+ top = tophash(hash)
+ } else {
+ if hash&newbit != 0 {
+ useY = 1
}
}
- useX = hash&newbit == 0
}
- if useX {
- b.tophash[i] = evacuatedX
- if xi == bucketCnt {
- newx := h.newoverflow(t, x)
- x = newx
- xi = 0
- xk = add(unsafe.Pointer(x), dataOffset)
- xv = add(xk, bucketCnt*uintptr(t.keysize))
- }
- x.tophash[xi] = top
- if t.indirectkey {
- *(*unsafe.Pointer)(xk) = k2 // copy pointer
- } else {
- typedmemmove(t.key, xk, k) // copy value
- }
- if t.indirectvalue {
- *(*unsafe.Pointer)(xv) = *(*unsafe.Pointer)(v)
- } else {
- typedmemmove(t.elem, xv, v)
- }
- xi++
- xk = add(xk, uintptr(t.keysize))
- xv = add(xv, uintptr(t.valuesize))
+
+ if evacuatedX+1 != evacuatedY {
+ throw("bad evacuatedN")
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*uintptr(t.keysize))
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+ if t.indirectkey {
+ *(*unsafe.Pointer)(dst.k) = k2 // copy pointer
} else {
- b.tophash[i] = evacuatedY
- if yi == bucketCnt {
- newy := h.newoverflow(t, y)
- y = newy
- yi = 0
- yk = add(unsafe.Pointer(y), dataOffset)
- yv = add(yk, bucketCnt*uintptr(t.keysize))
- }
- y.tophash[yi] = top
- if t.indirectkey {
- *(*unsafe.Pointer)(yk) = k2
- } else {
- typedmemmove(t.key, yk, k)
- }
- if t.indirectvalue {
- *(*unsafe.Pointer)(yv) = *(*unsafe.Pointer)(v)
- } else {
- typedmemmove(t.elem, yv, v)
- }
- yi++
- yk = add(yk, uintptr(t.keysize))
- yv = add(yv, uintptr(t.valuesize))
+ typedmemmove(t.key, dst.k, k) // copy value
}
+ if t.indirectvalue {
+ *(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v)
+ } else {
+ typedmemmove(t.elem, dst.v, v)
+ }
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, uintptr(t.keysize))
+ dst.v = add(dst.v, uintptr(t.valuesize))
}
}
// Unlink the overflow buckets & clear key/value to help GC.
- if h.flags&oldIterator == 0 {
- b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
// Preserve b.tophash because the evacuation
// state is maintained there.
- if t.bucket.kind&kindNoPointers == 0 {
- memclrHasPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
- } else {
- memclrNoHeapPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
- }
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
}
}
- // Advance evacuation mark
if oldbucket == h.nevacuate {
- h.nevacuate = oldbucket + 1
- // Experiments suggest that 1024 is overkill by at least an order of magnitude.
- // Put it in there as a safeguard anyway, to ensure O(1) behavior.
- stop := h.nevacuate + 1024
- if stop > newbit {
- stop = newbit
- }
- for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
- h.nevacuate++
- }
- if h.nevacuate == newbit { // newbit == # of oldbuckets
- // Growing is all done. Free old main bucket array.
- h.oldbuckets = nil
- // Can discard old overflow buckets as well.
- // If they are still referenced by an iterator,
- // then the iterator holds a pointers to the slice.
- if h.extra != nil {
- h.extra.overflow[1] = nil
- }
- h.flags &^= sameSizeGrow
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
+
+func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {
+ h.nevacuate++
+ // Experiments suggest that 1024 is overkill by at least an order of magnitude.
+ // Put it in there as a safeguard anyway, to ensure O(1) behavior.
+ stop := h.nevacuate + 1024
+ if stop > newbit {
+ stop = newbit
+ }
+ for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
+ h.nevacuate++
+ }
+ if h.nevacuate == newbit { // newbit == # of oldbuckets
+ // Growing is all done. Free old main bucket array.
+ h.oldbuckets = nil
+ // Can discard old overflow buckets as well.
+ // If they are still referenced by an iterator,
+ // then the iterator holds a pointers to the slice.
+ if h.extra != nil {
+ h.extra.oldoverflow = nil
}
+ h.flags &^= sameSizeGrow
}
}
@@ -1230,7 +1185,45 @@ func ismapkey(t *_type) bool {
//go:linkname reflect_makemap reflect.makemap
func reflect_makemap(t *maptype, cap int) *hmap {
- return makemap(t, int64(cap), nil, nil)
+ // Check invariants and reflects math.
+ if sz := unsafe.Sizeof(hmap{}); sz != t.hmap.size {
+ println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
+ throw("bad hmap size")
+ }
+ if !ismapkey(t.key) {
+ throw("runtime.reflect_makemap: unsupported map key type")
+ }
+ if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) ||
+ t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) {
+ throw("key size wrong")
+ }
+ if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) ||
+ t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) {
+ throw("value size wrong")
+ }
+ if t.key.align > bucketCnt {
+ throw("key align too big")
+ }
+ if t.elem.align > bucketCnt {
+ throw("value align too big")
+ }
+ if t.key.size%uintptr(t.key.align) != 0 {
+ throw("key size not a multiple of key align")
+ }
+ if t.elem.size%uintptr(t.elem.align) != 0 {
+ throw("value size not a multiple of value align")
+ }
+ if bucketCnt < 8 {
+ throw("bucketsize too small for proper alignment")
+ }
+ if dataOffset%uintptr(t.key.align) != 0 {
+ throw("need padding in bucket (key)")
+ }
+ if dataOffset%uintptr(t.elem.align) != 0 {
+ throw("need padding in bucket (value)")
+ }
+
+ return makemap(t, cap, nil)
}
//go:linkname reflect_mapaccess reflect.mapaccess
@@ -1277,7 +1270,7 @@ func reflect_maplen(h *hmap) int {
return 0
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer( /* &h */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen))
}
return h.count
diff --git a/libgo/go/runtime/hashmap_fast.go b/libgo/go/runtime/hashmap_fast.go
index bec8fdac14e..e0fc9815131 100644
--- a/libgo/go/runtime/hashmap_fast.go
+++ b/libgo/go/runtime/hashmap_fast.go
@@ -11,7 +11,7 @@ import (
func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
}
if h == nil || h.count == 0 {
@@ -26,7 +26,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
b = (*bmap)(h.buckets)
} else {
hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -39,28 +39,19 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
- if k != key {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+ if *(*uint32)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
}
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
}
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
}
if h == nil || h.count == 0 {
@@ -75,7 +66,7 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
b = (*bmap)(h.buckets)
} else {
hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -88,28 +79,19 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
- if k != key {
- continue
- }
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+ if *(*uint32)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
}
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
}
}
+ return unsafe.Pointer(&zeroVal[0]), false
}
func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
}
if h == nil || h.count == 0 {
@@ -124,7 +106,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
b = (*bmap)(h.buckets)
} else {
hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -137,28 +119,19 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
- if k != key {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+ if *(*uint64)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
}
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
}
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
}
if h == nil || h.count == 0 {
@@ -173,7 +146,7 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
b = (*bmap)(h.buckets)
} else {
hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -186,28 +159,19 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
}
}
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
- if k != key {
- continue
- }
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+ if *(*uint64)(k) == key && b.tophash[i] != empty {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
}
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
- }
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
}
}
+ return unsafe.Pointer(&zeroVal[0]), false
}
func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
}
if h == nil || h.count == 0 {
@@ -222,13 +186,9 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
b := (*bmap)(h.buckets)
if key.len < 32 {
// short key, doing lots of comparisons is ok
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
@@ -239,13 +199,9 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
}
// long key, try not to do more comparisons than necessary
keymaybe := uintptr(bucketCnt)
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str {
@@ -275,7 +231,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
}
dohash:
hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -287,34 +243,24 @@ dohash:
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x != top {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] != top {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
}
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0])
- }
}
+ return unsafe.Pointer(&zeroVal[0])
}
func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
+ callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
}
if h == nil || h.count == 0 {
@@ -329,13 +275,9 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
b := (*bmap)(h.buckets)
if key.len < 32 {
// short key, doing lots of comparisons is ok
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
@@ -346,13 +288,9 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
}
// long key, try not to do more comparisons than necessary
keymaybe := uintptr(bucketCnt)
- for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x == empty {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] == empty {
continue
}
if k.str == key.str {
@@ -382,7 +320,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
}
dohash:
hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
- m := uintptr(1)<<h.B - 1
+ m := bucketMask(h.B)
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
@@ -394,37 +332,113 @@ dohash:
b = oldb
}
}
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
+ top := tophash(hash)
+ for ; b != nil; b = b.overflow(t) {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] != top {
+ continue
+ }
+ if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+ return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
+ }
+ }
+ }
+ return unsafe.Pointer(&zeroVal[0]), false
+}
+
+func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
+ if h == nil {
+ panic(plainError("assignment to entry in nil map"))
+ }
+ if raceenabled {
+ callerpc := getcallerpc()
+ racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+ }
+ if h.flags&hashWriting != 0 {
+ throw("concurrent map writes")
+ }
+ hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+ // Set hashWriting after calling alg.hash for consistency with mapassign.
+ h.flags |= hashWriting
+
+ if h.buckets == nil {
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+ }
+
+again:
+ bucket := hash & bucketMask(h.B)
+ if h.growing() {
+ growWork_fast32(t, h, bucket)
}
+ b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+ var insertb *bmap
+ var inserti uintptr
+ var insertk unsafe.Pointer
+
for {
for i := uintptr(0); i < bucketCnt; i++ {
- x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check
- if x != top {
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ inserti = i
+ insertb = b
+ }
continue
}
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
+ if k != key {
continue
}
- if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
- return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
- }
+ inserti = i
+ insertb = b
+ goto done
}
- b = b.overflow(t)
- if b == nil {
- return unsafe.Pointer(&zeroVal[0]), false
+ ovf := b.overflow(t)
+ if ovf == nil {
+ break
}
+ b = ovf
+ }
+
+ // Did not find mapping for key. Allocate new cell & add entry.
+
+ // If we hit the max load factor or we have too many overflow buckets,
+ // and we're not already in the middle of growing, start growing.
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ hashGrow(t, h)
+ goto again // Growing the table invalidates everything, so try again
+ }
+
+ if insertb == nil {
+ // all current buckets are full, allocate a new one.
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+ // store new key at insert position
+ *(*uint32)(insertk) = key
+
+ h.count++
+
+done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
+ if h.flags&hashWriting == 0 {
+ throw("concurrent map writes")
+ }
+ h.flags &^= hashWriting
+ return val
}
-func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
+func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
if h == nil {
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
}
if h.flags&hashWriting != 0 {
@@ -436,38 +450,35 @@ func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast32(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- var inserti *uint8
+ var insertb *bmap
+ var inserti uintptr
var insertk unsafe.Pointer
- var val unsafe.Pointer
+
for {
for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
- if b.tophash[i] == empty && inserti == nil {
- inserti = &b.tophash[i]
- insertk = add(unsafe.Pointer(b), dataOffset+i*4)
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ inserti = i
+ insertb = b
}
continue
}
- k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
+ k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4)))
if k != key {
continue
}
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+ inserti = i
+ insertb = b
goto done
}
ovf := b.overflow(t)
@@ -481,25 +492,26 @@ again:
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
- if inserti == nil {
+ if insertb == nil {
// all current buckets are full, allocate a new one.
- newb := h.newoverflow(t, b)
- inserti = &newb.tophash[0]
- insertk = add(unsafe.Pointer(newb), dataOffset)
- val = add(insertk, bucketCnt*4)
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+ // store new key at insert position
+ *(*unsafe.Pointer)(insertk) = key
- // store new key/value at insert position
- typedmemmove(t.key, insertk, unsafe.Pointer(&key))
- *inserti = top
h.count++
done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -512,7 +524,7 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
}
if h.flags&hashWriting != 0 {
@@ -524,30 +536,26 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast64(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- var inserti *uint8
+ var insertb *bmap
+ var inserti uintptr
var insertk unsafe.Pointer
- var val unsafe.Pointer
+
for {
for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
- if b.tophash[i] == empty && inserti == nil {
- inserti = &b.tophash[i]
- insertk = add(unsafe.Pointer(b), dataOffset+i*8)
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ insertb = b
+ inserti = i
}
continue
}
@@ -555,7 +563,8 @@ again:
if k != key {
continue
}
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+ insertb = b
+ inserti = i
goto done
}
ovf := b.overflow(t)
@@ -569,25 +578,26 @@ again:
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
- if inserti == nil {
+ if insertb == nil {
// all current buckets are full, allocate a new one.
- newb := h.newoverflow(t, b)
- inserti = &newb.tophash[0]
- insertk = add(unsafe.Pointer(newb), dataOffset)
- val = add(insertk, bucketCnt*8)
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+ // store new key at insert position
+ *(*uint64)(insertk) = key
- // store new key/value at insert position
- typedmemmove(t.key, insertk, unsafe.Pointer(&key))
- *inserti = top
h.count++
done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -595,48 +605,131 @@ done:
return val
}
-func mapassign_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
+func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
if h == nil {
panic(plainError("assignment to entry in nil map"))
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&t))
- racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
+ callerpc := getcallerpc()
+ racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
}
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
- key := stringStructOf(&ky)
- hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+ hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
// Set hashWriting after calling alg.hash for consistency with mapassign.
h.flags |= hashWriting
if h.buckets == nil {
- h.buckets = newarray(t.bucket, 1)
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast64(t, h, bucket)
}
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
+
+ var insertb *bmap
+ var inserti uintptr
+ var insertk unsafe.Pointer
+
+ for {
+ for i := uintptr(0); i < bucketCnt; i++ {
+ if b.tophash[i] == empty {
+ if insertb == nil {
+ insertb = b
+ inserti = i
+ }
+ continue
+ }
+ k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8)))
+ if k != key {
+ continue
+ }
+ insertb = b
+ inserti = i
+ goto done
+ }
+ ovf := b.overflow(t)
+ if ovf == nil {
+ break
+ }
+ b = ovf
+ }
+
+ // Did not find mapping for key. Allocate new cell & add entry.
+
+ // If we hit the max load factor or we have too many overflow buckets,
+ // and we're not already in the middle of growing, start growing.
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ hashGrow(t, h)
+ goto again // Growing the table invalidates everything, so try again
}
- var inserti *uint8
+ if insertb == nil {
+ // all current buckets are full, allocate a new one.
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
+ }
+ insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+ // store new key at insert position
+ *(*unsafe.Pointer)(insertk) = key
+
+ h.count++
+
+done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
+ if h.flags&hashWriting == 0 {
+ throw("concurrent map writes")
+ }
+ h.flags &^= hashWriting
+ return val
+}
+
+func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
+ if h == nil {
+ panic(plainError("assignment to entry in nil map"))
+ }
+ if raceenabled {
+ callerpc := getcallerpc()
+ racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
+ }
+ if h.flags&hashWriting != 0 {
+ throw("concurrent map writes")
+ }
+ key := stringStructOf(&s)
+ hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
+
+ // Set hashWriting after calling alg.hash for consistency with mapassign.
+ h.flags |= hashWriting
+
+ if h.buckets == nil {
+ h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+ }
+
+again:
+ bucket := hash & bucketMask(h.B)
+ if h.growing() {
+ growWork_faststr(t, h, bucket)
+ }
+ b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+ top := tophash(hash)
+
+ var insertb *bmap
+ var inserti uintptr
var insertk unsafe.Pointer
- var val unsafe.Pointer
+
for {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
- if b.tophash[i] == empty && inserti == nil {
- inserti = &b.tophash[i]
- insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+ if b.tophash[i] == empty && insertb == nil {
+ insertb = b
+ inserti = i
}
continue
}
@@ -648,7 +741,8 @@ again:
continue
}
// already have a mapping for key. Update it.
- val = add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+ inserti = i
+ insertb = b
goto done
}
ovf := b.overflow(t)
@@ -662,25 +756,25 @@ again:
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
- if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+ if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
- if inserti == nil {
+ if insertb == nil {
// all current buckets are full, allocate a new one.
- newb := h.newoverflow(t, b)
- inserti = &newb.tophash[0]
- insertk = add(unsafe.Pointer(newb), dataOffset)
- val = add(insertk, bucketCnt*2*sys.PtrSize)
+ insertb = h.newoverflow(t, b)
+ inserti = 0 // not necessary, but avoids needlessly spilling inserti
}
+ insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
- // store new key/value at insert position
+ insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
+ // store new key at insert position
*((*stringStruct)(insertk)) = *key
- *inserti = top
h.count++
done:
+ val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize))
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -690,7 +784,7 @@ done:
func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32))
}
if h == nil || h.count == 0 {
@@ -705,38 +799,32 @@ func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
// Set hashWriting after calling alg.hash for consistency with mapdelete
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
- }
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
+ growWork_fast32(t, h, bucket)
}
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+ if key != *(*uint32)(k) || b.tophash[i] == empty {
continue
}
- k := (*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))
- if key != *k {
- continue
+ // Only clear key if there are pointers in it.
+ if t.key.kind&kindNoPointers == 0 {
+ memclrHasPointers(k, t.key.size)
+ }
+ // Only clear value if there are pointers in it.
+ if t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+ memclrHasPointers(v, t.elem.size)
}
- typedmemclr(t.key, unsafe.Pointer(k))
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*4 + i*uintptr(t.valuesize))
- typedmemclr(t.elem, v)
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -745,7 +833,7 @@ done:
func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64))
}
if h == nil || h.count == 0 {
@@ -760,38 +848,32 @@ func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
// Set hashWriting after calling alg.hash for consistency with mapdelete
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
+ growWork_fast64(t, h, bucket)
}
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+ for ; b != nil; b = b.overflow(t) {
+ for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+ if key != *(*uint64)(k) || b.tophash[i] == empty {
continue
}
- k := (*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))
- if key != *k {
- continue
+ // Only clear key if there are pointers in it.
+ if t.key.kind&kindNoPointers == 0 {
+ memclrHasPointers(k, t.key.size)
+ }
+ // Only clear value if there are pointers in it.
+ if t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+ memclrHasPointers(v, t.elem.size)
}
- typedmemclr(t.key, unsafe.Pointer(k))
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*8 + i*uintptr(t.valuesize))
- typedmemclr(t.elem, v)
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
@@ -800,7 +882,7 @@ done:
func mapdelete_faststr(t *maptype, h *hmap, ky string) {
if raceenabled && h != nil {
- callerpc := getcallerpc(unsafe.Pointer(&t))
+ callerpc := getcallerpc()
racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
}
if h == nil || h.count == 0 {
@@ -816,43 +898,340 @@ func mapdelete_faststr(t *maptype, h *hmap, ky string) {
// Set hashWriting after calling alg.hash for consistency with mapdelete
h.flags |= hashWriting
- bucket := hash & (uintptr(1)<<h.B - 1)
+ bucket := hash & bucketMask(h.B)
if h.growing() {
- growWork(t, h, bucket)
- }
- b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
- top := uint8(hash >> (sys.PtrSize*8 - 8))
- if top < minTopHash {
- top += minTopHash
- }
- for {
- for i := uintptr(0); i < bucketCnt; i++ {
- if b.tophash[i] != top {
- continue
- }
- k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
- if k.len != key.len {
+ growWork_faststr(t, h, bucket)
+ }
+ b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+ top := tophash(hash)
+search:
+ for ; b != nil; b = b.overflow(t) {
+ for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+ k := (*stringStruct)(kptr)
+ if k.len != key.len || b.tophash[i] != top {
continue
}
if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
continue
}
- typedmemclr(t.key, unsafe.Pointer(k))
- v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*2*sys.PtrSize + i*uintptr(t.valuesize))
- typedmemclr(t.elem, v)
+ // Clear key's pointer.
+ k.str = nil
+ // Only clear value if there are pointers in it.
+ if t.elem.kind&kindNoPointers == 0 {
+ v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+ memclrHasPointers(v, t.elem.size)
+ }
b.tophash[i] = empty
h.count--
- goto done
- }
- b = b.overflow(t)
- if b == nil {
- goto done
+ break search
}
}
-done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
}
+
+func growWork_fast32(t *maptype, h *hmap, bucket uintptr) {
+ // make sure we evacuate the oldbucket corresponding
+ // to the bucket we're about to use
+ evacuate_fast32(t, h, bucket&h.oldbucketmask())
+
+ // evacuate one more oldbucket to make progress on growing
+ if h.growing() {
+ evacuate_fast32(t, h, h.nevacuate)
+ }
+}
+
+func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
+ b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ newbit := h.noldbuckets()
+ if !evacuated(b) {
+ // TODO: reuse overflow buckets instead of using new ones, if there
+ // is no iterator using the old buckets. (If !oldIterator.)
+
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*4)
+
+ if !h.sameSizeGrow() {
+ // Only calculate y pointers if we're growing bigger.
+ // Otherwise GC can see bad pointers.
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*4)
+ }
+
+ for ; b != nil; b = b.overflow(t) {
+ k := add(unsafe.Pointer(b), dataOffset)
+ v := add(k, bucketCnt*4)
+ for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) {
+ top := b.tophash[i]
+ if top == empty {
+ b.tophash[i] = evacuatedEmpty
+ continue
+ }
+ if top < minTopHash {
+ throw("bad map state")
+ }
+ var useY uint8
+ if !h.sameSizeGrow() {
+ // Compute hash to make our evacuation decision (whether we need
+ // to send this key/value to bucket x or bucket y).
+ hash := t.key.hashfn(k, uintptr(h.hash0))
+ if hash&newbit != 0 {
+ useY = 1
+ }
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*4)
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+ // Copy key.
+ if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+ writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
+ } else {
+ *(*uint32)(dst.k) = *(*uint32)(k)
+ }
+
+ typedmemmove(t.elem, dst.v, v)
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, 4)
+ dst.v = add(dst.v, uintptr(t.valuesize))
+ }
+ }
+ // Unlink the overflow buckets & clear key/value to help GC.
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+ // Preserve b.tophash because the evacuation
+ // state is maintained there.
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
+ }
+ }
+
+ if oldbucket == h.nevacuate {
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
+
+func growWork_fast64(t *maptype, h *hmap, bucket uintptr) {
+ // make sure we evacuate the oldbucket corresponding
+ // to the bucket we're about to use
+ evacuate_fast64(t, h, bucket&h.oldbucketmask())
+
+ // evacuate one more oldbucket to make progress on growing
+ if h.growing() {
+ evacuate_fast64(t, h, h.nevacuate)
+ }
+}
+
+func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
+ b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ newbit := h.noldbuckets()
+ if !evacuated(b) {
+ // TODO: reuse overflow buckets instead of using new ones, if there
+ // is no iterator using the old buckets. (If !oldIterator.)
+
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*8)
+
+ if !h.sameSizeGrow() {
+ // Only calculate y pointers if we're growing bigger.
+ // Otherwise GC can see bad pointers.
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*8)
+ }
+
+ for ; b != nil; b = b.overflow(t) {
+ k := add(unsafe.Pointer(b), dataOffset)
+ v := add(k, bucketCnt*8)
+ for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) {
+ top := b.tophash[i]
+ if top == empty {
+ b.tophash[i] = evacuatedEmpty
+ continue
+ }
+ if top < minTopHash {
+ throw("bad map state")
+ }
+ var useY uint8
+ if !h.sameSizeGrow() {
+ // Compute hash to make our evacuation decision (whether we need
+ // to send this key/value to bucket x or bucket y).
+ hash := t.key.hashfn(k, uintptr(h.hash0))
+ if hash&newbit != 0 {
+ useY = 1
+ }
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*8)
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+ // Copy key.
+ if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+ if sys.PtrSize == 8 {
+ writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
+ } else {
+ // There are three ways to squeeze at least one 32 bit pointer into 64 bits.
+ // Give up and call typedmemmove.
+ typedmemmove(t.key, dst.k, k)
+ }
+ } else {
+ *(*uint64)(dst.k) = *(*uint64)(k)
+ }
+
+ typedmemmove(t.elem, dst.v, v)
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, 8)
+ dst.v = add(dst.v, uintptr(t.valuesize))
+ }
+ }
+ // Unlink the overflow buckets & clear key/value to help GC.
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+ // Preserve b.tophash because the evacuation
+ // state is maintained there.
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
+ }
+ }
+
+ if oldbucket == h.nevacuate {
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
+
+func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
+ // make sure we evacuate the oldbucket corresponding
+ // to the bucket we're about to use
+ evacuate_faststr(t, h, bucket&h.oldbucketmask())
+
+ // evacuate one more oldbucket to make progress on growing
+ if h.growing() {
+ evacuate_faststr(t, h, h.nevacuate)
+ }
+}
+
+func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
+ b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+ newbit := h.noldbuckets()
+ if !evacuated(b) {
+ // TODO: reuse overflow buckets instead of using new ones, if there
+ // is no iterator using the old buckets. (If !oldIterator.)
+
+ // xy contains the x and y (low and high) evacuation destinations.
+ var xy [2]evacDst
+ x := &xy[0]
+ x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+ x.k = add(unsafe.Pointer(x.b), dataOffset)
+ x.v = add(x.k, bucketCnt*2*sys.PtrSize)
+
+ if !h.sameSizeGrow() {
+ // Only calculate y pointers if we're growing bigger.
+ // Otherwise GC can see bad pointers.
+ y := &xy[1]
+ y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+ y.k = add(unsafe.Pointer(y.b), dataOffset)
+ y.v = add(y.k, bucketCnt*2*sys.PtrSize)
+ }
+
+ for ; b != nil; b = b.overflow(t) {
+ k := add(unsafe.Pointer(b), dataOffset)
+ v := add(k, bucketCnt*2*sys.PtrSize)
+ for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
+ top := b.tophash[i]
+ if top == empty {
+ b.tophash[i] = evacuatedEmpty
+ continue
+ }
+ if top < minTopHash {
+ throw("bad map state")
+ }
+ var useY uint8
+ if !h.sameSizeGrow() {
+ // Compute hash to make our evacuation decision (whether we need
+ // to send this key/value to bucket x or bucket y).
+ hash := t.key.hashfn(k, uintptr(h.hash0))
+ if hash&newbit != 0 {
+ useY = 1
+ }
+ }
+
+ b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+ dst := &xy[useY] // evacuation destination
+
+ if dst.i == bucketCnt {
+ dst.b = h.newoverflow(t, dst.b)
+ dst.i = 0
+ dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+ dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
+ }
+ dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+ // Copy key.
+ *(*string)(dst.k) = *(*string)(k)
+
+ typedmemmove(t.elem, dst.v, v)
+ dst.i++
+ // These updates might push these pointers past the end of the
+ // key or value arrays. That's ok, as we have the overflow pointer
+ // at the end of the bucket to protect against pointing past the
+ // end of the bucket.
+ dst.k = add(dst.k, 2*sys.PtrSize)
+ dst.v = add(dst.v, uintptr(t.valuesize))
+ }
+ }
+ // Unlink the overflow buckets & clear key/value to help GC.
+ // Unlink the overflow buckets & clear key/value to help GC.
+ if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+ b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+ // Preserve b.tophash because the evacuation
+ // state is maintained there.
+ ptr := add(b, dataOffset)
+ n := uintptr(t.bucketsize) - dataOffset
+ memclrHasPointers(ptr, n)
+ }
+ }
+
+ if oldbucket == h.nevacuate {
+ advanceEvacuationMark(h, t, newbit)
+ }
+}
diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go
index 166199b5ca3..a4b168d7313 100644
--- a/libgo/go/runtime/heapdump.go
+++ b/libgo/go/runtime/heapdump.go
@@ -200,7 +200,6 @@ func dumptype(t *_type) {
// dump an object
func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
- dumpbvtypes(&bv, obj)
dumpint(tagObject)
dumpint(uint64(uintptr(obj)))
dumpmemrange(obj, size)
@@ -539,16 +538,6 @@ func dumpfields(bv bitvector) {
dumpint(fieldKindEol)
}
-// The heap dump reader needs to be able to disambiguate
-// Eface entries. So it needs to know every type that might
-// appear in such an entry. The following routine accomplishes that.
-// TODO(rsc, khr): Delete - no longer possible.
-
-// Dump all the types that appear in the type field of
-// any Eface described by this bit vector.
-func dumpbvtypes(bv *bitvector, base unsafe.Pointer) {
-}
-
func makeheapobjbv(p uintptr, size uintptr) bitvector {
// Extend the temp buffer if necessary.
nptr := size / sys.PtrSize
diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go
index 879a82f9c82..b697aa8bd39 100644
--- a/libgo/go/runtime/internal/atomic/atomic_test.go
+++ b/libgo/go/runtime/internal/atomic/atomic_test.go
@@ -52,7 +52,7 @@ func TestXadduintptr(t *testing.T) {
// Tests that xadduintptr correctly updates 64-bit values. The place where
// we actually do so is mstats.go, functions mSysStat{Inc,Dec}.
func TestXadduintptrOnUint64(t *testing.T) {
- if sys.BigEndian != 0 {
+ if sys.BigEndian {
// On big endian architectures, we never use xadduintptr to update
// 64-bit values and hence we skip the test. (Note that functions
// mSysStat{Inc,Dec} in mstats.go have explicit checks for
diff --git a/libgo/go/runtime/internal/sys/sys.go b/libgo/go/runtime/internal/sys/sys.go
index 586a763717d..9d9ac4507f6 100644
--- a/libgo/go/runtime/internal/sys/sys.go
+++ b/libgo/go/runtime/internal/sys/sys.go
@@ -6,9 +6,9 @@
// constants used by the runtime.
package sys
-// The next line makes 'go generate' write the zgen_*.go files with
+// The next line makes 'go generate' write the zgo*.go files with
// per-OS and per-arch information, including constants
-// named goos_$GOOS and goarch_$GOARCH for every
+// named Goos$GOOS and Goarch$GOARCH for every
// known GOOS and GOARCH. The constant is 1 on the
// current system, 0 otherwise; multiplying by them is
// useful for defining GOOS- or GOARCH-specific constants.
diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go
index 52a2376dc5e..d000b112f44 100644
--- a/libgo/go/runtime/lock_sema.go
+++ b/libgo/go/runtime/lock_sema.go
@@ -83,7 +83,7 @@ Loop:
// for this lock, chained through m->nextwaitm.
// Queue this M.
for {
- gp.m.nextwaitm = v &^ mutex_locked
+ gp.m.nextwaitm = muintptr(v &^ mutex_locked)
if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|mutex_locked) {
break
}
@@ -115,8 +115,8 @@ func unlock(l *mutex) {
} else {
// Other M's are waiting for the lock.
// Dequeue an M.
- mp = (*m)(unsafe.Pointer(v &^ mutex_locked))
- if atomic.Casuintptr(&l.key, v, mp.nextwaitm) {
+ mp = muintptr(v &^ mutex_locked).ptr()
+ if atomic.Casuintptr(&l.key, v, uintptr(mp.nextwaitm)) {
// Dequeued an M. Wake it.
semawakeup(mp)
break
@@ -152,7 +152,7 @@ func notewakeup(n *note) {
case v == 0:
// Nothing was waiting. Done.
case v == mutex_locked:
- // Two notewakeups! Not allowed.
+ // Two notewakeups! Not allowed.
throw("notewakeup - double wakeup")
default:
// Must be the waiting m. Wake it up.
diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go
index 796cd8a7c64..88e4ba3657b 100644
--- a/libgo/go/runtime/malloc.go
+++ b/libgo/go/runtime/malloc.go
@@ -546,9 +546,8 @@ func nextFreeFast(s *mspan) gclinkptr {
}
s.allocCache >>= uint(theBit + 1)
s.freeindex = freeidx
- v := gclinkptr(result*s.elemsize + s.base())
s.allocCount++
- return v
+ return gclinkptr(result*s.elemsize + s.base())
}
}
return 0
@@ -877,6 +876,9 @@ func reflect_unsafe_New(typ *_type) unsafe.Pointer {
// newarray allocates an array of n elements of type typ.
func newarray(typ *_type, n int) unsafe.Pointer {
+ if n == 1 {
+ return mallocgc(typ.size, typ, true)
+ }
if n < 0 || uintptr(n) > maxSliceCap(typ.size) {
panic(plainError("runtime: allocation size out of range"))
}
@@ -893,11 +895,13 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
mProf_Malloc(x, size)
}
-// nextSample returns the next sampling point for heap profiling.
-// It produces a random variable with a geometric distribution and
-// mean MemProfileRate. This is done by generating a uniformly
-// distributed random number and applying the cumulative distribution
-// function for an exponential.
+// nextSample returns the next sampling point for heap profiling. The goal is
+// to sample allocations on average every MemProfileRate bytes, but with a
+// completely random distribution over the allocation timeline; this
+// corresponds to a Poisson process with parameter MemProfileRate. In Poisson
+// processes, the distance between two samples follows the exponential
+// distribution (exp(MemProfileRate)), so the best return value is a random
+// number taken from an exponential distribution whose mean is MemProfileRate.
func nextSample() int32 {
if GOOS == "plan9" {
// Plan 9 doesn't support floating point in note handler.
@@ -906,25 +910,29 @@ func nextSample() int32 {
}
}
- period := MemProfileRate
+ return fastexprand(MemProfileRate)
+}
- // make nextSample not overflow. Maximum possible step is
- // -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period.
+// fastexprand returns a random number from an exponential distribution with
+// the specified mean.
+func fastexprand(mean int) int32 {
+ // Avoid overflow. Maximum possible step is
+ // -ln(1/(1<<randomBitCount)) * mean, approximately 20 * mean.
switch {
- case period > 0x7000000:
- period = 0x7000000
- case period == 0:
+ case mean > 0x7000000:
+ mean = 0x7000000
+ case mean == 0:
return 0
}
- // Let m be the sample rate,
- // the probability distribution function is m*exp(-mx), so the CDF is
- // p = 1 - exp(-mx), so
- // q = 1 - p == exp(-mx)
- // log_e(q) = -mx
- // -log_e(q)/m = x
- // x = -log_e(q) * period
- // x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency
+ // Take a random sample of the exponential distribution exp(-mean*x).
+ // The probability distribution function is mean*exp(-mean*x), so the CDF is
+ // p = 1 - exp(-mean*x), so
+ // q = 1 - p == exp(-mean*x)
+ // log_e(q) = -mean*x
+ // -log_e(q)/mean = x
+ // x = -log_e(q) * mean
+ // x = log_2(q) * (-log_e(2)) * mean ; Using log_2 for efficiency
const randomBitCount = 26
q := fastrand()%(1<<randomBitCount) + 1
qlog := fastlog2(float64(q)) - randomBitCount
@@ -932,7 +940,7 @@ func nextSample() int32 {
qlog = 0
}
const minusLog2 = -0.6931471805599453 // -ln(2)
- return int32(qlog*(minusLog2*float64(period))) + 1
+ return int32(qlog*(minusLog2*float64(mean))) + 1
}
// nextSampleNoFP is similar to nextSample, but uses older,
@@ -950,7 +958,7 @@ func nextSampleNoFP() int32 {
}
type persistentAlloc struct {
- base unsafe.Pointer
+ base *notInHeap
off uintptr
}
@@ -967,17 +975,17 @@ var globalAlloc struct {
//
// Consider marking persistentalloc'd types go:notinheap.
func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
- var p unsafe.Pointer
+ var p *notInHeap
systemstack(func() {
p = persistentalloc1(size, align, sysStat)
})
- return p
+ return unsafe.Pointer(p)
}
// Must run on system stack because stack growth can (re)invoke it.
// See issue 9174.
//go:systemstack
-func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
const (
chunk = 256 << 10
maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
@@ -998,7 +1006,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
}
if size >= maxBlock {
- return sysAlloc(size, sysStat)
+ return (*notInHeap)(sysAlloc(size, sysStat))
}
mp := acquirem()
@@ -1011,7 +1019,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
}
persistent.off = round(persistent.off, align)
if persistent.off+size > chunk || persistent.base == nil {
- persistent.base = sysAlloc(chunk, &memstats.other_sys)
+ persistent.base = (*notInHeap)(sysAlloc(chunk, &memstats.other_sys))
if persistent.base == nil {
if persistent == &globalAlloc.persistentAlloc {
unlock(&globalAlloc.mutex)
@@ -1020,7 +1028,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
}
persistent.off = 0
}
- p := add(persistent.base, persistent.off)
+ p := persistent.base.add(persistent.off)
persistent.off += size
releasem(mp)
if persistent == &globalAlloc.persistentAlloc {
@@ -1033,3 +1041,19 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
}
return p
}
+
+// notInHeap is off-heap memory allocated by a lower-level allocator
+// like sysAlloc or persistentAlloc.
+//
+// In general, it's better to use real types marked as go:notinheap,
+// but this serves as a generic type for situations where that isn't
+// possible (like in the allocators).
+//
+// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc?
+//
+//go:notinheap
+type notInHeap struct{}
+
+func (p *notInHeap) add(bytes uintptr) *notInHeap {
+ return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes))
+}
diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go
index 0d43cf65976..ab580f81800 100644
--- a/libgo/go/runtime/malloc_test.go
+++ b/libgo/go/runtime/malloc_test.go
@@ -48,9 +48,6 @@ func TestMemStats(t *testing.T) {
}
// Of the uint fields, HeapReleased, HeapIdle can be 0.
// PauseTotalNs can be 0 if timer resolution is poor.
- //
- // TODO: Test that GCCPUFraction is <= 0.99. This currently
- // fails on windows/386. (Issue #19319)
fields := map[string][]func(interface{}) error{
"Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)},
"Lookups": {nz, le(1e10)}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)},
@@ -63,7 +60,7 @@ func TestMemStats(t *testing.T) {
"NextGC": {nz, le(1e10)}, "LastGC": {nz},
"PauseTotalNs": {le(1e11)}, "PauseNs": nil, "PauseEnd": nil,
"NumGC": {nz, le(1e9)}, "NumForcedGC": {nz, le(1e9)},
- "GCCPUFraction": nil, "EnableGC": {eq(true)}, "DebugGC": {eq(false)},
+ "GCCPUFraction": {le(0.99)}, "EnableGC": {eq(true)}, "DebugGC": {eq(false)},
"BySize": nil,
}
diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go
index 37c959f8327..6d7097e07ef 100644
--- a/libgo/go/runtime/map_test.go
+++ b/libgo/go/runtime/map_test.go
@@ -249,7 +249,7 @@ func testConcurrentReadsAfterGrowth(t *testing.T, useReflect bool) {
numGrowStep := 250
numReader := 16
if testing.Short() {
- numLoop, numGrowStep = 2, 500
+ numLoop, numGrowStep = 2, 100
}
for i := 0; i < numLoop; i++ {
m := make(map[int]int, 0)
@@ -603,6 +603,142 @@ func TestIgnoreBogusMapHint(t *testing.T) {
}
}
+var mapSink map[int]int
+
+var mapBucketTests = [...]struct {
+ n int // n is the number of map elements
+ noescape int // number of expected buckets for non-escaping map
+ escape int // number of expected buckets for escaping map
+}{
+ {-(1 << 30), 1, 1},
+ {-1, 1, 1},
+ {0, 1, 1},
+ {1, 1, 1},
+ {8, 1, 1},
+ {9, 2, 2},
+ {13, 2, 2},
+ {14, 4, 4},
+ {26, 4, 4},
+}
+
+func TestMapBuckets(t *testing.T) {
+ // Test that maps of different sizes have the right number of buckets.
+ // Non-escaping maps with small buckets (like map[int]int) never
+ // have a nil bucket pointer due to starting with preallocated buckets
+ // on the stack. Escaping maps start with a non-nil bucket pointer if
+ // hint size is above bucketCnt and thereby have more than one bucket.
+ // These tests depend on bucketCnt and loadFactor* in hashmap.go.
+ t.Run("mapliteral", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := map[int]int{}
+ // Skip test on gccgo until escape analysis is
+ // turned on.
+ if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := map[int]int{}
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+ t.Run("nohint", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := make(map[int]int)
+ // Skip test on gccgo until escape analysis is
+ // turned on.
+ if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := make(map[int]int)
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+ t.Run("makemap", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := make(map[int]int, tt.n)
+ // Skip test on gccgo until escape analysis is
+ // turned on.
+ if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := make(map[int]int, tt.n)
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+ t.Run("makemap64", func(t *testing.T) {
+ for _, tt := range mapBucketTests {
+ localMap := make(map[int]int, int64(tt.n))
+ // Skip test on gccgo until escape analysis is
+ // turned on.
+ if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" {
+ t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+ }
+ for i := 0; i < tt.n; i++ {
+ localMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+ t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+ }
+ escapingMap := make(map[int]int, tt.n)
+ if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+ t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+ }
+ for i := 0; i < tt.n; i++ {
+ escapingMap[i] = i
+ }
+ if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+ t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+ }
+ mapSink = escapingMap
+ }
+ })
+
+}
+
func benchmarkMapPop(b *testing.B, n int) {
m := map[int]int{}
for i := 0; i < b.N; i++ {
@@ -624,15 +760,39 @@ func BenchmarkMapPop100(b *testing.B) { benchmarkMapPop(b, 100) }
func BenchmarkMapPop1000(b *testing.B) { benchmarkMapPop(b, 1000) }
func BenchmarkMapPop10000(b *testing.B) { benchmarkMapPop(b, 10000) }
+var testNonEscapingMapVariable int = 8
+
func TestNonEscapingMap(t *testing.T) {
t.Skip("does not work on gccgo without better escape analysis")
n := testing.AllocsPerRun(1000, func() {
+ m := map[int]int{}
+ m[0] = 0
+ })
+ if n != 0 {
+ t.Fatalf("mapliteral: want 0 allocs, got %v", n)
+ }
+ n = testing.AllocsPerRun(1000, func() {
m := make(map[int]int)
m[0] = 0
})
if n != 0 {
- t.Fatalf("want 0 allocs, got %v", n)
+ t.Fatalf("no hint: want 0 allocs, got %v", n)
+ }
+ n = testing.AllocsPerRun(1000, func() {
+ m := make(map[int]int, 8)
+ m[0] = 0
+ })
+ if n != 0 {
+ t.Fatalf("with small hint: want 0 allocs, got %v", n)
+ }
+ n = testing.AllocsPerRun(1000, func() {
+ m := make(map[int]int, testNonEscapingMapVariable)
+ m[0] = 0
+ })
+ if n != 0 {
+ t.Fatalf("with variable hint: want 0 allocs, got %v", n)
}
+
}
func benchmarkMapAssignInt32(b *testing.B, n int) {
@@ -643,12 +803,16 @@ func benchmarkMapAssignInt32(b *testing.B, n int) {
}
func benchmarkMapDeleteInt32(b *testing.B, n int) {
- a := make(map[int32]int)
- for i := 0; i < n*b.N; i++ {
- a[int32(i)] = i
- }
+ a := make(map[int32]int, n)
b.ResetTimer()
- for i := 0; i < n*b.N; i = i + n {
+ for i := 0; i < b.N; i++ {
+ if len(a) == 0 {
+ b.StopTimer()
+ for j := i; j < i+n; j++ {
+ a[int32(j)] = j
+ }
+ b.StartTimer()
+ }
delete(a, int32(i))
}
}
@@ -661,12 +825,16 @@ func benchmarkMapAssignInt64(b *testing.B, n int) {
}
func benchmarkMapDeleteInt64(b *testing.B, n int) {
- a := make(map[int64]int)
- for i := 0; i < n*b.N; i++ {
- a[int64(i)] = i
- }
+ a := make(map[int64]int, n)
b.ResetTimer()
- for i := 0; i < n*b.N; i = i + n {
+ for i := 0; i < b.N; i++ {
+ if len(a) == 0 {
+ b.StopTimer()
+ for j := i; j < i+n; j++ {
+ a[int64(j)] = j
+ }
+ b.StartTimer()
+ }
delete(a, int64(i))
}
}
@@ -684,17 +852,23 @@ func benchmarkMapAssignStr(b *testing.B, n int) {
}
func benchmarkMapDeleteStr(b *testing.B, n int) {
- k := make([]string, n*b.N)
- for i := 0; i < n*b.N; i++ {
- k[i] = strconv.Itoa(i)
- }
- a := make(map[string]int)
- for i := 0; i < n*b.N; i++ {
- a[k[i]] = i
+ i2s := make([]string, n)
+ for i := 0; i < n; i++ {
+ i2s[i] = strconv.Itoa(i)
}
+ a := make(map[string]int, n)
b.ResetTimer()
- for i := 0; i < n*b.N; i = i + n {
- delete(a, k[i])
+ k := 0
+ for i := 0; i < b.N; i++ {
+ if len(a) == 0 {
+ b.StopTimer()
+ for j := 0; j < n; j++ {
+ a[i2s[j]] = j
+ }
+ k = i
+ b.StartTimer()
+ }
+ delete(a, i2s[i-k])
}
}
@@ -713,7 +887,7 @@ func BenchmarkMapAssign(b *testing.B) {
}
func BenchmarkMapDelete(b *testing.B) {
- b.Run("Int32", runWith(benchmarkMapDeleteInt32, 1, 2, 4))
- b.Run("Int64", runWith(benchmarkMapDeleteInt64, 1, 2, 4))
- b.Run("Str", runWith(benchmarkMapDeleteStr, 1, 2, 4))
+ b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000))
+ b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000))
+ b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000))
}
diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go
index d54016f0ba9..3b8f71434b8 100644
--- a/libgo/go/runtime/mbarrier.go
+++ b/libgo/go/runtime/mbarrier.go
@@ -189,6 +189,8 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) {
func writebarrierptr_prewrite1(dst *uintptr, src uintptr) {
mp := acquirem()
if mp.inwb || mp.dying > 0 {
+ // We explicitly allow write barriers in startpanic_m,
+ // since we're going down anyway. Ignore them here.
releasem(mp)
return
}
@@ -244,6 +246,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
// typedmemmove copies a value of type t to dst from src.
// Must be nosplit, see #16026.
+//
+// TODO: Perfect for go:nosplitrec since we can't have a safe point
+// anywhere in the bulk barrier or memmove.
+//
//go:nosplit
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if typ.kind&kindNoPointers == 0 {
@@ -265,8 +271,8 @@ func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
//go:linkname reflect_typedmemmove reflect.typedmemmove
func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if raceenabled {
- raceWriteObjectPC(typ, dst, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove))
- raceReadObjectPC(typ, src, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove))
+ raceWriteObjectPC(typ, dst, getcallerpc(), funcPC(reflect_typedmemmove))
+ raceReadObjectPC(typ, src, getcallerpc(), funcPC(reflect_typedmemmove))
}
if msanenabled {
msanwrite(dst, typ.size)
@@ -310,8 +316,12 @@ func typedslicecopy(typ *_type, dst, src slice) int {
dstp := dst.array
srcp := src.array
+ // The compiler emits calls to typedslicecopy before
+ // instrumentation runs, so unlike the other copying and
+ // assignment operations, it's not instrumented in the calling
+ // code and needs its own instrumentation.
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&typ))
+ callerpc := getcallerpc()
pc := funcPC(slicecopy)
racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc)
racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
@@ -329,41 +339,13 @@ func typedslicecopy(typ *_type, dst, src slice) int {
// compiler only emits calls to typedslicecopy for types with pointers,
// and growslice and reflect_typedslicecopy check for pointers
// before calling typedslicecopy.
- if !writeBarrier.needed {
- memmove(dstp, srcp, uintptr(n)*typ.size)
- return n
+ size := uintptr(n) * typ.size
+ if writeBarrier.needed {
+ bulkBarrierPreWrite(uintptr(dstp), uintptr(srcp), size)
}
-
- systemstack(func() {
- if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) {
- // Overlap with src before dst.
- // Copy backward, being careful not to move dstp/srcp
- // out of the array they point into.
- dstp = add(dstp, uintptr(n-1)*typ.size)
- srcp = add(srcp, uintptr(n-1)*typ.size)
- i := 0
- for {
- typedmemmove(typ, dstp, srcp)
- if i++; i >= n {
- break
- }
- dstp = add(dstp, -typ.size)
- srcp = add(srcp, -typ.size)
- }
- } else {
- // Copy forward, being careful not to move dstp/srcp
- // out of the array they point into.
- i := 0
- for {
- typedmemmove(typ, dstp, srcp)
- if i++; i >= n {
- break
- }
- dstp = add(dstp, typ.size)
- srcp = add(srcp, typ.size)
- }
- }
- })
+ // See typedmemmove for a discussion of the race between the
+ // barrier and memmove.
+ memmove(dstp, srcp, size)
return n
}
@@ -380,7 +362,7 @@ func reflect_typedslicecopy(elemType *_type, dst, src slice) int {
size := uintptr(n) * elemType.size
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&elemType))
+ callerpc := getcallerpc()
pc := funcPC(reflect_typedslicecopy)
racewriterangepc(dst.array, size, callerpc, pc)
racereadrangepc(src.array, size, callerpc, pc)
diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go
index d1a58202352..a775b57b033 100644
--- a/libgo/go/runtime/mbitmap.go
+++ b/libgo/go/runtime/mbitmap.go
@@ -463,11 +463,6 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr,
return
}
-// prefetch the bits.
-func (h heapBits) prefetch() {
- prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
-}
-
// next returns the heapBits describing the next pointer-sized word in memory.
// That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result.
@@ -542,12 +537,13 @@ func (h heapBits) setCheckmarked(size uintptr) {
atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
}
-// bulkBarrierPreWrite executes writebarrierptr_prewrite1
+// bulkBarrierPreWrite executes a write barrier
// for every pointer slot in the memory range [src, src+size),
// using pointer/scalar information from [dst, dst+size).
// This executes the write barriers necessary before a memmove.
// src, dst, and size must be pointer-aligned.
// The range [dst, dst+size) must lie within a single object.
+// It does not perform the actual writes.
//
// As a special case, src == 0 indicates that this is being used for a
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
@@ -593,12 +589,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
return
}
+ buf := &getg().m.p.ptr().wbBuf
h := heapBitsForAddr(dst)
if src == 0 {
for i := uintptr(0); i < size; i += sys.PtrSize {
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
- writebarrierptr_prewrite1(dstx, 0)
+ if !buf.putFast(*dstx, 0) {
+ wbBufFlush(nil, 0)
+ }
}
h = h.next()
}
@@ -607,7 +606,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
- writebarrierptr_prewrite1(dstx, *srcx)
+ if !buf.putFast(*dstx, *srcx) {
+ wbBufFlush(nil, 0)
+ }
}
h = h.next()
}
@@ -627,6 +628,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
bits = addb(bits, word/8)
mask := uint8(1) << (word % 8)
+ buf := &getg().m.p.ptr().wbBuf
for i := uintptr(0); i < size; i += sys.PtrSize {
if mask == 0 {
bits = addb(bits, 1)
@@ -640,10 +642,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
- writebarrierptr_prewrite1(dstx, 0)
+ if !buf.putFast(*dstx, 0) {
+ wbBufFlush(nil, 0)
+ }
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
- writebarrierptr_prewrite1(dstx, *srcx)
+ if !buf.putFast(*dstx, *srcx) {
+ wbBufFlush(nil, 0)
+ }
}
}
mask <<= 1
diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go
index 71a2f22114f..766cfd17523 100644
--- a/libgo/go/runtime/mcache.go
+++ b/libgo/go/runtime/mcache.go
@@ -96,7 +96,7 @@ func freemcache(c *mcache) {
// Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span.
-func (c *mcache) refill(spc spanClass) *mspan {
+func (c *mcache) refill(spc spanClass) {
_g_ := getg()
_g_.m.locks++
@@ -123,7 +123,6 @@ func (c *mcache) refill(spc spanClass) *mspan {
c.alloc[spc] = s
_g_.m.locks--
- return s
}
func (c *mcache) releaseAll() {
diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go
index ea3e5ebab4e..a087945251f 100644
--- a/libgo/go/runtime/mem_gccgo.go
+++ b/libgo/go/runtime/mem_gccgo.go
@@ -13,9 +13,10 @@ import (
// Functions called by C code.
//go:linkname sysAlloc runtime.sysAlloc
+//go:linkname sysFree runtime.sysFree
//extern mmap
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) unsafe.Pointer
+func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) unsafe.Pointer
//extern munmap
func munmap(addr unsafe.Pointer, length uintptr) int32
@@ -40,6 +41,14 @@ func init() {
}
}
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) (unsafe.Pointer, int) {
+ p := sysMmap(addr, n, prot, flags, fd, off)
+ if uintptr(p) == _MAP_FAILED {
+ return nil, errno()
+ }
+ return p, 0
+}
+
// NOTE: vec must be just 1 byte long here.
// Mincore returns ENOMEM if any of the pages are unmapped,
// but we want to know that all of the pages are unmapped.
@@ -75,31 +84,30 @@ func addrspace_free(v unsafe.Pointer, n uintptr) bool {
return true
}
-func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) unsafe.Pointer {
- p := mmap(v, n, prot, flags, fd, offset)
+func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) (unsafe.Pointer, int) {
+ p, err := mmap(v, n, prot, flags, fd, offset)
// On some systems, mmap ignores v without
// MAP_FIXED, so retry if the address space is free.
if p != v && addrspace_free(v, n) {
- if uintptr(p) != _MAP_FAILED {
+ if err == 0 {
munmap(p, n)
}
- p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
+ p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
}
- return p
+ return p, err
}
// Don't split the stack as this method may be invoked without a valid G, which
// prevents us from allocating more stack.
//go:nosplit
func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
- p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
- if uintptr(p) == _MAP_FAILED {
- errval := errno()
- if errval == _EACCES {
+ p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
+ if err != 0 {
+ if err == _EACCES {
print("runtime: mmap: access denied\n")
exit(2)
}
- if errval == _EAGAIN {
+ if err == _EAGAIN {
print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
exit(2)
}
@@ -225,9 +233,9 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
// if we can reserve at least 64K and check the assumption in SysMap.
// Only user-mode Linux (UML) rejects these requests.
if sys.PtrSize == 8 && uint64(n) > 1<<32 {
- p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
- if p != v {
- if uintptr(p) != _MAP_FAILED {
+ p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
+ if p != v || err != 0 {
+ if err == 0 {
munmap(p, 64<<10)
}
return nil
@@ -237,8 +245,8 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
return v
}
- p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
- if uintptr(p) == _MAP_FAILED {
+ p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
+ if err != 0 {
return nil
}
*reserved = true
@@ -259,12 +267,12 @@ func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
// to do this - we do not on other platforms.
flags |= _MAP_FIXED
}
- p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0)
- if uintptr(p) == _MAP_FAILED && errno() == _ENOMEM {
+ p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0)
+ if err == _ENOMEM {
throw("runtime: out of memory")
}
- if p != v {
- print("runtime: address space conflict: map(", v, ") = ", p, "\n")
+ if p != v || err != 0 {
+ print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n")
throw("runtime: address space conflict")
}
return
@@ -275,11 +283,11 @@ func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
// So always unmap first even if it is already unmapped.
munmap(v, n)
}
- p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0)
- if uintptr(p) == _MAP_FAILED && errno() == _ENOMEM {
+ p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0)
+ if err == _ENOMEM {
throw("runtime: out of memory")
}
- if p != v {
+ if p != v || err != 0 {
throw("runtime: cannot map pages in arena address space")
}
}
diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go
index 74b8753b5f7..62de604e69c 100644
--- a/libgo/go/runtime/memmove_test.go
+++ b/libgo/go/runtime/memmove_test.go
@@ -9,6 +9,7 @@ import (
"encoding/binary"
"fmt"
"internal/race"
+ "internal/testenv"
. "runtime"
"testing"
)
@@ -88,6 +89,10 @@ func TestMemmoveAlias(t *testing.T) {
}
func TestMemmoveLarge0x180000(t *testing.T) {
+ if testing.Short() && testenv.Builder() == "" {
+ t.Skip("-short")
+ }
+
t.Parallel()
if race.Enabled {
t.Skip("skipping large memmove test under race detector")
@@ -96,6 +101,10 @@ func TestMemmoveLarge0x180000(t *testing.T) {
}
func TestMemmoveOverlapLarge0x120000(t *testing.T) {
+ if testing.Short() && testenv.Builder() == "" {
+ t.Skip("-short")
+ }
+
t.Parallel()
if race.Enabled {
t.Skip("skipping large memmove test under race detector")
diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go
index 4353ee57569..19573d8b8d3 100644
--- a/libgo/go/runtime/mfinal.go
+++ b/libgo/go/runtime/mfinal.go
@@ -419,11 +419,7 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
return
}
-// Mark KeepAlive as noinline so that the current compiler will ensure
-// that the argument is alive at the point of the function call.
-// If it were inlined, it would disappear, and there would be nothing
-// keeping the argument alive. Perhaps a future compiler will recognize
-// runtime.KeepAlive specially and do something more efficient.
+// Mark KeepAlive as noinline so that it is easily detectable as an intrinsic.
//go:noinline
// KeepAlive marks its argument as currently reachable.
@@ -445,4 +441,11 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
// Without the KeepAlive call, the finalizer could run at the start of
// syscall.Read, closing the file descriptor before syscall.Read makes
// the actual system call.
-func KeepAlive(interface{}) {}
+func KeepAlive(x interface{}) {
+ // Introduce a use of x that the compiler can't eliminate.
+ // This makes sure x is alive on entry. We need x to be alive
+ // on entry for "defer runtime.KeepAlive(x)"; see issue 21402.
+ if cgoAlwaysFalse {
+ println(x)
+ }
+}
diff --git a/libgo/go/runtime/mfinal_test.go b/libgo/go/runtime/mfinal_test.go
index 38c2623bb7b..2086e42ba33 100644
--- a/libgo/go/runtime/mfinal_test.go
+++ b/libgo/go/runtime/mfinal_test.go
@@ -254,3 +254,24 @@ var (
Foo2 = &Object2{}
Foo1 = &Object1{}
)
+
+func TestDeferKeepAlive(t *testing.T) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+
+ // See issue 21402.
+ t.Parallel()
+ type T *int // needs to be a pointer base type to avoid tinyalloc and its never-finalized behavior.
+ x := new(T)
+ finRun := false
+ runtime.SetFinalizer(x, func(x *T) {
+ finRun = true
+ })
+ defer runtime.KeepAlive(x)
+ runtime.GC()
+ time.Sleep(time.Second)
+ if finRun {
+ t.Errorf("finalizer ran prematurely")
+ }
+}
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go
index 31c4be86fe4..626f088d450 100644
--- a/libgo/go/runtime/mgc.go
+++ b/libgo/go/runtime/mgc.go
@@ -231,6 +231,24 @@ func setGCPercent(in int32) (out int32) {
// Update pacing in response to gcpercent change.
gcSetTriggerRatio(memstats.triggerRatio)
unlock(&mheap_.lock)
+
+ // If we just disabled GC, wait for any concurrent GC to
+ // finish so we always return with no GC running.
+ if in < 0 {
+ // Disable phase transitions.
+ lock(&work.sweepWaiters.lock)
+ if gcphase == _GCmark {
+ // GC is active. Wait until we reach sweeping.
+ gp := getg()
+ gp.schedlink = work.sweepWaiters.head
+ work.sweepWaiters.head.set(gp)
+ goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
+ } else {
+ // GC isn't active.
+ unlock(&work.sweepWaiters.lock)
+ }
+ }
+
return out
}
@@ -300,10 +318,10 @@ const (
// gcMarkWorkerFractionalMode indicates that a P is currently
// running the "fractional" mark worker. The fractional worker
- // is necessary when GOMAXPROCS*gcGoalUtilization is not an
- // integer. The fractional worker should run until it is
+ // is necessary when GOMAXPROCS*gcBackgroundUtilization is not
+ // an integer. The fractional worker should run until it is
// preempted and will be scheduled to pick up the fractional
- // part of GOMAXPROCS*gcGoalUtilization.
+ // part of GOMAXPROCS*gcBackgroundUtilization.
gcMarkWorkerFractionalMode
// gcMarkWorkerIdleMode indicates that a P is running the mark
@@ -397,23 +415,18 @@ type gcControllerState struct {
assistBytesPerWork float64
// fractionalUtilizationGoal is the fraction of wall clock
- // time that should be spent in the fractional mark worker.
- // For example, if the overall mark utilization goal is 25%
- // and GOMAXPROCS is 6, one P will be a dedicated mark worker
- // and this will be set to 0.5 so that 50% of the time some P
- // is in a fractional mark worker. This is computed at the
- // beginning of each cycle.
+ // time that should be spent in the fractional mark worker on
+ // each P that isn't running a dedicated worker.
+ //
+ // For example, if the utilization goal is 25% and there are
+ // no dedicated workers, this will be 0.25. If there goal is
+ // 25%, there is one dedicated worker, and GOMAXPROCS is 5,
+ // this will be 0.05 to make up the missing 5%.
+ //
+ // If this is zero, no fractional workers are needed.
fractionalUtilizationGoal float64
_ [sys.CacheLineSize]byte
-
- // fractionalMarkWorkersNeeded is the number of fractional
- // mark workers that need to be started. This is either 0 or
- // 1. This is potentially updated atomically at every
- // scheduling point (hence it gets its own cache line).
- fractionalMarkWorkersNeeded int64
-
- _ [sys.CacheLineSize]byte
}
// startCycle resets the GC controller's state and computes estimates
@@ -454,23 +467,33 @@ func (c *gcControllerState) startCycle() {
memstats.next_gc = memstats.heap_live + 1024*1024
}
- // Compute the total mark utilization goal and divide it among
- // dedicated and fractional workers.
- totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization
- c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal)
- c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)
- if c.fractionalUtilizationGoal > 0 {
- c.fractionalMarkWorkersNeeded = 1
+ // Compute the background mark utilization goal. In general,
+ // this may not come out exactly. We round the number of
+ // dedicated workers so that the utilization is closest to
+ // 25%. For small GOMAXPROCS, this would introduce too much
+ // error, so we add fractional workers in that case.
+ totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization
+ c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5)
+ utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1
+ const maxUtilError = 0.3
+ if utilError < -maxUtilError || utilError > maxUtilError {
+ // Rounding put us more than 30% off our goal. With
+ // gcBackgroundUtilization of 25%, this happens for
+ // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional
+ // workers to compensate.
+ if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal {
+ // Too many dedicated workers.
+ c.dedicatedMarkWorkersNeeded--
+ }
+ c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs)
} else {
- c.fractionalMarkWorkersNeeded = 0
+ c.fractionalUtilizationGoal = 0
}
// Clear per-P state
- for _, p := range &allp {
- if p == nil {
- break
- }
+ for _, p := range allp {
p.gcAssistTime = 0
+ p.gcFractionalMarkTime = 0
}
// Compute initial values for controls that are updated
@@ -483,7 +506,7 @@ func (c *gcControllerState) startCycle() {
work.initialHeapLive>>20, "->",
memstats.next_gc>>20, " MB)",
" workers=", c.dedicatedMarkWorkersNeeded,
- "+", c.fractionalMarkWorkersNeeded, "\n")
+ "+", c.fractionalUtilizationGoal, "\n")
}
}
@@ -496,47 +519,73 @@ func (c *gcControllerState) startCycle() {
// is when assists are enabled and the necessary statistics are
// available).
func (c *gcControllerState) revise() {
- // Compute the expected scan work remaining.
+ gcpercent := gcpercent
+ if gcpercent < 0 {
+ // If GC is disabled but we're running a forced GC,
+ // act like GOGC is huge for the below calculations.
+ gcpercent = 100000
+ }
+ live := atomic.Load64(&memstats.heap_live)
+
+ var heapGoal, scanWorkExpected int64
+ if live <= memstats.next_gc {
+ // We're under the soft goal. Pace GC to complete at
+ // next_gc assuming the heap is in steady-state.
+ heapGoal = int64(memstats.next_gc)
+
+ // Compute the expected scan work remaining.
+ //
+ // This is estimated based on the expected
+ // steady-state scannable heap. For example, with
+ // GOGC=100, only half of the scannable heap is
+ // expected to be live, so that's what we target.
+ //
+ // (This is a float calculation to avoid overflowing on
+ // 100*heap_scan.)
+ scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
+ } else {
+ // We're past the soft goal. Pace GC so that in the
+ // worst case it will complete by the hard goal.
+ const maxOvershoot = 1.1
+ heapGoal = int64(float64(memstats.next_gc) * maxOvershoot)
+
+ // Compute the upper bound on the scan work remaining.
+ scanWorkExpected = int64(memstats.heap_scan)
+ }
+
+ // Compute the remaining scan work estimate.
//
// Note that we currently count allocations during GC as both
// scannable heap (heap_scan) and scan work completed
- // (scanWork), so this difference won't be changed by
- // allocations during GC.
- //
- // This particular estimate is a strict upper bound on the
- // possible remaining scan work for the current heap.
- // You might consider dividing this by 2 (or by
- // (100+GOGC)/100) to counter this over-estimation, but
- // benchmarks show that this has almost no effect on mean
- // mutator utilization, heap size, or assist time and it
- // introduces the danger of under-estimating and letting the
- // mutator outpace the garbage collector.
- scanWorkExpected := int64(memstats.heap_scan) - c.scanWork
- if scanWorkExpected < 1000 {
+ // (scanWork), so allocation will change this difference will
+ // slowly in the soft regime and not at all in the hard
+ // regime.
+ scanWorkRemaining := scanWorkExpected - c.scanWork
+ if scanWorkRemaining < 1000 {
// We set a somewhat arbitrary lower bound on
// remaining scan work since if we aim a little high,
// we can miss by a little.
//
// We *do* need to enforce that this is at least 1,
// since marking is racy and double-scanning objects
- // may legitimately make the expected scan work
- // negative.
- scanWorkExpected = 1000
+ // may legitimately make the remaining scan work
+ // negative, even in the hard goal regime.
+ scanWorkRemaining = 1000
}
// Compute the heap distance remaining.
- heapDistance := int64(memstats.next_gc) - int64(atomic.Load64(&memstats.heap_live))
- if heapDistance <= 0 {
+ heapRemaining := heapGoal - int64(live)
+ if heapRemaining <= 0 {
// This shouldn't happen, but if it does, avoid
// dividing by zero or setting the assist negative.
- heapDistance = 1
+ heapRemaining = 1
}
// Compute the mutator assist ratio so by the time the mutator
// allocates the remaining heap bytes up to next_gc, it will
// have done (or stolen) the remaining amount of scan work.
- c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance)
- c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected)
+ c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining)
+ c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining)
}
// endCycle computes the trigger ratio for the next cycle.
@@ -570,7 +619,7 @@ func (c *gcControllerState) endCycle() float64 {
assistDuration := nanotime() - c.markStartTime
// Assume background mark hit its utilization goal.
- utilization := gcGoalUtilization
+ utilization := gcBackgroundUtilization
// Add assist utilization; avoid divide by zero.
if assistDuration > 0 {
utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs))
@@ -689,51 +738,20 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
// This P is now dedicated to marking until the end of
// the concurrent mark phase.
_p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
+ } else if c.fractionalUtilizationGoal == 0 {
+ // No need for fractional workers.
+ return nil
} else {
- if !decIfPositive(&c.fractionalMarkWorkersNeeded) {
- // No more workers are need right now.
- return nil
- }
-
- // This P has picked the token for the fractional worker.
- // Is the GC currently under or at the utilization goal?
- // If so, do more work.
- //
- // We used to check whether doing one time slice of work
- // would remain under the utilization goal, but that has the
- // effect of delaying work until the mutator has run for
- // enough time slices to pay for the work. During those time
- // slices, write barriers are enabled, so the mutator is running slower.
- // Now instead we do the work whenever we're under or at the
- // utilization work and pay for it by letting the mutator run later.
- // This doesn't change the overall utilization averages, but it
- // front loads the GC work so that the GC finishes earlier and
- // write barriers can be turned off sooner, effectively giving
- // the mutator a faster machine.
- //
- // The old, slower behavior can be restored by setting
- // gcForcePreemptNS = forcePreemptNS.
- const gcForcePreemptNS = 0
-
- // TODO(austin): We could fast path this and basically
- // eliminate contention on c.fractionalMarkWorkersNeeded by
- // precomputing the minimum time at which it's worth
- // next scheduling the fractional worker. Then Ps
- // don't have to fight in the window where we've
- // passed that deadline and no one has started the
- // worker yet.
+ // Is this P behind on the fractional utilization
+ // goal?
//
- // TODO(austin): Shorter preemption interval for mark
- // worker to improve fairness and give this
- // finer-grained control over schedule?
- now := nanotime() - gcController.markStartTime
- then := now + gcForcePreemptNS
- timeUsed := c.fractionalMarkTime + gcForcePreemptNS
- if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal {
- // Nope, we'd overshoot the utilization goal
- atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1)
+ // This should be kept in sync with pollFractionalWorkerExit.
+ delta := nanotime() - gcController.markStartTime
+ if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal {
+ // Nope. No need to run a fractional worker.
return nil
}
+ // Run a fractional worker.
_p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode
}
@@ -746,6 +764,24 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
return gp
}
+// pollFractionalWorkerExit returns true if a fractional mark worker
+// should self-preempt. It assumes it is called from the fractional
+// worker.
+func pollFractionalWorkerExit() bool {
+ // This should be kept in sync with the fractional worker
+ // scheduler logic in findRunnableGCWorker.
+ now := nanotime()
+ delta := now - gcController.markStartTime
+ if delta <= 0 {
+ return true
+ }
+ p := getg().m.p.ptr()
+ selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime)
+ // Add some slack to the utilization goal so that the
+ // fractional worker isn't behind again the instant it exits.
+ return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal
+}
+
// gcSetTriggerRatio sets the trigger ratio and updates everything
// derived from it: the absolute trigger, the heap goal, mark pacing,
// and sweep pacing.
@@ -860,9 +896,22 @@ func gcSetTriggerRatio(triggerRatio float64) {
}
}
-// gcGoalUtilization is the goal CPU utilization for background
+// gcGoalUtilization is the goal CPU utilization for
// marking as a fraction of GOMAXPROCS.
-const gcGoalUtilization = 0.25
+const gcGoalUtilization = 0.30
+
+// gcBackgroundUtilization is the fixed CPU utilization for background
+// marking. It must be <= gcGoalUtilization. The difference between
+// gcGoalUtilization and gcBackgroundUtilization will be made up by
+// mark assists. The scheduler will aim to use within 50% of this
+// goal.
+//
+// Setting this to < gcGoalUtilization avoids saturating the trigger
+// feedback controller when there are no assists, which allows it to
+// better control CPU and heap growth. However, the larger the gap,
+// the more mutator assists are expected to happen, which impact
+// mutator latency.
+const gcBackgroundUtilization = 0.25
// gcCreditSlack is the amount of scan work credit that can can
// accumulate locally before updating gcController.scanWork and,
@@ -1159,7 +1208,7 @@ func (t gcTrigger) test() bool {
if t.kind == gcTriggerAlways {
return true
}
- if gcphase != _GCoff || gcpercent < 0 {
+ if gcphase != _GCoff {
return false
}
switch t.kind {
@@ -1170,6 +1219,9 @@ func (t gcTrigger) test() bool {
// own write.
return memstats.heap_live >= memstats.gc_trigger
case gcTriggerTime:
+ if gcpercent < 0 {
+ return false
+ }
lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime))
return lastgc != 0 && t.now-lastgc > forcegcperiod
case gcTriggerCycle:
@@ -1236,7 +1288,7 @@ func gcStart(mode gcMode, trigger gcTrigger) {
}
}
- // Ok, we're doing it! Stop everybody else
+ // Ok, we're doing it! Stop everybody else
semacquire(&worldsema)
if trace.enabled {
@@ -1249,7 +1301,12 @@ func gcStart(mode gcMode, trigger gcTrigger) {
gcResetMarkState()
- work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs
+ work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs
+ if work.stwprocs > ncpu {
+ // This is used to compute CPU time of the STW phases,
+ // so it can't be more than ncpu, even if GOMAXPROCS is.
+ work.stwprocs = ncpu
+ }
work.heap0 = atomic.Load64(&memstats.heap_live)
work.pauseNS = 0
work.mode = mode
@@ -1257,6 +1314,9 @@ func gcStart(mode gcMode, trigger gcTrigger) {
now := nanotime()
work.tSweepTerm = now
work.pauseStart = now
+ if trace.enabled {
+ traceGCSTWStart(1)
+ }
systemstack(stopTheWorldWithSema)
// Finish sweep before we start concurrent scan.
systemstack(func() {
@@ -1309,11 +1369,17 @@ func gcStart(mode gcMode, trigger gcTrigger) {
gcController.markStartTime = now
// Concurrent mark.
- systemstack(startTheWorldWithSema)
- now = nanotime()
+ systemstack(func() {
+ now = startTheWorldWithSema(trace.enabled)
+ })
work.pauseNS += now - work.pauseStart
work.tMark = now
} else {
+ if trace.enabled {
+ // Switch to mark termination STW.
+ traceGCSTWDone()
+ traceGCSTWStart(0)
+ }
t := nanotime()
work.tMark, work.tMarkTerm = t, t
work.heapGoal = work.heap0
@@ -1356,7 +1422,8 @@ top:
// TODO(austin): Should dedicated workers keep an eye on this
// and exit gcDrain promptly?
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff)
- atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff)
+ prevFractionalGoal := gcController.fractionalUtilizationGoal
+ gcController.fractionalUtilizationGoal = 0
if !gcBlackenPromptly {
// Transition from mark 1 to mark 2.
@@ -1383,6 +1450,7 @@ top:
// workers have exited their loop so we can
// start new mark 2 workers.
forEachP(func(_p_ *p) {
+ wbBufFlush1(_p_)
_p_.gcw.dispose()
})
})
@@ -1399,7 +1467,7 @@ top:
// Now we can start up mark 2 workers.
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff)
- atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff)
+ gcController.fractionalUtilizationGoal = prevFractionalGoal
incnwait := atomic.Xadd(&work.nwait, +1)
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
@@ -1414,6 +1482,9 @@ top:
work.tMarkTerm = now
work.pauseStart = now
getg().m.preemptoff = "gcing"
+ if trace.enabled {
+ traceGCSTWStart(0)
+ }
systemstack(stopTheWorldWithSema)
// The gcphase is _GCmark, it will transition to _GCmarktermination
// below. The important thing is that the wb remains active until
@@ -1574,7 +1645,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
// so events don't leak into the wrong cycle.
mProf_NextCycle()
- systemstack(startTheWorldWithSema)
+ systemstack(func() { startTheWorldWithSema(true) })
// Flush the heap profile so we can start a new cycle next GC.
// This is relatively expensive, so we don't do it with the
@@ -1645,10 +1716,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
func gcBgMarkStartWorkers() {
// Background marking is performed by per-P G's. Ensure that
// each P has a background GC G.
- for _, p := range &allp {
- if p == nil || p.status == _Pdead {
- break
- }
+ for _, p := range allp {
if p.gcBgMarkWorker == 0 {
expectSystemGoroutine()
go gcBgMarkWorker(p)
@@ -1751,6 +1819,7 @@ func gcBgMarkWorker(_p_ *p) {
}
startTime := nanotime()
+ _p_.gcMarkWorkerStartTime = startTime
decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc {
@@ -1792,7 +1861,7 @@ func gcBgMarkWorker(_p_ *p) {
// without preemption.
gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
case gcMarkWorkerFractionalMode:
- gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
case gcMarkWorkerIdleMode:
gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
}
@@ -1817,7 +1886,7 @@ func gcBgMarkWorker(_p_ *p) {
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
case gcMarkWorkerFractionalMode:
atomic.Xaddint64(&gcController.fractionalMarkTime, duration)
- atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1)
+ atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration)
case gcMarkWorkerIdleMode:
atomic.Xaddint64(&gcController.idleMarkTime, duration)
}
@@ -1915,10 +1984,6 @@ func gcMark(start_time int64) {
work.helperDrainBlock = true
}
- if trace.enabled {
- traceGCScanStart()
- }
-
if work.nproc > 1 {
noteclear(&work.alldone)
helpgc(int32(work.nproc))
@@ -1952,8 +2017,8 @@ func gcMark(start_time int64) {
// Double-check that all gcWork caches are empty. This should
// be ensured by mark 2 before we enter mark termination.
- for i := 0; i < int(gomaxprocs); i++ {
- gcw := &allp[i].gcw
+ for _, p := range allp {
+ gcw := &p.gcw
if !gcw.empty() {
throw("P has cached GC work at end of mark termination")
}
@@ -1962,10 +2027,6 @@ func gcMark(start_time int64) {
}
}
- if trace.enabled {
- traceGCScanDone()
- }
-
cachestats()
// Update the marked heap stat.
@@ -2093,18 +2154,19 @@ func clearpools() {
unlock(&sched.deferlock)
}
-// Timing
-
-//go:nowritebarrier
+// gchelper runs mark termination tasks on Ps other than the P
+// coordinating mark termination.
+//
+// The caller is responsible for ensuring that this has a P to run on,
+// even though it's running during STW. Because of this, it's allowed
+// to have write barriers.
+//
+//go:yeswritebarrierrec
func gchelper() {
_g_ := getg()
_g_.m.traceback = 2
gchelperstart()
- if trace.enabled {
- traceGCScanStart()
- }
-
// Parallel mark over GC roots and heap
if gcphase == _GCmarktermination {
gcw := &_g_.m.p.ptr().gcw
@@ -2116,10 +2178,6 @@ func gchelper() {
gcw.dispose()
}
- if trace.enabled {
- traceGCScanDone()
- }
-
nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone
if atomic.Xadd(&work.ndone, +1) == nproc-1 {
notewakeup(&work.alldone)
@@ -2138,6 +2196,8 @@ func gchelperstart() {
}
}
+// Timing
+
// itoaDiv formats val/(10**dec) into buf.
func itoaDiv(buf []byte, val uint64, dec int) []byte {
i := len(buf) - 1
diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go
index c1fa1547adc..107a70a7898 100644
--- a/libgo/go/runtime/mgc_gccgo.go
+++ b/libgo/go/runtime/mgc_gccgo.go
@@ -6,7 +6,10 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
// gcRoot is a single GC root: a variable plus a ptrmask.
type gcRoot struct {
@@ -85,3 +88,21 @@ func checkPreempt() {
gp.scanningself = false
mcall(gopreempt_m)
}
+
+// gcWriteBarrier implements a write barrier. This is implemented in
+// assembly in the gc library, but there is no special advantage to
+// doing so with gccgo.
+//go:nosplit
+//go:nowritebarrier
+func gcWriteBarrier(dst *uintptr, src uintptr) {
+ buf := &getg().m.p.ptr().wbBuf
+ next := buf.next
+ np := next + 2*sys.PtrSize
+ buf.next = np
+ *(*uintptr)(unsafe.Pointer(next)) = src
+ *(*uintptr)(unsafe.Pointer(next + sys.PtrSize)) = *dst
+ if np >= buf.end {
+ wbBufFlush(dst, src)
+ }
+ *dst = src
+}
diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go
index 757e88d1d9d..fe437bf5e84 100644
--- a/libgo/go/runtime/mgclarge.go
+++ b/libgo/go/runtime/mgclarge.go
@@ -164,11 +164,10 @@ func (root *mTreap) insert(span *mspan) {
}
}
-func (root *mTreap) removeNode(t *treapNode) *mspan {
+func (root *mTreap) removeNode(t *treapNode) {
if t.spanKey.npages != t.npagesKey {
throw("span and treap node npages do not match")
}
- result := t.spanKey
// Rotate t down to be leaf of tree for removal, respecting priorities.
for t.right != nil || t.left != nil {
@@ -192,7 +191,6 @@ func (root *mTreap) removeNode(t *treapNode) *mspan {
t.spanKey = nil
t.npagesKey = 0
mheap_.treapalloc.free(unsafe.Pointer(t))
- return result
}
// remove searches for, finds, removes from the treap, and returns the smallest
diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go
index 998a830caa8..7297fcb6d1a 100644
--- a/libgo/go/runtime/mgcmark.go
+++ b/libgo/go/runtime/mgcmark.go
@@ -34,13 +34,13 @@ const (
// span base.
maxObletBytes = 128 << 10
- // idleCheckThreshold specifies how many units of work to do
- // between run queue checks in an idle worker. Assuming a scan
+ // drainCheckThreshold specifies how many units of work to do
+ // between self-preemption checks in gcDrain. Assuming a scan
// rate of 1 MB/ms, this is ~100 µs. Lower values have higher
// overhead in the scan loop (the scheduler check may perform
// a syscall, so its overhead is nontrivial). Higher values
// make the system less responsive to incoming work.
- idleCheckThreshold = 100000
+ drainCheckThreshold = 100000
)
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
@@ -717,6 +717,7 @@ const (
gcDrainNoBlock
gcDrainFlushBgCredit
gcDrainIdle
+ gcDrainFractional
// gcDrainBlock means neither gcDrainUntilPreempt or
// gcDrainNoBlock. It is the default, but callers should use
@@ -733,6 +734,10 @@ const (
// If flags&gcDrainIdle != 0, gcDrain returns when there is other work
// to do. This implies gcDrainNoBlock.
//
+// If flags&gcDrainFractional != 0, gcDrain self-preempts when
+// pollFractionalWorkerExit() returns true. This implies
+// gcDrainNoBlock.
+//
// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
// unable to get more work. Otherwise, it will block until all
// blocking calls are blocked in gcDrain.
@@ -749,14 +754,24 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
gp := getg().m.curg
preemptible := flags&gcDrainUntilPreempt != 0
- blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0
+ blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock) == 0
flushBgCredit := flags&gcDrainFlushBgCredit != 0
idle := flags&gcDrainIdle != 0
initScanWork := gcw.scanWork
- // idleCheck is the scan work at which to perform the next
- // idle check with the scheduler.
- idleCheck := initScanWork + idleCheckThreshold
+
+ // checkWork is the scan work before performing the next
+ // self-preempt check.
+ checkWork := int64(1<<63 - 1)
+ var check func() bool
+ if flags&(gcDrainIdle|gcDrainFractional) != 0 {
+ checkWork = initScanWork + drainCheckThreshold
+ if idle {
+ check = pollWork
+ } else if flags&gcDrainFractional != 0 {
+ check = pollFractionalWorkerExit
+ }
+ }
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
@@ -766,7 +781,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
break
}
markroot(gcw, job)
- if idle && pollWork() {
+ if check != nil && check() {
goto done
}
}
@@ -807,12 +822,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
gcFlushBgCredit(gcw.scanWork - initScanWork)
initScanWork = 0
}
- idleCheck -= gcw.scanWork
+ checkWork -= gcw.scanWork
gcw.scanWork = 0
- if idle && idleCheck <= 0 {
- idleCheck += idleCheckThreshold
- if pollWork() {
+ if checkWork <= 0 {
+ checkWork += drainCheckThreshold
+ if check != nil && check() {
break
}
}
@@ -1091,6 +1106,9 @@ func shade(b uintptr) {
// obj is the start of an object with mark mbits.
// If it isn't already marked, mark it and enqueue into gcw.
// base and off are for debugging only and could be removed.
+//
+// See also wbBufFlush1, which partially duplicates this logic.
+//
//go:nowritebarrierrec
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) {
// obj should be start of allocation, and so must be at least pointer-aligned.
@@ -1249,10 +1267,7 @@ func gcmarknewobject(obj, size, scanSize uintptr) {
//
// The world must be stopped.
func gcMarkTinyAllocs() {
- for _, p := range &allp {
- if p == nil || p.status == _Pdead {
- break
- }
+ for _, p := range allp {
c := p.mcache
if c == nil || c.tiny == 0 {
continue
diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go
index 461679b9343..c6634fc78ca 100644
--- a/libgo/go/runtime/mgcwork.go
+++ b/libgo/go/runtime/mgcwork.go
@@ -85,6 +85,13 @@ type gcWork struct {
scanWork int64
}
+// Most of the methods of gcWork are go:nowritebarrierrec because the
+// write barrier itself can invoke gcWork methods but the methods are
+// not generally re-entrant. Hence, if a gcWork method invoked the
+// write barrier while the gcWork was in an inconsistent state, and
+// the write barrier in turn invoked a gcWork method, it could
+// permanently corrupt the gcWork.
+
func (w *gcWork) init() {
w.wbuf1 = getempty()
wbuf2 := trygetfull()
@@ -96,7 +103,7 @@ func (w *gcWork) init() {
// put enqueues a pointer for the garbage collector to trace.
// obj must point to the beginning of a heap object or an oblet.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) put(obj uintptr) {
flushed := false
wbuf := w.wbuf1
@@ -129,7 +136,7 @@ func (w *gcWork) put(obj uintptr) {
// putFast does a put and returns true if it can be done quickly
// otherwise it returns false and the caller needs to call put.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) putFast(obj uintptr) bool {
wbuf := w.wbuf1
if wbuf == nil {
@@ -143,12 +150,45 @@ func (w *gcWork) putFast(obj uintptr) bool {
return true
}
+// putBatch performs a put on every pointer in obj. See put for
+// constraints on these pointers.
+//
+//go:nowritebarrierrec
+func (w *gcWork) putBatch(obj []uintptr) {
+ if len(obj) == 0 {
+ return
+ }
+
+ flushed := false
+ wbuf := w.wbuf1
+ if wbuf == nil {
+ w.init()
+ wbuf = w.wbuf1
+ }
+
+ for len(obj) > 0 {
+ for wbuf.nobj == len(wbuf.obj) {
+ putfull(wbuf)
+ w.wbuf1, w.wbuf2 = w.wbuf2, getempty()
+ wbuf = w.wbuf1
+ flushed = true
+ }
+ n := copy(wbuf.obj[wbuf.nobj:], obj)
+ wbuf.nobj += n
+ obj = obj[n:]
+ }
+
+ if flushed && gcphase == _GCmark {
+ gcController.enlistWorker()
+ }
+}
+
// tryGet dequeues a pointer for the garbage collector to trace.
//
// If there are no pointers remaining in this gcWork or in the global
// queue, tryGet returns 0. Note that there may still be pointers in
// other gcWork instances or other caches.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) tryGet() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
@@ -177,7 +217,7 @@ func (w *gcWork) tryGet() uintptr {
// tryGetFast dequeues a pointer for the garbage collector to trace
// if one is readily available. Otherwise it returns 0 and
// the caller is expected to call tryGet().
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) tryGetFast() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
@@ -194,7 +234,7 @@ func (w *gcWork) tryGetFast() uintptr {
// get dequeues a pointer for the garbage collector to trace, blocking
// if necessary to ensure all pointers from all queues and caches have
// been retrieved. get returns 0 if there are no pointers remaining.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) get() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
@@ -228,7 +268,7 @@ func (w *gcWork) get() uintptr {
// GC can inspect them. This helps reduce the mutator's
// ability to hide pointers during the concurrent mark phase.
//
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) dispose() {
if wbuf := w.wbuf1; wbuf != nil {
if wbuf.nobj == 0 {
@@ -262,7 +302,7 @@ func (w *gcWork) dispose() {
// balance moves some work that's cached in this gcWork back on the
// global queue.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) balance() {
if w.wbuf1 == nil {
return
@@ -282,7 +322,7 @@ func (w *gcWork) balance() {
}
// empty returns true if w has no mark work available.
-//go:nowritebarrier
+//go:nowritebarrierrec
func (w *gcWork) empty() bool {
return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)
}
diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go
index 8749f971065..d971bfee4df 100644
--- a/libgo/go/runtime/mheap.go
+++ b/libgo/go/runtime/mheap.go
@@ -56,6 +56,12 @@ type mheap struct {
// Internal pages map to an arbitrary span.
// For pages that have never been allocated, spans entries are nil.
//
+ // Modifications are protected by mheap.lock. Reads can be
+ // performed without locking, but ONLY from indexes that are
+ // known to contain in-use or stack spans. This means there
+ // must not be a safe-point between establishing that an
+ // address is live and looking it up in the spans array.
+ //
// This is backed by a reserved region of the address space so
// it can grow without moving. The memory up to len(spans) is
// mapped. cap(spans) indicates the total reserved memory.
@@ -154,6 +160,8 @@ type mheap struct {
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
specialprofilealloc fixalloc // allocator for specialprofile*
speciallock mutex // lock for special record allocators.
+
+ unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
}
var mheap_ mheap
@@ -311,6 +319,17 @@ func (s *mspan) layout() (size, n, total uintptr) {
return
}
+// recordspan adds a newly allocated span to h.allspans.
+//
+// This only happens the first time a span is allocated from
+// mheap.spanalloc (it is not called when a span is reused).
+//
+// Write barriers are disallowed here because it can be called from
+// gcWork when allocating new workbufs. However, because it's an
+// indirect call from the fixalloc initializer, the compiler can't see
+// this.
+//
+//go:nowritebarrierrec
func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
h := (*mheap)(vh)
s := (*mspan)(p)
@@ -320,8 +339,8 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
n = cap(h.allspans) * 3 / 2
}
var new []*mspan
- sp := (*slice)(unsafe.Pointer(&new))
- sp.array = sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys)
+ sp := (*notInHeapSlice)(unsafe.Pointer(&new))
+ sp.array = (*notInHeap)(sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys))
if sp.array == nil {
throw("runtime: cannot allocate memory")
}
@@ -331,12 +350,13 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
copy(new, h.allspans)
}
oldAllspans := h.allspans
- h.allspans = new
+ *(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new))
if len(oldAllspans) != 0 {
sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
}
}
- h.allspans = append(h.allspans, s)
+ h.allspans = h.allspans[:len(h.allspans)+1]
+ h.allspans[len(h.allspans)-1] = s
}
// A spanClass represents the size class and noscan-ness of a span.
@@ -854,7 +874,7 @@ HaveSpan:
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
-// by a perfectly balanced tree with a a depth of 20. Twice that is an acceptable 40.
+// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
func (h *mheap) isLargeSpan(npages uintptr) bool {
return npages >= uintptr(len(h.free))
}
@@ -1120,34 +1140,35 @@ func scavengelist(list *mSpanList, now, limit uint64) uintptr {
var sumreleased uintptr
for s := list.first; s != nil; s = s.next {
- if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
- start := s.base()
- end := start + s.npages<<_PageShift
- if physPageSize > _PageSize {
- // We can only release pages in
- // physPageSize blocks, so round start
- // and end in. (Otherwise, madvise
- // will round them *out* and release
- // more memory than we want.)
- start = (start + physPageSize - 1) &^ (physPageSize - 1)
- end &^= physPageSize - 1
- if end <= start {
- // start and end don't span a
- // whole physical page.
- continue
- }
- }
- len := end - start
-
- released := len - (s.npreleased << _PageShift)
- if physPageSize > _PageSize && released == 0 {
+ if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
+ continue
+ }
+ start := s.base()
+ end := start + s.npages<<_PageShift
+ if physPageSize > _PageSize {
+ // We can only release pages in
+ // physPageSize blocks, so round start
+ // and end in. (Otherwise, madvise
+ // will round them *out* and release
+ // more memory than we want.)
+ start = (start + physPageSize - 1) &^ (physPageSize - 1)
+ end &^= physPageSize - 1
+ if end <= start {
+ // start and end don't span a
+ // whole physical page.
continue
}
- memstats.heap_released += uint64(released)
- sumreleased += released
- s.npreleased = len >> _PageShift
- sysUnused(unsafe.Pointer(start), len)
}
+ len := end - start
+
+ released := len - (s.npreleased << _PageShift)
+ if physPageSize > _PageSize && released == 0 {
+ continue
+ }
+ memstats.heap_released += uint64(released)
+ sumreleased += released
+ s.npreleased = len >> _PageShift
+ sysUnused(unsafe.Pointer(start), len)
}
return sumreleased
}
diff --git a/libgo/go/runtime/mksizeclasses.go b/libgo/go/runtime/mksizeclasses.go
index 0cb2b33a8cd..b146dbcd6c9 100644
--- a/libgo/go/runtime/mksizeclasses.go
+++ b/libgo/go/runtime/mksizeclasses.go
@@ -24,8 +24,8 @@
// In practice, only one of the wastes comes into play for a
// given size (sizes < 512 waste mainly on the round-up,
// sizes > 512 waste mainly on the page chopping).
-//
-// TODO(rsc): Compute max waste for any given size.
+// For really small sizes, alignment constraints force the
+// overhead higher.
package main
@@ -242,15 +242,18 @@ nextk:
}
func printComment(w io.Writer, classes []class) {
- fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-11s\n", "class", "bytes/obj", "bytes/span", "objects", "waste bytes")
+ fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste")
+ prevSize := 0
for i, c := range classes {
if i == 0 {
continue
}
spanSize := c.npages * pageSize
objects := spanSize / c.size
- waste := spanSize - c.size*(spanSize/c.size)
- fmt.Fprintf(w, "// %5d %9d %10d %7d %11d\n", i, c.size, spanSize, objects, waste)
+ tailWaste := spanSize - c.size*(spanSize/c.size)
+ maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize)
+ prevSize = c.size
+ fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%%\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste)
}
fmt.Fprintf(w, "\n")
}
diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go
index 71dc2239854..22f5195cd58 100644
--- a/libgo/go/runtime/mstats.go
+++ b/libgo/go/runtime/mstats.go
@@ -589,12 +589,13 @@ func updatememstats() {
memstats.heap_objects = memstats.nmalloc - memstats.nfree
}
+// cachestats flushes all mcache stats.
+//
+// The world must be stopped.
+//
//go:nowritebarrier
func cachestats() {
- for _, p := range &allp {
- if p == nil {
- break
- }
+ for _, p := range allp {
c := p.mcache
if c == nil {
continue
@@ -610,9 +611,6 @@ func cachestats() {
//go:nowritebarrier
func flushmcache(i int) {
p := allp[i]
- if p == nil {
- return
- }
c := p.mcache
if c == nil {
return
@@ -665,7 +663,7 @@ func purgecachedstats(c *mcache) {
// overflow errors.
//go:nosplit
func mSysStatInc(sysStat *uint64, n uintptr) {
- if sys.BigEndian != 0 {
+ if sys.BigEndian {
atomic.Xadd64(sysStat, int64(n))
return
}
@@ -679,7 +677,7 @@ func mSysStatInc(sysStat *uint64, n uintptr) {
// mSysStatInc apply.
//go:nosplit
func mSysStatDec(sysStat *uint64, n uintptr) {
- if sys.BigEndian != 0 {
+ if sys.BigEndian {
atomic.Xadd64(sysStat, -int64(n))
return
}
diff --git a/libgo/go/runtime/mwbbuf.go b/libgo/go/runtime/mwbbuf.go
new file mode 100644
index 00000000000..a060df8bc06
--- /dev/null
+++ b/libgo/go/runtime/mwbbuf.go
@@ -0,0 +1,248 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This implements the write barrier buffer. The write barrier itself
+// is gcWriteBarrier and is implemented in assembly.
+//
+// The write barrier has a fast path and a slow path. The fast path
+// simply enqueues to a per-P write barrier buffer. It's written in
+// assembly and doesn't clobber any general purpose registers, so it
+// doesn't have the usual overheads of a Go call.
+//
+// When the buffer fills up, the write barrier invokes the slow path
+// (wbBufFlush) to flush the buffer to the GC work queues. In this
+// path, since the compiler didn't spill registers, we spill *all*
+// registers and disallow any GC safe points that could observe the
+// stack frame (since we don't know the types of the spilled
+// registers).
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// testSmallBuf forces a small write barrier buffer to stress write
+// barrier flushing.
+const testSmallBuf = false
+
+// wbBuf is a per-P buffer of pointers queued by the write barrier.
+// This buffer is flushed to the GC workbufs when it fills up and on
+// various GC transitions.
+//
+// This is closely related to a "sequential store buffer" (SSB),
+// except that SSBs are usually used for maintaining remembered sets,
+// while this is used for marking.
+type wbBuf struct {
+ // next points to the next slot in buf. It must not be a
+ // pointer type because it can point past the end of buf and
+ // must be updated without write barriers.
+ //
+ // This is a pointer rather than an index to optimize the
+ // write barrier assembly.
+ next uintptr
+
+ // end points to just past the end of buf. It must not be a
+ // pointer type because it points past the end of buf and must
+ // be updated without write barriers.
+ end uintptr
+
+ // buf stores a series of pointers to execute write barriers
+ // on. This must be a multiple of wbBufEntryPointers because
+ // the write barrier only checks for overflow once per entry.
+ buf [wbBufEntryPointers * wbBufEntries]uintptr
+}
+
+const (
+ // wbBufEntries is the number of write barriers between
+ // flushes of the write barrier buffer.
+ //
+ // This trades latency for throughput amortization. Higher
+ // values amortize flushing overhead more, but increase the
+ // latency of flushing. Higher values also increase the cache
+ // footprint of the buffer.
+ //
+ // TODO: What is the latency cost of this? Tune this value.
+ wbBufEntries = 256
+
+ // wbBufEntryPointers is the number of pointers added to the
+ // buffer by each write barrier.
+ wbBufEntryPointers = 2
+)
+
+// reset empties b by resetting its next and end pointers.
+func (b *wbBuf) reset() {
+ start := uintptr(unsafe.Pointer(&b.buf[0]))
+ b.next = start
+ if gcBlackenPromptly || writeBarrier.cgo {
+ // Effectively disable the buffer by forcing a flush
+ // on every barrier.
+ b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
+ } else if testSmallBuf {
+ // For testing, allow two barriers in the buffer. If
+ // we only did one, then barriers of non-heap pointers
+ // would be no-ops. This lets us combine a buffered
+ // barrier with a flush at a later time.
+ b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers]))
+ } else {
+ b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0])
+ }
+
+ if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 {
+ throw("bad write barrier buffer bounds")
+ }
+}
+
+// putFast adds old and new to the write barrier buffer and returns
+// false if a flush is necessary. Callers should use this as:
+//
+// buf := &getg().m.p.ptr().wbBuf
+// if !buf.putFast(old, new) {
+// wbBufFlush(...)
+// }
+//
+// The arguments to wbBufFlush depend on whether the caller is doing
+// its own cgo pointer checks. If it is, then this can be
+// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
+// new.
+//
+// Since buf is a per-P resource, the caller must ensure there are no
+// preemption points while buf is in use.
+//
+// It must be nowritebarrierrec to because write barriers here would
+// corrupt the write barrier buffer. It (and everything it calls, if
+// it called anything) has to be nosplit to avoid scheduling on to a
+// different P and a different buffer.
+//
+//go:nowritebarrierrec
+//go:nosplit
+func (b *wbBuf) putFast(old, new uintptr) bool {
+ p := (*[2]uintptr)(unsafe.Pointer(b.next))
+ p[0] = old
+ p[1] = new
+ b.next += 2 * sys.PtrSize
+ return b.next != b.end
+}
+
+// wbBufFlush flushes the current P's write barrier buffer to the GC
+// workbufs. It is passed the slot and value of the write barrier that
+// caused the flush so that it can implement cgocheck.
+//
+// This must not have write barriers because it is part of the write
+// barrier implementation.
+//
+// This and everything it calls must be nosplit because 1) the stack
+// contains untyped slots from gcWriteBarrier and 2) there must not be
+// a GC safe point between the write barrier test in the caller and
+// flushing the buffer.
+//
+// TODO: A "go:nosplitrec" annotation would be perfect for this.
+//
+//go:nowritebarrierrec
+//go:nosplit
+func wbBufFlush(dst *uintptr, src uintptr) {
+ if getg().m.dying > 0 {
+ // We're going down. Not much point in write barriers
+ // and this way we can allow write barriers in the
+ // panic path.
+ return
+ }
+
+ if writeBarrier.cgo && dst != nil {
+ // This must be called from the stack that did the
+ // write. It's nosplit all the way down.
+ cgoCheckWriteBarrier(dst, src)
+ if !writeBarrier.needed {
+ // We were only called for cgocheck.
+ b := &getg().m.p.ptr().wbBuf
+ b.next = uintptr(unsafe.Pointer(&b.buf[0]))
+ return
+ }
+ }
+
+ // Switch to the system stack so we don't have to worry about
+ // the untyped stack slots or safe points.
+ systemstack(func() {
+ wbBufFlush1(getg().m.p.ptr())
+ })
+}
+
+// wbBufFlush1 flushes p's write barrier buffer to the GC work queue.
+//
+// This must not have write barriers because it is part of the write
+// barrier implementation, so this may lead to infinite loops or
+// buffer corruption.
+//
+// This must be non-preemptible because it uses the P's workbuf.
+//
+//go:nowritebarrierrec
+//go:systemstack
+func wbBufFlush1(_p_ *p) {
+ // Get the buffered pointers.
+ start := uintptr(unsafe.Pointer(&_p_.wbBuf.buf[0]))
+ n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0])
+ ptrs := _p_.wbBuf.buf[:n]
+
+ // Reset the buffer.
+ _p_.wbBuf.reset()
+
+ if useCheckmark {
+ // Slow path for checkmark mode.
+ for _, ptr := range ptrs {
+ shade(ptr)
+ }
+ return
+ }
+
+ // Mark all of the pointers in the buffer and record only the
+ // pointers we greyed. We use the buffer itself to temporarily
+ // record greyed pointers.
+ //
+ // TODO: Should scanobject/scanblock just stuff pointers into
+ // the wbBuf? Then this would become the sole greying path.
+ gcw := &_p_.gcw
+ pos := 0
+ arenaStart := mheap_.arena_start
+ for _, ptr := range ptrs {
+ if ptr < arenaStart {
+ // nil pointers are very common, especially
+ // for the "old" values. Filter out these and
+ // other "obvious" non-heap pointers ASAP.
+ //
+ // TODO: Should we filter out nils in the fast
+ // path to reduce the rate of flushes?
+ continue
+ }
+ // TODO: This doesn't use hbits, so calling
+ // heapBitsForObject seems a little silly. We could
+ // easily separate this out since heapBitsForObject
+ // just calls heapBitsForAddr(obj) to get hbits.
+ obj, _, span, objIndex := heapBitsForObject(ptr, 0, 0, false)
+ if obj == 0 {
+ continue
+ }
+ // TODO: Consider making two passes where the first
+ // just prefetches the mark bits.
+ mbits := span.markBitsForIndex(objIndex)
+ if mbits.isMarked() {
+ continue
+ }
+ mbits.setMarked()
+ if span.spanclass.noscan() {
+ gcw.bytesMarked += uint64(span.elemsize)
+ continue
+ }
+ ptrs[pos] = obj
+ pos++
+ }
+
+ // Enqueue the greyed objects.
+ gcw.putBatch(ptrs[:pos])
+ if gcphase == _GCmarktermination || gcBlackenPromptly {
+ // Ps aren't allowed to cache work during mark
+ // termination.
+ gcw.dispose()
+ }
+}
diff --git a/libgo/go/runtime/netpoll_kqueue.go b/libgo/go/runtime/netpoll_kqueue.go
index 47927fe7c37..1f68effbf9d 100644
--- a/libgo/go/runtime/netpoll_kqueue.go
+++ b/libgo/go/runtime/netpoll_kqueue.go
@@ -97,10 +97,23 @@ retry:
for i := 0; i < int(n); i++ {
ev := &events[i]
var mode int32
- if ev.filter == _EVFILT_READ {
+ switch ev.filter {
+ case _EVFILT_READ:
mode += 'r'
- }
- if ev.filter == _EVFILT_WRITE {
+
+ // On some systems when the read end of a pipe
+ // is closed the write end will not get a
+ // _EVFILT_WRITE event, but will get a
+ // _EVFILT_READ event with EV_EOF set.
+ // Note that setting 'w' here just means that we
+ // will wake up a goroutine waiting to write;
+ // that goroutine will try the write again,
+ // and the appropriate thing will happen based
+ // on what that write returns (success, EPIPE, EAGAIN).
+ if ev.flags&_EV_EOF != 0 {
+ mode += 'w'
+ }
+ case _EVFILT_WRITE:
mode += 'w'
}
if mode != 0 {
diff --git a/libgo/go/runtime/netpoll_windows.go b/libgo/go/runtime/netpoll_windows.go
index 79dafb02796..134071f5e3c 100644
--- a/libgo/go/runtime/netpoll_windows.go
+++ b/libgo/go/runtime/netpoll_windows.go
@@ -47,7 +47,7 @@ func netpolldescriptor() uintptr {
func netpollopen(fd uintptr, pd *pollDesc) int32 {
if stdcall4(_CreateIoCompletionPort, fd, iocphandle, 0, 0) == 0 {
- return -int32(getlasterror())
+ return int32(getlasterror())
}
return 0
}
diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go
index a4d2886d6af..8c3535b893b 100644
--- a/libgo/go/runtime/os_freebsd.go
+++ b/libgo/go/runtime/os_freebsd.go
@@ -16,6 +16,17 @@ type mOS struct {
//extern _umtx_op
func sys_umtx_op(addr *uint32, mode int32, val uint32, uaddr1 uinptr, ts *umtx_time) int32
+func getPageSize() uintptr {
+ mib := [2]uint32{_CTL_HW, _HW_PAGESIZE}
+ out := uint32(0)
+ nout := unsafe.Sizeof(out)
+ ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+ if ret >= 0 {
+ return uintptr(out)
+ }
+ return 0
+}
+
// FreeBSD's umtx_op syscall is effectively the same as Linux's futex, and
// thus the code is largely similar. See Linux implementation
// and lock_futex.go for comments.
diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go
index e1a6a308cf2..816327e70b8 100644
--- a/libgo/go/runtime/os_linux.go
+++ b/libgo/go/runtime/os_linux.go
@@ -106,45 +106,46 @@ func sysargs(argc int32, argv **byte) {
// now argv+n is auxv
auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
- if sysauxv(auxv[:]) == 0 {
- // In some situations we don't get a loader-provided
- // auxv, such as when loaded as a library on Android.
- // Fall back to /proc/self/auxv.
- fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0)
- if fd < 0 {
- // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to
- // try using mincore to detect the physical page size.
- // mincore should return EINVAL when address is not a multiple of system page size.
- const size = 256 << 10 // size of memory region to allocate
- p := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
- if uintptr(p) < 4096 {
- return
- }
- var n uintptr
- for n = 4 << 10; n < size; n <<= 1 {
- err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
- if err == 0 {
- physPageSize = n
- break
- }
- }
- if physPageSize == 0 {
- physPageSize = size
- }
- munmap(p, size)
+ if sysauxv(auxv[:]) != 0 {
+ return
+ }
+ // In some situations we don't get a loader-provided
+ // auxv, such as when loaded as a library on Android.
+ // Fall back to /proc/self/auxv.
+ fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0)
+ if fd < 0 {
+ // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to
+ // try using mincore to detect the physical page size.
+ // mincore should return EINVAL when address is not a multiple of system page size.
+ const size = 256 << 10 // size of memory region to allocate
+ p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+ if err != 0 {
return
}
- var buf [128]uintptr
- n := read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
- closefd(fd)
- if n < 0 {
- return
+ var n uintptr
+ for n = 4 << 10; n < size; n <<= 1 {
+ err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
+ if err == 0 {
+ physPageSize = n
+ break
+ }
+ }
+ if physPageSize == 0 {
+ physPageSize = size
}
- // Make sure buf is terminated, even if we didn't read
- // the whole file.
- buf[len(buf)-2] = _AT_NULL
- sysauxv(buf[:])
+ munmap(p, size)
+ return
+ }
+ var buf [128]uintptr
+ n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
+ closefd(fd)
+ if n < 0 {
+ return
}
+ // Make sure buf is terminated, even if we didn't read
+ // the whole file.
+ buf[len(buf)-2] = _AT_NULL
+ sysauxv(buf[:])
}
func sysauxv(auxv []uintptr) int {
diff --git a/libgo/go/runtime/os_linux_ppc64x.go b/libgo/go/runtime/os_linux_ppc64x.go
index b324344493e..d27902d794d 100644
--- a/libgo/go/runtime/os_linux_ppc64x.go
+++ b/libgo/go/runtime/os_linux_ppc64x.go
@@ -7,55 +7,22 @@
package runtime
-import (
- "runtime/internal/sys"
-)
+// For go:linkname
+import _ "unsafe"
-const (
- // ISA level
- // Go currently requires POWER5 as a minimum for ppc64, so we need
- // to check for ISA 2.03 and beyond.
- _PPC_FEATURE_POWER5_PLUS = 0x00020000 // ISA 2.03 (POWER5+)
- _PPC_FEATURE_ARCH_2_05 = 0x00001000 // ISA 2.05 (POWER6)
- _PPC_FEATURE_POWER6_EXT = 0x00000200 // mffgpr/mftgpr extension (POWER6x)
- _PPC_FEATURE_ARCH_2_06 = 0x00000100 // ISA 2.06 (POWER7)
- _PPC_FEATURE2_ARCH_2_07 = 0x80000000 // ISA 2.07 (POWER8)
+// ppc64x doesn't have a 'cpuid' instruction equivalent and relies on
+// HWCAP/HWCAP2 bits for hardware capabilities.
- // Standalone capabilities
- _PPC_FEATURE_HAS_ALTIVEC = 0x10000000 // SIMD/Vector unit
- _PPC_FEATURE_HAS_VSX = 0x00000080 // Vector scalar unit
-)
-
-type facilities struct {
- _ [sys.CacheLineSize]byte
- isPOWER5x bool // ISA 2.03
- isPOWER6 bool // ISA 2.05
- isPOWER6x bool // ISA 2.05 + mffgpr/mftgpr extension
- isPOWER7 bool // ISA 2.06
- isPOWER8 bool // ISA 2.07
- hasVMX bool // Vector unit
- hasVSX bool // Vector scalar unit
- _ [sys.CacheLineSize]byte
-}
-
-// cpu can be tested at runtime in go assembler code to check for
-// a certain ISA level or hardware capability, for example:
-// ·cpu+facilities_hasVSX(SB) for checking the availability of VSX
-// or
-// ·cpu+facilities_isPOWER7(SB) for checking if the processor implements
-// ISA 2.06 instructions.
-var cpu facilities
+//go:linkname cpu_hwcap internal/cpu.ppc64x_hwcap
+//go:linkname cpu_hwcap2 internal/cpu.ppc64x_hwcap2
+var cpu_hwcap uint
+var cpu_hwcap2 uint
func archauxv(tag, val uintptr) {
switch tag {
case _AT_HWCAP:
- cpu.isPOWER5x = val&_PPC_FEATURE_POWER5_PLUS != 0
- cpu.isPOWER6 = val&_PPC_FEATURE_ARCH_2_05 != 0
- cpu.isPOWER6x = val&_PPC_FEATURE_POWER6_EXT != 0
- cpu.isPOWER7 = val&_PPC_FEATURE_ARCH_2_06 != 0
- cpu.hasVMX = val&_PPC_FEATURE_HAS_ALTIVEC != 0
- cpu.hasVSX = val&_PPC_FEATURE_HAS_VSX != 0
+ cpu_hwcap = uint(val)
case _AT_HWCAP2:
- cpu.isPOWER8 = val&_PPC_FEATURE2_ARCH_2_07 != 0
+ cpu_hwcap2 = uint(val)
}
}
diff --git a/libgo/go/runtime/os_netbsd.go b/libgo/go/runtime/os_netbsd.go
index 464ce88d9c4..81ebe7636a1 100644
--- a/libgo/go/runtime/os_netbsd.go
+++ b/libgo/go/runtime/os_netbsd.go
@@ -15,7 +15,7 @@ type mOS struct {
//go:noescape
//extern lwp_park
-func lwp_park(abstime *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32
+func lwp_park(ts int32, rel int32, abstime *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32
//go:noescape
//extern lwp_unpark
@@ -31,10 +31,9 @@ func semasleep(ns int64) int32 {
// Compute sleep deadline.
var tsp *timespec
+ var ts timespec
if ns >= 0 {
- var ts timespec
var nsec int32
- ns += nanotime()
ts.set_sec(int64(timediv(ns, 1000000000, &nsec)))
ts.set_nsec(nsec)
tsp = &ts
@@ -50,9 +49,18 @@ func semasleep(ns int64) int32 {
}
// Sleep until unparked by semawakeup or timeout.
- ret := lwp_park(tsp, 0, unsafe.Pointer(&_g_.m.mos.waitsemacount), nil)
+ ret := lwp_park(_CLOCK_MONOTONIC, _TIMER_RELTIME, tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil)
if ret == _ETIMEDOUT {
return -1
+ } else if ret == _EINTR && ns >= 0 {
+ // Avoid sleeping forever if we keep getting
+ // interrupted (for example by the profiling
+ // timer). It would be if tsp upon return had the
+ // remaining time to sleep, but this is good enough.
+ var nsec int32
+ ns /= 2
+ ts.set_sec(timediv(ns, 1000000000, &nsec))
+ ts.set_nsec(nsec)
}
}
}
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index c39a58d0c4b..5cc325f3954 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -170,7 +170,18 @@ func freedefer(d *_defer) {
unlock(&sched.deferlock)
})
}
- *d = _defer{}
+
+ // These lines used to be simply `*d = _defer{}` but that
+ // started causing a nosplit stack overflow via typedmemmove.
+ d.link = nil
+ d.frame = nil
+ d.panicStack = nil
+ d._panic = nil
+ d.pfn = 0
+ d.arg = nil
+ d.retaddr = 0
+ d.makefunccanrecover = false
+
pp.deferpool = append(pp.deferpool, d)
}
@@ -327,7 +338,7 @@ func unwindStack() {
// Goexit terminates the goroutine that calls it. No other goroutine is affected.
// Goexit runs all deferred calls before terminating the goroutine. Because Goexit
-// is not panic, however, any recover calls in those deferred functions will return nil.
+// is not a panic, any recover calls in those deferred functions will return nil.
//
// Calling Goexit from the main goroutine terminates that goroutine
// without func main returning. Since func main has not returned,
@@ -599,7 +610,7 @@ func canrecover(retaddr uintptr) bool {
// caller starts with "runtime.", then we are permitted to
// call recover.
var locs [16]location
- if callers(2, locs[:2]) < 2 {
+ if callers(1, locs[:2]) < 2 {
return false
}
@@ -619,7 +630,7 @@ func canrecover(retaddr uintptr) bool {
// reflect.makeFuncStub or reflect.ffi_callback called by FFI
// functions. Then we check the caller of that function.
- n := callers(3, locs[:])
+ n := callers(2, locs[:])
foundFFICallback := false
i := 0
for ; i < n; i++ {
@@ -822,6 +833,12 @@ var panicking uint32
// so that two concurrent panics don't overlap their output.
var paniclk mutex
+// startpanic_m implements unrecoverable panic.
+//
+// It can have write barriers because the write barrier explicitly
+// ignores writes once dying > 0.
+//
+//go:yeswritebarrierrec
func startpanic() {
_g_ := getg()
// Uncomment when mheap_ is in Go.
@@ -860,7 +877,7 @@ func startpanic() {
exit(4)
fallthrough
default:
- // Can't even print! Just exit.
+ // Can't even print! Just exit.
exit(5)
}
}
diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go
index a57b69dca35..8a562e2ce8b 100644
--- a/libgo/go/runtime/pprof/pprof.go
+++ b/libgo/go/runtime/pprof/pprof.go
@@ -18,7 +18,7 @@
// To add equivalent profiling support to a standalone program, add
// code like the following to your main function:
//
-// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`")
+// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
// var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
//
// func main() {
@@ -319,7 +319,15 @@ func (p *Profile) WriteTo(w io.Writer, debug int) error {
p.mu.Unlock()
// Map order is non-deterministic; make output deterministic.
- sort.Sort(stackProfile(all))
+ sort.Slice(all, func(i, j int) bool {
+ t, u := all[i], all[j]
+ for k := 0; k < len(t) && k < len(u); k++ {
+ if t[k] != u[k] {
+ return t[k] < u[k]
+ }
+ }
+ return len(t) < len(u)
+ })
return printCountProfile(w, debug, p.name, stackProfile(all))
}
@@ -328,16 +336,6 @@ type stackProfile [][]uintptr
func (x stackProfile) Len() int { return len(x) }
func (x stackProfile) Stack(i int) []uintptr { return x[i] }
-func (x stackProfile) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
-func (x stackProfile) Less(i, j int) bool {
- t, u := x[i], x[j]
- for k := 0; k < len(t) && k < len(u); k++ {
- if t[k] != u[k] {
- return t[k] < u[k]
- }
- }
- return len(t) < len(u)
-}
// A countProfile is a set of stack traces to be printed as counts
// grouped by stack trace. There are multiple implementations:
@@ -348,6 +346,41 @@ type countProfile interface {
Stack(i int) []uintptr
}
+// printCountCycleProfile outputs block profile records (for block or mutex profiles)
+// as the pprof-proto format output. Translations from cycle count to time duration
+// are done because The proto expects count and time (nanoseconds) instead of count
+// and the number of cycles for block, contention profiles.
+func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error {
+ // Output profile in protobuf form.
+ b := newProfileBuilder(w)
+ b.pbValueType(tagProfile_PeriodType, countName, "count")
+ b.pb.int64Opt(tagProfile_Period, 1)
+ b.pbValueType(tagProfile_SampleType, countName, "count")
+ b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds")
+
+ cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9
+
+ values := []int64{0, 0}
+ var locs []uint64
+ for _, r := range records {
+ values[0] = int64(r.Count)
+ values[1] = int64(float64(r.Cycles) / cpuGHz) // to nanoseconds
+ locs = locs[:0]
+ for _, addr := range r.Stack() {
+ // For count profiles, all stack addresses are
+ // return PCs, which is what locForPC expects.
+ l := b.locForPC(addr)
+ if l == 0 { // runtime.goexit
+ continue
+ }
+ locs = append(locs, l)
+ }
+ b.pbSample(values, locs, nil)
+ }
+ b.build()
+ return nil
+}
+
// printCountProfile prints a countProfile at the specified debug level.
// The profile will be in compressed proto format unless debug is nonzero.
func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
@@ -441,7 +474,7 @@ func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) {
// Hide runtime.goexit and any runtime functions at the beginning.
// This is useful mainly for allocation traces.
- skip := name == "runtime.goexit"
+ skip := name == "runtime.goexit" || name == "runtime.kickoff"
if !show {
switch {
case strings.HasPrefix(name, "runtime."):
@@ -490,6 +523,14 @@ func countHeap() int {
// writeHeap writes the current runtime heap profile to w.
func writeHeap(w io.Writer, debug int) error {
+ var memStats *runtime.MemStats
+ if debug != 0 {
+ // Read mem stats first, so that our other allocations
+ // do not appear in the statistics.
+ memStats = new(runtime.MemStats)
+ runtime.ReadMemStats(memStats)
+ }
+
// Find out how many records there are (MemProfile(nil, true)),
// allocate that many records, and get the data.
// There's a race—more records might be added between
@@ -552,8 +593,7 @@ func writeHeap(w io.Writer, debug int) error {
// Print memstats information too.
// Pprof will ignore, but useful for people
- s := new(runtime.MemStats)
- runtime.ReadMemStats(s)
+ s := memStats
fmt.Fprintf(w, "\n# runtime.MemStats\n")
fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
@@ -779,14 +819,14 @@ func writeBlock(w io.Writer, debug int) error {
sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
- b := bufio.NewWriter(w)
- var tw *tabwriter.Writer
- w = b
- if debug > 0 {
- tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
- w = tw
+ if debug <= 0 {
+ return printCountCycleProfile(w, "contentions", "delay", p)
}
+ b := bufio.NewWriter(w)
+ tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+ w = tw
+
fmt.Fprintf(w, "--- contention:\n")
fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
for i := range p {
@@ -823,14 +863,14 @@ func writeMutex(w io.Writer, debug int) error {
sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
- b := bufio.NewWriter(w)
- var tw *tabwriter.Writer
- w = b
- if debug > 0 {
- tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
- w = tw
+ if debug <= 0 {
+ return printCountCycleProfile(w, "contentions", "delay", p)
}
+ b := bufio.NewWriter(w)
+ tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+ w = tw
+
fmt.Fprintf(w, "--- mutex:\n")
fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1))
diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go
index 9e5e403b741..08a4f969ca2 100644
--- a/libgo/go/runtime/pprof/pprof_test.go
+++ b/libgo/go/runtime/pprof/pprof_test.go
@@ -26,16 +26,18 @@ import (
"time"
)
-func cpuHogger(f func() int, dur time.Duration) {
+func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
// We only need to get one 100 Hz clock tick, so we've got
// a large safety buffer.
// But do at least 500 iterations (which should take about 100ms),
// otherwise TestCPUProfileMultithreaded can fail if only one
// thread is scheduled during the testing period.
t0 := time.Now()
+ accum := *y
for i := 0; i < 500 || time.Since(t0) < dur; i++ {
- f()
+ accum = f(accum)
}
+ *y = accum
}
var (
@@ -46,8 +48,8 @@ var (
// The actual CPU hogging function.
// Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime.
-func cpuHog1() int {
- foo := salt1
+func cpuHog1(x int) int {
+ foo := x
for i := 0; i < 1e5; i++ {
if foo > 0 {
foo *= foo
@@ -58,8 +60,8 @@ func cpuHog1() int {
return foo
}
-func cpuHog2() int {
- foo := salt2
+func cpuHog2(x int) int {
+ foo := x
for i := 0; i < 1e5; i++ {
if foo > 0 {
foo *= foo
@@ -72,7 +74,7 @@ func cpuHog2() int {
func TestCPUProfile(t *testing.T) {
testCPUProfile(t, []string{"pprof.cpuHog1"}, func(dur time.Duration) {
- cpuHogger(cpuHog1, dur)
+ cpuHogger(cpuHog1, &salt1, dur)
})
}
@@ -81,29 +83,29 @@ func TestCPUProfileMultithreaded(t *testing.T) {
testCPUProfile(t, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, func(dur time.Duration) {
c := make(chan int)
go func() {
- cpuHogger(cpuHog1, dur)
+ cpuHogger(cpuHog1, &salt1, dur)
c <- 1
}()
- cpuHogger(cpuHog2, dur)
+ cpuHogger(cpuHog2, &salt2, dur)
<-c
})
}
func TestCPUProfileInlining(t *testing.T) {
testCPUProfile(t, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, func(dur time.Duration) {
- cpuHogger(inlinedCaller, dur)
+ cpuHogger(inlinedCaller, &salt1, dur)
})
}
-func inlinedCaller() int {
- inlinedCallee()
- return 0
+func inlinedCaller(x int) int {
+ x = inlinedCallee(x)
+ return x
}
-func inlinedCallee() {
+func inlinedCallee(x int) int {
// We could just use cpuHog1, but for loops prevent inlining
// right now. :(
- foo := salt1
+ foo := x
i := 0
loop:
if foo > 0 {
@@ -114,7 +116,7 @@ loop:
if i++; i < 1e5 {
goto loop
}
- salt1 = foo
+ return foo
}
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
@@ -177,9 +179,9 @@ func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) {
}
}
- if badOS[runtime.GOOS] {
+ switch runtime.GOOS {
+ case "darwin", "dragonfly", "netbsd", "solaris":
t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS)
- return
}
// Ignore the failure if the tests are running in a QEMU-based emulator,
// QEMU is not perfect at emulating everything.
@@ -187,7 +189,6 @@ func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) {
// IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605.
if os.Getenv("IN_QEMU") == "1" {
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
- return
}
t.FailNow()
}
@@ -394,60 +395,108 @@ func TestMathBigDivide(t *testing.T) {
})
}
-// Operating systems that are expected to fail the tests. See issue 13841.
-var badOS = map[string]bool{
- "darwin": true,
- "netbsd": true,
- "plan9": true,
- "dragonfly": true,
- "solaris": true,
-}
-
func TestBlockProfile(t *testing.T) {
t.Skip("lots of details are different for gccgo; FIXME")
type TestCase struct {
name string
f func()
+ stk []string
re string
}
tests := [...]TestCase{
- {"chan recv", blockChanRecv, `
+ {
+ name: "chan recv",
+ f: blockChanRecv,
+ stk: []string{
+ "runtime.chanrecv1",
+ "runtime/pprof.blockChanRecv",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanRecv\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"chan send", blockChanSend, `
+ {
+ name: "chan send",
+ f: blockChanSend,
+ stk: []string{
+ "runtime.chansend1",
+ "runtime/pprof.blockChanSend",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chansend1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanSend\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"chan close", blockChanClose, `
+ {
+ name: "chan close",
+ f: blockChanClose,
+ stk: []string{
+ "runtime.chanrecv1",
+ "runtime/pprof.blockChanClose",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockChanClose\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"select recv async", blockSelectRecvAsync, `
+ {
+ name: "select recv async",
+ f: blockSelectRecvAsync,
+ stk: []string{
+ "runtime.selectgo",
+ "runtime/pprof.blockSelectRecvAsync",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"select send sync", blockSelectSendSync, `
+ {
+ name: "select send sync",
+ f: blockSelectSendSync,
+ stk: []string{
+ "runtime.selectgo",
+ "runtime/pprof.blockSelectSendSync",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"mutex", blockMutex, `
+ {
+ name: "mutex",
+ f: blockMutex,
+ stk: []string{
+ "sync.(*Mutex).Lock",
+ "runtime/pprof.blockMutex",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+ .*/src/sync/mutex\.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockMutex\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
- {"cond", blockCond, `
+ {
+ name: "cond",
+ f: blockCond,
+ stk: []string{
+ "sync.(*Cond).Wait",
+ "runtime/pprof.blockCond",
+ "runtime/pprof.TestBlockProfile",
+ },
+ re: `
[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
# 0x[0-9a-f]+ sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+ .*/src/sync/cond\.go:[0-9]+
# 0x[0-9a-f]+ runtime/pprof\.blockCond\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
@@ -455,28 +504,84 @@ func TestBlockProfile(t *testing.T) {
`},
}
+ // Generate block profile
runtime.SetBlockProfileRate(1)
defer runtime.SetBlockProfileRate(0)
for _, test := range tests {
test.f()
}
- var w bytes.Buffer
- Lookup("block").WriteTo(&w, 1)
- prof := w.String()
- if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
- t.Fatalf("Bad profile header:\n%v", prof)
- }
+ t.Run("debug=1", func(t *testing.T) {
+ var w bytes.Buffer
+ Lookup("block").WriteTo(&w, 1)
+ prof := w.String()
- if strings.HasSuffix(prof, "#\t0x0\n\n") {
- t.Errorf("Useless 0 suffix:\n%v", prof)
+ if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
+ t.Fatalf("Bad profile header:\n%v", prof)
+ }
+
+ if strings.HasSuffix(prof, "#\t0x0\n\n") {
+ t.Errorf("Useless 0 suffix:\n%v", prof)
+ }
+
+ for _, test := range tests {
+ if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) {
+ t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
+ }
+ }
+ })
+
+ t.Run("proto", func(t *testing.T) {
+ // proto format
+ var w bytes.Buffer
+ Lookup("block").WriteTo(&w, 0)
+ p, err := profile.Parse(&w)
+ if err != nil {
+ t.Fatalf("failed to parse profile: %v", err)
+ }
+ t.Logf("parsed proto: %s", p)
+ if err := p.CheckValid(); err != nil {
+ t.Fatalf("invalid profile: %v", err)
+ }
+
+ stks := stacks(p)
+ for _, test := range tests {
+ if !containsStack(stks, test.stk) {
+ t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk)
+ }
+ }
+ })
+
+}
+
+func stacks(p *profile.Profile) (res [][]string) {
+ for _, s := range p.Sample {
+ var stk []string
+ for _, l := range s.Location {
+ for _, line := range l.Line {
+ stk = append(stk, line.Function.Name)
+ }
+ }
+ res = append(res, stk)
}
+ return res
+}
- for _, test := range tests {
- if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) {
- t.Fatalf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
+func containsStack(got [][]string, want []string) bool {
+ for _, stk := range got {
+ if len(stk) < len(want) {
+ continue
+ }
+ for i, f := range want {
+ if f != stk[i] {
+ break
+ }
+ if i == len(want)-1 {
+ return true
+ }
}
}
+ return false
}
const blockDelay = 10 * time.Millisecond
@@ -568,6 +673,8 @@ func blockCond() {
}
func TestMutexProfile(t *testing.T) {
+ // Generate mutex profile
+
old := runtime.SetMutexProfileFraction(1)
defer runtime.SetMutexProfileFraction(old)
if old != 0 {
@@ -576,39 +683,60 @@ func TestMutexProfile(t *testing.T) {
blockMutex()
- var w bytes.Buffer
- Lookup("mutex").WriteTo(&w, 1)
- prof := w.String()
+ t.Run("debug=1", func(t *testing.T) {
+ var w bytes.Buffer
+ Lookup("mutex").WriteTo(&w, 1)
+ prof := w.String()
+ t.Logf("received profile: %v", prof)
- if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
- t.Errorf("Bad profile header:\n%v", prof)
- }
- prof = strings.Trim(prof, "\n")
- lines := strings.Split(prof, "\n")
- // gccgo adds an extra line in the stack trace, not sure why.
- if len(lines) < 6 {
- t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
- }
- if len(lines) < 6 {
- return
- }
- // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
- r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
- //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
- if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
- t.Errorf("%q didn't match %q", lines[3], r2)
- }
- r3 := "^#.*pprof.\\$nested.*$"
- match := false
- for _, i := range []int{5, 6} {
- if ok, _ := regexp.MatchString(r3, lines[i]); ok {
- match = true
- break
+ if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
+ t.Errorf("Bad profile header:\n%v", prof)
}
- }
- if !match {
- t.Errorf("neither %q nor %q matched %q", lines[5], lines[6], r3)
- }
+ prof = strings.Trim(prof, "\n")
+ lines := strings.Split(prof, "\n")
+ if len(lines) != 6 {
+ t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
+ }
+ if len(lines) < 6 {
+ return
+ }
+ // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
+ r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
+ //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
+ if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
+ t.Errorf("%q didn't match %q", lines[3], r2)
+ }
+ if runtime.Compiler != "gccgo" {
+ r3 := "^#.*pprof.blockMutex.*$"
+ if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
+ t.Errorf("%q didn't match %q", lines[5], r3)
+ }
+ }
+ t.Logf(prof)
+ })
+ t.Run("proto", func(t *testing.T) {
+ // proto format
+ var w bytes.Buffer
+ Lookup("mutex").WriteTo(&w, 0)
+ p, err := profile.Parse(&w)
+ if err != nil {
+ t.Fatalf("failed to parse profile: %v", err)
+ }
+ t.Logf("parsed proto: %s", p)
+ if err := p.CheckValid(); err != nil {
+ t.Fatalf("invalid profile: %v", err)
+ }
+
+ stks := stacks(p)
+ for _, want := range [][]string{
+ // {"sync.(*Mutex).Unlock", "pprof.blockMutex.func1"},
+ {"sync.Unlock.pN10_sync.Mutex", "pprof.$nested17"},
+ } {
+ if !containsStack(stks, want) {
+ t.Errorf("No matching stack entry for %+v", want)
+ }
+ }
+ })
}
func func1(c chan int) { <-c }
@@ -725,7 +853,7 @@ func TestEmptyCallStack(t *testing.T) {
func TestCPUProfileLabel(t *testing.T) {
testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, func(dur time.Duration) {
Do(context.Background(), Labels("key", "value"), func(context.Context) {
- cpuHogger(cpuHog1, dur)
+ cpuHogger(cpuHog1, &salt1, dur)
})
})
}
@@ -738,14 +866,15 @@ func TestLabelRace(t *testing.T) {
start := time.Now()
var wg sync.WaitGroup
for time.Since(start) < dur {
+ var salts [10]int
for i := 0; i < 10; i++ {
wg.Add(1)
- go func() {
+ go func(j int) {
Do(context.Background(), Labels("key", "value"), func(context.Context) {
- cpuHogger(cpuHog1, time.Millisecond)
+ cpuHogger(cpuHog1, &salts[j], time.Millisecond)
})
wg.Done()
- }()
+ }(i)
}
wg.Wait()
}
diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go
index 5e1d71c7e72..793be44a417 100644
--- a/libgo/go/runtime/pprof/proto.go
+++ b/libgo/go/runtime/pprof/proto.go
@@ -202,7 +202,7 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
// the stack and we have return PCs anyway.
frames := runtime.CallersFrames([]uintptr{addr})
frame, more := frames.Next()
- if frame.Function == "runtime.goexit" {
+ if frame.Function == "runtime.goexit" || frame.Function == "runtime.kickoff" {
// Short-circuit if we see runtime.goexit so the loop
// below doesn't allocate a useless empty location.
return 0
@@ -228,7 +228,7 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
- for frame.Function != "runtime.goexit" {
+ for frame.Function != "runtime.goexit" && frame.Function != "runtime.kickoff" {
// Write out each line in frame expansion.
funcID := uint64(b.funcs[frame.Function])
if funcID == 0 {
diff --git a/libgo/go/runtime/print.go b/libgo/go/runtime/print.go
index 4db726a7552..3da05ad5f9e 100644
--- a/libgo/go/runtime/print.go
+++ b/libgo/go/runtime/print.go
@@ -78,7 +78,7 @@ var debuglock mutex
// The compiler emits calls to printlock and printunlock around
// the multiple calls that implement a single Go print or println
-// statement. Some of the print helpers (printsp, for example)
+// statement. Some of the print helpers (printslice, for example)
// call print recursively. There is also the problem of a crash
// happening during the print routines and needing to acquire
// the print lock to print information about the crash.
@@ -120,31 +120,31 @@ func gwrite(b []byte) {
}
func printsp() {
- print(" ")
+ printstring(" ")
}
func printnl() {
- print("\n")
+ printstring("\n")
}
func printbool(v bool) {
if v {
- print("true")
+ printstring("true")
} else {
- print("false")
+ printstring("false")
}
}
func printfloat(v float64) {
switch {
case v != v:
- print("NaN")
+ printstring("NaN")
return
case v+v == v && v > 0:
- print("+Inf")
+ printstring("+Inf")
return
case v+v == v && v < 0:
- print("-Inf")
+ printstring("-Inf")
return
}
@@ -226,7 +226,7 @@ func printuint(v uint64) {
func printint(v int64) {
if v < 0 {
- print("-")
+ printstring("-")
v = -v
}
printuint(uint64(v))
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 345f57b6875..1ea41528600 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -34,6 +34,7 @@ import (
//go:linkname helpgc runtime.helpgc
//go:linkname kickoff runtime.kickoff
//go:linkname mstart1 runtime.mstart1
+//go:linkname mexit runtime.mexit
//go:linkname globrunqput runtime.globrunqput
//go:linkname pidleget runtime.pidleget
@@ -54,6 +55,7 @@ func getTraceback(me, gp *g)
func gtraceback(*g)
func _cgo_notify_runtime_init_done()
func alreadyInCallers() bool
+func stackfree(*g)
// Functions created by the compiler.
//extern __go_init_main
@@ -138,6 +140,9 @@ var (
// it is closed, meaning cgocallbackg can reliably receive from it.
var main_init_done chan bool
+// mainStarted indicates that the main M has started.
+var mainStarted bool
+
// runtimeInitTime is the nanotime() at which the runtime started.
var runtimeInitTime int64
@@ -157,8 +162,8 @@ func main() {
maxstacksize = 250000000
}
- // Record when the world started.
- runtimeInitTime = nanotime()
+ // Allow newproc to start new Ms.
+ mainStarted = true
systemstack(func() {
newm(sysmon, nil)
@@ -184,8 +189,15 @@ func main() {
}
}()
+ // Record when the world started. Must be after runtime_init
+ // because nanotime on some platforms depends on startNano.
+ runtimeInitTime = nanotime()
+
main_init_done = make(chan bool)
if iscgo {
+ // Start the template thread in case we enter Go from
+ // a C-created thread and need to create a new thread.
+ startTemplateThread()
_cgo_notify_runtime_init_done()
}
@@ -269,9 +281,10 @@ func forcegchelper() {
}
}
+//go:nosplit
+
// Gosched yields the processor, allowing other goroutines to run. It does not
// suspend the current goroutine, so execution resumes automatically.
-//go:nosplit
func Gosched() {
mcall(gosched_m)
}
@@ -359,8 +372,8 @@ func releaseSudog(s *sudog) {
if s.elem != nil {
throw("runtime: sudog with non-nil elem")
}
- if s.selectdone != nil {
- throw("runtime: sudog with non-nil selectdone")
+ if s.isSelect {
+ throw("runtime: sudog with non-false isSelect")
}
if s.next != nil {
throw("runtime: sudog with non-nil next")
@@ -419,7 +432,7 @@ func funcPC(f interface{}) uintptr {
func lockedOSThread() bool {
gp := getg()
- return gp.lockedm != nil && gp.m.lockedg != nil
+ return gp.lockedm != 0 && gp.m.lockedg != 0
}
var (
@@ -479,13 +492,21 @@ func schedinit() {
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
}
- if procs > _MaxGomaxprocs {
- procs = _MaxGomaxprocs
- }
if procresize(procs) != nil {
throw("unknown runnable goroutine during bootstrap")
}
+ // For cgocheck > 1, we turn on the write barrier at all times
+ // and check all pointer writes. We can't do this until after
+ // procresize because the write barrier needs a P.
+ if debug.cgocheck > 1 {
+ writeBarrier.cgo = true
+ writeBarrier.enabled = true
+ for _, p := range allp {
+ p.wbBuf.reset()
+ }
+ }
+
if buildVersion == "" {
// Condition should never trigger. This code just serves
// to ensure runtime·buildVersion is kept in the resulting binary.
@@ -501,7 +522,7 @@ func dumpgstatus(gp *g) {
func checkmcount() {
// sched lock is held
- if sched.mcount > sched.maxmcount {
+ if mcount() > sched.maxmcount {
print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
throw("thread exhaustion")
}
@@ -515,15 +536,20 @@ func mcommoninit(mp *m) {
callers(1, mp.createstack[:])
}
- mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
- if mp.fastrand == 0 {
- mp.fastrand = 0x49f6428a
- }
-
lock(&sched.lock)
- mp.id = sched.mcount
- sched.mcount++
+ if sched.mnext+1 < sched.mnext {
+ throw("runtime: thread ID overflow")
+ }
+ mp.id = sched.mnext
+ sched.mnext++
checkmcount()
+
+ mp.fastrand[0] = 1597334677 * uint32(mp.id)
+ mp.fastrand[1] = uint32(cputicks())
+ if mp.fastrand[0]|mp.fastrand[1] == 0 {
+ mp.fastrand[1] = 1
+ }
+
mpreinit(mp)
// Add to allm so garbage collector doesn't free g->m
@@ -735,8 +761,10 @@ func casgstatus(gp *g, oldval, newval uint32) {
// _Grunning or _Grunning|_Gscan; either way,
// we own gp.gcscanvalid, so it's safe to read.
// gp.gcscanvalid must not be true when we are running.
- print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
- throw("casgstatus")
+ systemstack(func() {
+ print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
+ throw("casgstatus")
+ })
}
// See http://golang.org/cl/21503 for justification of the yield delay.
@@ -912,7 +940,7 @@ func stopTheWorld(reason string) {
// startTheWorld undoes the effects of stopTheWorld.
func startTheWorld() {
- systemstack(startTheWorldWithSema)
+ systemstack(func() { startTheWorldWithSema(false) })
// worldsema must be held over startTheWorldWithSema to ensure
// gomaxprocs cannot change while worldsema is held.
semrelease(&worldsema)
@@ -962,8 +990,7 @@ func stopTheWorldWithSema() {
_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
sched.stopwait--
// try to retake all P's in Psyscall status
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
s := p.status
if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) {
if trace.enabled {
@@ -1003,8 +1030,7 @@ func stopTheWorldWithSema() {
if sched.stopwait != 0 {
bad = "stopTheWorld: not stopped (stopwait != 0)"
} else {
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
if p.status != _Pgcstop {
bad = "stopTheWorld: not stopped (status != _Pgcstop)"
}
@@ -1028,12 +1054,14 @@ func mhelpgc() {
_g_.m.helpgc = -1
}
-func startTheWorldWithSema() {
+func startTheWorldWithSema(emitTraceEvent bool) int64 {
_g_ := getg()
- _g_.m.locks++ // disable preemption because it can be holding p in a local var
- gp := netpoll(false) // non-blocking
- injectglist(gp)
+ _g_.m.locks++ // disable preemption because it can be holding p in a local var
+ if netpollinited() {
+ gp := netpoll(false) // non-blocking
+ injectglist(gp)
+ }
add := needaddgcproc()
lock(&sched.lock)
@@ -1068,6 +1096,12 @@ func startTheWorldWithSema() {
}
}
+ // Capture start-the-world time before doing clean-up tasks.
+ startTime := nanotime()
+ if emitTraceEvent {
+ traceGCSTWDone()
+ }
+
// Wakeup an additional proc in case we have excessive runnable goroutines
// in local queues or in the global queue. If we don't, the proc will park itself.
// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
@@ -1086,6 +1120,8 @@ func startTheWorldWithSema() {
newm(mhelpgc, nil)
}
_g_.m.locks--
+
+ return startTime
}
// First function run by a new goroutine.
@@ -1116,15 +1152,13 @@ func kickoff() {
throw("no p in kickoff")
}
}
-
gp.param = nil
fv(param)
goexit1()
}
-// This is called from mstart.
-func mstart1() {
+func mstart1(dummy int32) {
_g_ := getg()
if _g_ != _g_.m.g0 {
@@ -1137,12 +1171,7 @@ func mstart1() {
// prepare the thread to be able to handle the signals.
// For gccgo minit was called by C code.
if _g_.m == &m0 {
- // Create an extra M for callbacks on threads not created by Go.
- if iscgo && !cgoHasExtraM {
- cgoHasExtraM = true
- newextram()
- }
- initsig(false)
+ mstartm0()
}
if fn := _g_.m.mstartfn; fn != nil {
@@ -1159,6 +1188,114 @@ func mstart1() {
schedule()
}
+// mstartm0 implements part of mstart1 that only runs on the m0.
+//
+// Write barriers are allowed here because we know the GC can't be
+// running yet, so they'll be no-ops.
+//
+//go:yeswritebarrierrec
+func mstartm0() {
+ // Create an extra M for callbacks on threads not created by Go.
+ if iscgo && !cgoHasExtraM {
+ cgoHasExtraM = true
+ newextram()
+ }
+ initsig(false)
+}
+
+// mexit tears down and exits the current thread.
+//
+// Don't call this directly to exit the thread, since it must run at
+// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to
+// unwind the stack to the point that exits the thread.
+//
+// It is entered with m.p != nil, so write barriers are allowed. It
+// will release the P before exiting.
+//
+//go:yeswritebarrierrec
+func mexit(osStack bool) {
+ g := getg()
+ m := g.m
+
+ if m == &m0 {
+ // This is the main thread. Just wedge it.
+ //
+ // On Linux, exiting the main thread puts the process
+ // into a non-waitable zombie state. On Plan 9,
+ // exiting the main thread unblocks wait even though
+ // other threads are still running. On Solaris we can
+ // neither exitThread nor return from mstart. Other
+ // bad things probably happen on other platforms.
+ //
+ // We could try to clean up this M more before wedging
+ // it, but that complicates signal handling.
+ handoffp(releasep())
+ lock(&sched.lock)
+ sched.nmfreed++
+ checkdead()
+ unlock(&sched.lock)
+ notesleep(&m.park)
+ throw("locked m0 woke up")
+ }
+
+ sigblock()
+ unminit()
+
+ // Free the gsignal stack.
+ if m.gsignal != nil {
+ stackfree(m.gsignal)
+ }
+
+ // Remove m from allm.
+ lock(&sched.lock)
+ for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink {
+ if *pprev == m {
+ *pprev = m.alllink
+ goto found
+ }
+ }
+ throw("m not found in allm")
+found:
+ if !osStack {
+ // Delay reaping m until it's done with the stack.
+ //
+ // If this is using an OS stack, the OS will free it
+ // so there's no need for reaping.
+ atomic.Store(&m.freeWait, 1)
+ // Put m on the free list, though it will not be reaped until
+ // freeWait is 0. Note that the free list must not be linked
+ // through alllink because some functions walk allm without
+ // locking, so may be using alllink.
+ m.freelink = sched.freem
+ sched.freem = m
+ }
+ unlock(&sched.lock)
+
+ // Release the P.
+ handoffp(releasep())
+ // After this point we must not have write barriers.
+
+ // Invoke the deadlock detector. This must happen after
+ // handoffp because it may have started a new M to take our
+ // P's work.
+ lock(&sched.lock)
+ sched.nmfreed++
+ checkdead()
+ unlock(&sched.lock)
+
+ if osStack {
+ // Return from mstart and let the system thread
+ // library free the g0 stack and terminate the thread.
+ return
+ }
+
+ // mstart is the thread's entry point, so there's nothing to
+ // return to. Exit the thread directly. exitThread will clear
+ // m.freeWait when it's done with the stack and the m can be
+ // reaped.
+ exitThread(&m.freeWait)
+}
+
// forEachP calls fn(p) for every P p when p reaches a GC safe point.
// If a P is currently executing code, this will bring the P to a GC
// safe point and execute fn on that P. If the P is not executing code
@@ -1182,7 +1319,7 @@ func forEachP(fn func(*p)) {
sched.safePointFn = fn
// Ask all Ps to run the safe point function.
- for _, p := range allp[:gomaxprocs] {
+ for _, p := range allp {
if p != _p_ {
atomic.Store(&p.runSafePointFn, 1)
}
@@ -1210,8 +1347,7 @@ func forEachP(fn func(*p)) {
// Force Ps currently in _Psyscall into _Pidle and hand them
// off to induce safe point function execution.
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
s := p.status
if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) {
if trace.enabled {
@@ -1240,8 +1376,7 @@ func forEachP(fn func(*p)) {
if sched.safePointWait != 0 {
throw("forEachP: not done")
}
- for i := 0; i < int(gomaxprocs); i++ {
- p := allp[i]
+ for _, p := range allp {
if p.runSafePointFn != 0 {
throw("forEachP: P did not run fn")
}
@@ -1295,6 +1430,27 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe
if _g_.m.p == 0 {
acquirep(_p_) // temporarily borrow p for mallocs in this function
}
+
+ // Release the free M list. We need to do this somewhere and
+ // this may free up a stack we can use.
+ if sched.freem != nil {
+ lock(&sched.lock)
+ var newList *m
+ for freem := sched.freem; freem != nil; {
+ if freem.freeWait != 0 {
+ next := freem.freelink
+ freem.freelink = newList
+ newList = freem
+ freem = next
+ continue
+ }
+ stackfree(freem.g0)
+ freem = freem.freelink
+ }
+ sched.freem = newList
+ unlock(&sched.lock)
+ }
+
mp = new(m)
mp.mstartfn = fn
mcommoninit(mp)
@@ -1431,9 +1587,9 @@ func oneNewExtraM() {
casgstatus(gp, _Gidle, _Gdead)
gp.m = mp
mp.curg = gp
- mp.locked = _LockInternal
- mp.lockedg = gp
- gp.lockedm = mp
+ mp.lockedInt++
+ mp.lockedg.set(gp)
+ gp.lockedm.set(mp)
gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1))
// put on allg for garbage collector
allgadd(gp)
@@ -1574,6 +1730,27 @@ func unlockextra(mp *m) {
// around exec'ing while creating/destroying threads. See issue #19546.
var execLock rwmutex
+// newmHandoff contains a list of m structures that need new OS threads.
+// This is used by newm in situations where newm itself can't safely
+// start an OS thread.
+var newmHandoff struct {
+ lock mutex
+
+ // newm points to a list of M structures that need new OS
+ // threads. The list is linked through m.schedlink.
+ newm muintptr
+
+ // waiting indicates that wake needs to be notified when an m
+ // is put on the list.
+ waiting bool
+ wake note
+
+ // haveTemplateThread indicates that the templateThread has
+ // been started. This is not protected by lock. Use cas to set
+ // to 1.
+ haveTemplateThread uint32
+}
+
// Create a new m. It will start off with a call to fn, or else the scheduler.
// fn needs to be static and not a heap allocated closure.
// May run with m.p==nil, so write barriers are not allowed.
@@ -1582,11 +1759,90 @@ func newm(fn func(), _p_ *p) {
mp, _, _ := allocm(_p_, fn, false)
mp.nextp.set(_p_)
mp.sigmask = initSigmask
+ if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
+ // We're on a locked M or a thread that may have been
+ // started by C. The kernel state of this thread may
+ // be strange (the user may have locked it for that
+ // purpose). We don't want to clone that into another
+ // thread. Instead, ask a known-good thread to create
+ // the thread for us.
+ //
+ // This is disabled on Plan 9. See golang.org/issue/22227.
+ //
+ // TODO: This may be unnecessary on Windows, which
+ // doesn't model thread creation off fork.
+ lock(&newmHandoff.lock)
+ if newmHandoff.haveTemplateThread == 0 {
+ throw("on a locked thread with no template thread")
+ }
+ mp.schedlink = newmHandoff.newm
+ newmHandoff.newm.set(mp)
+ if newmHandoff.waiting {
+ newmHandoff.waiting = false
+ notewakeup(&newmHandoff.wake)
+ }
+ unlock(&newmHandoff.lock)
+ return
+ }
+ newm1(mp)
+}
+
+func newm1(mp *m) {
execLock.rlock() // Prevent process clone.
newosproc(mp)
execLock.runlock()
}
+// startTemplateThread starts the template thread if it is not already
+// running.
+//
+// The calling thread must itself be in a known-good state.
+func startTemplateThread() {
+ if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
+ return
+ }
+ newm(templateThread, nil)
+}
+
+// tmeplateThread is a thread in a known-good state that exists solely
+// to start new threads in known-good states when the calling thread
+// may not be a a good state.
+//
+// Many programs never need this, so templateThread is started lazily
+// when we first enter a state that might lead to running on a thread
+// in an unknown state.
+//
+// templateThread runs on an M without a P, so it must not have write
+// barriers.
+//
+//go:nowritebarrierrec
+func templateThread() {
+ lock(&sched.lock)
+ sched.nmsys++
+ checkdead()
+ unlock(&sched.lock)
+
+ for {
+ lock(&newmHandoff.lock)
+ for newmHandoff.newm != 0 {
+ newm := newmHandoff.newm.ptr()
+ newmHandoff.newm = 0
+ unlock(&newmHandoff.lock)
+ for newm != nil {
+ next := newm.schedlink.ptr()
+ newm.schedlink = 0
+ newm1(newm)
+ newm = next
+ }
+ lock(&newmHandoff.lock)
+ }
+ newmHandoff.waiting = true
+ noteclear(&newmHandoff.wake)
+ unlock(&newmHandoff.lock)
+ notesleep(&newmHandoff.wake)
+ }
+}
+
// Stops execution of the current m until new work is available.
// Returns with acquired P.
func stopm() {
@@ -1609,7 +1865,9 @@ retry:
notesleep(&_g_.m.park)
noteclear(&_g_.m.park)
if _g_.m.helpgc != 0 {
+ // helpgc() set _g_.m.p and _g_.m.mcache, so we have a P.
gchelper()
+ // Undo the effects of helpgc().
_g_.m.helpgc = 0
_g_.m.mcache = nil
_g_.m.p = 0
@@ -1743,7 +2001,7 @@ func wakep() {
func stoplockedm() {
_g_ := getg()
- if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m {
+ if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m {
throw("stoplockedm: inconsistent locking")
}
if _g_.m.p != 0 {
@@ -1755,7 +2013,7 @@ func stoplockedm() {
// Wait until another thread schedules lockedg again.
notesleep(&_g_.m.park)
noteclear(&_g_.m.park)
- status := readgstatus(_g_.m.lockedg)
+ status := readgstatus(_g_.m.lockedg.ptr())
if status&^_Gscan != _Grunnable {
print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
dumpgstatus(_g_)
@@ -1771,7 +2029,7 @@ func stoplockedm() {
func startlockedm(gp *g) {
_g_ := getg()
- mp := gp.lockedm
+ mp := gp.lockedm.ptr()
if mp == _g_.m {
throw("startlockedm: locked to me")
}
@@ -1896,11 +2154,12 @@ top:
// Poll network.
// This netpoll is only an optimization before we resort to stealing.
- // We can safely skip it if there a thread blocked in netpoll already.
- // If there is any kind of logical race with that blocked thread
- // (e.g. it has already returned from netpoll, but does not set lastpoll yet),
- // this thread will do blocking netpoll below anyway.
- if netpollinited() && sched.lastpoll != 0 {
+ // We can safely skip it if there are no waiters or a thread is blocked
+ // in netpoll already. If there is any kind of logical race with that
+ // blocked thread (e.g. it has already returned from netpoll, but does
+ // not set lastpoll yet), this thread will do blocking netpoll below
+ // anyway.
+ if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
if gp := netpoll(false); gp != nil { // non-blocking
// netpoll returns list of goroutines linked by schedlink.
injectglist(gp.schedlink.ptr())
@@ -1996,9 +2255,8 @@ stop:
}
// check all runqueues once again
- for i := 0; i < int(gomaxprocs); i++ {
- _p_ := allp[i]
- if _p_ != nil && !runqempty(_p_) {
+ for _, _p_ := range allp {
+ if !runqempty(_p_) {
lock(&sched.lock)
_p_ = pidleget()
unlock(&sched.lock)
@@ -2137,9 +2395,15 @@ func schedule() {
throw("schedule: holding locks")
}
- if _g_.m.lockedg != nil {
+ if _g_.m.lockedg != 0 {
stoplockedm()
- execute(_g_.m.lockedg, false) // Never returns.
+ execute(_g_.m.lockedg.ptr(), false) // Never returns.
+ }
+
+ // We should not schedule away from a g that is executing a cgo call,
+ // since the cgo call is using the m's g0 stack.
+ if _g_.m.incgo {
+ throw("schedule: in cgo")
}
top:
@@ -2205,7 +2469,7 @@ top:
resetspinning()
}
- if gp.lockedm != nil {
+ if gp.lockedm != 0 {
// Hands off own p to the locked m,
// then blocks waiting for a new p.
startlockedm(gp)
@@ -2322,8 +2586,9 @@ func goexit0(gp *g) {
gp.isSystemGoroutine = false
}
gp.m = nil
- gp.lockedm = nil
- _g_.m.lockedg = nil
+ locked := gp.lockedm != 0
+ gp.lockedm = 0
+ _g_.m.lockedg = 0
gp.entry = nil
gp.paniconfault = false
gp._defer = nil // should be true already but just in case.
@@ -2334,17 +2599,38 @@ func goexit0(gp *g) {
gp.labels = nil
gp.timer = nil
+ if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
+ // Flush assist credit to the global pool. This gives
+ // better information to pacing if the application is
+ // rapidly creating an exiting goroutines.
+ scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes))
+ atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
+ gp.gcAssistBytes = 0
+ }
+
// Note that gp's stack scan is now "valid" because it has no
// stack.
gp.gcscanvalid = true
dropg()
- if _g_.m.locked&^_LockExternal != 0 {
- print("invalid m->locked = ", _g_.m.locked, "\n")
+ if _g_.m.lockedInt != 0 {
+ print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
throw("internal lockOSThread error")
}
- _g_.m.locked = 0
+ _g_.m.lockedExt = 0
gfput(_g_.m.p.ptr(), gp)
+ if locked {
+ // The goroutine may have locked this thread because
+ // it put it in an unusual kernel state. Kill it
+ // rather than returning it to the thread pool.
+
+ // Return to mstart, which will release the P and exit
+ // the thread.
+ if GOOS != "plan9" { // See golang.org/issue/22227.
+ _g_.m.exiting = true
+ gogo(_g_.m.g0)
+ }
+ }
schedule()
}
@@ -2481,7 +2767,9 @@ func exitsyscall(dummy int32) {
oldp := _g_.m.p.ptr()
if exitsyscallfast() {
if _g_.m.mcache == nil {
- throw("lost mcache")
+ systemstack(func() {
+ throw("lost mcache")
+ })
}
if trace.enabled {
if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
@@ -2519,7 +2807,9 @@ func exitsyscall(dummy int32) {
mcall(exitsyscall0)
if _g_.m.mcache == nil {
- throw("lost mcache")
+ systemstack(func() {
+ throw("lost mcache")
+ })
}
// Scheduler returned, so we're allowed to run now.
@@ -2644,7 +2934,7 @@ func exitsyscall0(gp *g) {
acquirep(_p_)
execute(gp, false) // Never returns.
}
- if _g_.m.lockedg != nil {
+ if _g_.m.lockedg != 0 {
// Wait until another thread schedules gp and so m again.
stoplockedm()
execute(gp, false) // Never returns.
@@ -2798,7 +3088,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
newg.entry = entry
newg.param = arg
- newg.gopc = getcallerpc(unsafe.Pointer(&fn))
+ newg.gopc = getcallerpc()
newg.startpc = fn
if _g_.m.curg != nil {
newg.labels = _g_.m.curg.labels
@@ -2827,7 +3117,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
runqput(_p_, newg, true)
- if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && runtimeInitTime != 0 {
+ if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted {
wakep()
}
_g_.m.locks--
@@ -2947,23 +3237,41 @@ func Breakpoint() {
//go:nosplit
func dolockOSThread() {
_g_ := getg()
- _g_.m.lockedg = _g_
- _g_.lockedm = _g_.m
+ _g_.m.lockedg.set(_g_)
+ _g_.lockedm.set(_g_.m)
}
//go:nosplit
// LockOSThread wires the calling goroutine to its current operating system thread.
-// Until the calling goroutine exits or calls UnlockOSThread, it will always
-// execute in that thread, and no other goroutine can.
+// The calling goroutine will always execute in that thread,
+// and no other goroutine will execute in it,
+// until the calling goroutine has made as many calls to
+// UnlockOSThread as to LockOSThread.
+// If the calling goroutine exits without unlocking the thread,
+// the thread will be terminated.
+//
+// A goroutine should call LockOSThread before calling OS services or
+// non-Go library functions that depend on per-thread state.
func LockOSThread() {
- getg().m.locked |= _LockExternal
+ if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" {
+ // If we need to start a new thread from the locked
+ // thread, we need the template thread. Start it now
+ // while we're in a known-good state.
+ startTemplateThread()
+ }
+ _g_ := getg()
+ _g_.m.lockedExt++
+ if _g_.m.lockedExt == 0 {
+ _g_.m.lockedExt--
+ panic("LockOSThread nesting overflow")
+ }
dolockOSThread()
}
//go:nosplit
func lockOSThread() {
- getg().m.locked += _LockInternal
+ getg().m.lockedInt++
dolockOSThread()
}
@@ -2973,29 +3281,43 @@ func lockOSThread() {
//go:nosplit
func dounlockOSThread() {
_g_ := getg()
- if _g_.m.locked != 0 {
+ if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
return
}
- _g_.m.lockedg = nil
- _g_.lockedm = nil
+ _g_.m.lockedg = 0
+ _g_.lockedm = 0
}
//go:nosplit
-// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
-// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
+// UnlockOSThread undoes an earlier call to LockOSThread.
+// If this drops the number of active LockOSThread calls on the
+// calling goroutine to zero, it unwires the calling goroutine from
+// its fixed operating system thread.
+// If there are no active LockOSThread calls, this is a no-op.
+//
+// Before calling UnlockOSThread, the caller must ensure that the OS
+// thread is suitable for running other goroutines. If the caller made
+// any permanent changes to the state of the thread that would affect
+// other goroutines, it should not call this function and thus leave
+// the goroutine locked to the OS thread until the goroutine (and
+// hence the thread) exits.
func UnlockOSThread() {
- getg().m.locked &^= _LockExternal
+ _g_ := getg()
+ if _g_.m.lockedExt == 0 {
+ return
+ }
+ _g_.m.lockedExt--
dounlockOSThread()
}
//go:nosplit
func unlockOSThread() {
_g_ := getg()
- if _g_.m.locked < _LockInternal {
+ if _g_.m.lockedInt == 0 {
systemstack(badunlockosthread)
}
- _g_.m.locked -= _LockInternal
+ _g_.m.lockedInt--
dounlockOSThread()
}
@@ -3005,10 +3327,7 @@ func badunlockosthread() {
func gcount() int32 {
n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys))
- for _, _p_ := range &allp {
- if _p_ == nil {
- break
- }
+ for _, _p_ := range allp {
n -= _p_.gfreecnt
}
@@ -3021,7 +3340,7 @@ func gcount() int32 {
}
func mcount() int32 {
- return sched.mcount
+ return int32(sched.mnext - sched.nmfreed)
}
var prof struct {
@@ -3190,7 +3509,7 @@ func setcpuprofilerate(hz int32) {
// Returns list of Ps with local work, they need to be scheduled by the caller.
func procresize(nprocs int32) *p {
old := gomaxprocs
- if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs {
+ if old < 0 || nprocs <= 0 {
throw("procresize: invalid arg")
}
if trace.enabled {
@@ -3204,6 +3523,23 @@ func procresize(nprocs int32) *p {
}
sched.procresizetime = now
+ // Grow allp if necessary.
+ if nprocs > int32(len(allp)) {
+ // Synchronize with retake, which could be running
+ // concurrently since it doesn't run on a P.
+ lock(&allpLock)
+ if nprocs <= int32(cap(allp)) {
+ allp = allp[:nprocs]
+ } else {
+ nallp := make([]*p, nprocs)
+ // Copy everything up to allp's cap so we
+ // never lose old allocated Ps.
+ copy(nallp, allp[:cap(allp)])
+ allp = nallp
+ }
+ unlock(&allpLock)
+ }
+
// initialize new P's
for i := int32(0); i < nprocs; i++ {
pp := allp[i]
@@ -3213,6 +3549,7 @@ func procresize(nprocs int32) *p {
pp.status = _Pgcstop
pp.sudogcache = pp.sudogbuf[:0]
pp.deferpool = pp.deferpoolbuf[:0]
+ pp.wbBuf.reset()
atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
}
if pp.mcache == nil {
@@ -3230,13 +3567,11 @@ func procresize(nprocs int32) *p {
// free unused P's
for i := nprocs; i < old; i++ {
p := allp[i]
- if trace.enabled {
- if p == getg().m.p.ptr() {
- // moving to p[0], pretend that we were descheduled
- // and then scheduled again to keep the trace sane.
- traceGoSched()
- traceProcStop(p)
- }
+ if trace.enabled && p == getg().m.p.ptr() {
+ // moving to p[0], pretend that we were descheduled
+ // and then scheduled again to keep the trace sane.
+ traceGoSched()
+ traceProcStop(p)
}
// move all runnable goroutines to the global queue
for p.runqhead != p.runqtail {
@@ -3262,6 +3597,11 @@ func procresize(nprocs int32) *p {
// world is stopped.
p.gcBgMarkWorker.set(nil)
}
+ // Flush p's write barrier buffer.
+ if gcphase != _GCoff {
+ wbBufFlush1(p)
+ p.gcw.dispose()
+ }
for i := range p.sudogbuf {
p.sudogbuf[i] = nil
}
@@ -3274,10 +3614,18 @@ func procresize(nprocs int32) *p {
p.mcache = nil
gfpurge(p)
traceProcFree(p)
+ p.gcAssistTime = 0
p.status = _Pdead
// can't free P itself because it can be referenced by an M in syscall
}
+ // Trim allp.
+ if int32(len(allp)) != nprocs {
+ lock(&allpLock)
+ allp = allp[:nprocs]
+ unlock(&allpLock)
+ }
+
_g_ := getg()
if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
// continue to use the current P
@@ -3349,7 +3697,7 @@ func acquirep1(_p_ *p) {
throw("acquirep: already in go")
}
if _p_.m != 0 || _p_.status != _Pidle {
- id := int32(0)
+ id := int64(0)
if _p_.m != 0 {
id = _p_.m.ptr().id
}
@@ -3394,6 +3742,7 @@ func incidlelocked(v int32) {
// Check for deadlock situation.
// The check is based on number of running M's, if 0 -> deadlock.
+// sched.lock must be held.
func checkdead() {
// For -buildmode=c-shared or -buildmode=c-archive it's OK if
// there are no running goroutines. The calling program is
@@ -3410,13 +3759,12 @@ func checkdead() {
return
}
- // -1 for sysmon
- run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
+ run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys
if run > 0 {
return
}
if run < 0 {
- print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
+ print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n")
throw("checkdead: inconsistent counts")
}
@@ -3479,6 +3827,11 @@ var forcegcperiod int64 = 2 * 60 * 1e9
//
//go:nowritebarrierrec
func sysmon() {
+ lock(&sched.lock)
+ sched.nmsys++
+ checkdead()
+ unlock(&sched.lock)
+
// If a heap span goes unused for 5 minutes after a garbage collection,
// we hand it back to the operating system.
scavengelimit := int64(5 * 60 * 1e9)
@@ -3518,15 +3871,11 @@ func sysmon() {
}
shouldRelax := true
if osRelaxMinNS > 0 {
- lock(&timers.lock)
- if timers.sleeping {
- now := nanotime()
- next := timers.sleepUntil
- if next-now < osRelaxMinNS {
- shouldRelax = false
- }
+ next := timeSleepUntil()
+ now := nanotime()
+ if next-now < osRelaxMinNS {
+ shouldRelax = false
}
- unlock(&timers.lock)
}
if shouldRelax {
osRelax(true)
@@ -3550,7 +3899,7 @@ func sysmon() {
// poll network if not polled for more than 10ms
lastpoll := int64(atomic.Load64(&sched.lastpoll))
now := nanotime()
- if lastpoll != 0 && lastpoll+10*1000*1000 < now {
+ if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
gp := netpoll(false) // non-blocking - returns list of goroutines
if gp != nil {
@@ -3607,9 +3956,17 @@ const forcePreemptNS = 10 * 1000 * 1000 // 10ms
func retake(now int64) uint32 {
n := 0
- for i := int32(0); i < gomaxprocs; i++ {
+ // Prevent allp slice changes. This lock will be completely
+ // uncontended unless we're already stopping the world.
+ lock(&allpLock)
+ // We can't use a range loop over allp because we may
+ // temporarily drop the allpLock. Hence, we need to re-fetch
+ // allp each time around the loop.
+ for i := 0; i < len(allp); i++ {
_p_ := allp[i]
if _p_ == nil {
+ // This can happen if procresize has grown
+ // allp but not yet created new Ps.
continue
}
pd := &_p_.sysmontick
@@ -3628,6 +3985,8 @@ func retake(now int64) uint32 {
if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
continue
}
+ // Drop allpLock so we can take sched.lock.
+ unlock(&allpLock)
// Need to decrement number of idle locked M's
// (pretending that one more is running) before the CAS.
// Otherwise the M from which we retake can exit the syscall,
@@ -3643,6 +4002,7 @@ func retake(now int64) uint32 {
handoffp(_p_)
}
incidlelocked(1)
+ lock(&allpLock)
} else if s == _Prunning {
// Preempt G if it's running for too long.
t := int64(_p_.schedtick)
@@ -3657,6 +4017,7 @@ func retake(now int64) uint32 {
preemptone(_p_)
}
}
+ unlock(&allpLock)
return uint32(n)
}
@@ -3667,9 +4028,8 @@ func retake(now int64) uint32 {
// Returns true if preemption request was issued to at least one goroutine.
func preemptall() bool {
res := false
- for i := int32(0); i < gomaxprocs; i++ {
- _p_ := allp[i]
- if _p_ == nil || _p_.status != _Prunning {
+ for _, _p_ := range allp {
+ if _p_.status != _Prunning {
continue
}
if preemptone(_p_) {
@@ -3727,23 +4087,19 @@ func schedtrace(detailed bool) {
}
lock(&sched.lock)
- print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
+ print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
if detailed {
print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
}
// We must be careful while reading data from P's, M's and G's.
// Even if we hold schedlock, most data can be changed concurrently.
// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
- for i := int32(0); i < gomaxprocs; i++ {
- _p_ := allp[i]
- if _p_ == nil {
- continue
- }
+ for i, _p_ := range allp {
mp := _p_.m.ptr()
h := atomic.Load(&_p_.runqhead)
t := atomic.Load(&_p_.runqtail)
if detailed {
- id := int32(-1)
+ id := int64(-1)
if mp != nil {
id = mp.id
}
@@ -3756,7 +4112,7 @@ func schedtrace(detailed bool) {
print("[")
}
print(t - h)
- if i == gomaxprocs-1 {
+ if i == len(allp)-1 {
print("]\n")
}
}
@@ -3770,7 +4126,7 @@ func schedtrace(detailed bool) {
for mp := allm; mp != nil; mp = mp.alllink {
_p_ := mp.p.ptr()
gp := mp.curg
- lockedg := mp.lockedg
+ lockedg := mp.lockedg.ptr()
id1 := int32(-1)
if _p_ != nil {
id1 = _p_.id
@@ -3790,12 +4146,12 @@ func schedtrace(detailed bool) {
for gi := 0; gi < len(allgs); gi++ {
gp := allgs[gi]
mp := gp.m
- lockedm := gp.lockedm
- id1 := int32(-1)
+ lockedm := gp.lockedm.ptr()
+ id1 := int64(-1)
if mp != nil {
id1 = mp.id
}
- id2 := int32(-1)
+ id2 := int64(-1)
if lockedm != nil {
id2 = lockedm.id
}
@@ -4077,22 +4433,25 @@ func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool
if stealRunNextG {
// Try to steal from _p_.runnext.
if next := _p_.runnext; next != 0 {
- // Sleep to ensure that _p_ isn't about to run the g we
- // are about to steal.
- // The important use case here is when the g running on _p_
- // ready()s another g and then almost immediately blocks.
- // Instead of stealing runnext in this window, back off
- // to give _p_ a chance to schedule runnext. This will avoid
- // thrashing gs between different Ps.
- // A sync chan send/recv takes ~50ns as of time of writing,
- // so 3us gives ~50x overshoot.
- if GOOS != "windows" {
- usleep(3)
- } else {
- // On windows system timer granularity is 1-15ms,
- // which is way too much for this optimization.
- // So just yield.
- osyield()
+ if _p_.status == _Prunning {
+ // Sleep to ensure that _p_ isn't about to run the g
+ // we are about to steal.
+ // The important use case here is when the g running
+ // on _p_ ready()s another g and then almost
+ // immediately blocks. Instead of stealing runnext
+ // in this window, back off to give _p_ a chance to
+ // schedule runnext. This will avoid thrashing gs
+ // between different Ps.
+ // A sync chan send/recv takes ~50ns as of time of
+ // writing, so 3us gives ~50x overshoot.
+ if GOOS != "windows" {
+ usleep(3)
+ } else {
+ // On windows system timer granularity is
+ // 1-15ms, which is way too much for this
+ // optimization. So just yield.
+ osyield()
+ }
}
if !_p_.runnext.cas(next, 0) {
continue
diff --git a/libgo/go/runtime/proc_runtime_test.go b/libgo/go/runtime/proc_runtime_test.go
index d56f9b14636..a7bde2c6df7 100644
--- a/libgo/go/runtime/proc_runtime_test.go
+++ b/libgo/go/runtime/proc_runtime_test.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build ignore
-
// Proc unit tests. In runtime package so can use runtime guts.
package runtime
diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go
index 313a9610e0e..672e1fa0148 100644
--- a/libgo/go/runtime/proc_test.go
+++ b/libgo/go/runtime/proc_test.go
@@ -658,6 +658,116 @@ func BenchmarkClosureCall(b *testing.B) {
_ = sum
}
+func benchmarkWakeupParallel(b *testing.B, spin func(time.Duration)) {
+ if runtime.GOMAXPROCS(0) == 1 {
+ b.Skip("skipping: GOMAXPROCS=1")
+ }
+
+ wakeDelay := 5 * time.Microsecond
+ for _, delay := range []time.Duration{
+ 0,
+ 1 * time.Microsecond,
+ 2 * time.Microsecond,
+ 5 * time.Microsecond,
+ 10 * time.Microsecond,
+ 20 * time.Microsecond,
+ 50 * time.Microsecond,
+ 100 * time.Microsecond,
+ } {
+ b.Run(delay.String(), func(b *testing.B) {
+ if b.N == 0 {
+ return
+ }
+ // Start two goroutines, which alternate between being
+ // sender and receiver in the following protocol:
+ //
+ // - The receiver spins for `delay` and then does a
+ // blocking receive on a channel.
+ //
+ // - The sender spins for `delay+wakeDelay` and then
+ // sends to the same channel. (The addition of
+ // `wakeDelay` improves the probability that the
+ // receiver will be blocking when the send occurs when
+ // the goroutines execute in parallel.)
+ //
+ // In each iteration of the benchmark, each goroutine
+ // acts once as sender and once as receiver, so each
+ // goroutine spins for delay twice.
+ //
+ // BenchmarkWakeupParallel is used to estimate how
+ // efficiently the scheduler parallelizes goroutines in
+ // the presence of blocking:
+ //
+ // - If both goroutines are executed on the same core,
+ // an increase in delay by N will increase the time per
+ // iteration by 4*N, because all 4 delays are
+ // serialized.
+ //
+ // - Otherwise, an increase in delay by N will increase
+ // the time per iteration by 2*N, and the time per
+ // iteration is 2 * (runtime overhead + chan
+ // send/receive pair + delay + wakeDelay). This allows
+ // the runtime overhead, including the time it takes
+ // for the unblocked goroutine to be scheduled, to be
+ // estimated.
+ ping, pong := make(chan struct{}), make(chan struct{})
+ start := make(chan struct{})
+ done := make(chan struct{})
+ go func() {
+ <-start
+ for i := 0; i < b.N; i++ {
+ // sender
+ spin(delay + wakeDelay)
+ ping <- struct{}{}
+ // receiver
+ spin(delay)
+ <-pong
+ }
+ done <- struct{}{}
+ }()
+ go func() {
+ for i := 0; i < b.N; i++ {
+ // receiver
+ spin(delay)
+ <-ping
+ // sender
+ spin(delay + wakeDelay)
+ pong <- struct{}{}
+ }
+ done <- struct{}{}
+ }()
+ b.ResetTimer()
+ start <- struct{}{}
+ <-done
+ <-done
+ })
+ }
+}
+
+func BenchmarkWakeupParallelSpinning(b *testing.B) {
+ benchmarkWakeupParallel(b, func(d time.Duration) {
+ end := time.Now().Add(d)
+ for time.Now().Before(end) {
+ // do nothing
+ }
+ })
+}
+
+// sysNanosleep is defined by OS-specific files (such as runtime_linux_test.go)
+// to sleep for the given duration. If nil, dependent tests are skipped.
+// The implementation should invoke a blocking system call and not
+// call time.Sleep, which would deschedule the goroutine.
+var sysNanosleep func(d time.Duration)
+
+func BenchmarkWakeupParallelSyscall(b *testing.B) {
+ if sysNanosleep == nil {
+ b.Skipf("skipping on %v; sysNanosleep not defined", runtime.GOOS)
+ }
+ benchmarkWakeupParallel(b, func(d time.Duration) {
+ sysNanosleep(d)
+ })
+}
+
type Matrix [][]float64
func BenchmarkMatmult(b *testing.B) {
@@ -722,8 +832,47 @@ func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, thres
}
}
-/*
func TestStealOrder(t *testing.T) {
runtime.RunStealOrderTest()
}
-*/
+
+func TestLockOSThreadNesting(t *testing.T) {
+ go func() {
+ e, i := runtime.LockOSCounts()
+ if e != 0 || i != 0 {
+ t.Errorf("want locked counts 0, 0; got %d, %d", e, i)
+ return
+ }
+ runtime.LockOSThread()
+ runtime.LockOSThread()
+ runtime.UnlockOSThread()
+ e, i = runtime.LockOSCounts()
+ if e != 1 || i != 0 {
+ t.Errorf("want locked counts 1, 0; got %d, %d", e, i)
+ return
+ }
+ runtime.UnlockOSThread()
+ e, i = runtime.LockOSCounts()
+ if e != 0 || i != 0 {
+ t.Errorf("want locked counts 0, 0; got %d, %d", e, i)
+ return
+ }
+ }()
+}
+
+func TestLockOSThreadExit(t *testing.T) {
+ testLockOSThreadExit(t, "testprog")
+}
+
+func testLockOSThreadExit(t *testing.T, prog string) {
+ output := runTestProg(t, prog, "LockOSThreadMain", "GOMAXPROCS=1")
+ want := "OK\n"
+ if output != want {
+ t.Errorf("want %s, got %s\n", want, output)
+ }
+
+ output = runTestProg(t, prog, "LockOSThreadAlt")
+ if output != want {
+ t.Errorf("want %s, got %s\n", want, output)
+ }
+}
diff --git a/libgo/go/runtime/runtime-lldb_test.go b/libgo/go/runtime/runtime-lldb_test.go
index 98bc9066662..9a287052eaf 100644
--- a/libgo/go/runtime/runtime-lldb_test.go
+++ b/libgo/go/runtime/runtime-lldb_test.go
@@ -5,11 +5,7 @@
package runtime_test
import (
- "debug/elf"
- "debug/macho"
- "encoding/binary"
"internal/testenv"
- "io"
"io/ioutil"
"os"
"os/exec"
@@ -158,7 +154,7 @@ func TestLldbPython(t *testing.T) {
t.Fatalf("failed to create file: %v", err)
}
- cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags", "-N -l", "-o", "a.exe")
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe")
cmd.Dir = dir
out, err := cmd.CombinedOutput()
if err != nil {
@@ -182,81 +178,3 @@ func TestLldbPython(t *testing.T) {
t.Fatalf("Unexpected lldb output:\n%s", got)
}
}
-
-// Check that aranges are valid even when lldb isn't installed.
-func TestDwarfAranges(t *testing.T) {
- testenv.MustHaveGoBuild(t)
- dir, err := ioutil.TempDir("", "go-build")
- if err != nil {
- t.Fatalf("failed to create temp directory: %v", err)
- }
- defer os.RemoveAll(dir)
-
- src := filepath.Join(dir, "main.go")
- err = ioutil.WriteFile(src, []byte(lldbHelloSource), 0644)
- if err != nil {
- t.Fatalf("failed to create file: %v", err)
- }
-
- cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
- cmd.Dir = dir
- out, err := cmd.CombinedOutput()
- if err != nil {
- t.Fatalf("building source %v\n%s", err, out)
- }
-
- filename := filepath.Join(dir, "a.exe")
- if f, err := elf.Open(filename); err == nil {
- sect := f.Section(".debug_aranges")
- if sect == nil {
- t.Fatal("Missing aranges section")
- }
- verifyAranges(t, f.ByteOrder, sect.Open())
- } else if f, err := macho.Open(filename); err == nil {
- sect := f.Section("__debug_aranges")
- if sect == nil {
- t.Fatal("Missing aranges section")
- }
- verifyAranges(t, f.ByteOrder, sect.Open())
- } else {
- t.Skip("Not an elf or macho binary.")
- }
-}
-
-func verifyAranges(t *testing.T, byteorder binary.ByteOrder, data io.ReadSeeker) {
- var header struct {
- UnitLength uint32 // does not include the UnitLength field
- Version uint16
- Offset uint32
- AddressSize uint8
- SegmentSize uint8
- }
- for {
- offset, err := data.Seek(0, io.SeekCurrent)
- if err != nil {
- t.Fatalf("Seek error: %v", err)
- }
- if err = binary.Read(data, byteorder, &header); err == io.EOF {
- return
- } else if err != nil {
- t.Fatalf("Error reading arange header: %v", err)
- }
- tupleSize := int64(header.SegmentSize) + 2*int64(header.AddressSize)
- lastTupleOffset := offset + int64(header.UnitLength) + 4 - tupleSize
- if lastTupleOffset%tupleSize != 0 {
- t.Fatalf("Invalid arange length %d, (addr %d, seg %d)", header.UnitLength, header.AddressSize, header.SegmentSize)
- }
- if _, err = data.Seek(lastTupleOffset, io.SeekStart); err != nil {
- t.Fatalf("Seek error: %v", err)
- }
- buf := make([]byte, tupleSize)
- if n, err := data.Read(buf); err != nil || int64(n) < tupleSize {
- t.Fatalf("Read error: %v", err)
- }
- for _, val := range buf {
- if val != 0 {
- t.Fatalf("Invalid terminator")
- }
- }
- }
-}
diff --git a/libgo/go/runtime/runtime.go b/libgo/go/runtime/runtime.go
index 58710de406c..d19d6afed38 100644
--- a/libgo/go/runtime/runtime.go
+++ b/libgo/go/runtime/runtime.go
@@ -61,6 +61,12 @@ func syscall_Getpagesize() int { return int(physPageSize) }
//go:linkname os_runtime_args os.runtime_args
func os_runtime_args() []string { return append([]string{}, argslice...) }
+//go:linkname syscall_Exit syscall.Exit
+//go:nosplit
+func syscall_Exit(code int) {
+ exit(int32(code))
+}
+
// Temporary, for the gccgo runtime code written in C.
//go:linkname get_envs runtime_get_envs
func get_envs() []string { return envs }
diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go
index 627adf74765..b617f8598fa 100644
--- a/libgo/go/runtime/runtime1.go
+++ b/libgo/go/runtime/runtime1.go
@@ -111,10 +111,6 @@ var test_z64, test_x64 uint64
func testAtomic64() {
test_z64 = 42
test_x64 = 0
- prefetcht0(uintptr(unsafe.Pointer(&test_z64)))
- prefetcht1(uintptr(unsafe.Pointer(&test_z64)))
- prefetcht2(uintptr(unsafe.Pointer(&test_z64)))
- prefetchnta(uintptr(unsafe.Pointer(&test_z64)))
if atomic.Cas64(&test_z64, test_x64, 1) {
throw("cas64 failed")
}
@@ -413,13 +409,6 @@ func parsedebugvars() {
setTraceback(gogetenv("GOTRACEBACK"))
traceback_env = traceback_cache
-
- // For cgocheck > 1, we turn on the write barrier at all times
- // and check all pointer writes.
- if debug.cgocheck > 1 {
- writeBarrier.cgo = true
- writeBarrier.enabled = true
- }
}
//go:linkname setTraceback runtime_debug.SetTraceback
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 045e76ff4df..543086d09aa 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -173,9 +173,13 @@ func efaceOf(ep *interface{}) *eface {
// a word that is completely ignored by the GC than to have one for which
// only a few updates are ignored.
//
-// Gs, Ms, and Ps are always reachable via true pointers in the
-// allgs, allm, and allp lists or (during allocation before they reach those lists)
+// Gs and Ps are always reachable via true pointers in the
+// allgs and allp lists or (during allocation before they reach those lists)
// from stack variables.
+//
+// Ms are always reachable via true pointers either from allm or
+// freem. Unlike Gs and Ps we do free Ms, so it's important that
+// nothing ever hold an muintptr across a safe point.
// A guintptr holds a goroutine pointer, but typed as a uintptr
// to bypass write barriers. It is used in the Gobuf goroutine state
@@ -225,6 +229,15 @@ func (pp puintptr) ptr() *p { return (*p)(unsafe.Pointer(pp)) }
//go:nosplit
func (pp *puintptr) set(p *p) { *pp = puintptr(unsafe.Pointer(p)) }
+// muintptr is a *m that is not tracked by the garbage collector.
+//
+// Because we do free Ms, there are some additional constrains on
+// muintptrs:
+//
+// 1. Never hold an muintptr locally across a safe point.
+//
+// 2. Any muintptr in the heap must be owned by the M itself so it can
+// ensure it is not in use when the last true *m is released.
type muintptr uintptr
//go:nosplit
@@ -256,11 +269,14 @@ type sudog struct {
// channel this sudog is blocking on. shrinkstack depends on
// this for sudogs involved in channel ops.
- g *g
- selectdone *uint32 // CAS to 1 to win select race (may point to stack)
- next *sudog
- prev *sudog
- elem unsafe.Pointer // data element (may point to stack)
+ g *g
+
+ // isSelect indicates g is participating in a select, so
+ // g.selectDone must be CAS'd to win the wake-up race.
+ isSelect bool
+ next *sudog
+ prev *sudog
+ elem unsafe.Pointer // data element (may point to stack)
// The following fields are never accessed concurrently.
// For channels, waitlink is only accessed by g.
@@ -351,7 +367,7 @@ type g struct {
sysexitticks int64 // cputicks when syscall has returned (for tracing)
traceseq uint64 // trace event sequencer
tracelastp puintptr // last P emitted an event for this goroutine
- lockedm *m
+ lockedm muintptr
sig uint32
writebuf []byte
sigcode0 uintptr
@@ -362,8 +378,9 @@ type g struct {
// Not for gccgo: racectx uintptr
waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order
// Not for gccgo: cgoCtxt []uintptr // cgo traceback context
- labels unsafe.Pointer // profiler labels
- timer *timer // cached timer for time.Sleep
+ labels unsafe.Pointer // profiler labels
+ timer *timer // cached timer for time.Sleep
+ selectDone uint32 // are we participating in a select and did someone win the race?
// Per-G GC state
@@ -381,13 +398,26 @@ type g struct {
exception unsafe.Pointer // current exception being thrown
isforeign bool // whether current exception is not from Go
- // Fields that hold stack and context information if status is Gsyscall
+ // When using split-stacks, these fields holds the results of
+ // __splitstack_find while executing a syscall. These are used
+ // by the garbage collector to scan the goroutine's stack.
+ //
+ // When not using split-stacks, g0 stacks are allocated by the
+ // libc and other goroutine stacks are allocated by malg.
+ // gcstack: unused (sometimes cleared)
+ // gcstacksize: g0: 0; others: size of stack
+ // gcnextsegment: unused
+ // gcnextsp: current SP while executing a syscall
+ // gcinitialsp: g0: top of stack; others: start of stack memory
gcstack uintptr
gcstacksize uintptr
gcnextsegment uintptr
gcnextsp uintptr
gcinitialsp unsafe.Pointer
- gcregs g_ucontext_t
+
+ // gcregs holds the register values while executing a syscall.
+ // This is set by getcontext and scanned by the garbage collector.
+ gcregs g_ucontext_t
entry func(unsafe.Pointer) // goroutine function to run
entryfn uintptr // function address passed to __go_go
@@ -411,14 +441,15 @@ type m struct {
// Fields not known to debuggers.
procid uint64 // for debuggers, but offset not hard-coded
gsignal *g // signal-handling g
+ // Not for gccgo: goSigStack gsignalStack // Go-allocated signal handling stack
sigmask sigset // storage for saved signal mask
- // Not for gccgo: tls [6]uintptr // thread-local storage (for x86 extern register)
+ // Not for gccgo: tls [6]uintptr // thread-local storage (for x86 extern register)
mstartfn func()
curg *g // current running goroutine
caughtsig guintptr // goroutine running during fatal signal
p puintptr // attached p for executing go code (nil if not executing go code)
nextp puintptr
- id int32
+ id int64
mallocing int32
throwing int32
preemptoff string // if != "", keep curg running on this m
@@ -432,8 +463,11 @@ type m struct {
inwb bool // m is executing a write barrier
newSigstack bool // minit on C thread called sigaltstack
printlock int8
- incgo bool // m is executing a cgo call
- fastrand uint32
+ incgo bool // m is executing a cgo call
+ freeWait uint32 // if == 0, safe to free g0 and delete m (atomic)
+ fastrand [2]uint32
+ needextram bool
+ traceback uint8
ncgocall uint64 // number of cgo calls in total
ncgo int32 // number of cgo calls currently in progress
// Not for gccgo: cgoCallersUse uint32 // if non-zero, cgoCallers in use temporarily
@@ -442,15 +476,14 @@ type m struct {
alllink *m // on allm
schedlink muintptr
mcache *mcache
- lockedg *g
+ lockedg guintptr
createstack [32]location // stack that created this thread.
- // Not for gccgo: freglo [16]uint32 // d[i] lsb and f[i]
- // Not for gccgo: freghi [16]uint32 // d[i] msb and f[i+16]
- // Not for gccgo: fflag uint32 // floating point compare flags
- locked uint32 // tracking for lockosthread
- nextwaitm uintptr // next m waiting for lock
- needextram bool
- traceback uint8
+ // Not for gccgo: freglo [16]uint32 // d[i] lsb and f[i]
+ // Not for gccgo: freghi [16]uint32 // d[i] msb and f[i+16]
+ // Not for gccgo: fflag uint32 // floating point compare flags
+ lockedExt uint32 // tracking for external LockOSThread
+ lockedInt uint32 // tracking for internal lockOSThread
+ nextwaitm muintptr // next m waiting for lock
waitunlockf unsafe.Pointer // todo go func(*g, unsafe.pointer) bool
waitlock unsafe.Pointer
waittraceev byte
@@ -458,6 +491,7 @@ type m struct {
startingtrace bool
syscalltick uint32
// Not for gccgo: thread uintptr // thread handle
+ freelink *m // on sched.freem
// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
@@ -475,6 +509,7 @@ type m struct {
gsignalstacksize uintptr
dropextram bool // drop after call is done
+ exiting bool // thread is exiting
gcing int32
}
@@ -490,7 +525,7 @@ type p struct {
sysmontick sysmontick // last tick observed by sysmon
m muintptr // back-link to associated m (nil if idle)
mcache *mcache
- // Not for gccgo: racectx uintptr
+ racectx uintptr
// gccgo has only one size of defer.
deferpool []*_defer
@@ -535,26 +570,30 @@ type p struct {
palloc persistentAlloc // per-P to avoid mutex
// Per-P GC state
- gcAssistTime int64 // Nanoseconds in assistAlloc
- gcBgMarkWorker guintptr
- gcMarkWorkerMode gcMarkWorkerMode
+ gcAssistTime int64 // Nanoseconds in assistAlloc
+ gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker
+ gcBgMarkWorker guintptr
+ gcMarkWorkerMode gcMarkWorkerMode
+
+ // gcMarkWorkerStartTime is the nanotime() at which this mark
+ // worker started.
+ gcMarkWorkerStartTime int64
// gcw is this P's GC work buffer cache. The work buffer is
// filled by write barriers, drained by mutator assists, and
// disposed on certain GC state transitions.
gcw gcWork
+ // wbBuf is this P's GC write barrier buffer.
+ //
+ // TODO: Consider caching this in the running G.
+ wbBuf wbBuf
+
runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point
pad [sys.CacheLineSize]byte
}
-const (
- // The max value of GOMAXPROCS.
- // There are no fundamental restrictions on the value.
- _MaxGomaxprocs = 1 << 10
-)
-
type schedt struct {
// accessed atomically. keep at top to ensure alignment on 32-bit systems.
goidgen uint64
@@ -562,11 +601,16 @@ type schedt struct {
lock mutex
+ // When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be
+ // sure to call checkdead().
+
midle muintptr // idle m's waiting for work
nmidle int32 // number of idle m's waiting for work
nmidlelocked int32 // number of locked m's waiting for work
- mcount int32 // number of m's that have been created
+ mnext int64 // number of m's that have been created and next M ID
maxmcount int32 // maximum number of m's allowed (or die)
+ nmsys int32 // number of system m's not counted for deadlock
+ nmfreed int64 // cumulative number of freed m's
ngsys uint32 // number of system goroutines; updated atomically
@@ -592,6 +636,10 @@ type schedt struct {
deferlock mutex
deferpool *_defer
+ // freem is the list of m's waiting to be freed when their
+ // m.exited is set. Linked through m.freelink.
+ freem *m
+
gcwaiting uint32 // gc is waiting to run
stopwait int32
stopnote note
@@ -610,18 +658,7 @@ type schedt struct {
totaltime int64 // ∫gomaxprocs dt up to procresizetime
}
-// The m.locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
-// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
-// External locks are not recursive; a second lock is silently ignored.
-// The upper bits of m.locked record the nesting depth of calls to lockOSThread
-// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
-// Internal locks can be recursive. For instance, a lock for cgo can occur while the main
-// goroutine is holding the lock during the initialization phase.
-const (
- _LockExternal = 1
- _LockInternal = 2
-)
-
+// Values for the flags field of a sigTabT.
const (
_SigNotify = 1 << iota // let signal.Notify have signal, even if from kernel
_SigKill // if signal.Notify doesn't take it, exit quietly
@@ -630,7 +667,8 @@ const (
_SigDefault // if the signal isn't explicitly requested, don't monitor it
_SigGoExit // cause all runtime procs to exit (only used on Plan 9).
_SigSetStack // add SA_ONSTACK to libc handler
- _SigUnblock // unblocked in minit
+ _SigUnblock // always unblock; see blockableSig
+ _SigIgn // _SIG_DFL action is to ignore the signal
)
// Lock-free stack node.
@@ -671,8 +709,8 @@ func extendRandom(r []byte, n int) {
}
}
-// deferred subroutine calls
-// This is the gccgo version.
+// A _defer holds an entry on the list of deferred calls.
+// If you add a field here, add code to clear it in freedefer.
type _defer struct {
// The next entry in the stack.
link *_defer
@@ -743,7 +781,8 @@ const _TracebackMaxFrames = 100
var (
allglen uintptr
allm *m
- allp [_MaxGomaxprocs + 1]*p
+ allp []*p // len(allp) == gomaxprocs; may change at safe points, otherwise immutable
+ allpLock mutex // Protects P-less reads of allp and all writes
gomaxprocs int32
ncpu int32
forcegc forcegcstate
diff --git a/libgo/go/runtime/runtime_mmap_test.go b/libgo/go/runtime/runtime_mmap_test.go
index 0141e81d4a0..c0040414d46 100644
--- a/libgo/go/runtime/runtime_mmap_test.go
+++ b/libgo/go/runtime/runtime_mmap_test.go
@@ -14,17 +14,10 @@ import (
// what the code in mem_bsd.go, mem_darwin.go, and mem_linux.go expects.
// See the uses of ENOMEM in sysMap in those files.
func TestMmapErrorSign(t *testing.T) {
- p := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
+ p, err := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
- // The runtime.mmap function is nosplit, but t.Errorf is not.
- // Reset the pointer so that we don't get an "invalid stack
- // pointer" error from t.Errorf if we call it.
- v := uintptr(p)
- p = nil
-
- err := runtime.Errno()
- if v != ^uintptr(0) || err != runtime.ENOMEM {
- t.Errorf("mmap = %v, %v, want %v", v, err, runtime.ENOMEM)
+ if p != nil || err != runtime.ENOMEM {
+ t.Errorf("mmap = %v, %v, want nil, %v", p, err, runtime.ENOMEM)
}
}
@@ -34,20 +27,20 @@ func TestPhysPageSize(t *testing.T) {
ps := runtime.GetPhysPageSize()
// Get a region of memory to play with. This should be page-aligned.
- b := uintptr(runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0))
- if b == ^uintptr(0) {
- t.Fatalf("Mmap: %v %v", b, runtime.Errno())
+ b, err := runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
+ if err != 0 {
+ t.Fatalf("Mmap: %v", err)
}
// Mmap should fail at a half page into the buffer.
- err := uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0))
- if err != ^uintptr(0) {
+ _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)
+ if err == 0 {
t.Errorf("Mmap should have failed with half-page alignment %d, but succeeded: %v", ps/2, err)
}
// Mmap should succeed at a full page into the buffer.
- err = uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0))
- if err == ^uintptr(0) {
- t.Errorf("Mmap at full-page alignment %d failed: %v %v", ps, err, runtime.Errno())
+ _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)
+ if err != 0 {
+ t.Errorf("Mmap at full-page alignment %d failed: %v", ps, err)
}
}
diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go
index b8f6ac2aed4..0231043260b 100644
--- a/libgo/go/runtime/runtime_test.go
+++ b/libgo/go/runtime/runtime_test.go
@@ -5,6 +5,7 @@
package runtime_test
import (
+ "flag"
"io"
. "runtime"
"runtime/debug"
@@ -13,6 +14,8 @@ import (
"unsafe"
)
+var flagQuick = flag.Bool("quick", false, "skip slow tests, for second run in all.bash")
+
func init() {
// We're testing the runtime, so make tracebacks show things
// in the runtime. This only raises the level, so it won't
@@ -196,9 +199,9 @@ func eqstring_generic(s1, s2 string) bool {
}
func TestEqString(t *testing.T) {
- // This isn't really an exhaustive test of eqstring, it's
+ // This isn't really an exhaustive test of == on strings, it's
// just a convenient way of documenting (via eqstring_generic)
- // what eqstring does.
+ // what == does.
s := []string{
"",
"a",
@@ -213,7 +216,7 @@ func TestEqString(t *testing.T) {
x := s1 == s2
y := eqstring_generic(s1, s2)
if x != y {
- t.Errorf(`eqstring("%s","%s") = %t, want %t`, s1, s2, x, y)
+ t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y)
}
}
}
diff --git a/libgo/go/runtime/rwmutex_test.go b/libgo/go/runtime/rwmutex_test.go
index a69eca1511f..872b3b098e8 100644
--- a/libgo/go/runtime/rwmutex_test.go
+++ b/libgo/go/runtime/rwmutex_test.go
@@ -12,6 +12,7 @@ package runtime_test
import (
"fmt"
. "runtime"
+ "runtime/debug"
"sync/atomic"
"testing"
)
@@ -47,6 +48,10 @@ func doTestParallelReaders(numReaders int) {
func TestParallelRWMutexReaders(t *testing.T) {
defer GOMAXPROCS(GOMAXPROCS(-1))
+ // If runtime triggers a forced GC during this test then it will deadlock,
+ // since the goroutines can't be stopped/preempted.
+ // Disable GC for this test (see issue #10958).
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
doTestParallelReaders(1)
doTestParallelReaders(3)
doTestParallelReaders(4)
diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go
index 9f8ac49d972..096af52be35 100644
--- a/libgo/go/runtime/select.go
+++ b/libgo/go/runtime/select.go
@@ -88,7 +88,7 @@ func newselect(sel *hselect, selsize int64, size int32) {
}
func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) {
- pc := getcallerpc(unsafe.Pointer(&sel))
+ pc := getcallerpc()
i := sel.ncase
if i >= sel.tcase {
throw("selectsend: too many cases")
@@ -109,7 +109,7 @@ func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) {
}
func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) {
- pc := getcallerpc(unsafe.Pointer(&sel))
+ pc := getcallerpc()
i := sel.ncase
if i >= sel.tcase {
throw("selectrecv: too many cases")
@@ -131,7 +131,7 @@ func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) {
}
func selectdefault(sel *hselect) {
- pc := getcallerpc(unsafe.Pointer(&sel))
+ pc := getcallerpc()
i := sel.ncase
if i >= sel.tcase {
throw("selectdefault: too many cases")
@@ -301,7 +301,6 @@ func selectgo(sel *hselect) int {
var (
gp *g
- done uint32
sg *sudog
c *hchan
k *scase
@@ -368,7 +367,6 @@ loop:
// pass 2 - enqueue on all chans
gp = getg()
- done = 0
if gp.waiting != nil {
throw("gp.waiting != nil")
}
@@ -382,8 +380,7 @@ loop:
c = cas.c
sg := acquireSudog()
sg.g = gp
- // Note: selectdone is adjusted for stack copies in stack1.go:adjustsudogs
- sg.selectdone = (*uint32)(noescape(unsafe.Pointer(&done)))
+ sg.isSelect = true
// No stack splits between assigning elem and enqueuing
// sg on gp.waiting where copystack can find it.
sg.elem = cas.elem
@@ -409,62 +406,9 @@ loop:
gp.param = nil
gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1)
- // While we were asleep, some goroutine came along and completed
- // one of the cases in the select and woke us up (called ready).
- // As part of that process, the goroutine did a cas on done above
- // (aka *sg.selectdone for all queued sg) to win the right to
- // complete the select. Now done = 1.
- //
- // If we copy (grow) our own stack, we will update the
- // selectdone pointers inside the gp.waiting sudog list to point
- // at the new stack. Another goroutine attempting to
- // complete one of our (still linked in) select cases might
- // see the new selectdone pointer (pointing at the new stack)
- // before the new stack has real data; if the new stack has done = 0
- // (before the old values are copied over), the goroutine might
- // do a cas via sg.selectdone and incorrectly believe that it has
- // won the right to complete the select, executing a second
- // communication and attempting to wake us (call ready) again.
- //
- // Then things break.
- //
- // The best break is that the goroutine doing ready sees the
- // _Gcopystack status and throws, as in #17007.
- // A worse break would be for us to continue on, start running real code,
- // block in a semaphore acquisition (sema.go), and have the other
- // goroutine wake us up without having really acquired the semaphore.
- // That would result in the goroutine spuriously running and then
- // queue up another spurious wakeup when the semaphore really is ready.
- // In general the situation can cascade until something notices the
- // problem and causes a crash.
- //
- // A stack shrink does not have this problem, because it locks
- // all the channels that are involved first, blocking out the
- // possibility of a cas on selectdone.
- //
- // A stack growth before gopark above does not have this
- // problem, because we hold those channel locks (released by
- // selparkcommit).
- //
- // A stack growth after sellock below does not have this
- // problem, because again we hold those channel locks.
- //
- // The only problem is a stack growth during sellock.
- // To keep that from happening, run sellock on the system stack.
- //
- // It might be that we could avoid this if copystack copied the
- // stack before calling adjustsudogs. In that case,
- // syncadjustsudogs would need to recopy the tiny part that
- // it copies today, resulting in a little bit of extra copying.
- //
- // An even better fix, not for the week before a release candidate,
- // would be to put space in every sudog and make selectdone
- // point at (say) the space in the first sudog.
-
- systemstack(func() {
- sellock(scases, lockorder)
- })
+ sellock(scases, lockorder)
+ gp.selectDone = 0
sg = (*sudog)(gp.param)
gp.param = nil
@@ -477,7 +421,7 @@ loop:
sglist = gp.waiting
// Clear all elem before unlinking from gp.waiting.
for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink {
- sg1.selectdone = nil
+ sg1.isSelect = false
sg1.elem = nil
sg1.c = nil
}
@@ -528,10 +472,8 @@ loop:
print("wait-return: sel=", sel, " c=", c, " cas=", cas, " kind=", cas.kind, "\n")
}
- if cas.kind == caseRecv {
- if cas.receivedp != nil {
- *cas.receivedp = true
- }
+ if cas.kind == caseRecv && cas.receivedp != nil {
+ *cas.receivedp = true
}
if raceenabled {
diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go
index d04e6f592fc..6e2beeccee1 100644
--- a/libgo/go/runtime/sema.go
+++ b/libgo/go/runtime/sema.go
@@ -275,7 +275,10 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
// on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket.
// https://en.wikipedia.org/wiki/Treap
// http://faculty.washington.edu/aragon/pubs/rst89.pdf
- s.ticket = fastrand()
+ //
+ // s.ticket compared with zero in couple of places, therefore set lowest bit.
+ // It will not affect treap's quality noticeably.
+ s.ticket = fastrand() | 1
s.parent = last
*pt = s
diff --git a/libgo/go/runtime/signal_gccgo.go b/libgo/go/runtime/signal_gccgo.go
index 056be36a729..6fe7ba10aaf 100644
--- a/libgo/go/runtime/signal_gccgo.go
+++ b/libgo/go/runtime/signal_gccgo.go
@@ -46,11 +46,6 @@ func kill(pid _pid_t, sig uint32) int32
//extern setitimer
func setitimer(which int32, new *_itimerval, old *_itimerval) int32
-type sigTabT struct {
- flags int32
- name string
-}
-
type sigctxt struct {
info *_siginfo_t
ctxt unsafe.Pointer
diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go
index 378c68e1d90..c042162e7e6 100644
--- a/libgo/go/runtime/signal_sighandler.go
+++ b/libgo/go/runtime/signal_sighandler.go
@@ -92,9 +92,9 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
}
print("PC=", hex(sigpc), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
- if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+ if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
print("signal arrived during cgo execution\n")
- gp = _g_.m.lockedg
+ gp = _g_.m.lockedg.ptr()
}
print("\n")
@@ -111,7 +111,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
if docrash {
crashing++
- if crashing < sched.mcount-int32(extraMCount) {
+ if crashing < mcount()-int32(extraMCount) {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go
index 3237e18765f..85171484a90 100644
--- a/libgo/go/runtime/signal_unix.go
+++ b/libgo/go/runtime/signal_unix.go
@@ -8,7 +8,6 @@ package runtime
import (
"runtime/internal/atomic"
- "runtime/internal/sys"
"unsafe"
)
@@ -16,6 +15,16 @@ import (
//go:linkname initsig runtime.initsig
//go:linkname sigtrampgo runtime.sigtrampgo
+// sigTabT is the type of an entry in the global sigtable array.
+// sigtable is inherently system dependent, and appears in OS-specific files,
+// but sigTabT is the same for all Unixy systems.
+// The sigtable array is indexed by a system signal number to get the flags
+// and printable name of each signal.
+type sigTabT struct {
+ flags int32
+ name string
+}
+
//go:linkname os_sigpipe os.sigpipe
func os_sigpipe() {
systemstack(sigpipe)
@@ -275,6 +284,12 @@ func sigpipe() {
// sigtrampgo is called from the signal handler function, sigtramp,
// written in assembly code.
// This is called by the signal handler, and the world may be stopped.
+//
+// It must be nosplit because getg() is still the G that was running
+// (if any) when the signal was delivered, but it's (usually) called
+// on the gsignal stack. Until this switches the G to gsignal, the
+// stack bounds check won't work.
+//
//go:nosplit
//go:nowritebarrierrec
func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) {
@@ -355,8 +370,9 @@ func sigpanic() {
//go:nosplit
//go:nowritebarrierrec
func dieFromSignal(sig uint32) {
- setsig(sig, _SIG_DFL)
unblocksig(sig)
+ // Mark the signal as unhandled to ensure it is forwarded.
+ atomic.Store(&handlingSig[sig], 0)
raise(sig)
// That should have killed us. On some systems, though, raise
@@ -368,6 +384,22 @@ func dieFromSignal(sig uint32) {
osyield()
osyield()
+ // If that didn't work, try _SIG_DFL.
+ setsig(sig, _SIG_DFL)
+ raise(sig)
+
+ osyield()
+ osyield()
+ osyield()
+
+ // On Darwin we may still fail to die, because raise sends the
+ // signal to the whole process rather than just the current thread,
+ // and osyield just sleeps briefly rather than letting all other
+ // threads run. See issue 20315. Sleep longer.
+ if GOOS == "darwin" {
+ usleep(100)
+ }
+
// If we are still somehow running, just exit with the wrong status.
exit(2)
}
@@ -434,7 +466,7 @@ func crash() {
// this means the OS X core file will be >128 GB and even on a zippy
// workstation can take OS X well over an hour to write (uninterruptible).
// Save users from making that mistake.
- if sys.PtrSize == 8 {
+ if GOARCH == "amd64" {
return
}
}
@@ -463,7 +495,7 @@ func ensureSigM() {
var sigBlocked sigset
sigfillset(&sigBlocked)
for i := range sigtable {
- if sigtable[i].flags&_SigUnblock != 0 {
+ if !blockableSig(uint32(i)) {
sigdelset(&sigBlocked, i)
}
}
@@ -475,7 +507,7 @@ func ensureSigM() {
sigdelset(&sigBlocked, int(sig))
}
case sig := <-disableSigChan:
- if sig > 0 {
+ if sig > 0 && blockableSig(sig) {
sigaddset(&sigBlocked, int(sig))
}
}
@@ -536,17 +568,23 @@ func sigfwdgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) bool {
return false
}
fwdFn := atomic.Loaduintptr(&fwdSig[sig])
+ flags := sigtable[sig].flags
- if !signalsOK {
- // The only way we can get here is if we are in a
- // library or archive, we installed a signal handler
- // at program startup, but the Go runtime has not yet
- // been initialized.
+ // If we aren't handling the signal, forward it.
+ if atomic.Load(&handlingSig[sig]) == 0 || !signalsOK {
+ // If the signal is ignored, doing nothing is the same as forwarding.
+ if fwdFn == _SIG_IGN || (fwdFn == _SIG_DFL && flags&_SigIgn != 0) {
+ return true
+ }
+ // We are not handling the signal and there is no other handler to forward to.
+ // Crash with the default behavior.
if fwdFn == _SIG_DFL {
+ setsig(sig, _SIG_DFL)
dieFromSignal(sig)
- } else {
- sigfwd(fwdFn, sig, info, ctx)
+ return false
}
+
+ sigfwd(fwdFn, sig, info, ctx)
return true
}
@@ -555,18 +593,6 @@ func sigfwdgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) bool {
return false
}
- // If we aren't handling the signal, forward it.
- // Really if we aren't handling the signal, we shouldn't get here,
- // but on Darwin setsigstack can lead us here because it sets
- // the sa_tramp field. The sa_tramp field is not returned by
- // sigaction, so the fix for that is non-obvious.
- if atomic.Load(&handlingSig[sig]) == 0 {
- sigfwd(fwdFn, sig, info, ctx)
- return true
- }
-
- flags := sigtable[sig].flags
-
c := sigctxt{info, ctx}
// Only forward synchronous signals and SIGPIPE.
// Unfortunately, user generated SIGPIPEs will also be forwarded, because si_code
@@ -678,7 +704,7 @@ func minitSignalStack() {
func minitSignalMask() {
nmask := getg().m.sigmask
for i := range sigtable {
- if sigtable[i].flags&_SigUnblock != 0 {
+ if !blockableSig(uint32(i)) {
sigdelset(&nmask, i)
}
}
@@ -694,3 +720,22 @@ func unminitSignals() {
signalstack(nil, 0)
}
}
+
+// blockableSig returns whether sig may be blocked by the signal mask.
+// We never want to block the signals marked _SigUnblock;
+// these are the synchronous signals that turn into a Go panic.
+// In a Go program--not a c-archive/c-shared--we never want to block
+// the signals marked _SigKill or _SigThrow, as otherwise it's possible
+// for all running threads to block them and delay their delivery until
+// we start a new thread. When linked into a C program we let the C code
+// decide on the disposition of those signals.
+func blockableSig(sig uint32) bool {
+ flags := sigtable[sig].flags
+ if flags&_SigUnblock != 0 {
+ return false
+ }
+ if isarchive || islibrary {
+ return true
+ }
+ return flags&(_SigKill|_SigThrow) == 0
+}
diff --git a/libgo/go/runtime/sigqueue.go b/libgo/go/runtime/sigqueue.go
index cd036ce364c..b108c39cc85 100644
--- a/libgo/go/runtime/sigqueue.go
+++ b/libgo/go/runtime/sigqueue.go
@@ -45,13 +45,14 @@ import (
// as there is no connection between handling a signal and receiving one,
// but atomic instructions should minimize it.
var sig struct {
- note note
- mask [(_NSIG + 31) / 32]uint32
- wanted [(_NSIG + 31) / 32]uint32
- ignored [(_NSIG + 31) / 32]uint32
- recv [(_NSIG + 31) / 32]uint32
- state uint32
- inuse bool
+ note note
+ mask [(_NSIG + 31) / 32]uint32
+ wanted [(_NSIG + 31) / 32]uint32
+ ignored [(_NSIG + 31) / 32]uint32
+ recv [(_NSIG + 31) / 32]uint32
+ state uint32
+ delivering uint32
+ inuse bool
}
const (
@@ -60,15 +61,20 @@ const (
sigSending
)
-// Called from sighandler to send a signal back out of the signal handling thread.
-// Reports whether the signal was sent. If not, the caller typically crashes the program.
+// sigsend delivers a signal from sighandler to the internal signal delivery queue.
+// It reports whether the signal was sent. If not, the caller typically crashes the program.
+// It runs from the signal handler, so it's limited in what it can do.
func sigsend(s uint32) bool {
bit := uint32(1) << uint(s&31)
if !sig.inuse || s >= uint32(32*len(sig.wanted)) {
return false
}
+ atomic.Xadd(&sig.delivering, 1)
+ // We are running in the signal handler; defer is not available.
+
if w := atomic.Load(&sig.wanted[s/32]); w&bit == 0 {
+ atomic.Xadd(&sig.delivering, -1)
return false
}
@@ -76,6 +82,7 @@ func sigsend(s uint32) bool {
for {
mask := sig.mask[s/32]
if mask&bit != 0 {
+ atomic.Xadd(&sig.delivering, -1)
return true // signal already in queue
}
if atomic.Cas(&sig.mask[s/32], mask, mask|bit) {
@@ -104,6 +111,7 @@ Send:
}
}
+ atomic.Xadd(&sig.delivering, -1)
return true
}
@@ -155,6 +163,15 @@ func signal_recv() uint32 {
// by the os/signal package.
//go:linkname signalWaitUntilIdle os_signal.signalWaitUntilIdle
func signalWaitUntilIdle() {
+ // Although the signals we care about have been removed from
+ // sig.wanted, it is possible that another thread has received
+ // a signal, has read from sig.wanted, is now updating sig.mask,
+ // and has not yet woken up the processor thread. We need to wait
+ // until all current signal deliveries have completed.
+ for atomic.Load(&sig.delivering) != 0 {
+ Gosched()
+ }
+
// Although WaitUntilIdle seems like the right name for this
// function, the state we are looking for is sigReceiving, not
// sigIdle. The sigIdle state is really more like sigProcessing.
diff --git a/libgo/go/runtime/sizeclasses.go b/libgo/go/runtime/sizeclasses.go
index 5366564afda..9e17b001d3e 100644
--- a/libgo/go/runtime/sizeclasses.go
+++ b/libgo/go/runtime/sizeclasses.go
@@ -3,73 +3,73 @@
package runtime
-// class bytes/obj bytes/span objects waste bytes
-// 1 8 8192 1024 0
-// 2 16 8192 512 0
-// 3 32 8192 256 0
-// 4 48 8192 170 32
-// 5 64 8192 128 0
-// 6 80 8192 102 32
-// 7 96 8192 85 32
-// 8 112 8192 73 16
-// 9 128 8192 64 0
-// 10 144 8192 56 128
-// 11 160 8192 51 32
-// 12 176 8192 46 96
-// 13 192 8192 42 128
-// 14 208 8192 39 80
-// 15 224 8192 36 128
-// 16 240 8192 34 32
-// 17 256 8192 32 0
-// 18 288 8192 28 128
-// 19 320 8192 25 192
-// 20 352 8192 23 96
-// 21 384 8192 21 128
-// 22 416 8192 19 288
-// 23 448 8192 18 128
-// 24 480 8192 17 32
-// 25 512 8192 16 0
-// 26 576 8192 14 128
-// 27 640 8192 12 512
-// 28 704 8192 11 448
-// 29 768 8192 10 512
-// 30 896 8192 9 128
-// 31 1024 8192 8 0
-// 32 1152 8192 7 128
-// 33 1280 8192 6 512
-// 34 1408 16384 11 896
-// 35 1536 8192 5 512
-// 36 1792 16384 9 256
-// 37 2048 8192 4 0
-// 38 2304 16384 7 256
-// 39 2688 8192 3 128
-// 40 3072 24576 8 0
-// 41 3200 16384 5 384
-// 42 3456 24576 7 384
-// 43 4096 8192 2 0
-// 44 4864 24576 5 256
-// 45 5376 16384 3 256
-// 46 6144 24576 4 0
-// 47 6528 32768 5 128
-// 48 6784 40960 6 256
-// 49 6912 49152 7 768
-// 50 8192 8192 1 0
-// 51 9472 57344 6 512
-// 52 9728 49152 5 512
-// 53 10240 40960 4 0
-// 54 10880 32768 3 128
-// 55 12288 24576 2 0
-// 56 13568 40960 3 256
-// 57 14336 57344 4 0
-// 58 16384 16384 1 0
-// 59 18432 73728 4 0
-// 60 19072 57344 3 128
-// 61 20480 40960 2 0
-// 62 21760 65536 3 256
-// 63 24576 24576 1 0
-// 64 27264 81920 3 128
-// 65 28672 57344 2 0
-// 66 32768 32768 1 0
+// class bytes/obj bytes/span objects tail waste max waste
+// 1 8 8192 1024 0 87.50%
+// 2 16 8192 512 0 43.75%
+// 3 32 8192 256 0 46.88%
+// 4 48 8192 170 32 31.52%
+// 5 64 8192 128 0 23.44%
+// 6 80 8192 102 32 19.07%
+// 7 96 8192 85 32 15.95%
+// 8 112 8192 73 16 13.56%
+// 9 128 8192 64 0 11.72%
+// 10 144 8192 56 128 11.82%
+// 11 160 8192 51 32 9.73%
+// 12 176 8192 46 96 9.59%
+// 13 192 8192 42 128 9.25%
+// 14 208 8192 39 80 8.12%
+// 15 224 8192 36 128 8.15%
+// 16 240 8192 34 32 6.62%
+// 17 256 8192 32 0 5.86%
+// 18 288 8192 28 128 12.16%
+// 19 320 8192 25 192 11.80%
+// 20 352 8192 23 96 9.88%
+// 21 384 8192 21 128 9.51%
+// 22 416 8192 19 288 10.71%
+// 23 448 8192 18 128 8.37%
+// 24 480 8192 17 32 6.82%
+// 25 512 8192 16 0 6.05%
+// 26 576 8192 14 128 12.33%
+// 27 640 8192 12 512 15.48%
+// 28 704 8192 11 448 13.93%
+// 29 768 8192 10 512 13.94%
+// 30 896 8192 9 128 15.52%
+// 31 1024 8192 8 0 12.40%
+// 32 1152 8192 7 128 12.41%
+// 33 1280 8192 6 512 15.55%
+// 34 1408 16384 11 896 14.00%
+// 35 1536 8192 5 512 14.00%
+// 36 1792 16384 9 256 15.57%
+// 37 2048 8192 4 0 12.45%
+// 38 2304 16384 7 256 12.46%
+// 39 2688 8192 3 128 15.59%
+// 40 3072 24576 8 0 12.47%
+// 41 3200 16384 5 384 6.22%
+// 42 3456 24576 7 384 8.83%
+// 43 4096 8192 2 0 15.60%
+// 44 4864 24576 5 256 16.65%
+// 45 5376 16384 3 256 10.92%
+// 46 6144 24576 4 0 12.48%
+// 47 6528 32768 5 128 6.23%
+// 48 6784 40960 6 256 4.36%
+// 49 6912 49152 7 768 3.37%
+// 50 8192 8192 1 0 15.61%
+// 51 9472 57344 6 512 14.28%
+// 52 9728 49152 5 512 3.64%
+// 53 10240 40960 4 0 4.99%
+// 54 10880 32768 3 128 6.24%
+// 55 12288 24576 2 0 11.45%
+// 56 13568 40960 3 256 9.99%
+// 57 14336 57344 4 0 5.35%
+// 58 16384 16384 1 0 12.49%
+// 59 18432 73728 4 0 11.11%
+// 60 19072 57344 3 128 3.57%
+// 61 20480 40960 2 0 6.87%
+// 62 21760 65536 3 256 6.25%
+// 63 24576 24576 1 0 11.45%
+// 64 27264 81920 3 128 10.00%
+// 65 28672 57344 2 0 4.91%
+// 66 32768 32768 1 0 12.50%
const (
_MaxSmallSize = 32768
diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go
index f61f85e0fcb..ec5aa640222 100644
--- a/libgo/go/runtime/slice.go
+++ b/libgo/go/runtime/slice.go
@@ -23,6 +23,13 @@ type slice struct {
cap int
}
+// An notInHeapSlice is a slice backed by go:notinheap memory.
+type notInHeapSlice struct {
+ array *notInHeap
+ len int
+ cap int
+}
+
// maxElems is a lookup table containing the maximum capacity for a slice.
// The index is the size of the slice element.
var maxElems = [...]uintptr{
@@ -85,7 +92,7 @@ func makeslice64(et *_type, len64, cap64 int64) slice {
// The new slice's length is set to the requested capacity.
func growslice(et *_type, old slice, cap int) slice {
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&et))
+ callerpc := getcallerpc()
racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice))
}
if msanenabled {
@@ -109,12 +116,20 @@ func growslice(et *_type, old slice, cap int) slice {
if old.len < 1024 {
newcap = doublecap
} else {
- for newcap < cap {
+ // Check 0 < newcap to detect overflow
+ // and prevent an infinite loop.
+ for 0 < newcap && newcap < cap {
newcap += newcap / 4
}
+ // Set newcap to the requested cap when
+ // the newcap calculation overflowed.
+ if newcap <= 0 {
+ newcap = cap
+ }
}
}
+ var overflow bool
var lenmem, newlenmem, capmem uintptr
const ptrSize = unsafe.Sizeof((*byte)(nil))
switch et.size {
@@ -122,20 +137,37 @@ func growslice(et *_type, old slice, cap int) slice {
lenmem = uintptr(old.len)
newlenmem = uintptr(cap)
capmem = roundupsize(uintptr(newcap))
+ overflow = uintptr(newcap) > _MaxMem
newcap = int(capmem)
case ptrSize:
lenmem = uintptr(old.len) * ptrSize
newlenmem = uintptr(cap) * ptrSize
capmem = roundupsize(uintptr(newcap) * ptrSize)
+ overflow = uintptr(newcap) > _MaxMem/ptrSize
newcap = int(capmem / ptrSize)
default:
lenmem = uintptr(old.len) * et.size
newlenmem = uintptr(cap) * et.size
capmem = roundupsize(uintptr(newcap) * et.size)
+ overflow = uintptr(newcap) > maxSliceCap(et.size)
newcap = int(capmem / et.size)
}
- if cap < old.cap || uintptr(newcap) > maxSliceCap(et.size) {
+ // The check of overflow (uintptr(newcap) > maxSliceCap(et.size))
+ // in addition to capmem > _MaxMem is needed to prevent an overflow
+ // which can be used to trigger a segfault on 32bit architectures
+ // with this example program:
+ //
+ // type T [1<<27 + 1]int64
+ //
+ // var d T
+ // var s []T
+ //
+ // func main() {
+ // s = append(s, d, d, d, d)
+ // print(len(s), "\n")
+ // }
+ if cap < old.cap || overflow || capmem > _MaxMem {
panic(errorString("growslice: cap out of range"))
}
@@ -176,7 +208,7 @@ func slicecopy(to, fm slice, width uintptr) int {
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&to))
+ callerpc := getcallerpc()
pc := funcPC(slicecopy)
racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc)
racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc)
@@ -207,7 +239,7 @@ func slicestringcopy(to []byte, fm string) int {
}
if raceenabled {
- callerpc := getcallerpc(unsafe.Pointer(&to))
+ callerpc := getcallerpc()
pc := funcPC(slicestringcopy)
racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc)
}
diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go
index 7436ddfdf4b..e8df9a6b7c4 100644
--- a/libgo/go/runtime/string.go
+++ b/libgo/go/runtime/string.go
@@ -99,7 +99,7 @@ func slicebytetostring(buf *tmpBuf, b []byte) (str string) {
if raceenabled {
racereadrangepc(unsafe.Pointer(&b[0]),
uintptr(l),
- getcallerpc(unsafe.Pointer(&buf)),
+ getcallerpc(),
funcPC(slicebytetostring))
}
if msanenabled {
@@ -145,7 +145,7 @@ func slicebytetostringtmp(b []byte) string {
if raceenabled && len(b) > 0 {
racereadrangepc(unsafe.Pointer(&b[0]),
uintptr(len(b)),
- getcallerpc(unsafe.Pointer(&b)),
+ getcallerpc(),
funcPC(slicebytetostringtmp))
}
if msanenabled && len(b) > 0 {
@@ -194,7 +194,7 @@ func slicerunetostring(buf *tmpBuf, a []rune) string {
if raceenabled && len(a) > 0 {
racereadrangepc(unsafe.Pointer(&a[0]),
uintptr(len(a))*unsafe.Sizeof(a[0]),
- getcallerpc(unsafe.Pointer(&buf)),
+ getcallerpc(),
funcPC(slicerunetostring))
}
if msanenabled && len(a) > 0 {
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index 84fa1c79689..c454356b838 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -107,16 +107,21 @@ func reflect_memmove(to, from unsafe.Pointer, n uintptr) {
func memcmp(a, b unsafe.Pointer, size uintptr) int32
// exported value for testing
-var hashLoad = loadFactor
+var hashLoad = float32(loadFactorNum) / float32(loadFactorDen)
//go:nosplit
func fastrand() uint32 {
mp := getg().m
- fr := mp.fastrand
- mx := uint32(int32(fr)>>31) & 0xa8888eef
- fr = fr<<1 ^ mx
- mp.fastrand = fr
- return fr
+ // Implement xorshift64+: 2 32-bit xorshift sequences added together.
+ // Shift triplet [17,7,16] was calculated as indicated in Marsaglia's
+ // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
+ // This generator passes the SmallCrush suite, part of TestU01 framework:
+ // http://simul.iro.umontreal.ca/testu01/tu01.html
+ s1, s0 := mp.fastrand[0], mp.fastrand[1]
+ s1 ^= s1 << 17
+ s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16
+ mp.fastrand[0], mp.fastrand[1] = s0, s1
+ return s0 + s1
}
//go:nosplit
@@ -192,14 +197,16 @@ func publicationBarrier()
// getcallerpc returns the program counter (PC) of its caller's caller.
// getcallersp returns the stack pointer (SP) of its caller's caller.
-// For both, the argp must be a pointer to the caller's first function argument.
+// argp must be a pointer to the caller's first function argument.
// The implementation may or may not use argp, depending on
-// the architecture.
+// the architecture. The implementation may be a compiler
+// intrinsic; there is not necessarily code implementing this
+// on every platform.
//
// For example:
//
// func f(arg1, arg2, arg3 int) {
-// pc := getcallerpc(unsafe.Pointer(&arg1))
+// pc := getcallerpc()
// sp := getcallersp(unsafe.Pointer(&arg1))
// }
//
@@ -219,7 +226,7 @@ func publicationBarrier()
// immediately and can only be passed to nosplit functions.
//go:noescape
-func getcallerpc(argp unsafe.Pointer) uintptr
+func getcallerpc() uintptr
//go:noescape
func getcallersp(argp unsafe.Pointer) uintptr
@@ -430,7 +437,7 @@ func setpagesize(s uintptr) {
}
}
-// Temporary for gccgo until we port mgc.go.
+// Called by C code during library initialization.
//go:linkname runtime_m0 runtime.runtime_m0
func runtime_m0() *m {
return &m0
diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go
index 490405d51fd..e7607722a64 100644
--- a/libgo/go/runtime/stubs2.go
+++ b/libgo/go/runtime/stubs2.go
@@ -23,3 +23,10 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32
//go:noescape
func open(name *byte, mode, perm int32) int32
+
+// exitThread terminates the current thread, writing *wait = 0 when
+// the stack is safe to reclaim.
+func exitThread(wait *uint32) {
+ // This is never used by gccgo.
+ throw("exitThread")
+}
diff --git a/libgo/go/runtime/testdata/testprog/gc.go b/libgo/go/runtime/testdata/testprog/gc.go
index 744b6108e2b..542451753b7 100644
--- a/libgo/go/runtime/testdata/testprog/gc.go
+++ b/libgo/go/runtime/testdata/testprog/gc.go
@@ -25,6 +25,7 @@ func GCSys() {
runtime.GC()
runtime.ReadMemStats(memstats)
sys := memstats.Sys
+ fmt.Printf("original sys: %#x\n", sys)
runtime.MemProfileRate = 0 // disable profiler
@@ -36,6 +37,8 @@ func GCSys() {
// Should only be using a few MB.
// We allocated 100 MB or (if not short) 1 GB.
runtime.ReadMemStats(memstats)
+ fmt.Printf("final sys: %#x\n", memstats.Sys)
+ fmt.Printf("%#v\n", *memstats)
if sys > memstats.Sys {
sys = 0
} else {
diff --git a/libgo/go/runtime/testdata/testprog/gettid.go b/libgo/go/runtime/testdata/testprog/gettid.go
new file mode 100644
index 00000000000..1b3e29ab08e
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/gettid.go
@@ -0,0 +1,29 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "syscall"
+)
+
+func gettid() int {
+ return syscall.Gettid()
+}
+
+func tidExists(tid int) (exists, supported bool) {
+ stat, err := ioutil.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid))
+ if os.IsNotExist(err) {
+ return false, true
+ }
+ // Check if it's a zombie thread.
+ state := bytes.Fields(stat)[2]
+ return !(len(state) == 1 && state[0] == 'Z'), true
+}
diff --git a/libgo/go/runtime/testdata/testprog/gettid_none.go b/libgo/go/runtime/testdata/testprog/gettid_none.go
new file mode 100644
index 00000000000..036db87e10e
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/gettid_none.go
@@ -0,0 +1,15 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !linux
+
+package main
+
+func gettid() int {
+ return 0
+}
+
+func tidExists(tid int) (exists, supported bool) {
+ return false, false
+}
diff --git a/libgo/go/runtime/testdata/testprog/lockosthread.go b/libgo/go/runtime/testdata/testprog/lockosthread.go
new file mode 100644
index 00000000000..88c0d12e4c1
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/lockosthread.go
@@ -0,0 +1,94 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "os"
+ "runtime"
+ "time"
+)
+
+var mainTID int
+
+func init() {
+ registerInit("LockOSThreadMain", func() {
+ // init is guaranteed to run on the main thread.
+ mainTID = gettid()
+ })
+ register("LockOSThreadMain", LockOSThreadMain)
+
+ registerInit("LockOSThreadAlt", func() {
+ // Lock the OS thread now so main runs on the main thread.
+ runtime.LockOSThread()
+ })
+ register("LockOSThreadAlt", LockOSThreadAlt)
+}
+
+func LockOSThreadMain() {
+ // gettid only works on Linux, so on other platforms this just
+ // checks that the runtime doesn't do anything terrible.
+
+ // This requires GOMAXPROCS=1 from the beginning to reliably
+ // start a goroutine on the main thread.
+ if runtime.GOMAXPROCS(-1) != 1 {
+ println("requires GOMAXPROCS=1")
+ os.Exit(1)
+ }
+
+ ready := make(chan bool, 1)
+ go func() {
+ // Because GOMAXPROCS=1, this *should* be on the main
+ // thread. Stay there.
+ runtime.LockOSThread()
+ if mainTID != 0 && gettid() != mainTID {
+ println("failed to start goroutine on main thread")
+ os.Exit(1)
+ }
+ // Exit with the thread locked, which should exit the
+ // main thread.
+ ready <- true
+ }()
+ <-ready
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is still running on a different
+ // thread.
+ if mainTID != 0 && gettid() == mainTID {
+ println("goroutine migrated to locked thread")
+ os.Exit(1)
+ }
+ println("OK")
+}
+
+func LockOSThreadAlt() {
+ // This is running locked to the main OS thread.
+
+ var subTID int
+ ready := make(chan bool, 1)
+ go func() {
+ // This goroutine must be running on a new thread.
+ runtime.LockOSThread()
+ subTID = gettid()
+ ready <- true
+ // Exit with the thread locked.
+ }()
+ <-ready
+ runtime.UnlockOSThread()
+ for i := 0; i < 100; i++ {
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is running on a different thread.
+ if subTID != 0 && gettid() == subTID {
+ println("locked thread reused")
+ os.Exit(1)
+ }
+ exists, supported := tidExists(subTID)
+ if !supported || !exists {
+ goto ok
+ }
+ }
+ println("sub thread", subTID, "still running")
+ return
+ok:
+ println("OK")
+}
diff --git a/libgo/go/runtime/testdata/testprog/syscall_windows.go b/libgo/go/runtime/testdata/testprog/syscall_windows.go
index 6e6782e987a..b4b66441b83 100644
--- a/libgo/go/runtime/testdata/testprog/syscall_windows.go
+++ b/libgo/go/runtime/testdata/testprog/syscall_windows.go
@@ -4,11 +4,18 @@
package main
-import "syscall"
+import (
+ "internal/syscall/windows"
+ "runtime"
+ "sync"
+ "syscall"
+ "unsafe"
+)
func init() {
register("RaiseException", RaiseException)
register("ZeroDivisionException", ZeroDivisionException)
+ register("StackMemory", StackMemory)
}
func RaiseException() {
@@ -25,3 +32,39 @@ func ZeroDivisionException() {
z := x / y
println(z)
}
+
+func getPagefileUsage() (uintptr, error) {
+ p, err := syscall.GetCurrentProcess()
+ if err != nil {
+ return 0, err
+ }
+ var m windows.PROCESS_MEMORY_COUNTERS
+ err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m)))
+ if err != nil {
+ return 0, err
+ }
+ return m.PagefileUsage, nil
+}
+
+func StackMemory() {
+ mem1, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ const threadCount = 100
+ var wg sync.WaitGroup
+ for i := 0; i < threadCount; i++ {
+ wg.Add(1)
+ go func() {
+ runtime.LockOSThread()
+ wg.Done()
+ select {}
+ }()
+ }
+ wg.Wait()
+ mem2, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ print((mem2 - mem1) / threadCount)
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/callback.go b/libgo/go/runtime/testdata/testprogcgo/callback.go
index a49fc19b284..2f7568c2c4e 100644
--- a/libgo/go/runtime/testdata/testprogcgo/callback.go
+++ b/libgo/go/runtime/testdata/testprogcgo/callback.go
@@ -34,6 +34,7 @@ import "C"
import (
"fmt"
+ "os"
"runtime"
)
@@ -68,7 +69,10 @@ func grow1(x, sum *int) int {
}
func CgoCallbackGC() {
- const P = 100
+ P := 100
+ if os.Getenv("RUNTIME_TESTING_SHORT") != "" {
+ P = 10
+ }
done := make(chan bool)
// allocate a bunch of stack frames and spray them with pointers
for i := 0; i < P; i++ {
diff --git a/libgo/go/runtime/testdata/testprogcgo/catchpanic.go b/libgo/go/runtime/testdata/testprogcgo/catchpanic.go
new file mode 100644
index 00000000000..55a606d1bc8
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/catchpanic.go
@@ -0,0 +1,46 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+package main
+
+/*
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void abrthandler(int signum) {
+ if (signum == SIGABRT) {
+ exit(0); // success
+ }
+}
+
+void registerAbortHandler() {
+ struct sigaction act;
+ memset(&act, 0, sizeof act);
+ act.sa_handler = abrthandler;
+ sigaction(SIGABRT, &act, NULL);
+}
+
+static void __attribute__ ((constructor)) sigsetup(void) {
+ if (getenv("CGOCATCHPANIC_EARLY_HANDLER") == NULL)
+ return;
+ registerAbortHandler();
+}
+*/
+import "C"
+import "os"
+
+func init() {
+ register("CgoCatchPanic", CgoCatchPanic)
+}
+
+// Test that the SIGABRT raised by panic can be caught by an early signal handler.
+func CgoCatchPanic() {
+ if _, ok := os.LookupEnv("CGOCATCHPANIC_EARLY_HANDLER"); !ok {
+ C.registerAbortHandler()
+ }
+ panic("catch me")
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/cgo.go b/libgo/go/runtime/testdata/testprogcgo/cgo.go
index 209524a24db..a587db385b3 100644
--- a/libgo/go/runtime/testdata/testprogcgo/cgo.go
+++ b/libgo/go/runtime/testdata/testprogcgo/cgo.go
@@ -52,7 +52,11 @@ func CgoSignalDeadlock() {
time.Sleep(time.Millisecond)
start := time.Now()
var times []time.Duration
- for i := 0; i < 64; i++ {
+ n := 64
+ if os.Getenv("RUNTIME_TEST_SHORT") != "" {
+ n = 16
+ }
+ for i := 0; i < n; i++ {
go func() {
runtime.LockOSThread()
select {}
diff --git a/libgo/go/runtime/testdata/testprogcgo/lockosthread.c b/libgo/go/runtime/testdata/testprogcgo/lockosthread.c
new file mode 100644
index 00000000000..b10cc4f3b92
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/lockosthread.c
@@ -0,0 +1,13 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+#include <stdint.h>
+
+uint32_t threadExited;
+
+void setExited(void *x) {
+ __sync_fetch_and_add(&threadExited, 1);
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/lockosthread.go b/libgo/go/runtime/testdata/testprogcgo/lockosthread.go
new file mode 100644
index 00000000000..36423d9eb0c
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/lockosthread.go
@@ -0,0 +1,111 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+package main
+
+import (
+ "os"
+ "runtime"
+ "sync/atomic"
+ "time"
+ "unsafe"
+)
+
+/*
+#include <pthread.h>
+#include <stdint.h>
+
+extern uint32_t threadExited;
+
+void setExited(void *x);
+*/
+import "C"
+
+var mainThread C.pthread_t
+
+func init() {
+ registerInit("LockOSThreadMain", func() {
+ // init is guaranteed to run on the main thread.
+ mainThread = C.pthread_self()
+ })
+ register("LockOSThreadMain", LockOSThreadMain)
+
+ registerInit("LockOSThreadAlt", func() {
+ // Lock the OS thread now so main runs on the main thread.
+ runtime.LockOSThread()
+ })
+ register("LockOSThreadAlt", LockOSThreadAlt)
+}
+
+func LockOSThreadMain() {
+ // This requires GOMAXPROCS=1 from the beginning to reliably
+ // start a goroutine on the main thread.
+ if runtime.GOMAXPROCS(-1) != 1 {
+ println("requires GOMAXPROCS=1")
+ os.Exit(1)
+ }
+
+ ready := make(chan bool, 1)
+ go func() {
+ // Because GOMAXPROCS=1, this *should* be on the main
+ // thread. Stay there.
+ runtime.LockOSThread()
+ self := C.pthread_self()
+ if C.pthread_equal(mainThread, self) == 0 {
+ println("failed to start goroutine on main thread")
+ os.Exit(1)
+ }
+ // Exit with the thread locked, which should exit the
+ // main thread.
+ ready <- true
+ }()
+ <-ready
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is still running on a different
+ // thread.
+ self := C.pthread_self()
+ if C.pthread_equal(mainThread, self) != 0 {
+ println("goroutine migrated to locked thread")
+ os.Exit(1)
+ }
+ println("OK")
+}
+
+func LockOSThreadAlt() {
+ // This is running locked to the main OS thread.
+
+ var subThread C.pthread_t
+ ready := make(chan bool, 1)
+ C.threadExited = 0
+ go func() {
+ // This goroutine must be running on a new thread.
+ runtime.LockOSThread()
+ subThread = C.pthread_self()
+ // Register a pthread destructor so we can tell this
+ // thread has exited.
+ var key C.pthread_key_t
+ C.pthread_key_create(&key, (*[0]byte)(unsafe.Pointer(C.setExited)))
+ C.pthread_setspecific(key, unsafe.Pointer(new(int)))
+ ready <- true
+ // Exit with the thread locked.
+ }()
+ <-ready
+ for i := 0; i < 100; i++ {
+ time.Sleep(1 * time.Millisecond)
+ // Check that this goroutine is running on a different thread.
+ self := C.pthread_self()
+ if C.pthread_equal(subThread, self) != 0 {
+ println("locked thread reused")
+ os.Exit(1)
+ }
+ if atomic.LoadUint32((*uint32)(&C.threadExited)) != 0 {
+ println("OK")
+ return
+ }
+ }
+ println("sub thread still running")
+ os.Exit(1)
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/sigstack.go b/libgo/go/runtime/testdata/testprogcgo/sigstack.go
new file mode 100644
index 00000000000..e30a5592dcb
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/sigstack.go
@@ -0,0 +1,95 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9,!windows
+
+// Test handling of Go-allocated signal stacks when calling from
+// C-created threads with and without signal stacks. (See issue
+// #22930.)
+
+package main
+
+/*
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#ifndef MAP_STACK
+#define MAP_STACK 0
+#endif
+
+extern void SigStackCallback();
+
+static void* WithSigStack(void* arg __attribute__((unused))) {
+ // Set up an alternate system stack.
+ void* base = mmap(0, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON|MAP_STACK, -1, 0);
+ if (base == MAP_FAILED) {
+ perror("mmap failed");
+ abort();
+ }
+ stack_t st = {}, ost = {};
+ st.ss_sp = (char*)base;
+ st.ss_flags = 0;
+ st.ss_size = SIGSTKSZ;
+ if (sigaltstack(&st, &ost) < 0) {
+ perror("sigaltstack failed");
+ abort();
+ }
+
+ // Call Go.
+ SigStackCallback();
+
+ // Disable signal stack and protect it so we can detect reuse.
+ if (ost.ss_flags & SS_DISABLE) {
+ // Darwin libsystem has a bug where it checks ss_size
+ // even if SS_DISABLE is set. (The kernel gets it right.)
+ ost.ss_size = SIGSTKSZ;
+ }
+ if (sigaltstack(&ost, NULL) < 0) {
+ perror("sigaltstack restore failed");
+ abort();
+ }
+ mprotect(base, SIGSTKSZ, PROT_NONE);
+ return NULL;
+}
+
+static void* WithoutSigStack(void* arg __attribute__((unused))) {
+ SigStackCallback();
+ return NULL;
+}
+
+static void DoThread(int sigstack) {
+ pthread_t tid;
+ if (sigstack) {
+ pthread_create(&tid, NULL, WithSigStack, NULL);
+ } else {
+ pthread_create(&tid, NULL, WithoutSigStack, NULL);
+ }
+ pthread_join(tid, NULL);
+}
+*/
+import "C"
+
+func init() {
+ register("SigStack", SigStack)
+}
+
+func SigStack() {
+ C.DoThread(0)
+ C.DoThread(1)
+ C.DoThread(0)
+ C.DoThread(1)
+ println("OK")
+}
+
+var BadPtr *int
+
+//export SigStackCallback
+func SigStackCallback() {
+ // Cause the Go signal handler to run.
+ defer func() { recover() }()
+ *BadPtr = 42
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/stack_windows.go b/libgo/go/runtime/testdata/testprogcgo/stack_windows.go
new file mode 100644
index 00000000000..846297a960c
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/stack_windows.go
@@ -0,0 +1,54 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "C"
+import (
+ "internal/syscall/windows"
+ "runtime"
+ "sync"
+ "syscall"
+ "unsafe"
+)
+
+func init() {
+ register("StackMemory", StackMemory)
+}
+
+func getPagefileUsage() (uintptr, error) {
+ p, err := syscall.GetCurrentProcess()
+ if err != nil {
+ return 0, err
+ }
+ var m windows.PROCESS_MEMORY_COUNTERS
+ err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m)))
+ if err != nil {
+ return 0, err
+ }
+ return m.PagefileUsage, nil
+}
+
+func StackMemory() {
+ mem1, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ const threadCount = 100
+ var wg sync.WaitGroup
+ for i := 0; i < threadCount; i++ {
+ wg.Add(1)
+ go func() {
+ runtime.LockOSThread()
+ wg.Done()
+ select {}
+ }()
+ }
+ wg.Wait()
+ mem2, err := getPagefileUsage()
+ if err != nil {
+ panic(err)
+ }
+ print((mem2 - mem1) / threadCount)
+}
diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go
index f204830a6f7..93181fde600 100644
--- a/libgo/go/runtime/time.go
+++ b/libgo/go/runtime/time.go
@@ -6,14 +6,18 @@
package runtime
-import "unsafe"
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
// Package time knows the layout of this structure.
// If this struct changes, adjust ../time/sleep.go:/runtimeTimer.
// For GOOS=nacl, package syscall knows the layout of this structure.
// If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer.
type timer struct {
- i int // heap index
+ tb *timersBucket // the bucket the timer lives in
+ i int // heap index
// Timer wakes up at when, and then at when+period, ... (period > 0 only)
// each time calling f(arg, now) in the timer goroutine, so f must be
@@ -25,7 +29,37 @@ type timer struct {
seq uintptr
}
-var timers struct {
+// timersLen is the length of timers array.
+//
+// Ideally, this would be set to GOMAXPROCS, but that would require
+// dynamic reallocation
+//
+// The current value is a compromise between memory usage and performance
+// that should cover the majority of GOMAXPROCS values used in the wild.
+const timersLen = 64
+
+// timers contains "per-P" timer heaps.
+//
+// Timers are queued into timersBucket associated with the current P,
+// so each P may work with its own timers independently of other P instances.
+//
+// Each timersBucket may be associated with multiple P
+// if GOMAXPROCS > timersLen.
+var timers [timersLen]struct {
+ timersBucket
+
+ // The padding should eliminate false sharing
+ // between timersBucket values.
+ pad [sys.CacheLineSize - unsafe.Sizeof(timersBucket{})%sys.CacheLineSize]byte
+}
+
+func (t *timer) assignBucket() *timersBucket {
+ id := uint8(getg().m.p.ptr().id) % timersLen
+ t.tb = &timers[id].timersBucket
+ return t.tb
+}
+
+type timersBucket struct {
lock mutex
gp *g
created bool
@@ -51,18 +85,20 @@ func timeSleep(ns int64) {
return
}
- t := getg().timer
+ gp := getg()
+ t := gp.timer
if t == nil {
t = new(timer)
- getg().timer = t
+ gp.timer = t
}
*t = timer{}
t.when = nanotime() + ns
t.f = goroutineReady
- t.arg = getg()
- lock(&timers.lock)
- addtimerLocked(t)
- goparkunlock(&timers.lock, "sleep", traceEvGoSleep, 2)
+ t.arg = gp
+ tb := t.assignBucket()
+ lock(&tb.lock)
+ tb.addtimerLocked(t)
+ goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2)
}
// startTimer adds t to the timer heap.
@@ -89,90 +125,98 @@ func goroutineReady(arg interface{}, seq uintptr) {
}
func addtimer(t *timer) {
- lock(&timers.lock)
- addtimerLocked(t)
- unlock(&timers.lock)
+ tb := t.assignBucket()
+ lock(&tb.lock)
+ tb.addtimerLocked(t)
+ unlock(&tb.lock)
}
// Add a timer to the heap and start or kick timerproc if the new timer is
// earlier than any of the others.
// Timers are locked.
-func addtimerLocked(t *timer) {
+func (tb *timersBucket) addtimerLocked(t *timer) {
// when must never be negative; otherwise timerproc will overflow
// during its delta calculation and never expire other runtime timers.
if t.when < 0 {
t.when = 1<<63 - 1
}
- t.i = len(timers.t)
- timers.t = append(timers.t, t)
- siftupTimer(t.i)
+ t.i = len(tb.t)
+ tb.t = append(tb.t, t)
+ siftupTimer(tb.t, t.i)
if t.i == 0 {
// siftup moved to top: new earliest deadline.
- if timers.sleeping {
- timers.sleeping = false
- notewakeup(&timers.waitnote)
+ if tb.sleeping {
+ tb.sleeping = false
+ notewakeup(&tb.waitnote)
}
- if timers.rescheduling {
- timers.rescheduling = false
- goready(timers.gp, 0)
+ if tb.rescheduling {
+ tb.rescheduling = false
+ goready(tb.gp, 0)
}
}
- if !timers.created {
- timers.created = true
+ if !tb.created {
+ tb.created = true
expectSystemGoroutine()
- go timerproc()
+ go timerproc(tb)
}
}
// Delete timer t from the heap.
// Do not need to update the timerproc: if it wakes up early, no big deal.
func deltimer(t *timer) bool {
- // Dereference t so that any panic happens before the lock is held.
- // Discard result, because t might be moving in the heap.
- _ = t.i
+ if t.tb == nil {
+ // t.tb can be nil if the user created a timer
+ // directly, without invoking startTimer e.g
+ // time.Ticker{C: c}
+ // In this case, return early without any deletion.
+ // See Issue 21874.
+ return false
+ }
- lock(&timers.lock)
+ tb := t.tb
+
+ lock(&tb.lock)
// t may not be registered anymore and may have
// a bogus i (typically 0, if generated by Go).
// Verify it before proceeding.
i := t.i
- last := len(timers.t) - 1
- if i < 0 || i > last || timers.t[i] != t {
- unlock(&timers.lock)
+ last := len(tb.t) - 1
+ if i < 0 || i > last || tb.t[i] != t {
+ unlock(&tb.lock)
return false
}
if i != last {
- timers.t[i] = timers.t[last]
- timers.t[i].i = i
+ tb.t[i] = tb.t[last]
+ tb.t[i].i = i
}
- timers.t[last] = nil
- timers.t = timers.t[:last]
+ tb.t[last] = nil
+ tb.t = tb.t[:last]
if i != last {
- siftupTimer(i)
- siftdownTimer(i)
+ siftupTimer(tb.t, i)
+ siftdownTimer(tb.t, i)
}
- unlock(&timers.lock)
+ unlock(&tb.lock)
return true
}
// Timerproc runs the time-driven events.
-// It sleeps until the next event in the timers heap.
+// It sleeps until the next event in the tb heap.
// If addtimer inserts a new earlier event, it wakes timerproc early.
-func timerproc() {
+func timerproc(tb *timersBucket) {
setSystemGoroutine()
- timers.gp = getg()
+ tb.gp = getg()
for {
- lock(&timers.lock)
- timers.sleeping = false
+ lock(&tb.lock)
+ tb.sleeping = false
now := nanotime()
delta := int64(-1)
for {
- if len(timers.t) == 0 {
+ if len(tb.t) == 0 {
delta = -1
break
}
- t := timers.t[0]
+ t := tb.t[0]
delta = t.when - now
if delta > 0 {
break
@@ -180,43 +224,43 @@ func timerproc() {
if t.period > 0 {
// leave in heap but adjust next time to fire
t.when += t.period * (1 + -delta/t.period)
- siftdownTimer(0)
+ siftdownTimer(tb.t, 0)
} else {
// remove from heap
- last := len(timers.t) - 1
+ last := len(tb.t) - 1
if last > 0 {
- timers.t[0] = timers.t[last]
- timers.t[0].i = 0
+ tb.t[0] = tb.t[last]
+ tb.t[0].i = 0
}
- timers.t[last] = nil
- timers.t = timers.t[:last]
+ tb.t[last] = nil
+ tb.t = tb.t[:last]
if last > 0 {
- siftdownTimer(0)
+ siftdownTimer(tb.t, 0)
}
t.i = -1 // mark as removed
}
f := t.f
arg := t.arg
seq := t.seq
- unlock(&timers.lock)
+ unlock(&tb.lock)
if raceenabled {
raceacquire(unsafe.Pointer(t))
}
f(arg, seq)
- lock(&timers.lock)
+ lock(&tb.lock)
}
if delta < 0 || faketime > 0 {
// No timers left - put goroutine to sleep.
- timers.rescheduling = true
- goparkunlock(&timers.lock, "timer goroutine (idle)", traceEvGoBlock, 1)
+ tb.rescheduling = true
+ goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1)
continue
}
// At least one timer pending. Sleep until then.
- timers.sleeping = true
- timers.sleepUntil = now + delta
- noteclear(&timers.waitnote)
- unlock(&timers.lock)
- notetsleepg(&timers.waitnote, delta)
+ tb.sleeping = true
+ tb.sleepUntil = now + delta
+ noteclear(&tb.waitnote)
+ unlock(&tb.lock)
+ notetsleepg(&tb.waitnote, delta)
}
}
@@ -225,28 +269,67 @@ func timejump() *g {
return nil
}
- lock(&timers.lock)
- if !timers.created || len(timers.t) == 0 {
- unlock(&timers.lock)
+ for i := range timers {
+ lock(&timers[i].lock)
+ }
+ gp := timejumpLocked()
+ for i := range timers {
+ unlock(&timers[i].lock)
+ }
+
+ return gp
+}
+
+func timejumpLocked() *g {
+ // Determine a timer bucket with minimum when.
+ var minT *timer
+ for i := range timers {
+ tb := &timers[i]
+ if !tb.created || len(tb.t) == 0 {
+ continue
+ }
+ t := tb.t[0]
+ if minT == nil || t.when < minT.when {
+ minT = t
+ }
+ }
+ if minT == nil || minT.when <= faketime {
+ return nil
+ }
+
+ faketime = minT.when
+ tb := minT.tb
+ if !tb.rescheduling {
return nil
}
+ tb.rescheduling = false
+ return tb.gp
+}
+
+func timeSleepUntil() int64 {
+ next := int64(1<<63 - 1)
- var gp *g
- if faketime < timers.t[0].when {
- faketime = timers.t[0].when
- if timers.rescheduling {
- timers.rescheduling = false
- gp = timers.gp
+ // Determine minimum sleepUntil across all the timer buckets.
+ //
+ // The function can not return a precise answer,
+ // as another timer may pop in as soon as timers have been unlocked.
+ // So lock the timers one by one instead of all at once.
+ for i := range timers {
+ tb := &timers[i]
+
+ lock(&tb.lock)
+ if tb.sleeping && tb.sleepUntil < next {
+ next = tb.sleepUntil
}
+ unlock(&tb.lock)
}
- unlock(&timers.lock)
- return gp
+
+ return next
}
// Heap maintenance algorithms.
-func siftupTimer(i int) {
- t := timers.t
+func siftupTimer(t []*timer, i int) {
when := t[i].when
tmp := t[i]
for i > 0 {
@@ -256,14 +339,15 @@ func siftupTimer(i int) {
}
t[i] = t[p]
t[i].i = i
- t[p] = tmp
- t[p].i = p
i = p
}
+ if tmp != t[i] {
+ t[i] = tmp
+ t[i].i = i
+ }
}
-func siftdownTimer(i int) {
- t := timers.t
+func siftdownTimer(t []*timer, i int) {
n := len(t)
when := t[i].when
tmp := t[i]
@@ -294,10 +378,12 @@ func siftdownTimer(i int) {
}
t[i] = t[c]
t[i].i = i
- t[c] = tmp
- t[c].i = c
i = c
}
+ if tmp != t[i] {
+ t[i] = tmp
+ t[i].i = i
+ }
}
// Entry points for net, time to call nanotime.
@@ -312,4 +398,10 @@ func time_runtimeNano() int64 {
return nanotime()
}
-var startNano int64 = nanotime()
+// Monotonic times are reported as offsets from startNano.
+// We initialize startNano to nanotime() - 1 so that on systems where
+// monotonic time resolution is fairly low (e.g. Windows 2008
+// which appears to have a default resolution of 15ms),
+// we avoid ever reporting a nanotime of 0.
+// (Callers may want to use 0 as "time not set".)
+var startNano int64 = nanotime() - 1
diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go
index af9313be37a..8427e76c5a3 100644
--- a/libgo/go/runtime/trace.go
+++ b/libgo/go/runtime/trace.go
@@ -28,8 +28,8 @@ const (
traceEvProcStop = 6 // stop of P [timestamp]
traceEvGCStart = 7 // GC start [timestamp, seq, stack id]
traceEvGCDone = 8 // GC done [timestamp]
- traceEvGCScanStart = 9 // GC mark termination start [timestamp]
- traceEvGCScanDone = 10 // GC mark termination done [timestamp]
+ traceEvGCSTWStart = 9 // GC STW start [timestamp, kind]
+ traceEvGCSTWDone = 10 // GC STW done [timestamp]
traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id]
traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed]
traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
@@ -235,21 +235,21 @@ func StartTrace() error {
trace.timeStart = nanotime()
trace.headerWritten = false
trace.footerWritten = false
- trace.strings = make(map[string]uint64)
+
+ // string to id mapping
+ // 0 : reserved for an empty string
+ // remaining: other strings registered by traceString
trace.stringSeq = 0
+ trace.strings = make(map[string]uint64)
+
trace.seqGC = 0
_g_.m.startingtrace = false
trace.enabled = true
// Register runtime goroutine labels.
_, pid, bufp := traceAcquireBuffer()
- buf := (*bufp).ptr()
- if buf == nil {
- buf = traceFlush(0).ptr()
- (*bufp).set(buf)
- }
for i, label := range gcMarkWorkerModeStrings[:] {
- trace.markWorkerLabels[i], buf = traceString(buf, label)
+ trace.markWorkerLabels[i], bufp = traceString(bufp, pid, label)
}
traceReleaseBuffer(pid)
@@ -277,10 +277,9 @@ func StopTrace() {
traceGoSched()
- for _, p := range &allp {
- if p == nil {
- break
- }
+ // Loop over all allocated Ps because dead Ps may still have
+ // trace buffers.
+ for _, p := range allp[:cap(allp)] {
buf := p.tracebuf
if buf != 0 {
traceFullQueue(buf)
@@ -320,10 +319,7 @@ func StopTrace() {
// The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
lock(&trace.lock)
- for _, p := range &allp {
- if p == nil {
- break
- }
+ for _, p := range allp[:cap(allp)] {
if p.tracebuf != 0 {
throw("trace: non-empty trace buffer in proc")
}
@@ -382,7 +378,7 @@ func ReadTrace() []byte {
trace.headerWritten = true
trace.lockOwner = nil
unlock(&trace.lock)
- return []byte("go 1.9 trace\x00\x00\x00\x00")
+ return []byte("go 1.10 trace\x00\x00\x00")
}
// Wait for new data.
if trace.fullHead == 0 && !trace.shutdown {
@@ -408,9 +404,12 @@ func ReadTrace() []byte {
var data []byte
data = append(data, traceEvFrequency|0<<traceArgCountShift)
data = traceAppend(data, uint64(freq))
- if timers.gp != nil {
- data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
- data = traceAppend(data, uint64(timers.gp.goid))
+ for i := range timers {
+ tb := &timers[i]
+ if tb.gp != nil {
+ data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
+ data = traceAppend(data, uint64(tb.gp.goid))
+ }
}
// This will emit a bunch of full buffers, we will pick them up
// on the next iteration.
@@ -514,18 +513,12 @@ func traceEvent(ev byte, skip int, args ...uint64) {
buf := (*bufp).ptr()
const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params
if buf == nil || len(buf.arr)-buf.pos < maxSize {
- buf = traceFlush(traceBufPtrOf(buf)).ptr()
+ buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
(*bufp).set(buf)
}
ticks := uint64(cputicks()) / traceTickDiv
tickDiff := ticks - buf.lastTicks
- if buf.pos == 0 {
- buf.byte(traceEvBatch | 1<<traceArgCountShift)
- buf.varint(uint64(pid))
- buf.varint(ticks)
- tickDiff = 0
- }
buf.lastTicks = ticks
narg := byte(len(args))
if skip >= 0 {
@@ -602,7 +595,7 @@ func traceReleaseBuffer(pid int32) {
}
// traceFlush puts buf onto stack of full buffers and returns an empty buffer.
-func traceFlush(buf traceBufPtr) traceBufPtr {
+func traceFlush(buf traceBufPtr, pid int32) traceBufPtr {
owner := trace.lockOwner
dolock := owner == nil || owner != getg().m.curg
if dolock {
@@ -623,34 +616,51 @@ func traceFlush(buf traceBufPtr) traceBufPtr {
bufp := buf.ptr()
bufp.link.set(nil)
bufp.pos = 0
- bufp.lastTicks = 0
+
+ // initialize the buffer for a new batch
+ ticks := uint64(cputicks()) / traceTickDiv
+ bufp.lastTicks = ticks
+ bufp.byte(traceEvBatch | 1<<traceArgCountShift)
+ bufp.varint(uint64(pid))
+ bufp.varint(ticks)
+
if dolock {
unlock(&trace.lock)
}
return buf
}
-func traceString(buf *traceBuf, s string) (uint64, *traceBuf) {
+// traceString adds a string to the trace.strings and returns the id.
+func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) {
if s == "" {
- return 0, buf
+ return 0, bufp
}
if id, ok := trace.strings[s]; ok {
- return id, buf
+ return id, bufp
}
trace.stringSeq++
id := trace.stringSeq
trace.strings[s] = id
+ // memory allocation in above may trigger tracing and
+ // cause *bufp changes. Following code now works with *bufp,
+ // so there must be no memory allocation or any activities
+ // that causes tracing after this point.
+
+ buf := (*bufp).ptr()
size := 1 + 2*traceBytesPerNumber + len(s)
- if len(buf.arr)-buf.pos < size {
- buf = traceFlush(traceBufPtrOf(buf)).ptr()
+ if buf == nil || len(buf.arr)-buf.pos < size {
+ buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
+ (*bufp).set(buf)
}
buf.byte(traceEvString)
buf.varint(id)
buf.varint(uint64(len(s)))
buf.pos += copy(buf.arr[buf.pos:], s)
- return id, buf
+
+ (*bufp).set(buf)
+ return id, bufp
}
// traceAppend appends v to buf in little-endian-base-128 encoding.
@@ -772,7 +782,7 @@ func (tab *traceStackTable) newStack(n int) *traceStack {
// releases all memory and resets state.
func (tab *traceStackTable) dump() {
var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte
- buf := traceFlush(0).ptr()
+ bufp := traceFlush(0, 0)
for _, stk := range tab.tab {
stk := stk.ptr()
for ; stk != nil; stk = stk.link.ptr() {
@@ -782,7 +792,7 @@ func (tab *traceStackTable) dump() {
tmpbuf = traceAppend(tmpbuf, uint64(len(frames)))
for _, f := range frames {
var frame traceFrame
- frame, buf = traceFrameForPC(buf, f)
+ frame, bufp = traceFrameForPC(bufp, 0, f)
tmpbuf = traceAppend(tmpbuf, uint64(f.pc))
tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID))
tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID))
@@ -790,9 +800,10 @@ func (tab *traceStackTable) dump() {
}
// Now copy to the buffer.
size := 1 + traceBytesPerNumber + len(tmpbuf)
- if len(buf.arr)-buf.pos < size {
- buf = traceFlush(traceBufPtrOf(buf)).ptr()
+ if buf := bufp.ptr(); len(buf.arr)-buf.pos < size {
+ bufp = traceFlush(bufp, 0)
}
+ buf := bufp.ptr()
buf.byte(traceEvStack | 3<<traceArgCountShift)
buf.varint(uint64(len(tmpbuf)))
buf.pos += copy(buf.arr[buf.pos:], tmpbuf)
@@ -800,7 +811,7 @@ func (tab *traceStackTable) dump() {
}
lock(&trace.lock)
- traceFullQueue(traceBufPtrOf(buf))
+ traceFullQueue(bufp)
unlock(&trace.lock)
tab.mem.drop()
@@ -813,7 +824,10 @@ type traceFrame struct {
line uint64
}
-func traceFrameForPC(buf *traceBuf, f location) (traceFrame, *traceBuf) {
+// traceFrameForPC records the frame information.
+// It may allocate memory.
+func traceFrameForPC(buf traceBufPtr, pid int32, f location) (traceFrame, traceBufPtr) {
+ bufp := &buf
var frame traceFrame
fn := f.function
@@ -821,14 +835,14 @@ func traceFrameForPC(buf *traceBuf, f location) (traceFrame, *traceBuf) {
if len(fn) > maxLen {
fn = fn[len(fn)-maxLen:]
}
- frame.funcID, buf = traceString(buf, fn)
+ frame.funcID, bufp = traceString(bufp, pid, fn)
frame.line = uint64(f.lineno)
file := f.filename
if len(file) > maxLen {
file = file[len(file)-maxLen:]
}
- frame.fileID, buf = traceString(buf, file)
- return frame, buf
+ frame.fileID, bufp = traceString(bufp, pid, file)
+ return frame, (*bufp)
}
// traceAlloc is a non-thread-safe region allocator.
@@ -917,12 +931,12 @@ func traceGCDone() {
traceEvent(traceEvGCDone, -1)
}
-func traceGCScanStart() {
- traceEvent(traceEvGCScanStart, -1)
+func traceGCSTWStart(kind int) {
+ traceEvent(traceEvGCSTWStart, -1, uint64(kind))
}
-func traceGCScanDone() {
- traceEvent(traceEvGCScanDone, -1)
+func traceGCSTWDone() {
+ traceEvent(traceEvGCSTWDone, -1)
}
// traceGCSweepStart prepares to trace a sweep loop. This does not
diff --git a/libgo/go/runtime/trace/example_test.go b/libgo/go/runtime/trace/example_test.go
new file mode 100644
index 00000000000..8e0ee5a1a3f
--- /dev/null
+++ b/libgo/go/runtime/trace/example_test.go
@@ -0,0 +1,41 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package trace_test
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "runtime/trace"
+)
+
+// Example demonstrates the use of the trace package to trace
+// the execution of a Go program. The trace output will be
+// written to the file trace.out
+func Example() {
+ f, err := os.Create("trace.out")
+ if err != nil {
+ log.Fatalf("failed to create trace output file: %v", err)
+ }
+ defer func() {
+ if err := f.Close(); err != nil {
+ log.Fatalf("failed to close trace file: %v", err)
+ }
+ }()
+
+ if err := trace.Start(f); err != nil {
+ log.Fatalf("failed to start trace: %v", err)
+ }
+ defer trace.Stop()
+
+ // your program here
+ RunMyProgram()
+}
+
+func RunMyProgram() {
+ fmt.Printf("this function will be traced")
+}
diff --git a/libgo/go/runtime/trace/trace.go b/libgo/go/runtime/trace/trace.go
index 7cbb8a6e82c..439f998c03a 100644
--- a/libgo/go/runtime/trace/trace.go
+++ b/libgo/go/runtime/trace/trace.go
@@ -2,13 +2,36 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Go execution tracer.
-// The tracer captures a wide range of execution events like goroutine
-// creation/blocking/unblocking, syscall enter/exit/block, GC-related events,
-// changes of heap size, processor start/stop, etc and writes them to an io.Writer
-// in a compact form. A precise nanosecond-precision timestamp and a stack
-// trace is captured for most events. A trace can be analyzed later with
-// 'go tool trace' command.
+// Package trace contains facilities for programs to generate trace
+// for Go execution tracer.
+//
+// The execution trace captures a wide range of execution events such as
+// goroutine creation/blocking/unblocking, syscall enter/exit/block,
+// GC-related events, changes of heap size, processor start/stop, etc.
+// A precise nanosecond-precision timestamp and a stack trace is
+// captured for most events. The generated trace can be interpreted
+// using `go tool trace`.
+//
+// Tracing a Go program
+//
+// Support for tracing tests and benchmarks built with the standard
+// testing package is built into `go test`. For example, the following
+// command runs the test in the current directory and writes the trace
+// file (trace.out).
+//
+// go test -trace=test.out
+//
+// This runtime/trace package provides APIs to add equivalent tracing
+// support to a standalone program. See the Example that demonstrates
+// how to use this API to enable tracing.
+//
+// There is also a standard HTTP interface to profiling data. Adding the
+// following line will install handlers under the /debug/pprof/trace URL
+// to download live profiles:
+//
+// import _ "net/http/pprof"
+//
+// See the net/http/pprof package for more details.
package trace
import (
diff --git a/libgo/go/runtime/trace/trace_test.go b/libgo/go/runtime/trace/trace_test.go
index c5f64fcf4cf..5fa5b82f8e2 100644
--- a/libgo/go/runtime/trace/trace_test.go
+++ b/libgo/go/runtime/trace/trace_test.go
@@ -7,6 +7,7 @@ package trace_test
import (
"bytes"
"flag"
+ "internal/race"
"internal/trace"
"io"
"io/ioutil"
@@ -14,6 +15,7 @@ import (
"os"
"runtime"
. "runtime/trace"
+ "strconv"
"sync"
"testing"
"time"
@@ -23,6 +25,61 @@ var (
saveTraces = flag.Bool("savetraces", false, "save traces collected by tests")
)
+// TestEventBatch tests Flush calls that happen during Start
+// don't produce corrupted traces.
+func TestEventBatch(t *testing.T) {
+ if race.Enabled {
+ t.Skip("skipping in race mode")
+ }
+ if testing.Short() {
+ t.Skip("skipping in short mode")
+ }
+ // During Start, bunch of records are written to reflect the current
+ // snapshot of the program, including state of each goroutines.
+ // And some string constants are written to the trace to aid trace
+ // parsing. This test checks Flush of the buffer occurred during
+ // this process doesn't cause corrupted traces.
+ // When a Flush is called during Start is complicated
+ // so we test with a range of number of goroutines hoping that one
+ // of them triggers Flush.
+ // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate
+ // and traceEvGoWaiting events (12~13bytes per goroutine).
+ for g := 4950; g < 5050; g++ {
+ n := g
+ t.Run("G="+strconv.Itoa(n), func(t *testing.T) {
+ var wg sync.WaitGroup
+ wg.Add(n)
+
+ in := make(chan bool, 1000)
+ for i := 0; i < n; i++ {
+ go func() {
+ <-in
+ wg.Done()
+ }()
+ }
+ buf := new(bytes.Buffer)
+ if err := Start(buf); err != nil {
+ t.Fatalf("failed to start tracing: %v", err)
+ }
+
+ for i := 0; i < n; i++ {
+ in <- true
+ }
+ wg.Wait()
+ Stop()
+
+ _, err := trace.Parse(buf, "")
+ if err == trace.ErrTimeOrder {
+ t.Skipf("skipping trace: %v", err)
+ }
+
+ if err != nil {
+ t.Fatalf("failed to parse trace: %v", err)
+ }
+ })
+ }
+}
+
func TestTraceStartStop(t *testing.T) {
buf := new(bytes.Buffer)
if err := Start(buf); err != nil {
diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go
index 37c569887b0..79f78d8d247 100644
--- a/libgo/go/runtime/traceback_gccgo.go
+++ b/libgo/go/runtime/traceback_gccgo.go
@@ -52,7 +52,7 @@ func c_callers(skip int32, locbuf *location, max int32, keepThunks bool) int32
// callers returns a stack trace of the current goroutine.
// The gc version of callers takes []uintptr, but we take []location.
func callers(skip int, locbuf []location) int {
- n := c_callers(int32(skip), &locbuf[0], int32(len(locbuf)), false)
+ n := c_callers(int32(skip)+1, &locbuf[0], int32(len(locbuf)), false)
return int(n)
}
@@ -156,7 +156,7 @@ func goroutineheader(gp *g) {
if waitfor >= 1 {
print(", ", waitfor, " minutes")
}
- if gp.lockedm != nil {
+ if gp.lockedm != 0 {
print(", locked to thread")
}
print("]:\n")