diff options
author | Ian Lance Taylor <iant@golang.org> | 2018-01-09 01:23:08 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2018-01-09 01:23:08 +0000 |
commit | 1a2f01efa63036a5104f203a4789e682c0e0915d (patch) | |
tree | 373e15778dc8295354584e1f86915ae493b604ff /libgo/go/runtime | |
parent | 8799df67f2dab88f9fda11739c501780a85575e2 (diff) |
libgo: update to Go1.10beta1
Update the Go library to the 1.10beta1 release.
Requires a few changes to the compiler for modifications to the map
runtime code, and to handle some nowritebarrier cases in the runtime.
Reviewed-on: https://go-review.googlesource.com/86455
gotools/:
* Makefile.am (go_cmd_vet_files): New variable.
(go_cmd_buildid_files, go_cmd_test2json_files): New variables.
(s-zdefaultcc): Change from constants to functions.
(noinst_PROGRAMS): Add vet, buildid, and test2json.
(cgo$(EXEEXT)): Link against $(LIBGOTOOL).
(vet$(EXEEXT)): New target.
(buildid$(EXEEXT)): New target.
(test2json$(EXEEXT)): New target.
(install-exec-local): Install all $(noinst_PROGRAMS).
(uninstall-local): Uninstasll all $(noinst_PROGRAMS).
(check-go-tool): Depend on $(noinst_PROGRAMS). Copy down
objabi.go.
(check-runtime): Depend on $(noinst_PROGRAMS).
(check-cgo-test, check-carchive-test): Likewise.
(check-vet): New target.
(check): Depend on check-vet. Look at cmd_vet-testlog.
(.PHONY): Add check-vet.
* Makefile.in: Rebuild.
From-SVN: r256365
Diffstat (limited to 'libgo/go/runtime')
94 files changed, 4855 insertions, 1973 deletions
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go index 174320fe85a..7c98f1bc940 100644 --- a/libgo/go/runtime/alg.go +++ b/libgo/go/runtime/alg.go @@ -57,18 +57,15 @@ const ( func memhash0(p unsafe.Pointer, h uintptr) uintptr { return h } + func memhash8(p unsafe.Pointer, h uintptr) uintptr { return memhash(p, h, 1) } + func memhash16(p unsafe.Pointer, h uintptr) uintptr { return memhash(p, h, 2) } -func memhash32(p unsafe.Pointer, h uintptr) uintptr { - return memhash(p, h, 4) -} -func memhash64(p unsafe.Pointer, h uintptr) uintptr { - return memhash(p, h, 8) -} + func memhash128(p unsafe.Pointer, h uintptr) uintptr { return memhash(p, h, 16) } diff --git a/libgo/go/runtime/append_test.go b/libgo/go/runtime/append_test.go index 6bd8f3bd951..ef1e812c0dc 100644 --- a/libgo/go/runtime/append_test.go +++ b/libgo/go/runtime/append_test.go @@ -18,42 +18,52 @@ func BenchmarkMakeSlice(b *testing.B) { } } -func BenchmarkGrowSliceBytes(b *testing.B) { - b.StopTimer() - var x = make([]byte, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]byte(nil), x...) - } -} - -func BenchmarkGrowSliceInts(b *testing.B) { - b.StopTimer() - var x = make([]int, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]int(nil), x...) - } -} - -func BenchmarkGrowSlicePtr(b *testing.B) { - b.StopTimer() - var x = make([]*byte, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]*byte(nil), x...) - } -} +type ( + struct24 struct{ a, b, c int64 } + struct32 struct{ a, b, c, d int64 } + struct40 struct{ a, b, c, d, e int64 } +) -type struct24 struct{ a, b, c int64 } +func BenchmarkGrowSlice(b *testing.B) { + b.Run("Byte", func(b *testing.B) { + x := make([]byte, 9) + for i := 0; i < b.N; i++ { + _ = append([]byte(nil), x...) + } + }) + b.Run("Int", func(b *testing.B) { + x := make([]int, 9) + for i := 0; i < b.N; i++ { + _ = append([]int(nil), x...) + } + }) + b.Run("Ptr", func(b *testing.B) { + x := make([]*byte, 9) + for i := 0; i < b.N; i++ { + _ = append([]*byte(nil), x...) + } + }) + b.Run("Struct", func(b *testing.B) { + b.Run("24", func(b *testing.B) { + x := make([]struct24, 9) + for i := 0; i < b.N; i++ { + _ = append([]struct24(nil), x...) + } + }) + b.Run("32", func(b *testing.B) { + x := make([]struct32, 9) + for i := 0; i < b.N; i++ { + _ = append([]struct32(nil), x...) + } + }) + b.Run("40", func(b *testing.B) { + x := make([]struct40, 9) + for i := 0; i < b.N; i++ { + _ = append([]struct40(nil), x...) + } + }) -func BenchmarkGrowSliceStruct24Bytes(b *testing.B) { - b.StopTimer() - var x = make([]struct24, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]struct24(nil), x...) - } + }) } func BenchmarkAppend(b *testing.B) { diff --git a/libgo/go/runtime/cgo_gccgo.go b/libgo/go/runtime/cgo_gccgo.go index c3bf9552ea8..05be4964500 100644 --- a/libgo/go/runtime/cgo_gccgo.go +++ b/libgo/go/runtime/cgo_gccgo.go @@ -27,6 +27,13 @@ var iscgo bool // The extra M must be created before any C/C++ code calls cgocallback. var cgoHasExtraM bool +// cgoAlwaysFalse is a boolean value that is always false. +// The cgo-generated code says if cgoAlwaysFalse { cgoUse(p) }. +// The compiler cannot see that cgoAlwaysFalse is always false, +// so it emits the test and keeps the call, giving the desired +// escape analysis result. The test is cheaper than the call. +var cgoAlwaysFalse bool + // Cgocall prepares to call from code written in Go to code written in // C/C++. This takes the current goroutine out of the Go scheduler, as // though it were making a system call. Otherwise the program can @@ -37,12 +44,11 @@ var cgoHasExtraM bool // defer syscall.Cgocalldone() // cfunction() func Cgocall() { - lockOSThread() mp := getg().m mp.ncgocall++ mp.ncgo++ - mp.incgo = true entersyscall(0) + mp.incgo = true } // CgocallDone prepares to return to Go code from C/C++ code. @@ -59,8 +65,6 @@ func CgocallDone() { if readgstatus(gp)&^_Gscan == _Gsyscall { exitsyscall(0) } - - unlockOSThread() } // CgocallBack is used when calling from C/C++ code into Go code. @@ -78,6 +82,8 @@ func CgocallBack() { mp.dropextram = true } + lockOSThread() + exitsyscall(0) gp.m.incgo = false @@ -100,6 +106,8 @@ func CgocallBack() { // CgocallBackDone prepares to return to C/C++ code that has called // into Go code. func CgocallBackDone() { + unlockOSThread() + // If we are the top level Go function called from C/C++, then // we need to release the m. But don't release it if we are // panicing; since this is the top level, we are going to diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go index 4a416fbf6ad..9d161202dfa 100644 --- a/libgo/go/runtime/cgocall.go +++ b/libgo/go/runtime/cgocall.go @@ -234,10 +234,8 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { // No more possible pointers. break } - if hbits.isPointer() { - if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) { - panic(errorString(msg)) - } + if hbits.isPointer() && cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) { + panic(errorString(msg)) } hbits = hbits.next() } diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go index 30f054b3633..b85b519460e 100644 --- a/libgo/go/runtime/cgocheck.go +++ b/libgo/go/runtime/cgocheck.go @@ -16,6 +16,10 @@ const cgoWriteBarrierFail = "Go pointer stored into non-Go memory" // cgoCheckWriteBarrier is called whenever a pointer is stored into memory. // It throws if the program is storing a Go pointer into non-Go memory. +// +// This is called from the write barrier, so its entire call tree must +// be nosplit. +// //go:nosplit //go:nowritebarrier func cgoCheckWriteBarrier(dst *uintptr, src uintptr) { diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go index 7bb919c41db..8db728d5430 100644 --- a/libgo/go/runtime/chan.go +++ b/libgo/go/runtime/chan.go @@ -64,11 +64,19 @@ type waitq struct { } //go:linkname reflect_makechan reflect.makechan -func reflect_makechan(t *chantype, size int64) *hchan { +func reflect_makechan(t *chantype, size int) *hchan { return makechan(t, size) } -func makechan(t *chantype, size int64) *hchan { +func makechan64(t *chantype, size int64) *hchan { + if int64(int(size)) != size { + panic(plainError("makechan: size out of range")) + } + + return makechan(t, int(size)) +} + +func makechan(t *chantype, size int) *hchan { elem := t.elem // compiler checks this but be safe. @@ -78,29 +86,33 @@ func makechan(t *chantype, size int64) *hchan { if hchanSize%maxAlign != 0 || elem.align > maxAlign { throw("makechan: bad alignment") } - if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/elem.size) { + + if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > _MaxMem-hchanSize { panic(plainError("makechan: size out of range")) } + // Hchan does not contain pointers interesting for GC when elements stored in buf do not contain pointers. + // buf points into the same allocation, elemtype is persistent. + // SudoG's are referenced from their owning thread so they can't be collected. + // TODO(dvyukov,rlh): Rethink when collector can move allocated objects. var c *hchan - if elem.kind&kindNoPointers != 0 || size == 0 { - // Allocate memory in one call. - // Hchan does not contain pointers interesting for GC in this case: - // buf points into the same allocation, elemtype is persistent. - // SudoG's are referenced from their owning thread so they can't be collected. - // TODO(dvyukov,rlh): Rethink when collector can move allocated objects. + switch { + case size == 0 || elem.size == 0: + // Queue or element size is zero. + c = (*hchan)(mallocgc(hchanSize, nil, true)) + // Race detector uses this location for synchronization. + c.buf = unsafe.Pointer(c) + case elem.kind&kindNoPointers != 0: + // Elements do not contain pointers. + // Allocate hchan and buf in one call. c = (*hchan)(mallocgc(hchanSize+uintptr(size)*elem.size, nil, true)) - if size > 0 && elem.size != 0 { - c.buf = add(unsafe.Pointer(c), hchanSize) - } else { - // race detector uses this location for synchronization - // Also prevents us from pointing beyond the allocation (see issue 9401). - c.buf = unsafe.Pointer(c) - } - } else { + c.buf = add(unsafe.Pointer(c), hchanSize) + default: + // Elements contain pointers. c = new(hchan) - c.buf = newarray(elem, int(size)) + c.buf = mallocgc(uintptr(size)*elem.size, elem, true) } + c.elemsize = uint16(elem.size) c.elemtype = elem c.dataqsiz = uint(size) @@ -119,7 +131,7 @@ func chanbuf(c *hchan, i uint) unsafe.Pointer { // entry point for c <- x from compiled code //go:nosplit func chansend1(c *hchan, elem unsafe.Pointer) { - chansend(c, elem, true, getcallerpc(unsafe.Pointer(&c))) + chansend(c, elem, true, getcallerpc()) } /* @@ -223,7 +235,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { mysg.elem = ep mysg.waitlink = nil mysg.g = gp - mysg.selectdone = nil + mysg.isSelect = false mysg.c = c gp.waiting = mysg gp.param = nil @@ -331,7 +343,7 @@ func closechan(c *hchan) { } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&c)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(c), callerpc, funcPC(closechan)) racerelease(unsafe.Pointer(c)) } @@ -508,7 +520,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) mysg.waitlink = nil gp.waiting = mysg mysg.g = gp - mysg.selectdone = nil + mysg.isSelect = false mysg.c = c gp.param = nil c.recvq.enqueue(mysg) @@ -603,7 +615,7 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { // } // func selectnbsend(c *hchan, elem unsafe.Pointer) (selected bool) { - return chansend(c, elem, false, getcallerpc(unsafe.Pointer(&c))) + return chansend(c, elem, false, getcallerpc()) } // compiler implements @@ -653,7 +665,7 @@ func selectnbrecv2(elem unsafe.Pointer, received *bool, c *hchan) (selected bool //go:linkname reflect_chansend reflect.chansend func reflect_chansend(c *hchan, elem unsafe.Pointer, nb bool) (selected bool) { - return chansend(c, elem, !nb, getcallerpc(unsafe.Pointer(&c))) + return chansend(c, elem, !nb, getcallerpc()) } //go:linkname reflect_chanrecv reflect.chanrecv @@ -712,10 +724,16 @@ func (q *waitq) dequeue() *sudog { sgp.next = nil // mark as removed (see dequeueSudog) } - // if sgp participates in a select and is already signaled, ignore it - if sgp.selectdone != nil { - // claim the right to signal - if *sgp.selectdone != 0 || !atomic.Cas(sgp.selectdone, 0, 1) { + // if a goroutine was put on this queue because of a + // select, there is a small window between the goroutine + // being woken up by a different case and it grabbing the + // channel locks. Once it has the lock + // it removes itself from the queue, so we won't see it after that. + // We use a flag in the G struct to tell us when someone + // else has won the race to signal this goroutine but the goroutine + // hasn't removed itself from the queue yet. + if sgp.isSelect { + if !atomic.Cas(&sgp.g.selectDone, 0, 1) { continue } } diff --git a/libgo/go/runtime/chan_test.go b/libgo/go/runtime/chan_test.go index b96af8af5d7..29fb321c926 100644 --- a/libgo/go/runtime/chan_test.go +++ b/libgo/go/runtime/chan_test.go @@ -5,6 +5,8 @@ package runtime_test import ( + "internal/testenv" + "math" "runtime" "sync" "sync/atomic" @@ -435,6 +437,65 @@ func TestSelectStress(t *testing.T) { wg.Wait() } +func TestSelectFairness(t *testing.T) { + const trials = 10000 + if runtime.GOOS == "linux" && runtime.GOARCH == "ppc64le" { + testenv.SkipFlaky(t, 22047) + } + c1 := make(chan byte, trials+1) + c2 := make(chan byte, trials+1) + for i := 0; i < trials+1; i++ { + c1 <- 1 + c2 <- 2 + } + c3 := make(chan byte) + c4 := make(chan byte) + out := make(chan byte) + done := make(chan byte) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for { + var b byte + select { + case b = <-c3: + case b = <-c4: + case b = <-c1: + case b = <-c2: + } + select { + case out <- b: + case <-done: + return + } + } + }() + cnt1, cnt2 := 0, 0 + for i := 0; i < trials; i++ { + switch b := <-out; b { + case 1: + cnt1++ + case 2: + cnt2++ + default: + t.Fatalf("unexpected value %d on channel", b) + } + } + // If the select in the goroutine is fair, + // cnt1 and cnt2 should be about the same value. + // With 10,000 trials, the expected margin of error at + // a confidence level of five nines is 4.4172 / (2 * Sqrt(10000)). + r := float64(cnt1) / trials + e := math.Abs(r - 0.5) + t.Log(cnt1, cnt2, r, e) + if e > 4.4172/(2*math.Sqrt(trials)) { + t.Errorf("unfair select: in %d trials, results were %d, %d", trials, cnt1, cnt2) + } + close(done) + wg.Wait() +} + func TestChanSendInterface(t *testing.T) { type mt struct{} m := &mt{} @@ -674,6 +735,55 @@ done: <-ready2 } +type struct0 struct{} + +func BenchmarkMakeChan(b *testing.B) { + b.Run("Byte", func(b *testing.B) { + var x chan byte + for i := 0; i < b.N; i++ { + x = make(chan byte, 8) + } + close(x) + }) + b.Run("Int", func(b *testing.B) { + var x chan int + for i := 0; i < b.N; i++ { + x = make(chan int, 8) + } + close(x) + }) + b.Run("Ptr", func(b *testing.B) { + var x chan *byte + for i := 0; i < b.N; i++ { + x = make(chan *byte, 8) + } + close(x) + }) + b.Run("Struct", func(b *testing.B) { + b.Run("0", func(b *testing.B) { + var x chan struct0 + for i := 0; i < b.N; i++ { + x = make(chan struct0, 8) + } + close(x) + }) + b.Run("32", func(b *testing.B) { + var x chan struct32 + for i := 0; i < b.N; i++ { + x = make(chan struct32, 8) + } + close(x) + }) + b.Run("40", func(b *testing.B) { + var x chan struct40 + for i := 0; i < b.N; i++ { + x = make(chan struct40, 8) + } + close(x) + }) + }) +} + func BenchmarkChanNonblocking(b *testing.B) { myc := make(chan int) b.RunParallel(func(pb *testing.PB) { diff --git a/libgo/go/runtime/cpuprof.go b/libgo/go/runtime/cpuprof.go index b031b1a5e75..91cdf2b5594 100644 --- a/libgo/go/runtime/cpuprof.go +++ b/libgo/go/runtime/cpuprof.go @@ -160,6 +160,7 @@ func (p *cpuProfile) addExtra() { funcPC(_ExternalCode) + sys.PCQuantum, } cpuprof.log.write(nil, 0, hdr[:], lostStk[:]) + p.lostExtra = 0 } } diff --git a/libgo/go/runtime/cputicks.go b/libgo/go/runtime/cputicks.go index ee15aca24ef..7e62dc1e108 100644 --- a/libgo/go/runtime/cputicks.go +++ b/libgo/go/runtime/cputicks.go @@ -4,6 +4,6 @@ package runtime -// careful: cputicks is not guaranteed to be monotonic! In particular, we have +// careful: cputicks is not guaranteed to be monotonic! In particular, we have // noticed drift between cpus on certain os/arch combinations. See issue 8976. func cputicks() int64 diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go index b79873185cc..7e14e573bc5 100644 --- a/libgo/go/runtime/crash_cgo_test.go +++ b/libgo/go/runtime/crash_cgo_test.go @@ -13,6 +13,7 @@ import ( "os" "os/exec" "runtime" + "strconv" "strings" "testing" "time" @@ -113,7 +114,7 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() if err != nil { t.Fatalf("exit status: %v\n%s", err, got) } @@ -136,7 +137,7 @@ func TestCgoExternalThreadSignal(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() if err != nil { t.Fatalf("exit status: %v\n%s", err, got) } @@ -203,14 +204,14 @@ func TestCgoCheckBytes(t *testing.T) { const tries = 10 var tot1, tot2 time.Duration for i := 0; i < tries; i++ { - cmd := testEnv(exec.Command(exe, "CgoCheckBytes")) + cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes")) cmd.Env = append(cmd.Env, "GODEBUG=cgocheck=0", fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i)) start := time.Now() cmd.Run() d1 := time.Since(start) - cmd = testEnv(exec.Command(exe, "CgoCheckBytes")) + cmd = testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes")) cmd.Env = append(cmd.Env, fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i)) start = time.Now() @@ -251,7 +252,7 @@ func TestCgoCCodeSIGPROF(t *testing.T) { func TestCgoCrashTraceback(t *testing.T) { t.Parallel() - if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { + if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") { t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) } if runtime.Compiler == "gccgo" { @@ -279,7 +280,7 @@ func TestCgoTracebackContext(t *testing.T) { func testCgoPprof(t *testing.T, buildArg, runArg string) { t.Parallel() - if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { + if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") { t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) } if runtime.Compiler == "gccgo" { @@ -292,7 +293,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, runArg)).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, runArg)).CombinedOutput() if err != nil { if testenv.Builder() == "linux-amd64-alpine" { // See Issue 18243 and Issue 19938. @@ -304,7 +305,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) { defer os.Remove(fn) for try := 0; try < 2; try++ { - cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1")) + cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1")) // Check that pprof works both with and without explicit executable on command line. if try == 0 { cmd.Args = append(cmd.Args, exe, fn) @@ -339,7 +340,7 @@ func TestCgoPprof(t *testing.T) { } func TestCgoPprofPIE(t *testing.T) { - testCgoPprof(t, "-ldflags=-extldflags=-pie", "CgoPprof") + testCgoPprof(t, "-buildmode=pie", "CgoPprof") } func TestCgoPprofThread(t *testing.T) { @@ -371,7 +372,7 @@ func TestRaceProf(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput() if err != nil { t.Fatal(err) } @@ -400,7 +401,7 @@ func TestRaceSignal(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput() if err != nil { t.Logf("%s\n", got) t.Fatal(err) @@ -423,3 +424,72 @@ func TestCgoNumGoroutine(t *testing.T) { t.Errorf("expected %q got %v", want, got) } } + +func TestCatchPanic(t *testing.T) { + t.Parallel() + switch runtime.GOOS { + case "plan9", "windows": + t.Skipf("no signals on %s", runtime.GOOS) + case "darwin": + if runtime.GOARCH == "amd64" { + t.Skipf("crash() on darwin/amd64 doesn't raise SIGABRT") + } + } + + testenv.MustHaveGoRun(t) + + exe, err := buildTestProg(t, "testprogcgo") + if err != nil { + t.Fatal(err) + } + + for _, early := range []bool{true, false} { + cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCatchPanic")) + // Make sure a panic results in a crash. + cmd.Env = append(cmd.Env, "GOTRACEBACK=crash") + if early { + // Tell testprogcgo to install an early signal handler for SIGABRT + cmd.Env = append(cmd.Env, "CGOCATCHPANIC_EARLY_HANDLER=1") + } + if out, err := cmd.CombinedOutput(); err != nil { + t.Errorf("testprogcgo CgoCatchPanic failed: %v\n%s", err, out) + } + } +} + +func TestCgoLockOSThreadExit(t *testing.T) { + switch runtime.GOOS { + case "plan9", "windows": + t.Skipf("no pthreads on %s", runtime.GOOS) + } + t.Parallel() + testLockOSThreadExit(t, "testprogcgo") +} + +func TestWindowsStackMemoryCgo(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("skipping windows specific test") + } + testenv.SkipFlaky(t, 22575) + o := runTestProg(t, "testprogcgo", "StackMemory") + stackUsage, err := strconv.Atoi(o) + if err != nil { + t.Fatalf("Failed to read stack usage: %v", err) + } + if expected, got := 100<<10, stackUsage; got > expected { + t.Fatalf("expected < %d bytes of memory per thread, got %d", expected, got) + } +} + +func TestSigStackSwapping(t *testing.T) { + switch runtime.GOOS { + case "plan9", "windows": + t.Skip("no sigaltstack on %s", runtime.GOOS) + } + t.Parallel() + got := runTestProg(t, "testprogcgo", "SigStack") + want := "OK\n" + if got != want { + t.Errorf("expected %q got %v", want, got) + } +} diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go index 1cde6bf7997..8ec034835ec 100644 --- a/libgo/go/runtime/crash_test.go +++ b/libgo/go/runtime/crash_test.go @@ -32,25 +32,6 @@ func TestMain(m *testing.M) { os.Exit(status) } -func testEnv(cmd *exec.Cmd) *exec.Cmd { - if cmd.Env != nil { - panic("environment already set") - } - for _, env := range os.Environ() { - // Exclude GODEBUG from the environment to prevent its output - // from breaking tests that are trying to parse other command output. - if strings.HasPrefix(env, "GODEBUG=") { - continue - } - // Exclude GOTRACEBACK for the same reason. - if strings.HasPrefix(env, "GOTRACEBACK=") { - continue - } - cmd.Env = append(cmd.Env, env) - } - return cmd -} - var testprog struct { sync.Mutex dir string @@ -62,7 +43,11 @@ type buildexe struct { err error } -func runTestProg(t *testing.T, binary, name string) string { +func runTestProg(t *testing.T, binary, name string, env ...string) string { + if *flagQuick { + t.Skip("-quick") + } + testenv.MustHaveGoBuild(t) exe, err := buildTestProg(t, binary) @@ -70,7 +55,11 @@ func runTestProg(t *testing.T, binary, name string) string { t.Fatal(err) } - cmd := testEnv(exec.Command(exe, name)) + cmd := testenv.CleanCmdEnv(exec.Command(exe, name)) + cmd.Env = append(cmd.Env, env...) + if testing.Short() { + cmd.Env = append(cmd.Env, "RUNTIME_TEST_SHORT=1") + } var b bytes.Buffer cmd.Stdout = &b cmd.Stderr = &b @@ -111,6 +100,10 @@ func runTestProg(t *testing.T, binary, name string) string { } func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) { + if *flagQuick { + t.Skip("-quick") + } + checkStaleRuntime(t) testprog.Lock() @@ -139,7 +132,7 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) exe := filepath.Join(testprog.dir, name+".exe") cmd := exec.Command(testenv.GoToolPath(t), append([]string{"build", "-o", exe}, flags...)...) cmd.Dir = "testdata/" + binary - out, err := testEnv(cmd).CombinedOutput() + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out) testprog.target[name] = target @@ -158,14 +151,14 @@ var ( func checkStaleRuntime(t *testing.T) { staleRuntimeOnce.Do(func() { // 'go run' uses the installed copy of runtime.a, which may be out of date. - out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.Stale}}", "runtime")).CombinedOutput() + out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.Stale}}", "runtime")).CombinedOutput() if err != nil { staleRuntimeErr = fmt.Errorf("failed to execute 'go list': %v\n%v", err, string(out)) return } if string(out) != "false\n" { t.Logf("go list -f {{.Stale}} runtime:\n%s", out) - out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.StaleReason}}", "runtime")).CombinedOutput() + out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.StaleReason}}", "runtime")).CombinedOutput() if err != nil { t.Logf("go list -f {{.StaleReason}} failed: %v", err) } @@ -483,7 +476,7 @@ func TestMemPprof(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "MemProf")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "MemProf")).CombinedOutput() if err != nil { t.Fatal(err) } @@ -491,7 +484,7 @@ func TestMemPprof(t *testing.T) { defer os.Remove(fn) for try := 0; try < 2; try++ { - cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top")) + cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top")) // Check that pprof works both with and without explicit executable on command line. if try == 0 { cmd.Args = append(cmd.Args, exe, fn) @@ -606,7 +599,7 @@ func TestPanicRace(t *testing.T) { const tries = 10 retry: for i := 0; i < tries; i++ { - got, err := testEnv(exec.Command(exe, "PanicRace")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "PanicRace")).CombinedOutput() if err == nil { t.Logf("try %d: program exited successfully, should have failed", i+1) continue diff --git a/libgo/go/runtime/crash_unix_test.go b/libgo/go/runtime/crash_unix_test.go index 09c25471d10..584a6c74232 100644 --- a/libgo/go/runtime/crash_unix_test.go +++ b/libgo/go/runtime/crash_unix_test.go @@ -65,13 +65,13 @@ func TestCrashDumpsAllThreads(t *testing.T) { cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe") cmd.Dir = dir - out, err := testEnv(cmd).CombinedOutput() + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { t.Fatalf("building source: %v\n%s", err, out) } cmd = exec.Command(filepath.Join(dir, "a.exe")) - cmd = testEnv(cmd) + cmd = testenv.CleanCmdEnv(cmd) cmd.Env = append(cmd.Env, "GOTRACEBACK=crash") // Set GOGC=off. Because of golang.org/issue/10958, the tight @@ -132,6 +132,7 @@ import ( "fmt" "os" "runtime" + "time" ) func main() { @@ -149,6 +150,8 @@ func main() { <-c } + time.Sleep(time.Millisecond) + // Tell our parent that all the goroutines are executing. if _, err := os.NewFile(3, "pipe").WriteString("x"); err != nil { fmt.Fprintf(os.Stderr, "write to pipe failed: %v\n", err) @@ -184,7 +187,7 @@ func TestPanicSystemstack(t *testing.T) { t.Parallel() cmd := exec.Command(os.Args[0], "testPanicSystemstackInternal") - cmd = testEnv(cmd) + cmd = testenv.CleanCmdEnv(cmd) cmd.Env = append(cmd.Env, "GOTRACEBACK=crash") pr, pw, err := os.Pipe() if err != nil { @@ -249,7 +252,7 @@ func TestSignalExitStatus(t *testing.T) { if err != nil { t.Fatal(err) } - err = testEnv(exec.Command(exe, "SignalExitStatus")).Run() + err = testenv.CleanCmdEnv(exec.Command(exe, "SignalExitStatus")).Run() if err == nil { t.Error("test program succeeded unexpectedly") } else if ee, ok := err.(*exec.ExitError); !ok { diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go index fdd73463aba..7cddd29ed0f 100644 --- a/libgo/go/runtime/debug.go +++ b/libgo/go/runtime/debug.go @@ -15,9 +15,6 @@ import ( // The number of logical CPUs on the local machine can be queried with NumCPU. // This call will go away when the scheduler improves. func GOMAXPROCS(n int) int { - if n > _MaxGomaxprocs { - n = _MaxGomaxprocs - } lock(&sched.lock) ret := int(gomaxprocs) unlock(&sched.lock) diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go index 6325dcb3948..e385f14c5bc 100644 --- a/libgo/go/runtime/export_test.go +++ b/libgo/go/runtime/export_test.go @@ -149,12 +149,19 @@ func RunSchedLocalQueueEmptyTest(iters int) { } } -var StringHash = stringHash -var BytesHash = bytesHash -var Int32Hash = int32Hash -var Int64Hash = int64Hash -var EfaceHash = efaceHash -var IfaceHash = ifaceHash +var ( + StringHash = stringHash + BytesHash = bytesHash + Int32Hash = int32Hash + Int64Hash = int64Hash + MemHash = memhash + MemHash32 = memhash32 + MemHash64 = memhash64 + EfaceHash = efaceHash + IfaceHash = ifaceHash +) + +var UseAeshash = &useAeshash func MemclrBytes(b []byte) { s := (*slice)(unsafe.Pointer(&b)) @@ -364,3 +371,27 @@ func (rw *RWMutex) Lock() { func (rw *RWMutex) Unlock() { rw.rw.unlock() } + +func MapBucketsCount(m map[int]int) int { + h := *(**hmap)(unsafe.Pointer(&m)) + return 1 << h.B +} + +func MapBucketsPointerIsNil(m map[int]int) bool { + h := *(**hmap)(unsafe.Pointer(&m)) + return h.buckets == nil +} + +func LockOSCounts() (external, internal uint32) { + g := getg() + if g.m.lockedExt+g.m.lockedInt == 0 { + if g.lockedm != 0 { + panic("lockedm on non-locked goroutine") + } + } else { + if g.lockedm == 0 { + panic("nil lockedm on locked goroutine") + } + } + return g.m.lockedExt, g.m.lockedInt +} diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go index 6ca978980f2..36787e38b02 100644 --- a/libgo/go/runtime/extern.go +++ b/libgo/go/runtime/extern.go @@ -184,8 +184,8 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) // program counter adjustment. func Callers(skip int, pc []uintptr) int -// GOROOT returns the root of the Go tree. -// It uses the GOROOT environment variable, if set, +// GOROOT returns the root of the Go tree. It uses the +// GOROOT environment variable, if set at process start, // or else the root used during the Go build. func GOROOT() string { s := gogetenv("GOROOT") diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go index f14e0d5050e..a8c52d206f3 100644 --- a/libgo/go/runtime/gc_test.go +++ b/libgo/go/runtime/gc_test.go @@ -10,12 +10,14 @@ import ( "reflect" "runtime" "runtime/debug" + "sync/atomic" "testing" "time" "unsafe" ) func TestGcSys(t *testing.T) { + t.Skip("does not test anything; https://golang.org/issue/23343") if os.Getenv("GOGC") == "off" { t.Skip("skipping test; GOGC=off in environment") } @@ -171,7 +173,7 @@ func TestPeriodicGC(t *testing.T) { // slack if things are slow. var numGCs uint32 const want = 2 - for i := 0; i < 20 && numGCs < want; i++ { + for i := 0; i < 200 && numGCs < want; i++ { time.Sleep(5 * time.Millisecond) // Test that periodic GC actually happened. @@ -501,3 +503,142 @@ func BenchmarkReadMemStats(b *testing.B) { hugeSink = nil } + +func TestUserForcedGC(t *testing.T) { + // Test that runtime.GC() triggers a GC even if GOGC=off. + defer debug.SetGCPercent(debug.SetGCPercent(-1)) + + var ms1, ms2 runtime.MemStats + runtime.ReadMemStats(&ms1) + runtime.GC() + runtime.ReadMemStats(&ms2) + if ms1.NumGC == ms2.NumGC { + t.Fatalf("runtime.GC() did not trigger GC") + } + if ms1.NumForcedGC == ms2.NumForcedGC { + t.Fatalf("runtime.GC() was not accounted in NumForcedGC") + } +} + +func writeBarrierBenchmark(b *testing.B, f func()) { + runtime.GC() + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + //b.Logf("heap size: %d MB", ms.HeapAlloc>>20) + + // Keep GC running continuously during the benchmark, which in + // turn keeps the write barrier on continuously. + var stop uint32 + done := make(chan bool) + go func() { + for atomic.LoadUint32(&stop) == 0 { + runtime.GC() + } + close(done) + }() + defer func() { + atomic.StoreUint32(&stop, 1) + <-done + }() + + b.ResetTimer() + f() + b.StopTimer() +} + +func BenchmarkWriteBarrier(b *testing.B) { + if runtime.GOMAXPROCS(-1) < 2 { + // We don't want GC to take our time. + b.Skip("need GOMAXPROCS >= 2") + } + + // Construct a large tree both so the GC runs for a while and + // so we have a data structure to manipulate the pointers of. + type node struct { + l, r *node + } + var wbRoots []*node + var mkTree func(level int) *node + mkTree = func(level int) *node { + if level == 0 { + return nil + } + n := &node{mkTree(level - 1), mkTree(level - 1)} + if level == 10 { + // Seed GC with enough early pointers so it + // doesn't accidentally switch to mark 2 when + // it only has the top of the tree. + wbRoots = append(wbRoots, n) + } + return n + } + const depth = 22 // 64 MB + root := mkTree(22) + + writeBarrierBenchmark(b, func() { + var stack [depth]*node + tos := -1 + + // There are two write barriers per iteration, so i+=2. + for i := 0; i < b.N; i += 2 { + if tos == -1 { + stack[0] = root + tos = 0 + } + + // Perform one step of reversing the tree. + n := stack[tos] + if n.l == nil { + tos-- + } else { + n.l, n.r = n.r, n.l + stack[tos] = n.l + stack[tos+1] = n.r + tos++ + } + + if i%(1<<12) == 0 { + // Avoid non-preemptible loops (see issue #10958). + runtime.Gosched() + } + } + }) + + runtime.KeepAlive(wbRoots) +} + +func BenchmarkBulkWriteBarrier(b *testing.B) { + if runtime.GOMAXPROCS(-1) < 2 { + // We don't want GC to take our time. + b.Skip("need GOMAXPROCS >= 2") + } + + // Construct a large set of objects we can copy around. + const heapSize = 64 << 20 + type obj [16]*byte + ptrs := make([]*obj, heapSize/unsafe.Sizeof(obj{})) + for i := range ptrs { + ptrs[i] = new(obj) + } + + writeBarrierBenchmark(b, func() { + const blockSize = 1024 + var pos int + for i := 0; i < b.N; i += blockSize { + // Rotate block. + block := ptrs[pos : pos+blockSize] + first := block[0] + copy(block, block[1:]) + block[blockSize-1] = first + + pos += blockSize + if pos+blockSize > len(ptrs) { + pos = 0 + } + + runtime.Gosched() + } + }) + + runtime.KeepAlive(ptrs) +} diff --git a/libgo/go/runtime/hash32.go b/libgo/go/runtime/hash32.go index dd2e657fe3f..401fe2857d9 100644 --- a/libgo/go/runtime/hash32.go +++ b/libgo/go/runtime/hash32.go @@ -86,6 +86,32 @@ tail: return uintptr(h) } +func memhash32(p unsafe.Pointer, seed uintptr) uintptr { + h := uint32(seed + 4*hashkey[0]) + h ^= readUnaligned32(p) + h = rotl_15(h*m1) * m2 + h ^= h >> 17 + h *= m3 + h ^= h >> 13 + h *= m4 + h ^= h >> 16 + return uintptr(h) +} + +func memhash64(p unsafe.Pointer, seed uintptr) uintptr { + h := uint32(seed + 8*hashkey[0]) + h ^= readUnaligned32(p) + h = rotl_15(h*m1) * m2 + h ^= readUnaligned32(add(p, 4)) + h = rotl_15(h*m1) * m2 + h ^= h >> 17 + h *= m3 + h ^= h >> 13 + h *= m4 + h ^= h >> 16 + return uintptr(h) +} + // Note: in order to get the compiler to issue rotl instructions, we // need to constant fold the shift amount by hand. // TODO: convince the compiler to issue rotl instructions after inlining. diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go index f7d4a6f2f2a..5912943a4e9 100644 --- a/libgo/go/runtime/hash64.go +++ b/libgo/go/runtime/hash64.go @@ -86,6 +86,28 @@ tail: return uintptr(h) } +func memhash32(p unsafe.Pointer, seed uintptr) uintptr { + h := uint64(seed + 4*hashkey[0]) + v := uint64(readUnaligned32(p)) + h ^= v + h ^= v << 32 + h = rotl_31(h*m1) * m2 + h ^= h >> 29 + h *= m3 + h ^= h >> 32 + return uintptr(h) +} + +func memhash64(p unsafe.Pointer, seed uintptr) uintptr { + h := uint64(seed + 8*hashkey[0]) + h ^= uint64(readUnaligned32(p)) | uint64(readUnaligned32(add(p, 4)))<<32 + h = rotl_31(h*m1) * m2 + h ^= h >> 29 + h *= m3 + h ^= h >> 32 + return uintptr(h) +} + // Note: in order to get the compiler to issue rotl instructions, we // need to constant fold the shift amount by hand. // TODO: convince the compiler to issue rotl instructions after inlining. diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go index 167c49eb5f5..54c91609f60 100644 --- a/libgo/go/runtime/hash_test.go +++ b/libgo/go/runtime/hash_test.go @@ -14,6 +14,40 @@ import ( "unsafe" ) +func TestMemHash32Equality(t *testing.T) { + if *UseAeshash { + t.Skip("skipping since AES hash implementation is used") + } + var b [4]byte + r := rand.New(rand.NewSource(1234)) + seed := uintptr(r.Uint64()) + for i := 0; i < 100; i++ { + randBytes(r, b[:]) + got := MemHash32(unsafe.Pointer(&b), seed) + want := MemHash(unsafe.Pointer(&b), seed, 4) + if got != want { + t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want) + } + } +} + +func TestMemHash64Equality(t *testing.T) { + if *UseAeshash { + t.Skip("skipping since AES hash implementation is used") + } + var b [8]byte + r := rand.New(rand.NewSource(1234)) + seed := uintptr(r.Uint64()) + for i := 0; i < 100; i++ { + randBytes(r, b[:]) + got := MemHash64(unsafe.Pointer(&b), seed) + want := MemHash(unsafe.Pointer(&b), seed, 8) + if got != want { + t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want) + } + } +} + // Smhasher is a torture test for hash functions. // https://code.google.com/p/smhasher/ // This code is a port of some of the Smhasher tests to Go. diff --git a/libgo/go/runtime/hashmap.go b/libgo/go/runtime/hashmap.go index a3e50cd9221..a1fe49e9305 100644 --- a/libgo/go/runtime/hashmap.go +++ b/libgo/go/runtime/hashmap.go @@ -63,6 +63,8 @@ import ( // themselves, so that the compiler will export them. // //go:linkname makemap runtime.makemap +//go:linkname makemap64 runtime.makemap64 +//go:linkname makemap_small runtime.makemap_small //go:linkname mapaccess1 runtime.mapaccess1 //go:linkname mapaccess2 runtime.mapaccess2 //go:linkname mapaccess1_fat runtime.mapaccess1_fat @@ -77,8 +79,10 @@ const ( bucketCntBits = 3 bucketCnt = 1 << bucketCntBits - // Maximum average load of a bucket that triggers growth. - loadFactor = 6.5 + // Maximum average load of a bucket that triggers growth is 6.5. + // Represent as loadFactorNum/loadFactDen, to allow integer math. + loadFactorNum = 13 + loadFactorDen = 2 // Maximum key or value size to keep inline (instead of mallocing per element). // Must fit in a uint8. @@ -137,12 +141,13 @@ type mapextra struct { // If both key and value do not contain pointers and are inline, then we mark bucket // type as containing no pointers. This avoids scanning such maps. // However, bmap.overflow is a pointer. In order to keep overflow buckets - // alive, we store pointers to all overflow buckets in hmap.overflow. - // Overflow is used only if key and value do not contain pointers. - // overflow[0] contains overflow buckets for hmap.buckets. - // overflow[1] contains overflow buckets for hmap.oldbuckets. + // alive, we store pointers to all overflow buckets in hmap.overflow and h.map.oldoverflow. + // overflow and oldoverflow are only used if key and value do not contain pointers. + // overflow contains overflow buckets for hmap.buckets. + // oldoverflow contains overflow buckets for hmap.oldbuckets. // The indirection allows to store a pointer to the slice in hiter. - overflow [2]*[]*bmap + overflow *[]*bmap + oldoverflow *[]*bmap // nextOverflow holds a pointer to a free overflow bucket. nextOverflow *bmap @@ -171,7 +176,8 @@ type hiter struct { h *hmap buckets unsafe.Pointer // bucket ptr at hash_iter initialization time bptr *bmap // current bucket - overflow [2]*[]*bmap // keeps overflow buckets alive + overflow *[]*bmap // keeps overflow buckets of hmap.buckets alive + oldoverflow *[]*bmap // keeps overflow buckets of hmap.oldbuckets alive startBucket uintptr // bucket iteration started at offset uint8 // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1) wrapped bool // already wrapped around from end of bucket array to beginning @@ -181,6 +187,28 @@ type hiter struct { checkBucket uintptr } +// bucketShift returns 1<<b, optimized for code generation. +func bucketShift(b uint8) uintptr { + if sys.GoarchAmd64|sys.GoarchAmd64p32|sys.Goarch386 != 0 { + b &= sys.PtrSize*8 - 1 // help x86 archs remove shift overflow checks + } + return uintptr(1) << b +} + +// bucketMask returns 1<<b - 1, optimized for code generation. +func bucketMask(b uint8) uintptr { + return bucketShift(b) - 1 +} + +// tophash calculates the tophash value for hash. +func tophash(hash uintptr) uint8 { + top := uint8(hash >> (sys.PtrSize*8 - 8)) + if top < minTopHash { + top += minTopHash + } + return top +} + func evacuated(b *bmap) bool { h := b.tophash[0] return h > empty && h < minTopHash @@ -194,6 +222,10 @@ func (b *bmap) setoverflow(t *maptype, ovf *bmap) { *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf } +func (b *bmap) keys() unsafe.Pointer { + return add(unsafe.Pointer(b), dataOffset) +} + // incrnoverflow increments h.noverflow. // noverflow counts the number of overflow buckets. // This is used to trigger same-size map growth. @@ -242,7 +274,7 @@ func (h *hmap) newoverflow(t *maptype, b *bmap) *bmap { h.incrnoverflow() if t.bucket.kind&kindNoPointers != 0 { h.createOverflow() - *h.extra.overflow[0] = append(*h.extra.overflow[0], ovf) + *h.extra.overflow = append(*h.extra.overflow, ovf) } b.setoverflow(t, ovf) return ovf @@ -252,97 +284,69 @@ func (h *hmap) createOverflow() { if h.extra == nil { h.extra = new(mapextra) } - if h.extra.overflow[0] == nil { - h.extra.overflow[0] = new([]*bmap) + if h.extra.overflow == nil { + h.extra.overflow = new([]*bmap) } } -// makemap implements a Go map creation make(map[k]v, hint) +func makemap64(t *maptype, hint int64, h *hmap) *hmap { + if int64(int(hint)) != hint { + hint = 0 + } + return makemap(t, int(hint), h) +} + +// makehmap_small implements Go map creation for make(map[k]v) and +// make(map[k]v, hint) when hint is known to be at most bucketCnt +// at compile time and the map needs to be allocated on the heap. +func makemap_small() *hmap { + h := new(hmap) + h.hash0 = fastrand() + return h +} + +// makemap implements Go map creation for make(map[k]v, hint). // If the compiler has determined that the map or the first bucket // can be created on the stack, h and/or bucket may be non-nil. // If h != nil, the map can be created directly in h. -// If bucket != nil, bucket can be used as the first bucket. -func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap { - if sz := unsafe.Sizeof(hmap{}); sz > 48 || sz != t.hmap.size { +// If h.buckets != nil, bucket pointed to can be used as the first bucket. +func makemap(t *maptype, hint int, h *hmap) *hmap { + // The size of hmap should be 48 bytes on 64 bit + // and 28 bytes on 32 bit platforms. + if sz := unsafe.Sizeof(hmap{}); sz != 8+5*sys.PtrSize { println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size) throw("bad hmap size") } - if hint < 0 || hint > int64(maxSliceCap(t.bucket.size)) { + if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) { hint = 0 } - if !ismapkey(t.key) { - throw("runtime.makemap: unsupported map key type") - } - - // check compiler's and reflect's math - if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) || - t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) { - throw("key size wrong") - } - if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) || - t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) { - throw("value size wrong") - } - - // invariants we depend on. We should probably check these at compile time - // somewhere, but for now we'll do it here. - if t.key.align > bucketCnt { - throw("key align too big") - } - if t.elem.align > bucketCnt { - throw("value align too big") - } - if t.key.size%uintptr(t.key.align) != 0 { - throw("key size not a multiple of key align") - } - if t.elem.size%uintptr(t.elem.align) != 0 { - throw("value size not a multiple of value align") - } - if bucketCnt < 8 { - throw("bucketsize too small for proper alignment") - } - if dataOffset%uintptr(t.key.align) != 0 { - throw("need padding in bucket (key)") - } - if dataOffset%uintptr(t.elem.align) != 0 { - throw("need padding in bucket (value)") + // initialize Hmap + if h == nil { + h = (*hmap)(newobject(t.hmap)) } + h.hash0 = fastrand() // find size parameter which will hold the requested # of elements B := uint8(0) - for ; overLoadFactor(hint, B); B++ { + for overLoadFactor(hint, B) { + B++ } + h.B = B // allocate initial hash table // if B == 0, the buckets field is allocated lazily later (in mapassign) // If hint is large zeroing this memory could take a while. - buckets := bucket - var extra *mapextra - if B != 0 { + if h.B != 0 { var nextOverflow *bmap - buckets, nextOverflow = makeBucketArray(t, B) + h.buckets, nextOverflow = makeBucketArray(t, h.B) if nextOverflow != nil { - extra = new(mapextra) - extra.nextOverflow = nextOverflow + h.extra = new(mapextra) + h.extra.nextOverflow = nextOverflow } } - // initialize Hmap - if h == nil { - h = (*hmap)(newobject(t.hmap)) - } - h.count = 0 - h.B = B - h.extra = extra - h.flags = 0 - h.hash0 = fastrand() - h.buckets = buckets - h.oldbuckets = nil - h.nevacuate = 0 - h.noverflow = 0 - return h } @@ -353,7 +357,7 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap { // hold onto it for very long. func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() pc := funcPC(mapaccess1) racereadpc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -370,7 +374,7 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { hashfn := t.key.hashfn equalfn := t.key.equalfn hash := hashfn(key, uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -382,11 +386,8 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -403,16 +404,13 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { return v } } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) - } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() pc := funcPC(mapaccess2) racereadpc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -429,7 +427,7 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) hashfn := t.key.hashfn equalfn := t.key.equalfn hash := hashfn(key, uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -441,11 +439,8 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -462,11 +457,8 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) return v, true } } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false - } } + return unsafe.Pointer(&zeroVal[0]), false } // returns both key and value. Used by map iterator @@ -477,7 +469,7 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe hashfn := t.key.hashfn equalfn := t.key.equalfn hash := hashfn(key, uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -489,11 +481,8 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -510,11 +499,8 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe return k, v } } - b = b.overflow(t) - if b == nil { - return nil, nil - } } + return nil, nil } func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer { @@ -539,7 +525,7 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() pc := funcPC(mapassign) racewritepc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -559,19 +545,16 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { growWork(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } + top := tophash(hash) var inserti *uint8 var insertk unsafe.Pointer @@ -611,7 +594,7 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } @@ -651,7 +634,7 @@ done: func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() pc := funcPC(mapdelete) racewritepc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -674,16 +657,14 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { // in which case we have not actually done a write (delete). h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { growWork(t, h, bucket) } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + top := tophash(hash) +search: + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -696,53 +677,58 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { if !equalfn(key, k2) { continue } + // Only clear key if there are pointers in it. if t.indirectkey { *(*unsafe.Pointer)(k) = nil - } else { - typedmemclr(t.key, k) + } else if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) } - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize)) - if t.indirectvalue { - *(*unsafe.Pointer)(v) = nil - } else { - typedmemclr(t.elem, v) + // Only clear value if there are pointers in it. + if t.indirectvalue || t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) + if t.indirectvalue { + *(*unsafe.Pointer)(v) = nil + } else { + memclrHasPointers(v, t.elem.size) + } } b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting } +// mapiterinit initializes the hiter struct used for ranging over maps. +// The hiter struct pointed to by 'it' is allocated on the stack +// by the compilers order pass or on the heap by reflect_mapiterinit. +// Both need to have zeroed hiter since the struct contains pointers. +// Gccgo-specific: *it need not be zeroed by the compiler, +// and it's cheaper to zero it here. func mapiterinit(t *maptype, h *hmap, it *hiter) { - // Clear pointer fields so garbage collector does not complain. it.key = nil it.value = nil it.t = nil it.h = nil it.buckets = nil it.bptr = nil - it.overflow[0] = nil - it.overflow[1] = nil + it.overflow = nil + it.oldoverflow = nil + it.wrapped = false + it.i = 0 + it.checkBucket = 0 if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit)) } if h == nil || h.count == 0 { - it.key = nil - it.value = nil return } @@ -762,6 +748,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { // while we are iterating. h.createOverflow() it.overflow = h.extra.overflow + it.oldoverflow = h.extra.oldoverflow } // decide where to start @@ -769,16 +756,14 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { if h.B > 31-bucketCntBits { r += uintptr(fastrand()) << 31 } - it.startBucket = r & (uintptr(1)<<h.B - 1) + it.startBucket = r & bucketMask(h.B) it.offset = uint8(r >> h.B & (bucketCnt - 1)) // iterator state it.bucket = it.startBucket - it.wrapped = false - it.bptr = nil // Remember we have an iterator. - // Can run concurrently with another hash_iter_init(). + // Can run concurrently with another mapiterinit(). if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator { atomic.Or8(&h.flags, iterator|oldIterator) } @@ -789,7 +774,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { func mapiternext(it *hiter) { h := it.h if raceenabled { - callerpc := getcallerpc(unsafe.Pointer( /* &it */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext)) } if h.flags&hashWriting != 0 { @@ -829,7 +814,7 @@ next: checkBucket = noCheck } bucket++ - if bucket == uintptr(1)<<it.B { + if bucket == bucketShift(it.B) { bucket = 0 it.wrapped = true } @@ -837,90 +822,75 @@ next: } for ; i < bucketCnt; i++ { offi := (i + it.offset) & (bucketCnt - 1) + if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty { + continue + } k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize)) + if t.indirectkey { + k = *((*unsafe.Pointer)(k)) + } v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize)) - if b.tophash[offi] != empty && b.tophash[offi] != evacuatedEmpty { - if checkBucket != noCheck && !h.sameSizeGrow() { - // Special case: iterator was started during a grow to a larger size - // and the grow is not done yet. We're working on a bucket whose - // oldbucket has not been evacuated yet. Or at least, it wasn't - // evacuated when we started the bucket. So we're iterating - // through the oldbucket, skipping any keys that will go - // to the other new bucket (each oldbucket expands to two - // buckets during a grow). - k2 := k - if t.indirectkey { - k2 = *((*unsafe.Pointer)(k2)) - } - if t.reflexivekey || equalfn(k2, k2) { - // If the item in the oldbucket is not destined for - // the current new bucket in the iteration, skip it. - hash := hashfn(k2, uintptr(h.hash0)) - if hash&(uintptr(1)<<it.B-1) != checkBucket { - continue - } - } else { - // Hash isn't repeatable if k != k (NaNs). We need a - // repeatable and randomish choice of which direction - // to send NaNs during evacuation. We'll use the low - // bit of tophash to decide which way NaNs go. - // NOTE: this case is why we need two evacuate tophash - // values, evacuatedX and evacuatedY, that differ in - // their low bit. - if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) { - continue - } - } - } - if b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY { - // this is the golden data, we can return it. - if t.indirectkey { - k = *((*unsafe.Pointer)(k)) - } - it.key = k - if t.indirectvalue { - v = *((*unsafe.Pointer)(v)) + if checkBucket != noCheck && !h.sameSizeGrow() { + // Special case: iterator was started during a grow to a larger size + // and the grow is not done yet. We're working on a bucket whose + // oldbucket has not been evacuated yet. Or at least, it wasn't + // evacuated when we started the bucket. So we're iterating + // through the oldbucket, skipping any keys that will go + // to the other new bucket (each oldbucket expands to two + // buckets during a grow). + if t.reflexivekey || equalfn(k, k) { + // If the item in the oldbucket is not destined for + // the current new bucket in the iteration, skip it. + hash := hashfn(k, uintptr(h.hash0)) + if hash&bucketMask(it.B) != checkBucket { + continue } - it.value = v } else { - // The hash table has grown since the iterator was started. - // The golden data for this key is now somewhere else. - k2 := k - if t.indirectkey { - k2 = *((*unsafe.Pointer)(k2)) - } - if t.reflexivekey || equalfn(k2, k2) { - // Check the current hash table for the data. - // This code handles the case where the key - // has been deleted, updated, or deleted and reinserted. - // NOTE: we need to regrab the key as it has potentially been - // updated to an equal() but not identical key (e.g. +0.0 vs -0.0). - rk, rv := mapaccessK(t, h, k2) - if rk == nil { - continue // key has been deleted - } - it.key = rk - it.value = rv - } else { - // if key!=key then the entry can't be deleted or - // updated, so we can just return it. That's lucky for - // us because when key!=key we can't look it up - // successfully in the current table. - it.key = k2 - if t.indirectvalue { - v = *((*unsafe.Pointer)(v)) - } - it.value = v + // Hash isn't repeatable if k != k (NaNs). We need a + // repeatable and randomish choice of which direction + // to send NaNs during evacuation. We'll use the low + // bit of tophash to decide which way NaNs go. + // NOTE: this case is why we need two evacuate tophash + // values, evacuatedX and evacuatedY, that differ in + // their low bit. + if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) { + continue } } - it.bucket = bucket - if it.bptr != b { // avoid unnecessary write barrier; see issue 14921 - it.bptr = b + } + if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) || + !(t.reflexivekey || equalfn(k, k)) { + // This is the golden data, we can return it. + // OR + // key!=key, so the entry can't be deleted or updated, so we can just return it. + // That's lucky for us because when key!=key we can't look it up successfully. + it.key = k + if t.indirectvalue { + v = *((*unsafe.Pointer)(v)) } - it.i = i + 1 - it.checkBucket = checkBucket - return + it.value = v + } else { + // The hash table has grown since the iterator was started. + // The golden data for this key is now somewhere else. + // Check the current hash table for the data. + // This code handles the case where the key + // has been deleted, updated, or deleted and reinserted. + // NOTE: we need to regrab the key as it has potentially been + // updated to an equal() but not identical key (e.g. +0.0 vs -0.0). + rk, rv := mapaccessK(t, h, k) + if rk == nil { + continue // key has been deleted + } + it.key = rk + it.value = rv } + it.bucket = bucket + if it.bptr != b { // avoid unnecessary write barrier; see issue 14921 + it.bptr = b + } + it.i = i + 1 + it.checkBucket = checkBucket + return } b = b.overflow(t) i = 0 @@ -928,7 +898,7 @@ next: } func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow *bmap) { - base := uintptr(1 << b) + base := bucketShift(b) nbuckets := base // For small b, overflow buckets are unlikely. // Avoid the overhead of the calculation. @@ -936,7 +906,7 @@ func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow // Add on the estimated number of overflow buckets // required to insert the median number of elements // used with this value of b. - nbuckets += 1 << (b - 4) + nbuckets += bucketShift(b - 4) sz := t.bucket.size * nbuckets up := roundupsize(sz) if up != sz { @@ -962,7 +932,7 @@ func hashGrow(t *maptype, h *hmap) { // Otherwise, there are too many overflow buckets, // so keep the same number of buckets and "grow" laterally. bigger := uint8(1) - if !overLoadFactor(int64(h.count), h.B) { + if !overLoadFactor(h.count+1, h.B) { bigger = 0 h.flags |= sameSizeGrow } @@ -981,13 +951,13 @@ func hashGrow(t *maptype, h *hmap) { h.nevacuate = 0 h.noverflow = 0 - if h.extra != nil && h.extra.overflow[0] != nil { + if h.extra != nil && h.extra.overflow != nil { // Promote current overflow buckets to the old generation. - if h.extra.overflow[1] != nil { - throw("overflow is not nil") + if h.extra.oldoverflow != nil { + throw("oldoverflow is not nil") } - h.extra.overflow[1] = h.extra.overflow[0] - h.extra.overflow[0] = nil + h.extra.oldoverflow = h.extra.overflow + h.extra.overflow = nil } if nextOverflow != nil { if h.extra == nil { @@ -1001,9 +971,8 @@ func hashGrow(t *maptype, h *hmap) { } // overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor. -func overLoadFactor(count int64, B uint8) bool { - // TODO: rewrite to use integer math and comparison? - return count >= bucketCnt && float32(count) >= loadFactor*float32((uint64(1)<<B)) +func overLoadFactor(count int, B uint8) bool { + return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen) } // tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets. @@ -1014,10 +983,11 @@ func tooManyOverflowBuckets(noverflow uint16, B uint8) bool { // If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory. // "too many" means (approximately) as many overflow buckets as regular buckets. // See incrnoverflow for more details. - if B < 16 { - return noverflow >= uint16(1)<<B + if B > 15 { + B = 15 } - return noverflow >= 1<<15 + // The compiler doesn't see here that B < 16; mask B to generate shorter shift code. + return noverflow >= uint16(1)<<(B&15) } // growing reports whether h is growing. The growth may be to the same size or bigger. @@ -1036,7 +1006,7 @@ func (h *hmap) noldbuckets() uintptr { if !h.sameSizeGrow() { oldB-- } - return uintptr(1) << oldB + return bucketShift(oldB) } // oldbucketmask provides a mask that can be applied to calculate n % noldbuckets(). @@ -1060,33 +1030,38 @@ func bucketEvacuated(t *maptype, h *hmap, bucket uintptr) bool { return evacuated(b) } +// evacDst is an evacuation destination. +type evacDst struct { + b *bmap // current destination bucket + i int // key/val index into b + k unsafe.Pointer // pointer to current key storage + v unsafe.Pointer // pointer to current value storage +} + func evacuate(t *maptype, h *hmap, oldbucket uintptr) { b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) newbit := h.noldbuckets() hashfn := t.key.hashfn - equalfn := t.key.equalfn if !evacuated(b) { // TODO: reuse overflow buckets instead of using new ones, if there // is no iterator using the old buckets. (If !oldIterator.) - var ( - x, y *bmap // current low/high buckets in new map - xi, yi int // key/val indices into x and y - xk, yk unsafe.Pointer // pointers to current x and y key storage - xv, yv unsafe.Pointer // pointers to current x and y value storage - ) - x = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) - xi = 0 - xk = add(unsafe.Pointer(x), dataOffset) - xv = add(xk, bucketCnt*uintptr(t.keysize)) + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*uintptr(t.keysize)) + if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. // Otherwise GC can see bad pointers. - y = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) - yi = 0 - yk = add(unsafe.Pointer(y), dataOffset) - yv = add(yk, bucketCnt*uintptr(t.keysize)) + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*uintptr(t.keysize)) } + for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) v := add(k, bucketCnt*uintptr(t.keysize)) @@ -1103,122 +1078,102 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { if t.indirectkey { k2 = *((*unsafe.Pointer)(k2)) } - useX := true + var useY uint8 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need // to send this key/value to bucket x or bucket y). hash := hashfn(k2, uintptr(h.hash0)) - if h.flags&iterator != 0 { - if !t.reflexivekey && !equalfn(k2, k2) { - // If key != key (NaNs), then the hash could be (and probably - // will be) entirely different from the old hash. Moreover, - // it isn't reproducible. Reproducibility is required in the - // presence of iterators, as our evacuation decision must - // match whatever decision the iterator made. - // Fortunately, we have the freedom to send these keys either - // way. Also, tophash is meaningless for these kinds of keys. - // We let the low bit of tophash drive the evacuation decision. - // We recompute a new random tophash for the next level so - // these keys will get evenly distributed across all buckets - // after multiple grows. - if top&1 != 0 { - hash |= newbit - } else { - hash &^= newbit - } - top = uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } + if h.flags&iterator != 0 && !t.reflexivekey && !t.key.equalfn(k2, k2) { + // If key != key (NaNs), then the hash could be (and probably + // will be) entirely different from the old hash. Moreover, + // it isn't reproducible. Reproducibility is required in the + // presence of iterators, as our evacuation decision must + // match whatever decision the iterator made. + // Fortunately, we have the freedom to send these keys either + // way. Also, tophash is meaningless for these kinds of keys. + // We let the low bit of tophash drive the evacuation decision. + // We recompute a new random tophash for the next level so + // these keys will get evenly distributed across all buckets + // after multiple grows. + useY = top & 1 + top = tophash(hash) + } else { + if hash&newbit != 0 { + useY = 1 } } - useX = hash&newbit == 0 } - if useX { - b.tophash[i] = evacuatedX - if xi == bucketCnt { - newx := h.newoverflow(t, x) - x = newx - xi = 0 - xk = add(unsafe.Pointer(x), dataOffset) - xv = add(xk, bucketCnt*uintptr(t.keysize)) - } - x.tophash[xi] = top - if t.indirectkey { - *(*unsafe.Pointer)(xk) = k2 // copy pointer - } else { - typedmemmove(t.key, xk, k) // copy value - } - if t.indirectvalue { - *(*unsafe.Pointer)(xv) = *(*unsafe.Pointer)(v) - } else { - typedmemmove(t.elem, xv, v) - } - xi++ - xk = add(xk, uintptr(t.keysize)) - xv = add(xv, uintptr(t.valuesize)) + + if evacuatedX+1 != evacuatedY { + throw("bad evacuatedN") + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*uintptr(t.keysize)) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + if t.indirectkey { + *(*unsafe.Pointer)(dst.k) = k2 // copy pointer } else { - b.tophash[i] = evacuatedY - if yi == bucketCnt { - newy := h.newoverflow(t, y) - y = newy - yi = 0 - yk = add(unsafe.Pointer(y), dataOffset) - yv = add(yk, bucketCnt*uintptr(t.keysize)) - } - y.tophash[yi] = top - if t.indirectkey { - *(*unsafe.Pointer)(yk) = k2 - } else { - typedmemmove(t.key, yk, k) - } - if t.indirectvalue { - *(*unsafe.Pointer)(yv) = *(*unsafe.Pointer)(v) - } else { - typedmemmove(t.elem, yv, v) - } - yi++ - yk = add(yk, uintptr(t.keysize)) - yv = add(yv, uintptr(t.valuesize)) + typedmemmove(t.key, dst.k, k) // copy value } + if t.indirectvalue { + *(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v) + } else { + typedmemmove(t.elem, dst.v, v) + } + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, uintptr(t.keysize)) + dst.v = add(dst.v, uintptr(t.valuesize)) } } // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 { - b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) // Preserve b.tophash because the evacuation // state is maintained there. - if t.bucket.kind&kindNoPointers == 0 { - memclrHasPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset) - } else { - memclrNoHeapPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset) - } + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) } } - // Advance evacuation mark if oldbucket == h.nevacuate { - h.nevacuate = oldbucket + 1 - // Experiments suggest that 1024 is overkill by at least an order of magnitude. - // Put it in there as a safeguard anyway, to ensure O(1) behavior. - stop := h.nevacuate + 1024 - if stop > newbit { - stop = newbit - } - for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) { - h.nevacuate++ - } - if h.nevacuate == newbit { // newbit == # of oldbuckets - // Growing is all done. Free old main bucket array. - h.oldbuckets = nil - // Can discard old overflow buckets as well. - // If they are still referenced by an iterator, - // then the iterator holds a pointers to the slice. - if h.extra != nil { - h.extra.overflow[1] = nil - } - h.flags &^= sameSizeGrow + advanceEvacuationMark(h, t, newbit) + } +} + +func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) { + h.nevacuate++ + // Experiments suggest that 1024 is overkill by at least an order of magnitude. + // Put it in there as a safeguard anyway, to ensure O(1) behavior. + stop := h.nevacuate + 1024 + if stop > newbit { + stop = newbit + } + for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) { + h.nevacuate++ + } + if h.nevacuate == newbit { // newbit == # of oldbuckets + // Growing is all done. Free old main bucket array. + h.oldbuckets = nil + // Can discard old overflow buckets as well. + // If they are still referenced by an iterator, + // then the iterator holds a pointers to the slice. + if h.extra != nil { + h.extra.oldoverflow = nil } + h.flags &^= sameSizeGrow } } @@ -1230,7 +1185,45 @@ func ismapkey(t *_type) bool { //go:linkname reflect_makemap reflect.makemap func reflect_makemap(t *maptype, cap int) *hmap { - return makemap(t, int64(cap), nil, nil) + // Check invariants and reflects math. + if sz := unsafe.Sizeof(hmap{}); sz != t.hmap.size { + println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size) + throw("bad hmap size") + } + if !ismapkey(t.key) { + throw("runtime.reflect_makemap: unsupported map key type") + } + if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) || + t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) { + throw("key size wrong") + } + if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) || + t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) { + throw("value size wrong") + } + if t.key.align > bucketCnt { + throw("key align too big") + } + if t.elem.align > bucketCnt { + throw("value align too big") + } + if t.key.size%uintptr(t.key.align) != 0 { + throw("key size not a multiple of key align") + } + if t.elem.size%uintptr(t.elem.align) != 0 { + throw("value size not a multiple of value align") + } + if bucketCnt < 8 { + throw("bucketsize too small for proper alignment") + } + if dataOffset%uintptr(t.key.align) != 0 { + throw("need padding in bucket (key)") + } + if dataOffset%uintptr(t.elem.align) != 0 { + throw("need padding in bucket (value)") + } + + return makemap(t, cap, nil) } //go:linkname reflect_mapaccess reflect.mapaccess @@ -1277,7 +1270,7 @@ func reflect_maplen(h *hmap) int { return 0 } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer( /* &h */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen)) } return h.count diff --git a/libgo/go/runtime/hashmap_fast.go b/libgo/go/runtime/hashmap_fast.go index bec8fdac14e..e0fc9815131 100644 --- a/libgo/go/runtime/hashmap_fast.go +++ b/libgo/go/runtime/hashmap_fast.go @@ -11,7 +11,7 @@ import ( func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32)) } if h == nil || h.count == 0 { @@ -26,7 +26,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { b = (*bmap)(h.buckets) } else { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -39,28 +39,19 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) - if k != key { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if *(*uint32)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32)) } if h == nil || h.count == 0 { @@ -75,7 +66,7 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { b = (*bmap)(h.buckets) } else { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -88,28 +79,19 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) - if k != key { - continue - } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if *(*uint32)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false } } + return unsafe.Pointer(&zeroVal[0]), false } func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64)) } if h == nil || h.count == 0 { @@ -124,7 +106,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { b = (*bmap)(h.buckets) } else { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -137,28 +119,19 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))) - if k != key { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if *(*uint64)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64)) } if h == nil || h.count == 0 { @@ -173,7 +146,7 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { b = (*bmap)(h.buckets) } else { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -186,28 +159,19 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))) - if k != key { - continue - } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if *(*uint64)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false } } + return unsafe.Pointer(&zeroVal[0]), false } func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr)) } if h == nil || h.count == 0 { @@ -222,13 +186,9 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { b := (*bmap)(h.buckets) if key.len < 32 { // short key, doing lots of comparisons is ok - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { @@ -239,13 +199,9 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { } // long key, try not to do more comparisons than necessary keymaybe := uintptr(bucketCnt) - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str { @@ -275,7 +231,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { } dohash: hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -287,34 +243,24 @@ dohash: b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x != top { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) } } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) - } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr)) } if h == nil || h.count == 0 { @@ -329,13 +275,9 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { b := (*bmap)(h.buckets) if key.len < 32 { // short key, doing lots of comparisons is ok - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { @@ -346,13 +288,9 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { } // long key, try not to do more comparisons than necessary keymaybe := uintptr(bucketCnt) - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str { @@ -382,7 +320,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { } dohash: hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -394,37 +332,113 @@ dohash: b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { + continue + } + if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + } + } + } + return unsafe.Pointer(&zeroVal[0]), false +} + +func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast32(t, h, bucket) } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x != top { + if b.tophash[i] == empty { + if insertb == nil { + inserti = i + insertb = b + } continue } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) + if k != key { continue } - if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true - } + inserti = i + insertb = b + goto done } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false + ovf := b.overflow(t) + if ovf == nil { + break } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) + // store new key at insert position + *(*uint32)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val } -func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { +func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { if h == nil { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) } if h.flags&hashWriting != 0 { @@ -436,38 +450,35 @@ func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast32(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - var inserti *uint8 + var insertb *bmap + var inserti uintptr var insertk unsafe.Pointer - var val unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { - if b.tophash[i] == empty && inserti == nil { - inserti = &b.tophash[i] - insertk = add(unsafe.Pointer(b), dataOffset+i*4) - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + if b.tophash[i] == empty { + if insertb == nil { + inserti = i + insertb = b } continue } - k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) + k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4))) if k != key { continue } - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + inserti = i + insertb = b goto done } ovf := b.overflow(t) @@ -481,25 +492,26 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } - if inserti == nil { + if insertb == nil { // all current buckets are full, allocate a new one. - newb := h.newoverflow(t, b) - inserti = &newb.tophash[0] - insertk = add(unsafe.Pointer(newb), dataOffset) - val = add(insertk, bucketCnt*4) + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) + // store new key at insert position + *(*unsafe.Pointer)(insertk) = key - // store new key/value at insert position - typedmemmove(t.key, insertk, unsafe.Pointer(&key)) - *inserti = top h.count++ done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -512,7 +524,7 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) } if h.flags&hashWriting != 0 { @@ -524,30 +536,26 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast64(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - var inserti *uint8 + var insertb *bmap + var inserti uintptr var insertk unsafe.Pointer - var val unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { - if b.tophash[i] == empty && inserti == nil { - inserti = &b.tophash[i] - insertk = add(unsafe.Pointer(b), dataOffset+i*8) - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + if b.tophash[i] == empty { + if insertb == nil { + insertb = b + inserti = i } continue } @@ -555,7 +563,8 @@ again: if k != key { continue } - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + insertb = b + inserti = i goto done } ovf := b.overflow(t) @@ -569,25 +578,26 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } - if inserti == nil { + if insertb == nil { // all current buckets are full, allocate a new one. - newb := h.newoverflow(t, b) - inserti = &newb.tophash[0] - insertk = add(unsafe.Pointer(newb), dataOffset) - val = add(insertk, bucketCnt*8) + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) + // store new key at insert position + *(*uint64)(insertk) = key - // store new key/value at insert position - typedmemmove(t.key, insertk, unsafe.Pointer(&key)) - *inserti = top h.count++ done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -595,48 +605,131 @@ done: return val } -func mapassign_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { +func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { if h == nil { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&t)) - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr)) + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) } if h.flags&hashWriting != 0 { throw("concurrent map writes") } - key := stringStructOf(&ky) - hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapassign. h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast64(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + + for { + for i := uintptr(0); i < bucketCnt; i++ { + if b.tophash[i] == empty { + if insertb == nil { + insertb = b + inserti = i + } + continue + } + k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8))) + if k != key { + continue + } + insertb = b + inserti = i + goto done + } + ovf := b.overflow(t) + if ovf == nil { + break + } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again } - var inserti *uint8 + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti + } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) + // store new key at insert position + *(*unsafe.Pointer)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val +} + +func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + key := stringStructOf(&s) + hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_faststr(t, h, bucket) + } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + top := tophash(hash) + + var insertb *bmap + var inserti uintptr var insertk unsafe.Pointer - var val unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { - if b.tophash[i] == empty && inserti == nil { - inserti = &b.tophash[i] - insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) + if b.tophash[i] == empty && insertb == nil { + insertb = b + inserti = i } continue } @@ -648,7 +741,8 @@ again: continue } // already have a mapping for key. Update it. - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + inserti = i + insertb = b goto done } ovf := b.overflow(t) @@ -662,25 +756,25 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } - if inserti == nil { + if insertb == nil { // all current buckets are full, allocate a new one. - newb := h.newoverflow(t, b) - inserti = &newb.tophash[0] - insertk = add(unsafe.Pointer(newb), dataOffset) - val = add(insertk, bucketCnt*2*sys.PtrSize) + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks - // store new key/value at insert position + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize) + // store new key at insert position *((*stringStruct)(insertk)) = *key - *inserti = top h.count++ done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -690,7 +784,7 @@ done: func mapdelete_fast32(t *maptype, h *hmap, key uint32) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32)) } if h == nil || h.count == 0 { @@ -705,38 +799,32 @@ func mapdelete_fast32(t *maptype, h *hmap, key uint32) { // Set hashWriting after calling alg.hash for consistency with mapdelete h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash + growWork_fast32(t, h, bucket) } - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) +search: + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if key != *(*uint32)(k) || b.tophash[i] == empty { continue } - k := (*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)) - if key != *k { - continue + // Only clear key if there are pointers in it. + if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) + } + // Only clear value if there are pointers in it. + if t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + memclrHasPointers(v, t.elem.size) } - typedmemclr(t.key, unsafe.Pointer(k)) - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*4 + i*uintptr(t.valuesize)) - typedmemclr(t.elem, v) b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -745,7 +833,7 @@ done: func mapdelete_fast64(t *maptype, h *hmap, key uint64) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64)) } if h == nil || h.count == 0 { @@ -760,38 +848,32 @@ func mapdelete_fast64(t *maptype, h *hmap, key uint64) { // Set hashWriting after calling alg.hash for consistency with mapdelete h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast64(t, h, bucket) } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) +search: + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if key != *(*uint64)(k) || b.tophash[i] == empty { continue } - k := (*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)) - if key != *k { - continue + // Only clear key if there are pointers in it. + if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) + } + // Only clear value if there are pointers in it. + if t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + memclrHasPointers(v, t.elem.size) } - typedmemclr(t.key, unsafe.Pointer(k)) - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*8 + i*uintptr(t.valuesize)) - typedmemclr(t.elem, v) b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -800,7 +882,7 @@ done: func mapdelete_faststr(t *maptype, h *hmap, ky string) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr)) } if h == nil || h.count == 0 { @@ -816,43 +898,340 @@ func mapdelete_faststr(t *maptype, h *hmap, ky string) { // Set hashWriting after calling alg.hash for consistency with mapdelete h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + growWork_faststr(t, h, bucket) + } + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + top := tophash(hash) +search: + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { continue } if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) { continue } - typedmemclr(t.key, unsafe.Pointer(k)) - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*2*sys.PtrSize + i*uintptr(t.valuesize)) - typedmemclr(t.elem, v) + // Clear key's pointer. + k.str = nil + // Only clear value if there are pointers in it. + if t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + memclrHasPointers(v, t.elem.size) + } b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting } + +func growWork_fast32(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_fast32(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_fast32(t, h, h.nevacuate) + } +} + +func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*4) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*4) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*4) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.hashfn(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*4) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k)) + } else { + *(*uint32)(dst.k) = *(*uint32)(k) + } + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 4) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} + +func growWork_fast64(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_fast64(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_fast64(t, h, h.nevacuate) + } +} + +func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*8) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*8) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*8) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.hashfn(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*8) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + if sys.PtrSize == 8 { + writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k)) + } else { + // There are three ways to squeeze at least one 32 bit pointer into 64 bits. + // Give up and call typedmemmove. + typedmemmove(t.key, dst.k, k) + } + } else { + *(*uint64)(dst.k) = *(*uint64)(k) + } + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 8) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} + +func growWork_faststr(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_faststr(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_faststr(t, h, h.nevacuate) + } +} + +func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*2*sys.PtrSize) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*2*sys.PtrSize) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*2*sys.PtrSize) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.hashfn(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*2*sys.PtrSize) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + *(*string)(dst.k) = *(*string)(k) + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 2*sys.PtrSize) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go index 166199b5ca3..a4b168d7313 100644 --- a/libgo/go/runtime/heapdump.go +++ b/libgo/go/runtime/heapdump.go @@ -200,7 +200,6 @@ func dumptype(t *_type) { // dump an object func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) { - dumpbvtypes(&bv, obj) dumpint(tagObject) dumpint(uint64(uintptr(obj))) dumpmemrange(obj, size) @@ -539,16 +538,6 @@ func dumpfields(bv bitvector) { dumpint(fieldKindEol) } -// The heap dump reader needs to be able to disambiguate -// Eface entries. So it needs to know every type that might -// appear in such an entry. The following routine accomplishes that. -// TODO(rsc, khr): Delete - no longer possible. - -// Dump all the types that appear in the type field of -// any Eface described by this bit vector. -func dumpbvtypes(bv *bitvector, base unsafe.Pointer) { -} - func makeheapobjbv(p uintptr, size uintptr) bitvector { // Extend the temp buffer if necessary. nptr := size / sys.PtrSize diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go index 879a82f9c82..b697aa8bd39 100644 --- a/libgo/go/runtime/internal/atomic/atomic_test.go +++ b/libgo/go/runtime/internal/atomic/atomic_test.go @@ -52,7 +52,7 @@ func TestXadduintptr(t *testing.T) { // Tests that xadduintptr correctly updates 64-bit values. The place where // we actually do so is mstats.go, functions mSysStat{Inc,Dec}. func TestXadduintptrOnUint64(t *testing.T) { - if sys.BigEndian != 0 { + if sys.BigEndian { // On big endian architectures, we never use xadduintptr to update // 64-bit values and hence we skip the test. (Note that functions // mSysStat{Inc,Dec} in mstats.go have explicit checks for diff --git a/libgo/go/runtime/internal/sys/sys.go b/libgo/go/runtime/internal/sys/sys.go index 586a763717d..9d9ac4507f6 100644 --- a/libgo/go/runtime/internal/sys/sys.go +++ b/libgo/go/runtime/internal/sys/sys.go @@ -6,9 +6,9 @@ // constants used by the runtime. package sys -// The next line makes 'go generate' write the zgen_*.go files with +// The next line makes 'go generate' write the zgo*.go files with // per-OS and per-arch information, including constants -// named goos_$GOOS and goarch_$GOARCH for every +// named Goos$GOOS and Goarch$GOARCH for every // known GOOS and GOARCH. The constant is 1 on the // current system, 0 otherwise; multiplying by them is // useful for defining GOOS- or GOARCH-specific constants. diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go index 52a2376dc5e..d000b112f44 100644 --- a/libgo/go/runtime/lock_sema.go +++ b/libgo/go/runtime/lock_sema.go @@ -83,7 +83,7 @@ Loop: // for this lock, chained through m->nextwaitm. // Queue this M. for { - gp.m.nextwaitm = v &^ mutex_locked + gp.m.nextwaitm = muintptr(v &^ mutex_locked) if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|mutex_locked) { break } @@ -115,8 +115,8 @@ func unlock(l *mutex) { } else { // Other M's are waiting for the lock. // Dequeue an M. - mp = (*m)(unsafe.Pointer(v &^ mutex_locked)) - if atomic.Casuintptr(&l.key, v, mp.nextwaitm) { + mp = muintptr(v &^ mutex_locked).ptr() + if atomic.Casuintptr(&l.key, v, uintptr(mp.nextwaitm)) { // Dequeued an M. Wake it. semawakeup(mp) break @@ -152,7 +152,7 @@ func notewakeup(n *note) { case v == 0: // Nothing was waiting. Done. case v == mutex_locked: - // Two notewakeups! Not allowed. + // Two notewakeups! Not allowed. throw("notewakeup - double wakeup") default: // Must be the waiting m. Wake it up. diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go index 796cd8a7c64..88e4ba3657b 100644 --- a/libgo/go/runtime/malloc.go +++ b/libgo/go/runtime/malloc.go @@ -546,9 +546,8 @@ func nextFreeFast(s *mspan) gclinkptr { } s.allocCache >>= uint(theBit + 1) s.freeindex = freeidx - v := gclinkptr(result*s.elemsize + s.base()) s.allocCount++ - return v + return gclinkptr(result*s.elemsize + s.base()) } } return 0 @@ -877,6 +876,9 @@ func reflect_unsafe_New(typ *_type) unsafe.Pointer { // newarray allocates an array of n elements of type typ. func newarray(typ *_type, n int) unsafe.Pointer { + if n == 1 { + return mallocgc(typ.size, typ, true) + } if n < 0 || uintptr(n) > maxSliceCap(typ.size) { panic(plainError("runtime: allocation size out of range")) } @@ -893,11 +895,13 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { mProf_Malloc(x, size) } -// nextSample returns the next sampling point for heap profiling. -// It produces a random variable with a geometric distribution and -// mean MemProfileRate. This is done by generating a uniformly -// distributed random number and applying the cumulative distribution -// function for an exponential. +// nextSample returns the next sampling point for heap profiling. The goal is +// to sample allocations on average every MemProfileRate bytes, but with a +// completely random distribution over the allocation timeline; this +// corresponds to a Poisson process with parameter MemProfileRate. In Poisson +// processes, the distance between two samples follows the exponential +// distribution (exp(MemProfileRate)), so the best return value is a random +// number taken from an exponential distribution whose mean is MemProfileRate. func nextSample() int32 { if GOOS == "plan9" { // Plan 9 doesn't support floating point in note handler. @@ -906,25 +910,29 @@ func nextSample() int32 { } } - period := MemProfileRate + return fastexprand(MemProfileRate) +} - // make nextSample not overflow. Maximum possible step is - // -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period. +// fastexprand returns a random number from an exponential distribution with +// the specified mean. +func fastexprand(mean int) int32 { + // Avoid overflow. Maximum possible step is + // -ln(1/(1<<randomBitCount)) * mean, approximately 20 * mean. switch { - case period > 0x7000000: - period = 0x7000000 - case period == 0: + case mean > 0x7000000: + mean = 0x7000000 + case mean == 0: return 0 } - // Let m be the sample rate, - // the probability distribution function is m*exp(-mx), so the CDF is - // p = 1 - exp(-mx), so - // q = 1 - p == exp(-mx) - // log_e(q) = -mx - // -log_e(q)/m = x - // x = -log_e(q) * period - // x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency + // Take a random sample of the exponential distribution exp(-mean*x). + // The probability distribution function is mean*exp(-mean*x), so the CDF is + // p = 1 - exp(-mean*x), so + // q = 1 - p == exp(-mean*x) + // log_e(q) = -mean*x + // -log_e(q)/mean = x + // x = -log_e(q) * mean + // x = log_2(q) * (-log_e(2)) * mean ; Using log_2 for efficiency const randomBitCount = 26 q := fastrand()%(1<<randomBitCount) + 1 qlog := fastlog2(float64(q)) - randomBitCount @@ -932,7 +940,7 @@ func nextSample() int32 { qlog = 0 } const minusLog2 = -0.6931471805599453 // -ln(2) - return int32(qlog*(minusLog2*float64(period))) + 1 + return int32(qlog*(minusLog2*float64(mean))) + 1 } // nextSampleNoFP is similar to nextSample, but uses older, @@ -950,7 +958,7 @@ func nextSampleNoFP() int32 { } type persistentAlloc struct { - base unsafe.Pointer + base *notInHeap off uintptr } @@ -967,17 +975,17 @@ var globalAlloc struct { // // Consider marking persistentalloc'd types go:notinheap. func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { - var p unsafe.Pointer + var p *notInHeap systemstack(func() { p = persistentalloc1(size, align, sysStat) }) - return p + return unsafe.Pointer(p) } // Must run on system stack because stack growth can (re)invoke it. // See issue 9174. //go:systemstack -func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { +func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { const ( chunk = 256 << 10 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows @@ -998,7 +1006,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } if size >= maxBlock { - return sysAlloc(size, sysStat) + return (*notInHeap)(sysAlloc(size, sysStat)) } mp := acquirem() @@ -1011,7 +1019,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } persistent.off = round(persistent.off, align) if persistent.off+size > chunk || persistent.base == nil { - persistent.base = sysAlloc(chunk, &memstats.other_sys) + persistent.base = (*notInHeap)(sysAlloc(chunk, &memstats.other_sys)) if persistent.base == nil { if persistent == &globalAlloc.persistentAlloc { unlock(&globalAlloc.mutex) @@ -1020,7 +1028,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } persistent.off = 0 } - p := add(persistent.base, persistent.off) + p := persistent.base.add(persistent.off) persistent.off += size releasem(mp) if persistent == &globalAlloc.persistentAlloc { @@ -1033,3 +1041,19 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } return p } + +// notInHeap is off-heap memory allocated by a lower-level allocator +// like sysAlloc or persistentAlloc. +// +// In general, it's better to use real types marked as go:notinheap, +// but this serves as a generic type for situations where that isn't +// possible (like in the allocators). +// +// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc? +// +//go:notinheap +type notInHeap struct{} + +func (p *notInHeap) add(bytes uintptr) *notInHeap { + return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes)) +} diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go index 0d43cf65976..ab580f81800 100644 --- a/libgo/go/runtime/malloc_test.go +++ b/libgo/go/runtime/malloc_test.go @@ -48,9 +48,6 @@ func TestMemStats(t *testing.T) { } // Of the uint fields, HeapReleased, HeapIdle can be 0. // PauseTotalNs can be 0 if timer resolution is poor. - // - // TODO: Test that GCCPUFraction is <= 0.99. This currently - // fails on windows/386. (Issue #19319) fields := map[string][]func(interface{}) error{ "Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)}, "Lookups": {nz, le(1e10)}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)}, @@ -63,7 +60,7 @@ func TestMemStats(t *testing.T) { "NextGC": {nz, le(1e10)}, "LastGC": {nz}, "PauseTotalNs": {le(1e11)}, "PauseNs": nil, "PauseEnd": nil, "NumGC": {nz, le(1e9)}, "NumForcedGC": {nz, le(1e9)}, - "GCCPUFraction": nil, "EnableGC": {eq(true)}, "DebugGC": {eq(false)}, + "GCCPUFraction": {le(0.99)}, "EnableGC": {eq(true)}, "DebugGC": {eq(false)}, "BySize": nil, } diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go index 37c959f8327..6d7097e07ef 100644 --- a/libgo/go/runtime/map_test.go +++ b/libgo/go/runtime/map_test.go @@ -249,7 +249,7 @@ func testConcurrentReadsAfterGrowth(t *testing.T, useReflect bool) { numGrowStep := 250 numReader := 16 if testing.Short() { - numLoop, numGrowStep = 2, 500 + numLoop, numGrowStep = 2, 100 } for i := 0; i < numLoop; i++ { m := make(map[int]int, 0) @@ -603,6 +603,142 @@ func TestIgnoreBogusMapHint(t *testing.T) { } } +var mapSink map[int]int + +var mapBucketTests = [...]struct { + n int // n is the number of map elements + noescape int // number of expected buckets for non-escaping map + escape int // number of expected buckets for escaping map +}{ + {-(1 << 30), 1, 1}, + {-1, 1, 1}, + {0, 1, 1}, + {1, 1, 1}, + {8, 1, 1}, + {9, 2, 2}, + {13, 2, 2}, + {14, 4, 4}, + {26, 4, 4}, +} + +func TestMapBuckets(t *testing.T) { + // Test that maps of different sizes have the right number of buckets. + // Non-escaping maps with small buckets (like map[int]int) never + // have a nil bucket pointer due to starting with preallocated buckets + // on the stack. Escaping maps start with a non-nil bucket pointer if + // hint size is above bucketCnt and thereby have more than one bucket. + // These tests depend on bucketCnt and loadFactor* in hashmap.go. + t.Run("mapliteral", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := map[int]int{} + // Skip test on gccgo until escape analysis is + // turned on. + if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := map[int]int{} + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + t.Run("nohint", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := make(map[int]int) + // Skip test on gccgo until escape analysis is + // turned on. + if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := make(map[int]int) + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + t.Run("makemap", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := make(map[int]int, tt.n) + // Skip test on gccgo until escape analysis is + // turned on. + if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := make(map[int]int, tt.n) + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + t.Run("makemap64", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := make(map[int]int, int64(tt.n)) + // Skip test on gccgo until escape analysis is + // turned on. + if runtime.MapBucketsPointerIsNil(localMap) && runtime.Compiler != "gccgo" { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := make(map[int]int, tt.n) + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + +} + func benchmarkMapPop(b *testing.B, n int) { m := map[int]int{} for i := 0; i < b.N; i++ { @@ -624,15 +760,39 @@ func BenchmarkMapPop100(b *testing.B) { benchmarkMapPop(b, 100) } func BenchmarkMapPop1000(b *testing.B) { benchmarkMapPop(b, 1000) } func BenchmarkMapPop10000(b *testing.B) { benchmarkMapPop(b, 10000) } +var testNonEscapingMapVariable int = 8 + func TestNonEscapingMap(t *testing.T) { t.Skip("does not work on gccgo without better escape analysis") n := testing.AllocsPerRun(1000, func() { + m := map[int]int{} + m[0] = 0 + }) + if n != 0 { + t.Fatalf("mapliteral: want 0 allocs, got %v", n) + } + n = testing.AllocsPerRun(1000, func() { m := make(map[int]int) m[0] = 0 }) if n != 0 { - t.Fatalf("want 0 allocs, got %v", n) + t.Fatalf("no hint: want 0 allocs, got %v", n) + } + n = testing.AllocsPerRun(1000, func() { + m := make(map[int]int, 8) + m[0] = 0 + }) + if n != 0 { + t.Fatalf("with small hint: want 0 allocs, got %v", n) + } + n = testing.AllocsPerRun(1000, func() { + m := make(map[int]int, testNonEscapingMapVariable) + m[0] = 0 + }) + if n != 0 { + t.Fatalf("with variable hint: want 0 allocs, got %v", n) } + } func benchmarkMapAssignInt32(b *testing.B, n int) { @@ -643,12 +803,16 @@ func benchmarkMapAssignInt32(b *testing.B, n int) { } func benchmarkMapDeleteInt32(b *testing.B, n int) { - a := make(map[int32]int) - for i := 0; i < n*b.N; i++ { - a[int32(i)] = i - } + a := make(map[int32]int, n) b.ResetTimer() - for i := 0; i < n*b.N; i = i + n { + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := i; j < i+n; j++ { + a[int32(j)] = j + } + b.StartTimer() + } delete(a, int32(i)) } } @@ -661,12 +825,16 @@ func benchmarkMapAssignInt64(b *testing.B, n int) { } func benchmarkMapDeleteInt64(b *testing.B, n int) { - a := make(map[int64]int) - for i := 0; i < n*b.N; i++ { - a[int64(i)] = i - } + a := make(map[int64]int, n) b.ResetTimer() - for i := 0; i < n*b.N; i = i + n { + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := i; j < i+n; j++ { + a[int64(j)] = j + } + b.StartTimer() + } delete(a, int64(i)) } } @@ -684,17 +852,23 @@ func benchmarkMapAssignStr(b *testing.B, n int) { } func benchmarkMapDeleteStr(b *testing.B, n int) { - k := make([]string, n*b.N) - for i := 0; i < n*b.N; i++ { - k[i] = strconv.Itoa(i) - } - a := make(map[string]int) - for i := 0; i < n*b.N; i++ { - a[k[i]] = i + i2s := make([]string, n) + for i := 0; i < n; i++ { + i2s[i] = strconv.Itoa(i) } + a := make(map[string]int, n) b.ResetTimer() - for i := 0; i < n*b.N; i = i + n { - delete(a, k[i]) + k := 0 + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := 0; j < n; j++ { + a[i2s[j]] = j + } + k = i + b.StartTimer() + } + delete(a, i2s[i-k]) } } @@ -713,7 +887,7 @@ func BenchmarkMapAssign(b *testing.B) { } func BenchmarkMapDelete(b *testing.B) { - b.Run("Int32", runWith(benchmarkMapDeleteInt32, 1, 2, 4)) - b.Run("Int64", runWith(benchmarkMapDeleteInt64, 1, 2, 4)) - b.Run("Str", runWith(benchmarkMapDeleteStr, 1, 2, 4)) + b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000)) + b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000)) + b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000)) } diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go index d54016f0ba9..3b8f71434b8 100644 --- a/libgo/go/runtime/mbarrier.go +++ b/libgo/go/runtime/mbarrier.go @@ -189,6 +189,8 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) { func writebarrierptr_prewrite1(dst *uintptr, src uintptr) { mp := acquirem() if mp.inwb || mp.dying > 0 { + // We explicitly allow write barriers in startpanic_m, + // since we're going down anyway. Ignore them here. releasem(mp) return } @@ -244,6 +246,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) { // typedmemmove copies a value of type t to dst from src. // Must be nosplit, see #16026. +// +// TODO: Perfect for go:nosplitrec since we can't have a safe point +// anywhere in the bulk barrier or memmove. +// //go:nosplit func typedmemmove(typ *_type, dst, src unsafe.Pointer) { if typ.kind&kindNoPointers == 0 { @@ -265,8 +271,8 @@ func typedmemmove(typ *_type, dst, src unsafe.Pointer) { //go:linkname reflect_typedmemmove reflect.typedmemmove func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) { if raceenabled { - raceWriteObjectPC(typ, dst, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove)) - raceReadObjectPC(typ, src, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove)) + raceWriteObjectPC(typ, dst, getcallerpc(), funcPC(reflect_typedmemmove)) + raceReadObjectPC(typ, src, getcallerpc(), funcPC(reflect_typedmemmove)) } if msanenabled { msanwrite(dst, typ.size) @@ -310,8 +316,12 @@ func typedslicecopy(typ *_type, dst, src slice) int { dstp := dst.array srcp := src.array + // The compiler emits calls to typedslicecopy before + // instrumentation runs, so unlike the other copying and + // assignment operations, it's not instrumented in the calling + // code and needs its own instrumentation. if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&typ)) + callerpc := getcallerpc() pc := funcPC(slicecopy) racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc) racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc) @@ -329,41 +339,13 @@ func typedslicecopy(typ *_type, dst, src slice) int { // compiler only emits calls to typedslicecopy for types with pointers, // and growslice and reflect_typedslicecopy check for pointers // before calling typedslicecopy. - if !writeBarrier.needed { - memmove(dstp, srcp, uintptr(n)*typ.size) - return n + size := uintptr(n) * typ.size + if writeBarrier.needed { + bulkBarrierPreWrite(uintptr(dstp), uintptr(srcp), size) } - - systemstack(func() { - if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) { - // Overlap with src before dst. - // Copy backward, being careful not to move dstp/srcp - // out of the array they point into. - dstp = add(dstp, uintptr(n-1)*typ.size) - srcp = add(srcp, uintptr(n-1)*typ.size) - i := 0 - for { - typedmemmove(typ, dstp, srcp) - if i++; i >= n { - break - } - dstp = add(dstp, -typ.size) - srcp = add(srcp, -typ.size) - } - } else { - // Copy forward, being careful not to move dstp/srcp - // out of the array they point into. - i := 0 - for { - typedmemmove(typ, dstp, srcp) - if i++; i >= n { - break - } - dstp = add(dstp, typ.size) - srcp = add(srcp, typ.size) - } - } - }) + // See typedmemmove for a discussion of the race between the + // barrier and memmove. + memmove(dstp, srcp, size) return n } @@ -380,7 +362,7 @@ func reflect_typedslicecopy(elemType *_type, dst, src slice) int { size := uintptr(n) * elemType.size if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&elemType)) + callerpc := getcallerpc() pc := funcPC(reflect_typedslicecopy) racewriterangepc(dst.array, size, callerpc, pc) racereadrangepc(src.array, size, callerpc, pc) diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go index d1a58202352..a775b57b033 100644 --- a/libgo/go/runtime/mbitmap.go +++ b/libgo/go/runtime/mbitmap.go @@ -463,11 +463,6 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, return } -// prefetch the bits. -func (h heapBits) prefetch() { - prefetchnta(uintptr(unsafe.Pointer((h.bitp)))) -} - // next returns the heapBits describing the next pointer-sized word in memory. // That is, if h describes address p, h.next() describes p+ptrSize. // Note that next does not modify h. The caller must record the result. @@ -542,12 +537,13 @@ func (h heapBits) setCheckmarked(size uintptr) { atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift)) } -// bulkBarrierPreWrite executes writebarrierptr_prewrite1 +// bulkBarrierPreWrite executes a write barrier // for every pointer slot in the memory range [src, src+size), // using pointer/scalar information from [dst, dst+size). // This executes the write barriers necessary before a memmove. // src, dst, and size must be pointer-aligned. // The range [dst, dst+size) must lie within a single object. +// It does not perform the actual writes. // // As a special case, src == 0 indicates that this is being used for a // memclr. bulkBarrierPreWrite will pass 0 for the src of each write @@ -593,12 +589,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { return } + buf := &getg().m.p.ptr().wbBuf h := heapBitsForAddr(dst) if src == 0 { for i := uintptr(0); i < size; i += sys.PtrSize { if h.isPointer() { dstx := (*uintptr)(unsafe.Pointer(dst + i)) - writebarrierptr_prewrite1(dstx, 0) + if !buf.putFast(*dstx, 0) { + wbBufFlush(nil, 0) + } } h = h.next() } @@ -607,7 +606,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { if h.isPointer() { dstx := (*uintptr)(unsafe.Pointer(dst + i)) srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if !buf.putFast(*dstx, *srcx) { + wbBufFlush(nil, 0) + } } h = h.next() } @@ -627,6 +628,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { bits = addb(bits, word/8) mask := uint8(1) << (word % 8) + buf := &getg().m.p.ptr().wbBuf for i := uintptr(0); i < size; i += sys.PtrSize { if mask == 0 { bits = addb(bits, 1) @@ -640,10 +642,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { if *bits&mask != 0 { dstx := (*uintptr)(unsafe.Pointer(dst + i)) if src == 0 { - writebarrierptr_prewrite1(dstx, 0) + if !buf.putFast(*dstx, 0) { + wbBufFlush(nil, 0) + } } else { srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if !buf.putFast(*dstx, *srcx) { + wbBufFlush(nil, 0) + } } } mask <<= 1 diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go index 71a2f22114f..766cfd17523 100644 --- a/libgo/go/runtime/mcache.go +++ b/libgo/go/runtime/mcache.go @@ -96,7 +96,7 @@ func freemcache(c *mcache) { // Gets a span that has a free object in it and assigns it // to be the cached span for the given sizeclass. Returns this span. -func (c *mcache) refill(spc spanClass) *mspan { +func (c *mcache) refill(spc spanClass) { _g_ := getg() _g_.m.locks++ @@ -123,7 +123,6 @@ func (c *mcache) refill(spc spanClass) *mspan { c.alloc[spc] = s _g_.m.locks-- - return s } func (c *mcache) releaseAll() { diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go index ea3e5ebab4e..a087945251f 100644 --- a/libgo/go/runtime/mem_gccgo.go +++ b/libgo/go/runtime/mem_gccgo.go @@ -13,9 +13,10 @@ import ( // Functions called by C code. //go:linkname sysAlloc runtime.sysAlloc +//go:linkname sysFree runtime.sysFree //extern mmap -func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) unsafe.Pointer +func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) unsafe.Pointer //extern munmap func munmap(addr unsafe.Pointer, length uintptr) int32 @@ -40,6 +41,14 @@ func init() { } } +func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) (unsafe.Pointer, int) { + p := sysMmap(addr, n, prot, flags, fd, off) + if uintptr(p) == _MAP_FAILED { + return nil, errno() + } + return p, 0 +} + // NOTE: vec must be just 1 byte long here. // Mincore returns ENOMEM if any of the pages are unmapped, // but we want to know that all of the pages are unmapped. @@ -75,31 +84,30 @@ func addrspace_free(v unsafe.Pointer, n uintptr) bool { return true } -func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) unsafe.Pointer { - p := mmap(v, n, prot, flags, fd, offset) +func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) (unsafe.Pointer, int) { + p, err := mmap(v, n, prot, flags, fd, offset) // On some systems, mmap ignores v without // MAP_FIXED, so retry if the address space is free. if p != v && addrspace_free(v, n) { - if uintptr(p) != _MAP_FAILED { + if err == 0 { munmap(p, n) } - p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) + p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) } - return p + return p, err } // Don't split the stack as this method may be invoked without a valid G, which // prevents us from allocating more stack. //go:nosplit func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { - p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) - if uintptr(p) == _MAP_FAILED { - errval := errno() - if errval == _EACCES { + p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) + if err != 0 { + if err == _EACCES { print("runtime: mmap: access denied\n") exit(2) } - if errval == _EAGAIN { + if err == _EAGAIN { print("runtime: mmap: too much locked memory (check 'ulimit -l').\n") exit(2) } @@ -225,9 +233,9 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { // if we can reserve at least 64K and check the assumption in SysMap. // Only user-mode Linux (UML) rejects these requests. if sys.PtrSize == 8 && uint64(n) > 1<<32 { - p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) - if p != v { - if uintptr(p) != _MAP_FAILED { + p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) + if p != v || err != 0 { + if err == 0 { munmap(p, 64<<10) } return nil @@ -237,8 +245,8 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { return v } - p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) - if uintptr(p) == _MAP_FAILED { + p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) + if err != 0 { return nil } *reserved = true @@ -259,12 +267,12 @@ func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { // to do this - we do not on other platforms. flags |= _MAP_FIXED } - p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0) - if uintptr(p) == _MAP_FAILED && errno() == _ENOMEM { + p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0) + if err == _ENOMEM { throw("runtime: out of memory") } - if p != v { - print("runtime: address space conflict: map(", v, ") = ", p, "\n") + if p != v || err != 0 { + print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n") throw("runtime: address space conflict") } return @@ -275,11 +283,11 @@ func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { // So always unmap first even if it is already unmapped. munmap(v, n) } - p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0) - if uintptr(p) == _MAP_FAILED && errno() == _ENOMEM { + p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0) + if err == _ENOMEM { throw("runtime: out of memory") } - if p != v { + if p != v || err != 0 { throw("runtime: cannot map pages in arena address space") } } diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go index 74b8753b5f7..62de604e69c 100644 --- a/libgo/go/runtime/memmove_test.go +++ b/libgo/go/runtime/memmove_test.go @@ -9,6 +9,7 @@ import ( "encoding/binary" "fmt" "internal/race" + "internal/testenv" . "runtime" "testing" ) @@ -88,6 +89,10 @@ func TestMemmoveAlias(t *testing.T) { } func TestMemmoveLarge0x180000(t *testing.T) { + if testing.Short() && testenv.Builder() == "" { + t.Skip("-short") + } + t.Parallel() if race.Enabled { t.Skip("skipping large memmove test under race detector") @@ -96,6 +101,10 @@ func TestMemmoveLarge0x180000(t *testing.T) { } func TestMemmoveOverlapLarge0x120000(t *testing.T) { + if testing.Short() && testenv.Builder() == "" { + t.Skip("-short") + } + t.Parallel() if race.Enabled { t.Skip("skipping large memmove test under race detector") diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go index 4353ee57569..19573d8b8d3 100644 --- a/libgo/go/runtime/mfinal.go +++ b/libgo/go/runtime/mfinal.go @@ -419,11 +419,7 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) { return } -// Mark KeepAlive as noinline so that the current compiler will ensure -// that the argument is alive at the point of the function call. -// If it were inlined, it would disappear, and there would be nothing -// keeping the argument alive. Perhaps a future compiler will recognize -// runtime.KeepAlive specially and do something more efficient. +// Mark KeepAlive as noinline so that it is easily detectable as an intrinsic. //go:noinline // KeepAlive marks its argument as currently reachable. @@ -445,4 +441,11 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) { // Without the KeepAlive call, the finalizer could run at the start of // syscall.Read, closing the file descriptor before syscall.Read makes // the actual system call. -func KeepAlive(interface{}) {} +func KeepAlive(x interface{}) { + // Introduce a use of x that the compiler can't eliminate. + // This makes sure x is alive on entry. We need x to be alive + // on entry for "defer runtime.KeepAlive(x)"; see issue 21402. + if cgoAlwaysFalse { + println(x) + } +} diff --git a/libgo/go/runtime/mfinal_test.go b/libgo/go/runtime/mfinal_test.go index 38c2623bb7b..2086e42ba33 100644 --- a/libgo/go/runtime/mfinal_test.go +++ b/libgo/go/runtime/mfinal_test.go @@ -254,3 +254,24 @@ var ( Foo2 = &Object2{} Foo1 = &Object1{} ) + +func TestDeferKeepAlive(t *testing.T) { + if *flagQuick { + t.Skip("-quick") + } + + // See issue 21402. + t.Parallel() + type T *int // needs to be a pointer base type to avoid tinyalloc and its never-finalized behavior. + x := new(T) + finRun := false + runtime.SetFinalizer(x, func(x *T) { + finRun = true + }) + defer runtime.KeepAlive(x) + runtime.GC() + time.Sleep(time.Second) + if finRun { + t.Errorf("finalizer ran prematurely") + } +} diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go index 31c4be86fe4..626f088d450 100644 --- a/libgo/go/runtime/mgc.go +++ b/libgo/go/runtime/mgc.go @@ -231,6 +231,24 @@ func setGCPercent(in int32) (out int32) { // Update pacing in response to gcpercent change. gcSetTriggerRatio(memstats.triggerRatio) unlock(&mheap_.lock) + + // If we just disabled GC, wait for any concurrent GC to + // finish so we always return with no GC running. + if in < 0 { + // Disable phase transitions. + lock(&work.sweepWaiters.lock) + if gcphase == _GCmark { + // GC is active. Wait until we reach sweeping. + gp := getg() + gp.schedlink = work.sweepWaiters.head + work.sweepWaiters.head.set(gp) + goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) + } else { + // GC isn't active. + unlock(&work.sweepWaiters.lock) + } + } + return out } @@ -300,10 +318,10 @@ const ( // gcMarkWorkerFractionalMode indicates that a P is currently // running the "fractional" mark worker. The fractional worker - // is necessary when GOMAXPROCS*gcGoalUtilization is not an - // integer. The fractional worker should run until it is + // is necessary when GOMAXPROCS*gcBackgroundUtilization is not + // an integer. The fractional worker should run until it is // preempted and will be scheduled to pick up the fractional - // part of GOMAXPROCS*gcGoalUtilization. + // part of GOMAXPROCS*gcBackgroundUtilization. gcMarkWorkerFractionalMode // gcMarkWorkerIdleMode indicates that a P is running the mark @@ -397,23 +415,18 @@ type gcControllerState struct { assistBytesPerWork float64 // fractionalUtilizationGoal is the fraction of wall clock - // time that should be spent in the fractional mark worker. - // For example, if the overall mark utilization goal is 25% - // and GOMAXPROCS is 6, one P will be a dedicated mark worker - // and this will be set to 0.5 so that 50% of the time some P - // is in a fractional mark worker. This is computed at the - // beginning of each cycle. + // time that should be spent in the fractional mark worker on + // each P that isn't running a dedicated worker. + // + // For example, if the utilization goal is 25% and there are + // no dedicated workers, this will be 0.25. If there goal is + // 25%, there is one dedicated worker, and GOMAXPROCS is 5, + // this will be 0.05 to make up the missing 5%. + // + // If this is zero, no fractional workers are needed. fractionalUtilizationGoal float64 _ [sys.CacheLineSize]byte - - // fractionalMarkWorkersNeeded is the number of fractional - // mark workers that need to be started. This is either 0 or - // 1. This is potentially updated atomically at every - // scheduling point (hence it gets its own cache line). - fractionalMarkWorkersNeeded int64 - - _ [sys.CacheLineSize]byte } // startCycle resets the GC controller's state and computes estimates @@ -454,23 +467,33 @@ func (c *gcControllerState) startCycle() { memstats.next_gc = memstats.heap_live + 1024*1024 } - // Compute the total mark utilization goal and divide it among - // dedicated and fractional workers. - totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization - c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal) - c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded) - if c.fractionalUtilizationGoal > 0 { - c.fractionalMarkWorkersNeeded = 1 + // Compute the background mark utilization goal. In general, + // this may not come out exactly. We round the number of + // dedicated workers so that the utilization is closest to + // 25%. For small GOMAXPROCS, this would introduce too much + // error, so we add fractional workers in that case. + totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization + c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5) + utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1 + const maxUtilError = 0.3 + if utilError < -maxUtilError || utilError > maxUtilError { + // Rounding put us more than 30% off our goal. With + // gcBackgroundUtilization of 25%, this happens for + // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional + // workers to compensate. + if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal { + // Too many dedicated workers. + c.dedicatedMarkWorkersNeeded-- + } + c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs) } else { - c.fractionalMarkWorkersNeeded = 0 + c.fractionalUtilizationGoal = 0 } // Clear per-P state - for _, p := range &allp { - if p == nil { - break - } + for _, p := range allp { p.gcAssistTime = 0 + p.gcFractionalMarkTime = 0 } // Compute initial values for controls that are updated @@ -483,7 +506,7 @@ func (c *gcControllerState) startCycle() { work.initialHeapLive>>20, "->", memstats.next_gc>>20, " MB)", " workers=", c.dedicatedMarkWorkersNeeded, - "+", c.fractionalMarkWorkersNeeded, "\n") + "+", c.fractionalUtilizationGoal, "\n") } } @@ -496,47 +519,73 @@ func (c *gcControllerState) startCycle() { // is when assists are enabled and the necessary statistics are // available). func (c *gcControllerState) revise() { - // Compute the expected scan work remaining. + gcpercent := gcpercent + if gcpercent < 0 { + // If GC is disabled but we're running a forced GC, + // act like GOGC is huge for the below calculations. + gcpercent = 100000 + } + live := atomic.Load64(&memstats.heap_live) + + var heapGoal, scanWorkExpected int64 + if live <= memstats.next_gc { + // We're under the soft goal. Pace GC to complete at + // next_gc assuming the heap is in steady-state. + heapGoal = int64(memstats.next_gc) + + // Compute the expected scan work remaining. + // + // This is estimated based on the expected + // steady-state scannable heap. For example, with + // GOGC=100, only half of the scannable heap is + // expected to be live, so that's what we target. + // + // (This is a float calculation to avoid overflowing on + // 100*heap_scan.) + scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) + } else { + // We're past the soft goal. Pace GC so that in the + // worst case it will complete by the hard goal. + const maxOvershoot = 1.1 + heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) + + // Compute the upper bound on the scan work remaining. + scanWorkExpected = int64(memstats.heap_scan) + } + + // Compute the remaining scan work estimate. // // Note that we currently count allocations during GC as both // scannable heap (heap_scan) and scan work completed - // (scanWork), so this difference won't be changed by - // allocations during GC. - // - // This particular estimate is a strict upper bound on the - // possible remaining scan work for the current heap. - // You might consider dividing this by 2 (or by - // (100+GOGC)/100) to counter this over-estimation, but - // benchmarks show that this has almost no effect on mean - // mutator utilization, heap size, or assist time and it - // introduces the danger of under-estimating and letting the - // mutator outpace the garbage collector. - scanWorkExpected := int64(memstats.heap_scan) - c.scanWork - if scanWorkExpected < 1000 { + // (scanWork), so allocation will change this difference will + // slowly in the soft regime and not at all in the hard + // regime. + scanWorkRemaining := scanWorkExpected - c.scanWork + if scanWorkRemaining < 1000 { // We set a somewhat arbitrary lower bound on // remaining scan work since if we aim a little high, // we can miss by a little. // // We *do* need to enforce that this is at least 1, // since marking is racy and double-scanning objects - // may legitimately make the expected scan work - // negative. - scanWorkExpected = 1000 + // may legitimately make the remaining scan work + // negative, even in the hard goal regime. + scanWorkRemaining = 1000 } // Compute the heap distance remaining. - heapDistance := int64(memstats.next_gc) - int64(atomic.Load64(&memstats.heap_live)) - if heapDistance <= 0 { + heapRemaining := heapGoal - int64(live) + if heapRemaining <= 0 { // This shouldn't happen, but if it does, avoid // dividing by zero or setting the assist negative. - heapDistance = 1 + heapRemaining = 1 } // Compute the mutator assist ratio so by the time the mutator // allocates the remaining heap bytes up to next_gc, it will // have done (or stolen) the remaining amount of scan work. - c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance) - c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected) + c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) + c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) } // endCycle computes the trigger ratio for the next cycle. @@ -570,7 +619,7 @@ func (c *gcControllerState) endCycle() float64 { assistDuration := nanotime() - c.markStartTime // Assume background mark hit its utilization goal. - utilization := gcGoalUtilization + utilization := gcBackgroundUtilization // Add assist utilization; avoid divide by zero. if assistDuration > 0 { utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) @@ -689,51 +738,20 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { // This P is now dedicated to marking until the end of // the concurrent mark phase. _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode + } else if c.fractionalUtilizationGoal == 0 { + // No need for fractional workers. + return nil } else { - if !decIfPositive(&c.fractionalMarkWorkersNeeded) { - // No more workers are need right now. - return nil - } - - // This P has picked the token for the fractional worker. - // Is the GC currently under or at the utilization goal? - // If so, do more work. - // - // We used to check whether doing one time slice of work - // would remain under the utilization goal, but that has the - // effect of delaying work until the mutator has run for - // enough time slices to pay for the work. During those time - // slices, write barriers are enabled, so the mutator is running slower. - // Now instead we do the work whenever we're under or at the - // utilization work and pay for it by letting the mutator run later. - // This doesn't change the overall utilization averages, but it - // front loads the GC work so that the GC finishes earlier and - // write barriers can be turned off sooner, effectively giving - // the mutator a faster machine. - // - // The old, slower behavior can be restored by setting - // gcForcePreemptNS = forcePreemptNS. - const gcForcePreemptNS = 0 - - // TODO(austin): We could fast path this and basically - // eliminate contention on c.fractionalMarkWorkersNeeded by - // precomputing the minimum time at which it's worth - // next scheduling the fractional worker. Then Ps - // don't have to fight in the window where we've - // passed that deadline and no one has started the - // worker yet. + // Is this P behind on the fractional utilization + // goal? // - // TODO(austin): Shorter preemption interval for mark - // worker to improve fairness and give this - // finer-grained control over schedule? - now := nanotime() - gcController.markStartTime - then := now + gcForcePreemptNS - timeUsed := c.fractionalMarkTime + gcForcePreemptNS - if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal { - // Nope, we'd overshoot the utilization goal - atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1) + // This should be kept in sync with pollFractionalWorkerExit. + delta := nanotime() - gcController.markStartTime + if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { + // Nope. No need to run a fractional worker. return nil } + // Run a fractional worker. _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode } @@ -746,6 +764,24 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { return gp } +// pollFractionalWorkerExit returns true if a fractional mark worker +// should self-preempt. It assumes it is called from the fractional +// worker. +func pollFractionalWorkerExit() bool { + // This should be kept in sync with the fractional worker + // scheduler logic in findRunnableGCWorker. + now := nanotime() + delta := now - gcController.markStartTime + if delta <= 0 { + return true + } + p := getg().m.p.ptr() + selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime) + // Add some slack to the utilization goal so that the + // fractional worker isn't behind again the instant it exits. + return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal +} + // gcSetTriggerRatio sets the trigger ratio and updates everything // derived from it: the absolute trigger, the heap goal, mark pacing, // and sweep pacing. @@ -860,9 +896,22 @@ func gcSetTriggerRatio(triggerRatio float64) { } } -// gcGoalUtilization is the goal CPU utilization for background +// gcGoalUtilization is the goal CPU utilization for // marking as a fraction of GOMAXPROCS. -const gcGoalUtilization = 0.25 +const gcGoalUtilization = 0.30 + +// gcBackgroundUtilization is the fixed CPU utilization for background +// marking. It must be <= gcGoalUtilization. The difference between +// gcGoalUtilization and gcBackgroundUtilization will be made up by +// mark assists. The scheduler will aim to use within 50% of this +// goal. +// +// Setting this to < gcGoalUtilization avoids saturating the trigger +// feedback controller when there are no assists, which allows it to +// better control CPU and heap growth. However, the larger the gap, +// the more mutator assists are expected to happen, which impact +// mutator latency. +const gcBackgroundUtilization = 0.25 // gcCreditSlack is the amount of scan work credit that can can // accumulate locally before updating gcController.scanWork and, @@ -1159,7 +1208,7 @@ func (t gcTrigger) test() bool { if t.kind == gcTriggerAlways { return true } - if gcphase != _GCoff || gcpercent < 0 { + if gcphase != _GCoff { return false } switch t.kind { @@ -1170,6 +1219,9 @@ func (t gcTrigger) test() bool { // own write. return memstats.heap_live >= memstats.gc_trigger case gcTriggerTime: + if gcpercent < 0 { + return false + } lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime)) return lastgc != 0 && t.now-lastgc > forcegcperiod case gcTriggerCycle: @@ -1236,7 +1288,7 @@ func gcStart(mode gcMode, trigger gcTrigger) { } } - // Ok, we're doing it! Stop everybody else + // Ok, we're doing it! Stop everybody else semacquire(&worldsema) if trace.enabled { @@ -1249,7 +1301,12 @@ func gcStart(mode gcMode, trigger gcTrigger) { gcResetMarkState() - work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs + work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs + if work.stwprocs > ncpu { + // This is used to compute CPU time of the STW phases, + // so it can't be more than ncpu, even if GOMAXPROCS is. + work.stwprocs = ncpu + } work.heap0 = atomic.Load64(&memstats.heap_live) work.pauseNS = 0 work.mode = mode @@ -1257,6 +1314,9 @@ func gcStart(mode gcMode, trigger gcTrigger) { now := nanotime() work.tSweepTerm = now work.pauseStart = now + if trace.enabled { + traceGCSTWStart(1) + } systemstack(stopTheWorldWithSema) // Finish sweep before we start concurrent scan. systemstack(func() { @@ -1309,11 +1369,17 @@ func gcStart(mode gcMode, trigger gcTrigger) { gcController.markStartTime = now // Concurrent mark. - systemstack(startTheWorldWithSema) - now = nanotime() + systemstack(func() { + now = startTheWorldWithSema(trace.enabled) + }) work.pauseNS += now - work.pauseStart work.tMark = now } else { + if trace.enabled { + // Switch to mark termination STW. + traceGCSTWDone() + traceGCSTWStart(0) + } t := nanotime() work.tMark, work.tMarkTerm = t, t work.heapGoal = work.heap0 @@ -1356,7 +1422,8 @@ top: // TODO(austin): Should dedicated workers keep an eye on this // and exit gcDrain promptly? atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff) + prevFractionalGoal := gcController.fractionalUtilizationGoal + gcController.fractionalUtilizationGoal = 0 if !gcBlackenPromptly { // Transition from mark 1 to mark 2. @@ -1383,6 +1450,7 @@ top: // workers have exited their loop so we can // start new mark 2 workers. forEachP(func(_p_ *p) { + wbBufFlush1(_p_) _p_.gcw.dispose() }) }) @@ -1399,7 +1467,7 @@ top: // Now we can start up mark 2 workers. atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff) + gcController.fractionalUtilizationGoal = prevFractionalGoal incnwait := atomic.Xadd(&work.nwait, +1) if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { @@ -1414,6 +1482,9 @@ top: work.tMarkTerm = now work.pauseStart = now getg().m.preemptoff = "gcing" + if trace.enabled { + traceGCSTWStart(0) + } systemstack(stopTheWorldWithSema) // The gcphase is _GCmark, it will transition to _GCmarktermination // below. The important thing is that the wb remains active until @@ -1574,7 +1645,7 @@ func gcMarkTermination(nextTriggerRatio float64) { // so events don't leak into the wrong cycle. mProf_NextCycle() - systemstack(startTheWorldWithSema) + systemstack(func() { startTheWorldWithSema(true) }) // Flush the heap profile so we can start a new cycle next GC. // This is relatively expensive, so we don't do it with the @@ -1645,10 +1716,7 @@ func gcMarkTermination(nextTriggerRatio float64) { func gcBgMarkStartWorkers() { // Background marking is performed by per-P G's. Ensure that // each P has a background GC G. - for _, p := range &allp { - if p == nil || p.status == _Pdead { - break - } + for _, p := range allp { if p.gcBgMarkWorker == 0 { expectSystemGoroutine() go gcBgMarkWorker(p) @@ -1751,6 +1819,7 @@ func gcBgMarkWorker(_p_ *p) { } startTime := nanotime() + _p_.gcMarkWorkerStartTime = startTime decnwait := atomic.Xadd(&work.nwait, -1) if decnwait == work.nproc { @@ -1792,7 +1861,7 @@ func gcBgMarkWorker(_p_ *p) { // without preemption. gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) case gcMarkWorkerFractionalMode: - gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) case gcMarkWorkerIdleMode: gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) } @@ -1817,7 +1886,7 @@ func gcBgMarkWorker(_p_ *p) { atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) case gcMarkWorkerFractionalMode: atomic.Xaddint64(&gcController.fractionalMarkTime, duration) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1) + atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) case gcMarkWorkerIdleMode: atomic.Xaddint64(&gcController.idleMarkTime, duration) } @@ -1915,10 +1984,6 @@ func gcMark(start_time int64) { work.helperDrainBlock = true } - if trace.enabled { - traceGCScanStart() - } - if work.nproc > 1 { noteclear(&work.alldone) helpgc(int32(work.nproc)) @@ -1952,8 +2017,8 @@ func gcMark(start_time int64) { // Double-check that all gcWork caches are empty. This should // be ensured by mark 2 before we enter mark termination. - for i := 0; i < int(gomaxprocs); i++ { - gcw := &allp[i].gcw + for _, p := range allp { + gcw := &p.gcw if !gcw.empty() { throw("P has cached GC work at end of mark termination") } @@ -1962,10 +2027,6 @@ func gcMark(start_time int64) { } } - if trace.enabled { - traceGCScanDone() - } - cachestats() // Update the marked heap stat. @@ -2093,18 +2154,19 @@ func clearpools() { unlock(&sched.deferlock) } -// Timing - -//go:nowritebarrier +// gchelper runs mark termination tasks on Ps other than the P +// coordinating mark termination. +// +// The caller is responsible for ensuring that this has a P to run on, +// even though it's running during STW. Because of this, it's allowed +// to have write barriers. +// +//go:yeswritebarrierrec func gchelper() { _g_ := getg() _g_.m.traceback = 2 gchelperstart() - if trace.enabled { - traceGCScanStart() - } - // Parallel mark over GC roots and heap if gcphase == _GCmarktermination { gcw := &_g_.m.p.ptr().gcw @@ -2116,10 +2178,6 @@ func gchelper() { gcw.dispose() } - if trace.enabled { - traceGCScanDone() - } - nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone if atomic.Xadd(&work.ndone, +1) == nproc-1 { notewakeup(&work.alldone) @@ -2138,6 +2196,8 @@ func gchelperstart() { } } +// Timing + // itoaDiv formats val/(10**dec) into buf. func itoaDiv(buf []byte, val uint64, dec int) []byte { i := len(buf) - 1 diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go index c1fa1547adc..107a70a7898 100644 --- a/libgo/go/runtime/mgc_gccgo.go +++ b/libgo/go/runtime/mgc_gccgo.go @@ -6,7 +6,10 @@ package runtime -import "unsafe" +import ( + "runtime/internal/sys" + "unsafe" +) // gcRoot is a single GC root: a variable plus a ptrmask. type gcRoot struct { @@ -85,3 +88,21 @@ func checkPreempt() { gp.scanningself = false mcall(gopreempt_m) } + +// gcWriteBarrier implements a write barrier. This is implemented in +// assembly in the gc library, but there is no special advantage to +// doing so with gccgo. +//go:nosplit +//go:nowritebarrier +func gcWriteBarrier(dst *uintptr, src uintptr) { + buf := &getg().m.p.ptr().wbBuf + next := buf.next + np := next + 2*sys.PtrSize + buf.next = np + *(*uintptr)(unsafe.Pointer(next)) = src + *(*uintptr)(unsafe.Pointer(next + sys.PtrSize)) = *dst + if np >= buf.end { + wbBufFlush(dst, src) + } + *dst = src +} diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go index 757e88d1d9d..fe437bf5e84 100644 --- a/libgo/go/runtime/mgclarge.go +++ b/libgo/go/runtime/mgclarge.go @@ -164,11 +164,10 @@ func (root *mTreap) insert(span *mspan) { } } -func (root *mTreap) removeNode(t *treapNode) *mspan { +func (root *mTreap) removeNode(t *treapNode) { if t.spanKey.npages != t.npagesKey { throw("span and treap node npages do not match") } - result := t.spanKey // Rotate t down to be leaf of tree for removal, respecting priorities. for t.right != nil || t.left != nil { @@ -192,7 +191,6 @@ func (root *mTreap) removeNode(t *treapNode) *mspan { t.spanKey = nil t.npagesKey = 0 mheap_.treapalloc.free(unsafe.Pointer(t)) - return result } // remove searches for, finds, removes from the treap, and returns the smallest diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go index 998a830caa8..7297fcb6d1a 100644 --- a/libgo/go/runtime/mgcmark.go +++ b/libgo/go/runtime/mgcmark.go @@ -34,13 +34,13 @@ const ( // span base. maxObletBytes = 128 << 10 - // idleCheckThreshold specifies how many units of work to do - // between run queue checks in an idle worker. Assuming a scan + // drainCheckThreshold specifies how many units of work to do + // between self-preemption checks in gcDrain. Assuming a scan // rate of 1 MB/ms, this is ~100 µs. Lower values have higher // overhead in the scan loop (the scheduler check may perform // a syscall, so its overhead is nontrivial). Higher values // make the system less responsive to incoming work. - idleCheckThreshold = 100000 + drainCheckThreshold = 100000 ) // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and @@ -717,6 +717,7 @@ const ( gcDrainNoBlock gcDrainFlushBgCredit gcDrainIdle + gcDrainFractional // gcDrainBlock means neither gcDrainUntilPreempt or // gcDrainNoBlock. It is the default, but callers should use @@ -733,6 +734,10 @@ const ( // If flags&gcDrainIdle != 0, gcDrain returns when there is other work // to do. This implies gcDrainNoBlock. // +// If flags&gcDrainFractional != 0, gcDrain self-preempts when +// pollFractionalWorkerExit() returns true. This implies +// gcDrainNoBlock. +// // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is // unable to get more work. Otherwise, it will block until all // blocking calls are blocked in gcDrain. @@ -749,14 +754,24 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { gp := getg().m.curg preemptible := flags&gcDrainUntilPreempt != 0 - blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0 + blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock) == 0 flushBgCredit := flags&gcDrainFlushBgCredit != 0 idle := flags&gcDrainIdle != 0 initScanWork := gcw.scanWork - // idleCheck is the scan work at which to perform the next - // idle check with the scheduler. - idleCheck := initScanWork + idleCheckThreshold + + // checkWork is the scan work before performing the next + // self-preempt check. + checkWork := int64(1<<63 - 1) + var check func() bool + if flags&(gcDrainIdle|gcDrainFractional) != 0 { + checkWork = initScanWork + drainCheckThreshold + if idle { + check = pollWork + } else if flags&gcDrainFractional != 0 { + check = pollFractionalWorkerExit + } + } // Drain root marking jobs. if work.markrootNext < work.markrootJobs { @@ -766,7 +781,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { break } markroot(gcw, job) - if idle && pollWork() { + if check != nil && check() { goto done } } @@ -807,12 +822,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { gcFlushBgCredit(gcw.scanWork - initScanWork) initScanWork = 0 } - idleCheck -= gcw.scanWork + checkWork -= gcw.scanWork gcw.scanWork = 0 - if idle && idleCheck <= 0 { - idleCheck += idleCheckThreshold - if pollWork() { + if checkWork <= 0 { + checkWork += drainCheckThreshold + if check != nil && check() { break } } @@ -1091,6 +1106,9 @@ func shade(b uintptr) { // obj is the start of an object with mark mbits. // If it isn't already marked, mark it and enqueue into gcw. // base and off are for debugging only and could be removed. +// +// See also wbBufFlush1, which partially duplicates this logic. +// //go:nowritebarrierrec func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) { // obj should be start of allocation, and so must be at least pointer-aligned. @@ -1249,10 +1267,7 @@ func gcmarknewobject(obj, size, scanSize uintptr) { // // The world must be stopped. func gcMarkTinyAllocs() { - for _, p := range &allp { - if p == nil || p.status == _Pdead { - break - } + for _, p := range allp { c := p.mcache if c == nil || c.tiny == 0 { continue diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go index 461679b9343..c6634fc78ca 100644 --- a/libgo/go/runtime/mgcwork.go +++ b/libgo/go/runtime/mgcwork.go @@ -85,6 +85,13 @@ type gcWork struct { scanWork int64 } +// Most of the methods of gcWork are go:nowritebarrierrec because the +// write barrier itself can invoke gcWork methods but the methods are +// not generally re-entrant. Hence, if a gcWork method invoked the +// write barrier while the gcWork was in an inconsistent state, and +// the write barrier in turn invoked a gcWork method, it could +// permanently corrupt the gcWork. + func (w *gcWork) init() { w.wbuf1 = getempty() wbuf2 := trygetfull() @@ -96,7 +103,7 @@ func (w *gcWork) init() { // put enqueues a pointer for the garbage collector to trace. // obj must point to the beginning of a heap object or an oblet. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) put(obj uintptr) { flushed := false wbuf := w.wbuf1 @@ -129,7 +136,7 @@ func (w *gcWork) put(obj uintptr) { // putFast does a put and returns true if it can be done quickly // otherwise it returns false and the caller needs to call put. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) putFast(obj uintptr) bool { wbuf := w.wbuf1 if wbuf == nil { @@ -143,12 +150,45 @@ func (w *gcWork) putFast(obj uintptr) bool { return true } +// putBatch performs a put on every pointer in obj. See put for +// constraints on these pointers. +// +//go:nowritebarrierrec +func (w *gcWork) putBatch(obj []uintptr) { + if len(obj) == 0 { + return + } + + flushed := false + wbuf := w.wbuf1 + if wbuf == nil { + w.init() + wbuf = w.wbuf1 + } + + for len(obj) > 0 { + for wbuf.nobj == len(wbuf.obj) { + putfull(wbuf) + w.wbuf1, w.wbuf2 = w.wbuf2, getempty() + wbuf = w.wbuf1 + flushed = true + } + n := copy(wbuf.obj[wbuf.nobj:], obj) + wbuf.nobj += n + obj = obj[n:] + } + + if flushed && gcphase == _GCmark { + gcController.enlistWorker() + } +} + // tryGet dequeues a pointer for the garbage collector to trace. // // If there are no pointers remaining in this gcWork or in the global // queue, tryGet returns 0. Note that there may still be pointers in // other gcWork instances or other caches. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) tryGet() uintptr { wbuf := w.wbuf1 if wbuf == nil { @@ -177,7 +217,7 @@ func (w *gcWork) tryGet() uintptr { // tryGetFast dequeues a pointer for the garbage collector to trace // if one is readily available. Otherwise it returns 0 and // the caller is expected to call tryGet(). -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) tryGetFast() uintptr { wbuf := w.wbuf1 if wbuf == nil { @@ -194,7 +234,7 @@ func (w *gcWork) tryGetFast() uintptr { // get dequeues a pointer for the garbage collector to trace, blocking // if necessary to ensure all pointers from all queues and caches have // been retrieved. get returns 0 if there are no pointers remaining. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) get() uintptr { wbuf := w.wbuf1 if wbuf == nil { @@ -228,7 +268,7 @@ func (w *gcWork) get() uintptr { // GC can inspect them. This helps reduce the mutator's // ability to hide pointers during the concurrent mark phase. // -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) dispose() { if wbuf := w.wbuf1; wbuf != nil { if wbuf.nobj == 0 { @@ -262,7 +302,7 @@ func (w *gcWork) dispose() { // balance moves some work that's cached in this gcWork back on the // global queue. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) balance() { if w.wbuf1 == nil { return @@ -282,7 +322,7 @@ func (w *gcWork) balance() { } // empty returns true if w has no mark work available. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) empty() bool { return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0) } diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go index 8749f971065..d971bfee4df 100644 --- a/libgo/go/runtime/mheap.go +++ b/libgo/go/runtime/mheap.go @@ -56,6 +56,12 @@ type mheap struct { // Internal pages map to an arbitrary span. // For pages that have never been allocated, spans entries are nil. // + // Modifications are protected by mheap.lock. Reads can be + // performed without locking, but ONLY from indexes that are + // known to contain in-use or stack spans. This means there + // must not be a safe-point between establishing that an + // address is live and looking it up in the spans array. + // // This is backed by a reserved region of the address space so // it can grow without moving. The memory up to len(spans) is // mapped. cap(spans) indicates the total reserved memory. @@ -154,6 +160,8 @@ type mheap struct { specialfinalizeralloc fixalloc // allocator for specialfinalizer* specialprofilealloc fixalloc // allocator for specialprofile* speciallock mutex // lock for special record allocators. + + unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF } var mheap_ mheap @@ -311,6 +319,17 @@ func (s *mspan) layout() (size, n, total uintptr) { return } +// recordspan adds a newly allocated span to h.allspans. +// +// This only happens the first time a span is allocated from +// mheap.spanalloc (it is not called when a span is reused). +// +// Write barriers are disallowed here because it can be called from +// gcWork when allocating new workbufs. However, because it's an +// indirect call from the fixalloc initializer, the compiler can't see +// this. +// +//go:nowritebarrierrec func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { h := (*mheap)(vh) s := (*mspan)(p) @@ -320,8 +339,8 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { n = cap(h.allspans) * 3 / 2 } var new []*mspan - sp := (*slice)(unsafe.Pointer(&new)) - sp.array = sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys) + sp := (*notInHeapSlice)(unsafe.Pointer(&new)) + sp.array = (*notInHeap)(sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys)) if sp.array == nil { throw("runtime: cannot allocate memory") } @@ -331,12 +350,13 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { copy(new, h.allspans) } oldAllspans := h.allspans - h.allspans = new + *(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new)) if len(oldAllspans) != 0 { sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys) } } - h.allspans = append(h.allspans, s) + h.allspans = h.allspans[:len(h.allspans)+1] + h.allspans[len(h.allspans)-1] = s } // A spanClass represents the size class and noscan-ness of a span. @@ -854,7 +874,7 @@ HaveSpan: // Large spans have a minimum size of 1MByte. The maximum number of large spans to support // 1TBytes is 1 million, experimentation using random sizes indicates that the depth of // the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced -// by a perfectly balanced tree with a a depth of 20. Twice that is an acceptable 40. +// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40. func (h *mheap) isLargeSpan(npages uintptr) bool { return npages >= uintptr(len(h.free)) } @@ -1120,34 +1140,35 @@ func scavengelist(list *mSpanList, now, limit uint64) uintptr { var sumreleased uintptr for s := list.first; s != nil; s = s.next { - if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages { - start := s.base() - end := start + s.npages<<_PageShift - if physPageSize > _PageSize { - // We can only release pages in - // physPageSize blocks, so round start - // and end in. (Otherwise, madvise - // will round them *out* and release - // more memory than we want.) - start = (start + physPageSize - 1) &^ (physPageSize - 1) - end &^= physPageSize - 1 - if end <= start { - // start and end don't span a - // whole physical page. - continue - } - } - len := end - start - - released := len - (s.npreleased << _PageShift) - if physPageSize > _PageSize && released == 0 { + if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages { + continue + } + start := s.base() + end := start + s.npages<<_PageShift + if physPageSize > _PageSize { + // We can only release pages in + // physPageSize blocks, so round start + // and end in. (Otherwise, madvise + // will round them *out* and release + // more memory than we want.) + start = (start + physPageSize - 1) &^ (physPageSize - 1) + end &^= physPageSize - 1 + if end <= start { + // start and end don't span a + // whole physical page. continue } - memstats.heap_released += uint64(released) - sumreleased += released - s.npreleased = len >> _PageShift - sysUnused(unsafe.Pointer(start), len) } + len := end - start + + released := len - (s.npreleased << _PageShift) + if physPageSize > _PageSize && released == 0 { + continue + } + memstats.heap_released += uint64(released) + sumreleased += released + s.npreleased = len >> _PageShift + sysUnused(unsafe.Pointer(start), len) } return sumreleased } diff --git a/libgo/go/runtime/mksizeclasses.go b/libgo/go/runtime/mksizeclasses.go index 0cb2b33a8cd..b146dbcd6c9 100644 --- a/libgo/go/runtime/mksizeclasses.go +++ b/libgo/go/runtime/mksizeclasses.go @@ -24,8 +24,8 @@ // In practice, only one of the wastes comes into play for a // given size (sizes < 512 waste mainly on the round-up, // sizes > 512 waste mainly on the page chopping). -// -// TODO(rsc): Compute max waste for any given size. +// For really small sizes, alignment constraints force the +// overhead higher. package main @@ -242,15 +242,18 @@ nextk: } func printComment(w io.Writer, classes []class) { - fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-11s\n", "class", "bytes/obj", "bytes/span", "objects", "waste bytes") + fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste") + prevSize := 0 for i, c := range classes { if i == 0 { continue } spanSize := c.npages * pageSize objects := spanSize / c.size - waste := spanSize - c.size*(spanSize/c.size) - fmt.Fprintf(w, "// %5d %9d %10d %7d %11d\n", i, c.size, spanSize, objects, waste) + tailWaste := spanSize - c.size*(spanSize/c.size) + maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize) + prevSize = c.size + fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%%\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste) } fmt.Fprintf(w, "\n") } diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go index 71dc2239854..22f5195cd58 100644 --- a/libgo/go/runtime/mstats.go +++ b/libgo/go/runtime/mstats.go @@ -589,12 +589,13 @@ func updatememstats() { memstats.heap_objects = memstats.nmalloc - memstats.nfree } +// cachestats flushes all mcache stats. +// +// The world must be stopped. +// //go:nowritebarrier func cachestats() { - for _, p := range &allp { - if p == nil { - break - } + for _, p := range allp { c := p.mcache if c == nil { continue @@ -610,9 +611,6 @@ func cachestats() { //go:nowritebarrier func flushmcache(i int) { p := allp[i] - if p == nil { - return - } c := p.mcache if c == nil { return @@ -665,7 +663,7 @@ func purgecachedstats(c *mcache) { // overflow errors. //go:nosplit func mSysStatInc(sysStat *uint64, n uintptr) { - if sys.BigEndian != 0 { + if sys.BigEndian { atomic.Xadd64(sysStat, int64(n)) return } @@ -679,7 +677,7 @@ func mSysStatInc(sysStat *uint64, n uintptr) { // mSysStatInc apply. //go:nosplit func mSysStatDec(sysStat *uint64, n uintptr) { - if sys.BigEndian != 0 { + if sys.BigEndian { atomic.Xadd64(sysStat, -int64(n)) return } diff --git a/libgo/go/runtime/mwbbuf.go b/libgo/go/runtime/mwbbuf.go new file mode 100644 index 00000000000..a060df8bc06 --- /dev/null +++ b/libgo/go/runtime/mwbbuf.go @@ -0,0 +1,248 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This implements the write barrier buffer. The write barrier itself +// is gcWriteBarrier and is implemented in assembly. +// +// The write barrier has a fast path and a slow path. The fast path +// simply enqueues to a per-P write barrier buffer. It's written in +// assembly and doesn't clobber any general purpose registers, so it +// doesn't have the usual overheads of a Go call. +// +// When the buffer fills up, the write barrier invokes the slow path +// (wbBufFlush) to flush the buffer to the GC work queues. In this +// path, since the compiler didn't spill registers, we spill *all* +// registers and disallow any GC safe points that could observe the +// stack frame (since we don't know the types of the spilled +// registers). + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// testSmallBuf forces a small write barrier buffer to stress write +// barrier flushing. +const testSmallBuf = false + +// wbBuf is a per-P buffer of pointers queued by the write barrier. +// This buffer is flushed to the GC workbufs when it fills up and on +// various GC transitions. +// +// This is closely related to a "sequential store buffer" (SSB), +// except that SSBs are usually used for maintaining remembered sets, +// while this is used for marking. +type wbBuf struct { + // next points to the next slot in buf. It must not be a + // pointer type because it can point past the end of buf and + // must be updated without write barriers. + // + // This is a pointer rather than an index to optimize the + // write barrier assembly. + next uintptr + + // end points to just past the end of buf. It must not be a + // pointer type because it points past the end of buf and must + // be updated without write barriers. + end uintptr + + // buf stores a series of pointers to execute write barriers + // on. This must be a multiple of wbBufEntryPointers because + // the write barrier only checks for overflow once per entry. + buf [wbBufEntryPointers * wbBufEntries]uintptr +} + +const ( + // wbBufEntries is the number of write barriers between + // flushes of the write barrier buffer. + // + // This trades latency for throughput amortization. Higher + // values amortize flushing overhead more, but increase the + // latency of flushing. Higher values also increase the cache + // footprint of the buffer. + // + // TODO: What is the latency cost of this? Tune this value. + wbBufEntries = 256 + + // wbBufEntryPointers is the number of pointers added to the + // buffer by each write barrier. + wbBufEntryPointers = 2 +) + +// reset empties b by resetting its next and end pointers. +func (b *wbBuf) reset() { + start := uintptr(unsafe.Pointer(&b.buf[0])) + b.next = start + if gcBlackenPromptly || writeBarrier.cgo { + // Effectively disable the buffer by forcing a flush + // on every barrier. + b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) + } else if testSmallBuf { + // For testing, allow two barriers in the buffer. If + // we only did one, then barriers of non-heap pointers + // would be no-ops. This lets us combine a buffered + // barrier with a flush at a later time. + b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers])) + } else { + b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0]) + } + + if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 { + throw("bad write barrier buffer bounds") + } +} + +// putFast adds old and new to the write barrier buffer and returns +// false if a flush is necessary. Callers should use this as: +// +// buf := &getg().m.p.ptr().wbBuf +// if !buf.putFast(old, new) { +// wbBufFlush(...) +// } +// +// The arguments to wbBufFlush depend on whether the caller is doing +// its own cgo pointer checks. If it is, then this can be +// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and +// new. +// +// Since buf is a per-P resource, the caller must ensure there are no +// preemption points while buf is in use. +// +// It must be nowritebarrierrec to because write barriers here would +// corrupt the write barrier buffer. It (and everything it calls, if +// it called anything) has to be nosplit to avoid scheduling on to a +// different P and a different buffer. +// +//go:nowritebarrierrec +//go:nosplit +func (b *wbBuf) putFast(old, new uintptr) bool { + p := (*[2]uintptr)(unsafe.Pointer(b.next)) + p[0] = old + p[1] = new + b.next += 2 * sys.PtrSize + return b.next != b.end +} + +// wbBufFlush flushes the current P's write barrier buffer to the GC +// workbufs. It is passed the slot and value of the write barrier that +// caused the flush so that it can implement cgocheck. +// +// This must not have write barriers because it is part of the write +// barrier implementation. +// +// This and everything it calls must be nosplit because 1) the stack +// contains untyped slots from gcWriteBarrier and 2) there must not be +// a GC safe point between the write barrier test in the caller and +// flushing the buffer. +// +// TODO: A "go:nosplitrec" annotation would be perfect for this. +// +//go:nowritebarrierrec +//go:nosplit +func wbBufFlush(dst *uintptr, src uintptr) { + if getg().m.dying > 0 { + // We're going down. Not much point in write barriers + // and this way we can allow write barriers in the + // panic path. + return + } + + if writeBarrier.cgo && dst != nil { + // This must be called from the stack that did the + // write. It's nosplit all the way down. + cgoCheckWriteBarrier(dst, src) + if !writeBarrier.needed { + // We were only called for cgocheck. + b := &getg().m.p.ptr().wbBuf + b.next = uintptr(unsafe.Pointer(&b.buf[0])) + return + } + } + + // Switch to the system stack so we don't have to worry about + // the untyped stack slots or safe points. + systemstack(func() { + wbBufFlush1(getg().m.p.ptr()) + }) +} + +// wbBufFlush1 flushes p's write barrier buffer to the GC work queue. +// +// This must not have write barriers because it is part of the write +// barrier implementation, so this may lead to infinite loops or +// buffer corruption. +// +// This must be non-preemptible because it uses the P's workbuf. +// +//go:nowritebarrierrec +//go:systemstack +func wbBufFlush1(_p_ *p) { + // Get the buffered pointers. + start := uintptr(unsafe.Pointer(&_p_.wbBuf.buf[0])) + n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0]) + ptrs := _p_.wbBuf.buf[:n] + + // Reset the buffer. + _p_.wbBuf.reset() + + if useCheckmark { + // Slow path for checkmark mode. + for _, ptr := range ptrs { + shade(ptr) + } + return + } + + // Mark all of the pointers in the buffer and record only the + // pointers we greyed. We use the buffer itself to temporarily + // record greyed pointers. + // + // TODO: Should scanobject/scanblock just stuff pointers into + // the wbBuf? Then this would become the sole greying path. + gcw := &_p_.gcw + pos := 0 + arenaStart := mheap_.arena_start + for _, ptr := range ptrs { + if ptr < arenaStart { + // nil pointers are very common, especially + // for the "old" values. Filter out these and + // other "obvious" non-heap pointers ASAP. + // + // TODO: Should we filter out nils in the fast + // path to reduce the rate of flushes? + continue + } + // TODO: This doesn't use hbits, so calling + // heapBitsForObject seems a little silly. We could + // easily separate this out since heapBitsForObject + // just calls heapBitsForAddr(obj) to get hbits. + obj, _, span, objIndex := heapBitsForObject(ptr, 0, 0, false) + if obj == 0 { + continue + } + // TODO: Consider making two passes where the first + // just prefetches the mark bits. + mbits := span.markBitsForIndex(objIndex) + if mbits.isMarked() { + continue + } + mbits.setMarked() + if span.spanclass.noscan() { + gcw.bytesMarked += uint64(span.elemsize) + continue + } + ptrs[pos] = obj + pos++ + } + + // Enqueue the greyed objects. + gcw.putBatch(ptrs[:pos]) + if gcphase == _GCmarktermination || gcBlackenPromptly { + // Ps aren't allowed to cache work during mark + // termination. + gcw.dispose() + } +} diff --git a/libgo/go/runtime/netpoll_kqueue.go b/libgo/go/runtime/netpoll_kqueue.go index 47927fe7c37..1f68effbf9d 100644 --- a/libgo/go/runtime/netpoll_kqueue.go +++ b/libgo/go/runtime/netpoll_kqueue.go @@ -97,10 +97,23 @@ retry: for i := 0; i < int(n); i++ { ev := &events[i] var mode int32 - if ev.filter == _EVFILT_READ { + switch ev.filter { + case _EVFILT_READ: mode += 'r' - } - if ev.filter == _EVFILT_WRITE { + + // On some systems when the read end of a pipe + // is closed the write end will not get a + // _EVFILT_WRITE event, but will get a + // _EVFILT_READ event with EV_EOF set. + // Note that setting 'w' here just means that we + // will wake up a goroutine waiting to write; + // that goroutine will try the write again, + // and the appropriate thing will happen based + // on what that write returns (success, EPIPE, EAGAIN). + if ev.flags&_EV_EOF != 0 { + mode += 'w' + } + case _EVFILT_WRITE: mode += 'w' } if mode != 0 { diff --git a/libgo/go/runtime/netpoll_windows.go b/libgo/go/runtime/netpoll_windows.go index 79dafb02796..134071f5e3c 100644 --- a/libgo/go/runtime/netpoll_windows.go +++ b/libgo/go/runtime/netpoll_windows.go @@ -47,7 +47,7 @@ func netpolldescriptor() uintptr { func netpollopen(fd uintptr, pd *pollDesc) int32 { if stdcall4(_CreateIoCompletionPort, fd, iocphandle, 0, 0) == 0 { - return -int32(getlasterror()) + return int32(getlasterror()) } return 0 } diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go index a4d2886d6af..8c3535b893b 100644 --- a/libgo/go/runtime/os_freebsd.go +++ b/libgo/go/runtime/os_freebsd.go @@ -16,6 +16,17 @@ type mOS struct { //extern _umtx_op func sys_umtx_op(addr *uint32, mode int32, val uint32, uaddr1 uinptr, ts *umtx_time) int32 +func getPageSize() uintptr { + mib := [2]uint32{_CTL_HW, _HW_PAGESIZE} + out := uint32(0) + nout := unsafe.Sizeof(out) + ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0) + if ret >= 0 { + return uintptr(out) + } + return 0 +} + // FreeBSD's umtx_op syscall is effectively the same as Linux's futex, and // thus the code is largely similar. See Linux implementation // and lock_futex.go for comments. diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go index e1a6a308cf2..816327e70b8 100644 --- a/libgo/go/runtime/os_linux.go +++ b/libgo/go/runtime/os_linux.go @@ -106,45 +106,46 @@ func sysargs(argc int32, argv **byte) { // now argv+n is auxv auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize)) - if sysauxv(auxv[:]) == 0 { - // In some situations we don't get a loader-provided - // auxv, such as when loaded as a library on Android. - // Fall back to /proc/self/auxv. - fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0) - if fd < 0 { - // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to - // try using mincore to detect the physical page size. - // mincore should return EINVAL when address is not a multiple of system page size. - const size = 256 << 10 // size of memory region to allocate - p := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(p) < 4096 { - return - } - var n uintptr - for n = 4 << 10; n < size; n <<= 1 { - err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0]) - if err == 0 { - physPageSize = n - break - } - } - if physPageSize == 0 { - physPageSize = size - } - munmap(p, size) + if sysauxv(auxv[:]) != 0 { + return + } + // In some situations we don't get a loader-provided + // auxv, such as when loaded as a library on Android. + // Fall back to /proc/self/auxv. + fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0) + if fd < 0 { + // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to + // try using mincore to detect the physical page size. + // mincore should return EINVAL when address is not a multiple of system page size. + const size = 256 << 10 // size of memory region to allocate + p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { return } - var buf [128]uintptr - n := read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf))) - closefd(fd) - if n < 0 { - return + var n uintptr + for n = 4 << 10; n < size; n <<= 1 { + err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0]) + if err == 0 { + physPageSize = n + break + } + } + if physPageSize == 0 { + physPageSize = size } - // Make sure buf is terminated, even if we didn't read - // the whole file. - buf[len(buf)-2] = _AT_NULL - sysauxv(buf[:]) + munmap(p, size) + return + } + var buf [128]uintptr + n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf))) + closefd(fd) + if n < 0 { + return } + // Make sure buf is terminated, even if we didn't read + // the whole file. + buf[len(buf)-2] = _AT_NULL + sysauxv(buf[:]) } func sysauxv(auxv []uintptr) int { diff --git a/libgo/go/runtime/os_linux_ppc64x.go b/libgo/go/runtime/os_linux_ppc64x.go index b324344493e..d27902d794d 100644 --- a/libgo/go/runtime/os_linux_ppc64x.go +++ b/libgo/go/runtime/os_linux_ppc64x.go @@ -7,55 +7,22 @@ package runtime -import ( - "runtime/internal/sys" -) +// For go:linkname +import _ "unsafe" -const ( - // ISA level - // Go currently requires POWER5 as a minimum for ppc64, so we need - // to check for ISA 2.03 and beyond. - _PPC_FEATURE_POWER5_PLUS = 0x00020000 // ISA 2.03 (POWER5+) - _PPC_FEATURE_ARCH_2_05 = 0x00001000 // ISA 2.05 (POWER6) - _PPC_FEATURE_POWER6_EXT = 0x00000200 // mffgpr/mftgpr extension (POWER6x) - _PPC_FEATURE_ARCH_2_06 = 0x00000100 // ISA 2.06 (POWER7) - _PPC_FEATURE2_ARCH_2_07 = 0x80000000 // ISA 2.07 (POWER8) +// ppc64x doesn't have a 'cpuid' instruction equivalent and relies on +// HWCAP/HWCAP2 bits for hardware capabilities. - // Standalone capabilities - _PPC_FEATURE_HAS_ALTIVEC = 0x10000000 // SIMD/Vector unit - _PPC_FEATURE_HAS_VSX = 0x00000080 // Vector scalar unit -) - -type facilities struct { - _ [sys.CacheLineSize]byte - isPOWER5x bool // ISA 2.03 - isPOWER6 bool // ISA 2.05 - isPOWER6x bool // ISA 2.05 + mffgpr/mftgpr extension - isPOWER7 bool // ISA 2.06 - isPOWER8 bool // ISA 2.07 - hasVMX bool // Vector unit - hasVSX bool // Vector scalar unit - _ [sys.CacheLineSize]byte -} - -// cpu can be tested at runtime in go assembler code to check for -// a certain ISA level or hardware capability, for example: -// ·cpu+facilities_hasVSX(SB) for checking the availability of VSX -// or -// ·cpu+facilities_isPOWER7(SB) for checking if the processor implements -// ISA 2.06 instructions. -var cpu facilities +//go:linkname cpu_hwcap internal/cpu.ppc64x_hwcap +//go:linkname cpu_hwcap2 internal/cpu.ppc64x_hwcap2 +var cpu_hwcap uint +var cpu_hwcap2 uint func archauxv(tag, val uintptr) { switch tag { case _AT_HWCAP: - cpu.isPOWER5x = val&_PPC_FEATURE_POWER5_PLUS != 0 - cpu.isPOWER6 = val&_PPC_FEATURE_ARCH_2_05 != 0 - cpu.isPOWER6x = val&_PPC_FEATURE_POWER6_EXT != 0 - cpu.isPOWER7 = val&_PPC_FEATURE_ARCH_2_06 != 0 - cpu.hasVMX = val&_PPC_FEATURE_HAS_ALTIVEC != 0 - cpu.hasVSX = val&_PPC_FEATURE_HAS_VSX != 0 + cpu_hwcap = uint(val) case _AT_HWCAP2: - cpu.isPOWER8 = val&_PPC_FEATURE2_ARCH_2_07 != 0 + cpu_hwcap2 = uint(val) } } diff --git a/libgo/go/runtime/os_netbsd.go b/libgo/go/runtime/os_netbsd.go index 464ce88d9c4..81ebe7636a1 100644 --- a/libgo/go/runtime/os_netbsd.go +++ b/libgo/go/runtime/os_netbsd.go @@ -15,7 +15,7 @@ type mOS struct { //go:noescape //extern lwp_park -func lwp_park(abstime *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32 +func lwp_park(ts int32, rel int32, abstime *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32 //go:noescape //extern lwp_unpark @@ -31,10 +31,9 @@ func semasleep(ns int64) int32 { // Compute sleep deadline. var tsp *timespec + var ts timespec if ns >= 0 { - var ts timespec var nsec int32 - ns += nanotime() ts.set_sec(int64(timediv(ns, 1000000000, &nsec))) ts.set_nsec(nsec) tsp = &ts @@ -50,9 +49,18 @@ func semasleep(ns int64) int32 { } // Sleep until unparked by semawakeup or timeout. - ret := lwp_park(tsp, 0, unsafe.Pointer(&_g_.m.mos.waitsemacount), nil) + ret := lwp_park(_CLOCK_MONOTONIC, _TIMER_RELTIME, tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil) if ret == _ETIMEDOUT { return -1 + } else if ret == _EINTR && ns >= 0 { + // Avoid sleeping forever if we keep getting + // interrupted (for example by the profiling + // timer). It would be if tsp upon return had the + // remaining time to sleep, but this is good enough. + var nsec int32 + ns /= 2 + ts.set_sec(timediv(ns, 1000000000, &nsec)) + ts.set_nsec(nsec) } } } diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go index c39a58d0c4b..5cc325f3954 100644 --- a/libgo/go/runtime/panic.go +++ b/libgo/go/runtime/panic.go @@ -170,7 +170,18 @@ func freedefer(d *_defer) { unlock(&sched.deferlock) }) } - *d = _defer{} + + // These lines used to be simply `*d = _defer{}` but that + // started causing a nosplit stack overflow via typedmemmove. + d.link = nil + d.frame = nil + d.panicStack = nil + d._panic = nil + d.pfn = 0 + d.arg = nil + d.retaddr = 0 + d.makefunccanrecover = false + pp.deferpool = append(pp.deferpool, d) } @@ -327,7 +338,7 @@ func unwindStack() { // Goexit terminates the goroutine that calls it. No other goroutine is affected. // Goexit runs all deferred calls before terminating the goroutine. Because Goexit -// is not panic, however, any recover calls in those deferred functions will return nil. +// is not a panic, any recover calls in those deferred functions will return nil. // // Calling Goexit from the main goroutine terminates that goroutine // without func main returning. Since func main has not returned, @@ -599,7 +610,7 @@ func canrecover(retaddr uintptr) bool { // caller starts with "runtime.", then we are permitted to // call recover. var locs [16]location - if callers(2, locs[:2]) < 2 { + if callers(1, locs[:2]) < 2 { return false } @@ -619,7 +630,7 @@ func canrecover(retaddr uintptr) bool { // reflect.makeFuncStub or reflect.ffi_callback called by FFI // functions. Then we check the caller of that function. - n := callers(3, locs[:]) + n := callers(2, locs[:]) foundFFICallback := false i := 0 for ; i < n; i++ { @@ -822,6 +833,12 @@ var panicking uint32 // so that two concurrent panics don't overlap their output. var paniclk mutex +// startpanic_m implements unrecoverable panic. +// +// It can have write barriers because the write barrier explicitly +// ignores writes once dying > 0. +// +//go:yeswritebarrierrec func startpanic() { _g_ := getg() // Uncomment when mheap_ is in Go. @@ -860,7 +877,7 @@ func startpanic() { exit(4) fallthrough default: - // Can't even print! Just exit. + // Can't even print! Just exit. exit(5) } } diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go index a57b69dca35..8a562e2ce8b 100644 --- a/libgo/go/runtime/pprof/pprof.go +++ b/libgo/go/runtime/pprof/pprof.go @@ -18,7 +18,7 @@ // To add equivalent profiling support to a standalone program, add // code like the following to your main function: // -// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`") +// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") // var memprofile = flag.String("memprofile", "", "write memory profile to `file`") // // func main() { @@ -319,7 +319,15 @@ func (p *Profile) WriteTo(w io.Writer, debug int) error { p.mu.Unlock() // Map order is non-deterministic; make output deterministic. - sort.Sort(stackProfile(all)) + sort.Slice(all, func(i, j int) bool { + t, u := all[i], all[j] + for k := 0; k < len(t) && k < len(u); k++ { + if t[k] != u[k] { + return t[k] < u[k] + } + } + return len(t) < len(u) + }) return printCountProfile(w, debug, p.name, stackProfile(all)) } @@ -328,16 +336,6 @@ type stackProfile [][]uintptr func (x stackProfile) Len() int { return len(x) } func (x stackProfile) Stack(i int) []uintptr { return x[i] } -func (x stackProfile) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x stackProfile) Less(i, j int) bool { - t, u := x[i], x[j] - for k := 0; k < len(t) && k < len(u); k++ { - if t[k] != u[k] { - return t[k] < u[k] - } - } - return len(t) < len(u) -} // A countProfile is a set of stack traces to be printed as counts // grouped by stack trace. There are multiple implementations: @@ -348,6 +346,41 @@ type countProfile interface { Stack(i int) []uintptr } +// printCountCycleProfile outputs block profile records (for block or mutex profiles) +// as the pprof-proto format output. Translations from cycle count to time duration +// are done because The proto expects count and time (nanoseconds) instead of count +// and the number of cycles for block, contention profiles. +func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error { + // Output profile in protobuf form. + b := newProfileBuilder(w) + b.pbValueType(tagProfile_PeriodType, countName, "count") + b.pb.int64Opt(tagProfile_Period, 1) + b.pbValueType(tagProfile_SampleType, countName, "count") + b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds") + + cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9 + + values := []int64{0, 0} + var locs []uint64 + for _, r := range records { + values[0] = int64(r.Count) + values[1] = int64(float64(r.Cycles) / cpuGHz) // to nanoseconds + locs = locs[:0] + for _, addr := range r.Stack() { + // For count profiles, all stack addresses are + // return PCs, which is what locForPC expects. + l := b.locForPC(addr) + if l == 0 { // runtime.goexit + continue + } + locs = append(locs, l) + } + b.pbSample(values, locs, nil) + } + b.build() + return nil +} + // printCountProfile prints a countProfile at the specified debug level. // The profile will be in compressed proto format unless debug is nonzero. func printCountProfile(w io.Writer, debug int, name string, p countProfile) error { @@ -441,7 +474,7 @@ func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) { // Hide runtime.goexit and any runtime functions at the beginning. // This is useful mainly for allocation traces. - skip := name == "runtime.goexit" + skip := name == "runtime.goexit" || name == "runtime.kickoff" if !show { switch { case strings.HasPrefix(name, "runtime."): @@ -490,6 +523,14 @@ func countHeap() int { // writeHeap writes the current runtime heap profile to w. func writeHeap(w io.Writer, debug int) error { + var memStats *runtime.MemStats + if debug != 0 { + // Read mem stats first, so that our other allocations + // do not appear in the statistics. + memStats = new(runtime.MemStats) + runtime.ReadMemStats(memStats) + } + // Find out how many records there are (MemProfile(nil, true)), // allocate that many records, and get the data. // There's a race—more records might be added between @@ -552,8 +593,7 @@ func writeHeap(w io.Writer, debug int) error { // Print memstats information too. // Pprof will ignore, but useful for people - s := new(runtime.MemStats) - runtime.ReadMemStats(s) + s := memStats fmt.Fprintf(w, "\n# runtime.MemStats\n") fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc) fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc) @@ -779,14 +819,14 @@ func writeBlock(w io.Writer, debug int) error { sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) - b := bufio.NewWriter(w) - var tw *tabwriter.Writer - w = b - if debug > 0 { - tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) - w = tw + if debug <= 0 { + return printCountCycleProfile(w, "contentions", "delay", p) } + b := bufio.NewWriter(w) + tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + w = tw + fmt.Fprintf(w, "--- contention:\n") fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) for i := range p { @@ -823,14 +863,14 @@ func writeMutex(w io.Writer, debug int) error { sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) - b := bufio.NewWriter(w) - var tw *tabwriter.Writer - w = b - if debug > 0 { - tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) - w = tw + if debug <= 0 { + return printCountCycleProfile(w, "contentions", "delay", p) } + b := bufio.NewWriter(w) + tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + w = tw + fmt.Fprintf(w, "--- mutex:\n") fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go index 9e5e403b741..08a4f969ca2 100644 --- a/libgo/go/runtime/pprof/pprof_test.go +++ b/libgo/go/runtime/pprof/pprof_test.go @@ -26,16 +26,18 @@ import ( "time" ) -func cpuHogger(f func() int, dur time.Duration) { +func cpuHogger(f func(x int) int, y *int, dur time.Duration) { // We only need to get one 100 Hz clock tick, so we've got // a large safety buffer. // But do at least 500 iterations (which should take about 100ms), // otherwise TestCPUProfileMultithreaded can fail if only one // thread is scheduled during the testing period. t0 := time.Now() + accum := *y for i := 0; i < 500 || time.Since(t0) < dur; i++ { - f() + accum = f(accum) } + *y = accum } var ( @@ -46,8 +48,8 @@ var ( // The actual CPU hogging function. // Must not call other functions nor access heap/globals in the loop, // otherwise under race detector the samples will be in the race runtime. -func cpuHog1() int { - foo := salt1 +func cpuHog1(x int) int { + foo := x for i := 0; i < 1e5; i++ { if foo > 0 { foo *= foo @@ -58,8 +60,8 @@ func cpuHog1() int { return foo } -func cpuHog2() int { - foo := salt2 +func cpuHog2(x int) int { + foo := x for i := 0; i < 1e5; i++ { if foo > 0 { foo *= foo @@ -72,7 +74,7 @@ func cpuHog2() int { func TestCPUProfile(t *testing.T) { testCPUProfile(t, []string{"pprof.cpuHog1"}, func(dur time.Duration) { - cpuHogger(cpuHog1, dur) + cpuHogger(cpuHog1, &salt1, dur) }) } @@ -81,29 +83,29 @@ func TestCPUProfileMultithreaded(t *testing.T) { testCPUProfile(t, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, func(dur time.Duration) { c := make(chan int) go func() { - cpuHogger(cpuHog1, dur) + cpuHogger(cpuHog1, &salt1, dur) c <- 1 }() - cpuHogger(cpuHog2, dur) + cpuHogger(cpuHog2, &salt2, dur) <-c }) } func TestCPUProfileInlining(t *testing.T) { testCPUProfile(t, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, func(dur time.Duration) { - cpuHogger(inlinedCaller, dur) + cpuHogger(inlinedCaller, &salt1, dur) }) } -func inlinedCaller() int { - inlinedCallee() - return 0 +func inlinedCaller(x int) int { + x = inlinedCallee(x) + return x } -func inlinedCallee() { +func inlinedCallee(x int) int { // We could just use cpuHog1, but for loops prevent inlining // right now. :( - foo := salt1 + foo := x i := 0 loop: if foo > 0 { @@ -114,7 +116,7 @@ loop: if i++; i < 1e5 { goto loop } - salt1 = foo + return foo } func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) { @@ -177,9 +179,9 @@ func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) { } } - if badOS[runtime.GOOS] { + switch runtime.GOOS { + case "darwin", "dragonfly", "netbsd", "solaris": t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS) - return } // Ignore the failure if the tests are running in a QEMU-based emulator, // QEMU is not perfect at emulating everything. @@ -187,7 +189,6 @@ func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) { // IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605. if os.Getenv("IN_QEMU") == "1" { t.Skip("ignore the failure in QEMU; see golang.org/issue/9605") - return } t.FailNow() } @@ -394,60 +395,108 @@ func TestMathBigDivide(t *testing.T) { }) } -// Operating systems that are expected to fail the tests. See issue 13841. -var badOS = map[string]bool{ - "darwin": true, - "netbsd": true, - "plan9": true, - "dragonfly": true, - "solaris": true, -} - func TestBlockProfile(t *testing.T) { t.Skip("lots of details are different for gccgo; FIXME") type TestCase struct { name string f func() + stk []string re string } tests := [...]TestCase{ - {"chan recv", blockChanRecv, ` + { + name: "chan recv", + f: blockChanRecv, + stk: []string{ + "runtime.chanrecv1", + "runtime/pprof.blockChanRecv", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockChanRecv\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"chan send", blockChanSend, ` + { + name: "chan send", + f: blockChanSend, + stk: []string{ + "runtime.chansend1", + "runtime/pprof.blockChanSend", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.chansend1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockChanSend\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"chan close", blockChanClose, ` + { + name: "chan close", + f: blockChanClose, + stk: []string{ + "runtime.chanrecv1", + "runtime/pprof.blockChanClose", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockChanClose\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"select recv async", blockSelectRecvAsync, ` + { + name: "select recv async", + f: blockSelectRecvAsync, + stk: []string{ + "runtime.selectgo", + "runtime/pprof.blockSelectRecvAsync", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"select send sync", blockSelectSendSync, ` + { + name: "select send sync", + f: blockSelectSendSync, + stk: []string{ + "runtime.selectgo", + "runtime/pprof.blockSelectSendSync", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"mutex", blockMutex, ` + { + name: "mutex", + f: blockMutex, + stk: []string{ + "sync.(*Mutex).Lock", + "runtime/pprof.blockMutex", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+ .*/src/sync/mutex\.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockMutex\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"cond", blockCond, ` + { + name: "cond", + f: blockCond, + stk: []string{ + "sync.(*Cond).Wait", + "runtime/pprof.blockCond", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+ .*/src/sync/cond\.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockCond\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ @@ -455,28 +504,84 @@ func TestBlockProfile(t *testing.T) { `}, } + // Generate block profile runtime.SetBlockProfileRate(1) defer runtime.SetBlockProfileRate(0) for _, test := range tests { test.f() } - var w bytes.Buffer - Lookup("block").WriteTo(&w, 1) - prof := w.String() - if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") { - t.Fatalf("Bad profile header:\n%v", prof) - } + t.Run("debug=1", func(t *testing.T) { + var w bytes.Buffer + Lookup("block").WriteTo(&w, 1) + prof := w.String() - if strings.HasSuffix(prof, "#\t0x0\n\n") { - t.Errorf("Useless 0 suffix:\n%v", prof) + if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") { + t.Fatalf("Bad profile header:\n%v", prof) + } + + if strings.HasSuffix(prof, "#\t0x0\n\n") { + t.Errorf("Useless 0 suffix:\n%v", prof) + } + + for _, test := range tests { + if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) { + t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof) + } + } + }) + + t.Run("proto", func(t *testing.T) { + // proto format + var w bytes.Buffer + Lookup("block").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + + stks := stacks(p) + for _, test := range tests { + if !containsStack(stks, test.stk) { + t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk) + } + } + }) + +} + +func stacks(p *profile.Profile) (res [][]string) { + for _, s := range p.Sample { + var stk []string + for _, l := range s.Location { + for _, line := range l.Line { + stk = append(stk, line.Function.Name) + } + } + res = append(res, stk) } + return res +} - for _, test := range tests { - if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) { - t.Fatalf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof) +func containsStack(got [][]string, want []string) bool { + for _, stk := range got { + if len(stk) < len(want) { + continue + } + for i, f := range want { + if f != stk[i] { + break + } + if i == len(want)-1 { + return true + } } } + return false } const blockDelay = 10 * time.Millisecond @@ -568,6 +673,8 @@ func blockCond() { } func TestMutexProfile(t *testing.T) { + // Generate mutex profile + old := runtime.SetMutexProfileFraction(1) defer runtime.SetMutexProfileFraction(old) if old != 0 { @@ -576,39 +683,60 @@ func TestMutexProfile(t *testing.T) { blockMutex() - var w bytes.Buffer - Lookup("mutex").WriteTo(&w, 1) - prof := w.String() + t.Run("debug=1", func(t *testing.T) { + var w bytes.Buffer + Lookup("mutex").WriteTo(&w, 1) + prof := w.String() + t.Logf("received profile: %v", prof) - if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") { - t.Errorf("Bad profile header:\n%v", prof) - } - prof = strings.Trim(prof, "\n") - lines := strings.Split(prof, "\n") - // gccgo adds an extra line in the stack trace, not sure why. - if len(lines) < 6 { - t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof) - } - if len(lines) < 6 { - return - } - // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931" - r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+` - //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$" - if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok { - t.Errorf("%q didn't match %q", lines[3], r2) - } - r3 := "^#.*pprof.\\$nested.*$" - match := false - for _, i := range []int{5, 6} { - if ok, _ := regexp.MatchString(r3, lines[i]); ok { - match = true - break + if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") { + t.Errorf("Bad profile header:\n%v", prof) } - } - if !match { - t.Errorf("neither %q nor %q matched %q", lines[5], lines[6], r3) - } + prof = strings.Trim(prof, "\n") + lines := strings.Split(prof, "\n") + if len(lines) != 6 { + t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof) + } + if len(lines) < 6 { + return + } + // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931" + r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+` + //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$" + if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok { + t.Errorf("%q didn't match %q", lines[3], r2) + } + if runtime.Compiler != "gccgo" { + r3 := "^#.*pprof.blockMutex.*$" + if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok { + t.Errorf("%q didn't match %q", lines[5], r3) + } + } + t.Logf(prof) + }) + t.Run("proto", func(t *testing.T) { + // proto format + var w bytes.Buffer + Lookup("mutex").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + + stks := stacks(p) + for _, want := range [][]string{ + // {"sync.(*Mutex).Unlock", "pprof.blockMutex.func1"}, + {"sync.Unlock.pN10_sync.Mutex", "pprof.$nested17"}, + } { + if !containsStack(stks, want) { + t.Errorf("No matching stack entry for %+v", want) + } + } + }) } func func1(c chan int) { <-c } @@ -725,7 +853,7 @@ func TestEmptyCallStack(t *testing.T) { func TestCPUProfileLabel(t *testing.T) { testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, func(dur time.Duration) { Do(context.Background(), Labels("key", "value"), func(context.Context) { - cpuHogger(cpuHog1, dur) + cpuHogger(cpuHog1, &salt1, dur) }) }) } @@ -738,14 +866,15 @@ func TestLabelRace(t *testing.T) { start := time.Now() var wg sync.WaitGroup for time.Since(start) < dur { + var salts [10]int for i := 0; i < 10; i++ { wg.Add(1) - go func() { + go func(j int) { Do(context.Background(), Labels("key", "value"), func(context.Context) { - cpuHogger(cpuHog1, time.Millisecond) + cpuHogger(cpuHog1, &salts[j], time.Millisecond) }) wg.Done() - }() + }(i) } wg.Wait() } diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go index 5e1d71c7e72..793be44a417 100644 --- a/libgo/go/runtime/pprof/proto.go +++ b/libgo/go/runtime/pprof/proto.go @@ -202,7 +202,7 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 { // the stack and we have return PCs anyway. frames := runtime.CallersFrames([]uintptr{addr}) frame, more := frames.Next() - if frame.Function == "runtime.goexit" { + if frame.Function == "runtime.goexit" || frame.Function == "runtime.kickoff" { // Short-circuit if we see runtime.goexit so the loop // below doesn't allocate a useless empty location. return 0 @@ -228,7 +228,7 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 { start := b.pb.startMessage() b.pb.uint64Opt(tagLocation_ID, id) b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC)) - for frame.Function != "runtime.goexit" { + for frame.Function != "runtime.goexit" && frame.Function != "runtime.kickoff" { // Write out each line in frame expansion. funcID := uint64(b.funcs[frame.Function]) if funcID == 0 { diff --git a/libgo/go/runtime/print.go b/libgo/go/runtime/print.go index 4db726a7552..3da05ad5f9e 100644 --- a/libgo/go/runtime/print.go +++ b/libgo/go/runtime/print.go @@ -78,7 +78,7 @@ var debuglock mutex // The compiler emits calls to printlock and printunlock around // the multiple calls that implement a single Go print or println -// statement. Some of the print helpers (printsp, for example) +// statement. Some of the print helpers (printslice, for example) // call print recursively. There is also the problem of a crash // happening during the print routines and needing to acquire // the print lock to print information about the crash. @@ -120,31 +120,31 @@ func gwrite(b []byte) { } func printsp() { - print(" ") + printstring(" ") } func printnl() { - print("\n") + printstring("\n") } func printbool(v bool) { if v { - print("true") + printstring("true") } else { - print("false") + printstring("false") } } func printfloat(v float64) { switch { case v != v: - print("NaN") + printstring("NaN") return case v+v == v && v > 0: - print("+Inf") + printstring("+Inf") return case v+v == v && v < 0: - print("-Inf") + printstring("-Inf") return } @@ -226,7 +226,7 @@ func printuint(v uint64) { func printint(v int64) { if v < 0 { - print("-") + printstring("-") v = -v } printuint(uint64(v)) diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index 345f57b6875..1ea41528600 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -34,6 +34,7 @@ import ( //go:linkname helpgc runtime.helpgc //go:linkname kickoff runtime.kickoff //go:linkname mstart1 runtime.mstart1 +//go:linkname mexit runtime.mexit //go:linkname globrunqput runtime.globrunqput //go:linkname pidleget runtime.pidleget @@ -54,6 +55,7 @@ func getTraceback(me, gp *g) func gtraceback(*g) func _cgo_notify_runtime_init_done() func alreadyInCallers() bool +func stackfree(*g) // Functions created by the compiler. //extern __go_init_main @@ -138,6 +140,9 @@ var ( // it is closed, meaning cgocallbackg can reliably receive from it. var main_init_done chan bool +// mainStarted indicates that the main M has started. +var mainStarted bool + // runtimeInitTime is the nanotime() at which the runtime started. var runtimeInitTime int64 @@ -157,8 +162,8 @@ func main() { maxstacksize = 250000000 } - // Record when the world started. - runtimeInitTime = nanotime() + // Allow newproc to start new Ms. + mainStarted = true systemstack(func() { newm(sysmon, nil) @@ -184,8 +189,15 @@ func main() { } }() + // Record when the world started. Must be after runtime_init + // because nanotime on some platforms depends on startNano. + runtimeInitTime = nanotime() + main_init_done = make(chan bool) if iscgo { + // Start the template thread in case we enter Go from + // a C-created thread and need to create a new thread. + startTemplateThread() _cgo_notify_runtime_init_done() } @@ -269,9 +281,10 @@ func forcegchelper() { } } +//go:nosplit + // Gosched yields the processor, allowing other goroutines to run. It does not // suspend the current goroutine, so execution resumes automatically. -//go:nosplit func Gosched() { mcall(gosched_m) } @@ -359,8 +372,8 @@ func releaseSudog(s *sudog) { if s.elem != nil { throw("runtime: sudog with non-nil elem") } - if s.selectdone != nil { - throw("runtime: sudog with non-nil selectdone") + if s.isSelect { + throw("runtime: sudog with non-false isSelect") } if s.next != nil { throw("runtime: sudog with non-nil next") @@ -419,7 +432,7 @@ func funcPC(f interface{}) uintptr { func lockedOSThread() bool { gp := getg() - return gp.lockedm != nil && gp.m.lockedg != nil + return gp.lockedm != 0 && gp.m.lockedg != 0 } var ( @@ -479,13 +492,21 @@ func schedinit() { if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { procs = n } - if procs > _MaxGomaxprocs { - procs = _MaxGomaxprocs - } if procresize(procs) != nil { throw("unknown runnable goroutine during bootstrap") } + // For cgocheck > 1, we turn on the write barrier at all times + // and check all pointer writes. We can't do this until after + // procresize because the write barrier needs a P. + if debug.cgocheck > 1 { + writeBarrier.cgo = true + writeBarrier.enabled = true + for _, p := range allp { + p.wbBuf.reset() + } + } + if buildVersion == "" { // Condition should never trigger. This code just serves // to ensure runtime·buildVersion is kept in the resulting binary. @@ -501,7 +522,7 @@ func dumpgstatus(gp *g) { func checkmcount() { // sched lock is held - if sched.mcount > sched.maxmcount { + if mcount() > sched.maxmcount { print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") throw("thread exhaustion") } @@ -515,15 +536,20 @@ func mcommoninit(mp *m) { callers(1, mp.createstack[:]) } - mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks()) - if mp.fastrand == 0 { - mp.fastrand = 0x49f6428a - } - lock(&sched.lock) - mp.id = sched.mcount - sched.mcount++ + if sched.mnext+1 < sched.mnext { + throw("runtime: thread ID overflow") + } + mp.id = sched.mnext + sched.mnext++ checkmcount() + + mp.fastrand[0] = 1597334677 * uint32(mp.id) + mp.fastrand[1] = uint32(cputicks()) + if mp.fastrand[0]|mp.fastrand[1] == 0 { + mp.fastrand[1] = 1 + } + mpreinit(mp) // Add to allm so garbage collector doesn't free g->m @@ -735,8 +761,10 @@ func casgstatus(gp *g, oldval, newval uint32) { // _Grunning or _Grunning|_Gscan; either way, // we own gp.gcscanvalid, so it's safe to read. // gp.gcscanvalid must not be true when we are running. - print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n") - throw("casgstatus") + systemstack(func() { + print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n") + throw("casgstatus") + }) } // See http://golang.org/cl/21503 for justification of the yield delay. @@ -912,7 +940,7 @@ func stopTheWorld(reason string) { // startTheWorld undoes the effects of stopTheWorld. func startTheWorld() { - systemstack(startTheWorldWithSema) + systemstack(func() { startTheWorldWithSema(false) }) // worldsema must be held over startTheWorldWithSema to ensure // gomaxprocs cannot change while worldsema is held. semrelease(&worldsema) @@ -962,8 +990,7 @@ func stopTheWorldWithSema() { _g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. sched.stopwait-- // try to retake all P's in Psyscall status - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { s := p.status if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) { if trace.enabled { @@ -1003,8 +1030,7 @@ func stopTheWorldWithSema() { if sched.stopwait != 0 { bad = "stopTheWorld: not stopped (stopwait != 0)" } else { - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { if p.status != _Pgcstop { bad = "stopTheWorld: not stopped (status != _Pgcstop)" } @@ -1028,12 +1054,14 @@ func mhelpgc() { _g_.m.helpgc = -1 } -func startTheWorldWithSema() { +func startTheWorldWithSema(emitTraceEvent bool) int64 { _g_ := getg() - _g_.m.locks++ // disable preemption because it can be holding p in a local var - gp := netpoll(false) // non-blocking - injectglist(gp) + _g_.m.locks++ // disable preemption because it can be holding p in a local var + if netpollinited() { + gp := netpoll(false) // non-blocking + injectglist(gp) + } add := needaddgcproc() lock(&sched.lock) @@ -1068,6 +1096,12 @@ func startTheWorldWithSema() { } } + // Capture start-the-world time before doing clean-up tasks. + startTime := nanotime() + if emitTraceEvent { + traceGCSTWDone() + } + // Wakeup an additional proc in case we have excessive runnable goroutines // in local queues or in the global queue. If we don't, the proc will park itself. // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. @@ -1086,6 +1120,8 @@ func startTheWorldWithSema() { newm(mhelpgc, nil) } _g_.m.locks-- + + return startTime } // First function run by a new goroutine. @@ -1116,15 +1152,13 @@ func kickoff() { throw("no p in kickoff") } } - gp.param = nil fv(param) goexit1() } -// This is called from mstart. -func mstart1() { +func mstart1(dummy int32) { _g_ := getg() if _g_ != _g_.m.g0 { @@ -1137,12 +1171,7 @@ func mstart1() { // prepare the thread to be able to handle the signals. // For gccgo minit was called by C code. if _g_.m == &m0 { - // Create an extra M for callbacks on threads not created by Go. - if iscgo && !cgoHasExtraM { - cgoHasExtraM = true - newextram() - } - initsig(false) + mstartm0() } if fn := _g_.m.mstartfn; fn != nil { @@ -1159,6 +1188,114 @@ func mstart1() { schedule() } +// mstartm0 implements part of mstart1 that only runs on the m0. +// +// Write barriers are allowed here because we know the GC can't be +// running yet, so they'll be no-ops. +// +//go:yeswritebarrierrec +func mstartm0() { + // Create an extra M for callbacks on threads not created by Go. + if iscgo && !cgoHasExtraM { + cgoHasExtraM = true + newextram() + } + initsig(false) +} + +// mexit tears down and exits the current thread. +// +// Don't call this directly to exit the thread, since it must run at +// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to +// unwind the stack to the point that exits the thread. +// +// It is entered with m.p != nil, so write barriers are allowed. It +// will release the P before exiting. +// +//go:yeswritebarrierrec +func mexit(osStack bool) { + g := getg() + m := g.m + + if m == &m0 { + // This is the main thread. Just wedge it. + // + // On Linux, exiting the main thread puts the process + // into a non-waitable zombie state. On Plan 9, + // exiting the main thread unblocks wait even though + // other threads are still running. On Solaris we can + // neither exitThread nor return from mstart. Other + // bad things probably happen on other platforms. + // + // We could try to clean up this M more before wedging + // it, but that complicates signal handling. + handoffp(releasep()) + lock(&sched.lock) + sched.nmfreed++ + checkdead() + unlock(&sched.lock) + notesleep(&m.park) + throw("locked m0 woke up") + } + + sigblock() + unminit() + + // Free the gsignal stack. + if m.gsignal != nil { + stackfree(m.gsignal) + } + + // Remove m from allm. + lock(&sched.lock) + for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { + if *pprev == m { + *pprev = m.alllink + goto found + } + } + throw("m not found in allm") +found: + if !osStack { + // Delay reaping m until it's done with the stack. + // + // If this is using an OS stack, the OS will free it + // so there's no need for reaping. + atomic.Store(&m.freeWait, 1) + // Put m on the free list, though it will not be reaped until + // freeWait is 0. Note that the free list must not be linked + // through alllink because some functions walk allm without + // locking, so may be using alllink. + m.freelink = sched.freem + sched.freem = m + } + unlock(&sched.lock) + + // Release the P. + handoffp(releasep()) + // After this point we must not have write barriers. + + // Invoke the deadlock detector. This must happen after + // handoffp because it may have started a new M to take our + // P's work. + lock(&sched.lock) + sched.nmfreed++ + checkdead() + unlock(&sched.lock) + + if osStack { + // Return from mstart and let the system thread + // library free the g0 stack and terminate the thread. + return + } + + // mstart is the thread's entry point, so there's nothing to + // return to. Exit the thread directly. exitThread will clear + // m.freeWait when it's done with the stack and the m can be + // reaped. + exitThread(&m.freeWait) +} + // forEachP calls fn(p) for every P p when p reaches a GC safe point. // If a P is currently executing code, this will bring the P to a GC // safe point and execute fn on that P. If the P is not executing code @@ -1182,7 +1319,7 @@ func forEachP(fn func(*p)) { sched.safePointFn = fn // Ask all Ps to run the safe point function. - for _, p := range allp[:gomaxprocs] { + for _, p := range allp { if p != _p_ { atomic.Store(&p.runSafePointFn, 1) } @@ -1210,8 +1347,7 @@ func forEachP(fn func(*p)) { // Force Ps currently in _Psyscall into _Pidle and hand them // off to induce safe point function execution. - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { s := p.status if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) { if trace.enabled { @@ -1240,8 +1376,7 @@ func forEachP(fn func(*p)) { if sched.safePointWait != 0 { throw("forEachP: not done") } - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { if p.runSafePointFn != 0 { throw("forEachP: P did not run fn") } @@ -1295,6 +1430,27 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe if _g_.m.p == 0 { acquirep(_p_) // temporarily borrow p for mallocs in this function } + + // Release the free M list. We need to do this somewhere and + // this may free up a stack we can use. + if sched.freem != nil { + lock(&sched.lock) + var newList *m + for freem := sched.freem; freem != nil; { + if freem.freeWait != 0 { + next := freem.freelink + freem.freelink = newList + newList = freem + freem = next + continue + } + stackfree(freem.g0) + freem = freem.freelink + } + sched.freem = newList + unlock(&sched.lock) + } + mp = new(m) mp.mstartfn = fn mcommoninit(mp) @@ -1431,9 +1587,9 @@ func oneNewExtraM() { casgstatus(gp, _Gidle, _Gdead) gp.m = mp mp.curg = gp - mp.locked = _LockInternal - mp.lockedg = gp - gp.lockedm = mp + mp.lockedInt++ + mp.lockedg.set(gp) + gp.lockedm.set(mp) gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1)) // put on allg for garbage collector allgadd(gp) @@ -1574,6 +1730,27 @@ func unlockextra(mp *m) { // around exec'ing while creating/destroying threads. See issue #19546. var execLock rwmutex +// newmHandoff contains a list of m structures that need new OS threads. +// This is used by newm in situations where newm itself can't safely +// start an OS thread. +var newmHandoff struct { + lock mutex + + // newm points to a list of M structures that need new OS + // threads. The list is linked through m.schedlink. + newm muintptr + + // waiting indicates that wake needs to be notified when an m + // is put on the list. + waiting bool + wake note + + // haveTemplateThread indicates that the templateThread has + // been started. This is not protected by lock. Use cas to set + // to 1. + haveTemplateThread uint32 +} + // Create a new m. It will start off with a call to fn, or else the scheduler. // fn needs to be static and not a heap allocated closure. // May run with m.p==nil, so write barriers are not allowed. @@ -1582,11 +1759,90 @@ func newm(fn func(), _p_ *p) { mp, _, _ := allocm(_p_, fn, false) mp.nextp.set(_p_) mp.sigmask = initSigmask + if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { + // We're on a locked M or a thread that may have been + // started by C. The kernel state of this thread may + // be strange (the user may have locked it for that + // purpose). We don't want to clone that into another + // thread. Instead, ask a known-good thread to create + // the thread for us. + // + // This is disabled on Plan 9. See golang.org/issue/22227. + // + // TODO: This may be unnecessary on Windows, which + // doesn't model thread creation off fork. + lock(&newmHandoff.lock) + if newmHandoff.haveTemplateThread == 0 { + throw("on a locked thread with no template thread") + } + mp.schedlink = newmHandoff.newm + newmHandoff.newm.set(mp) + if newmHandoff.waiting { + newmHandoff.waiting = false + notewakeup(&newmHandoff.wake) + } + unlock(&newmHandoff.lock) + return + } + newm1(mp) +} + +func newm1(mp *m) { execLock.rlock() // Prevent process clone. newosproc(mp) execLock.runlock() } +// startTemplateThread starts the template thread if it is not already +// running. +// +// The calling thread must itself be in a known-good state. +func startTemplateThread() { + if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { + return + } + newm(templateThread, nil) +} + +// tmeplateThread is a thread in a known-good state that exists solely +// to start new threads in known-good states when the calling thread +// may not be a a good state. +// +// Many programs never need this, so templateThread is started lazily +// when we first enter a state that might lead to running on a thread +// in an unknown state. +// +// templateThread runs on an M without a P, so it must not have write +// barriers. +// +//go:nowritebarrierrec +func templateThread() { + lock(&sched.lock) + sched.nmsys++ + checkdead() + unlock(&sched.lock) + + for { + lock(&newmHandoff.lock) + for newmHandoff.newm != 0 { + newm := newmHandoff.newm.ptr() + newmHandoff.newm = 0 + unlock(&newmHandoff.lock) + for newm != nil { + next := newm.schedlink.ptr() + newm.schedlink = 0 + newm1(newm) + newm = next + } + lock(&newmHandoff.lock) + } + newmHandoff.waiting = true + noteclear(&newmHandoff.wake) + unlock(&newmHandoff.lock) + notesleep(&newmHandoff.wake) + } +} + // Stops execution of the current m until new work is available. // Returns with acquired P. func stopm() { @@ -1609,7 +1865,9 @@ retry: notesleep(&_g_.m.park) noteclear(&_g_.m.park) if _g_.m.helpgc != 0 { + // helpgc() set _g_.m.p and _g_.m.mcache, so we have a P. gchelper() + // Undo the effects of helpgc(). _g_.m.helpgc = 0 _g_.m.mcache = nil _g_.m.p = 0 @@ -1743,7 +2001,7 @@ func wakep() { func stoplockedm() { _g_ := getg() - if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m { + if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m { throw("stoplockedm: inconsistent locking") } if _g_.m.p != 0 { @@ -1755,7 +2013,7 @@ func stoplockedm() { // Wait until another thread schedules lockedg again. notesleep(&_g_.m.park) noteclear(&_g_.m.park) - status := readgstatus(_g_.m.lockedg) + status := readgstatus(_g_.m.lockedg.ptr()) if status&^_Gscan != _Grunnable { print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n") dumpgstatus(_g_) @@ -1771,7 +2029,7 @@ func stoplockedm() { func startlockedm(gp *g) { _g_ := getg() - mp := gp.lockedm + mp := gp.lockedm.ptr() if mp == _g_.m { throw("startlockedm: locked to me") } @@ -1896,11 +2154,12 @@ top: // Poll network. // This netpoll is only an optimization before we resort to stealing. - // We can safely skip it if there a thread blocked in netpoll already. - // If there is any kind of logical race with that blocked thread - // (e.g. it has already returned from netpoll, but does not set lastpoll yet), - // this thread will do blocking netpoll below anyway. - if netpollinited() && sched.lastpoll != 0 { + // We can safely skip it if there are no waiters or a thread is blocked + // in netpoll already. If there is any kind of logical race with that + // blocked thread (e.g. it has already returned from netpoll, but does + // not set lastpoll yet), this thread will do blocking netpoll below + // anyway. + if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 { if gp := netpoll(false); gp != nil { // non-blocking // netpoll returns list of goroutines linked by schedlink. injectglist(gp.schedlink.ptr()) @@ -1996,9 +2255,8 @@ stop: } // check all runqueues once again - for i := 0; i < int(gomaxprocs); i++ { - _p_ := allp[i] - if _p_ != nil && !runqempty(_p_) { + for _, _p_ := range allp { + if !runqempty(_p_) { lock(&sched.lock) _p_ = pidleget() unlock(&sched.lock) @@ -2137,9 +2395,15 @@ func schedule() { throw("schedule: holding locks") } - if _g_.m.lockedg != nil { + if _g_.m.lockedg != 0 { stoplockedm() - execute(_g_.m.lockedg, false) // Never returns. + execute(_g_.m.lockedg.ptr(), false) // Never returns. + } + + // We should not schedule away from a g that is executing a cgo call, + // since the cgo call is using the m's g0 stack. + if _g_.m.incgo { + throw("schedule: in cgo") } top: @@ -2205,7 +2469,7 @@ top: resetspinning() } - if gp.lockedm != nil { + if gp.lockedm != 0 { // Hands off own p to the locked m, // then blocks waiting for a new p. startlockedm(gp) @@ -2322,8 +2586,9 @@ func goexit0(gp *g) { gp.isSystemGoroutine = false } gp.m = nil - gp.lockedm = nil - _g_.m.lockedg = nil + locked := gp.lockedm != 0 + gp.lockedm = 0 + _g_.m.lockedg = 0 gp.entry = nil gp.paniconfault = false gp._defer = nil // should be true already but just in case. @@ -2334,17 +2599,38 @@ func goexit0(gp *g) { gp.labels = nil gp.timer = nil + if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 { + // Flush assist credit to the global pool. This gives + // better information to pacing if the application is + // rapidly creating an exiting goroutines. + scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes)) + atomic.Xaddint64(&gcController.bgScanCredit, scanCredit) + gp.gcAssistBytes = 0 + } + // Note that gp's stack scan is now "valid" because it has no // stack. gp.gcscanvalid = true dropg() - if _g_.m.locked&^_LockExternal != 0 { - print("invalid m->locked = ", _g_.m.locked, "\n") + if _g_.m.lockedInt != 0 { + print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n") throw("internal lockOSThread error") } - _g_.m.locked = 0 + _g_.m.lockedExt = 0 gfput(_g_.m.p.ptr(), gp) + if locked { + // The goroutine may have locked this thread because + // it put it in an unusual kernel state. Kill it + // rather than returning it to the thread pool. + + // Return to mstart, which will release the P and exit + // the thread. + if GOOS != "plan9" { // See golang.org/issue/22227. + _g_.m.exiting = true + gogo(_g_.m.g0) + } + } schedule() } @@ -2481,7 +2767,9 @@ func exitsyscall(dummy int32) { oldp := _g_.m.p.ptr() if exitsyscallfast() { if _g_.m.mcache == nil { - throw("lost mcache") + systemstack(func() { + throw("lost mcache") + }) } if trace.enabled { if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { @@ -2519,7 +2807,9 @@ func exitsyscall(dummy int32) { mcall(exitsyscall0) if _g_.m.mcache == nil { - throw("lost mcache") + systemstack(func() { + throw("lost mcache") + }) } // Scheduler returned, so we're allowed to run now. @@ -2644,7 +2934,7 @@ func exitsyscall0(gp *g) { acquirep(_p_) execute(gp, false) // Never returns. } - if _g_.m.lockedg != nil { + if _g_.m.lockedg != 0 { // Wait until another thread schedules gp and so m again. stoplockedm() execute(gp, false) // Never returns. @@ -2798,7 +3088,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g { newg.entry = entry newg.param = arg - newg.gopc = getcallerpc(unsafe.Pointer(&fn)) + newg.gopc = getcallerpc() newg.startpc = fn if _g_.m.curg != nil { newg.labels = _g_.m.curg.labels @@ -2827,7 +3117,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g { runqput(_p_, newg, true) - if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && runtimeInitTime != 0 { + if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted { wakep() } _g_.m.locks-- @@ -2947,23 +3237,41 @@ func Breakpoint() { //go:nosplit func dolockOSThread() { _g_ := getg() - _g_.m.lockedg = _g_ - _g_.lockedm = _g_.m + _g_.m.lockedg.set(_g_) + _g_.lockedm.set(_g_.m) } //go:nosplit // LockOSThread wires the calling goroutine to its current operating system thread. -// Until the calling goroutine exits or calls UnlockOSThread, it will always -// execute in that thread, and no other goroutine can. +// The calling goroutine will always execute in that thread, +// and no other goroutine will execute in it, +// until the calling goroutine has made as many calls to +// UnlockOSThread as to LockOSThread. +// If the calling goroutine exits without unlocking the thread, +// the thread will be terminated. +// +// A goroutine should call LockOSThread before calling OS services or +// non-Go library functions that depend on per-thread state. func LockOSThread() { - getg().m.locked |= _LockExternal + if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" { + // If we need to start a new thread from the locked + // thread, we need the template thread. Start it now + // while we're in a known-good state. + startTemplateThread() + } + _g_ := getg() + _g_.m.lockedExt++ + if _g_.m.lockedExt == 0 { + _g_.m.lockedExt-- + panic("LockOSThread nesting overflow") + } dolockOSThread() } //go:nosplit func lockOSThread() { - getg().m.locked += _LockInternal + getg().m.lockedInt++ dolockOSThread() } @@ -2973,29 +3281,43 @@ func lockOSThread() { //go:nosplit func dounlockOSThread() { _g_ := getg() - if _g_.m.locked != 0 { + if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 { return } - _g_.m.lockedg = nil - _g_.lockedm = nil + _g_.m.lockedg = 0 + _g_.lockedm = 0 } //go:nosplit -// UnlockOSThread unwires the calling goroutine from its fixed operating system thread. -// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op. +// UnlockOSThread undoes an earlier call to LockOSThread. +// If this drops the number of active LockOSThread calls on the +// calling goroutine to zero, it unwires the calling goroutine from +// its fixed operating system thread. +// If there are no active LockOSThread calls, this is a no-op. +// +// Before calling UnlockOSThread, the caller must ensure that the OS +// thread is suitable for running other goroutines. If the caller made +// any permanent changes to the state of the thread that would affect +// other goroutines, it should not call this function and thus leave +// the goroutine locked to the OS thread until the goroutine (and +// hence the thread) exits. func UnlockOSThread() { - getg().m.locked &^= _LockExternal + _g_ := getg() + if _g_.m.lockedExt == 0 { + return + } + _g_.m.lockedExt-- dounlockOSThread() } //go:nosplit func unlockOSThread() { _g_ := getg() - if _g_.m.locked < _LockInternal { + if _g_.m.lockedInt == 0 { systemstack(badunlockosthread) } - _g_.m.locked -= _LockInternal + _g_.m.lockedInt-- dounlockOSThread() } @@ -3005,10 +3327,7 @@ func badunlockosthread() { func gcount() int32 { n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys)) - for _, _p_ := range &allp { - if _p_ == nil { - break - } + for _, _p_ := range allp { n -= _p_.gfreecnt } @@ -3021,7 +3340,7 @@ func gcount() int32 { } func mcount() int32 { - return sched.mcount + return int32(sched.mnext - sched.nmfreed) } var prof struct { @@ -3190,7 +3509,7 @@ func setcpuprofilerate(hz int32) { // Returns list of Ps with local work, they need to be scheduled by the caller. func procresize(nprocs int32) *p { old := gomaxprocs - if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs { + if old < 0 || nprocs <= 0 { throw("procresize: invalid arg") } if trace.enabled { @@ -3204,6 +3523,23 @@ func procresize(nprocs int32) *p { } sched.procresizetime = now + // Grow allp if necessary. + if nprocs > int32(len(allp)) { + // Synchronize with retake, which could be running + // concurrently since it doesn't run on a P. + lock(&allpLock) + if nprocs <= int32(cap(allp)) { + allp = allp[:nprocs] + } else { + nallp := make([]*p, nprocs) + // Copy everything up to allp's cap so we + // never lose old allocated Ps. + copy(nallp, allp[:cap(allp)]) + allp = nallp + } + unlock(&allpLock) + } + // initialize new P's for i := int32(0); i < nprocs; i++ { pp := allp[i] @@ -3213,6 +3549,7 @@ func procresize(nprocs int32) *p { pp.status = _Pgcstop pp.sudogcache = pp.sudogbuf[:0] pp.deferpool = pp.deferpoolbuf[:0] + pp.wbBuf.reset() atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) } if pp.mcache == nil { @@ -3230,13 +3567,11 @@ func procresize(nprocs int32) *p { // free unused P's for i := nprocs; i < old; i++ { p := allp[i] - if trace.enabled { - if p == getg().m.p.ptr() { - // moving to p[0], pretend that we were descheduled - // and then scheduled again to keep the trace sane. - traceGoSched() - traceProcStop(p) - } + if trace.enabled && p == getg().m.p.ptr() { + // moving to p[0], pretend that we were descheduled + // and then scheduled again to keep the trace sane. + traceGoSched() + traceProcStop(p) } // move all runnable goroutines to the global queue for p.runqhead != p.runqtail { @@ -3262,6 +3597,11 @@ func procresize(nprocs int32) *p { // world is stopped. p.gcBgMarkWorker.set(nil) } + // Flush p's write barrier buffer. + if gcphase != _GCoff { + wbBufFlush1(p) + p.gcw.dispose() + } for i := range p.sudogbuf { p.sudogbuf[i] = nil } @@ -3274,10 +3614,18 @@ func procresize(nprocs int32) *p { p.mcache = nil gfpurge(p) traceProcFree(p) + p.gcAssistTime = 0 p.status = _Pdead // can't free P itself because it can be referenced by an M in syscall } + // Trim allp. + if int32(len(allp)) != nprocs { + lock(&allpLock) + allp = allp[:nprocs] + unlock(&allpLock) + } + _g_ := getg() if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs { // continue to use the current P @@ -3349,7 +3697,7 @@ func acquirep1(_p_ *p) { throw("acquirep: already in go") } if _p_.m != 0 || _p_.status != _Pidle { - id := int32(0) + id := int64(0) if _p_.m != 0 { id = _p_.m.ptr().id } @@ -3394,6 +3742,7 @@ func incidlelocked(v int32) { // Check for deadlock situation. // The check is based on number of running M's, if 0 -> deadlock. +// sched.lock must be held. func checkdead() { // For -buildmode=c-shared or -buildmode=c-archive it's OK if // there are no running goroutines. The calling program is @@ -3410,13 +3759,12 @@ func checkdead() { return } - // -1 for sysmon - run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1 + run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys if run > 0 { return } if run < 0 { - print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n") + print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n") throw("checkdead: inconsistent counts") } @@ -3479,6 +3827,11 @@ var forcegcperiod int64 = 2 * 60 * 1e9 // //go:nowritebarrierrec func sysmon() { + lock(&sched.lock) + sched.nmsys++ + checkdead() + unlock(&sched.lock) + // If a heap span goes unused for 5 minutes after a garbage collection, // we hand it back to the operating system. scavengelimit := int64(5 * 60 * 1e9) @@ -3518,15 +3871,11 @@ func sysmon() { } shouldRelax := true if osRelaxMinNS > 0 { - lock(&timers.lock) - if timers.sleeping { - now := nanotime() - next := timers.sleepUntil - if next-now < osRelaxMinNS { - shouldRelax = false - } + next := timeSleepUntil() + now := nanotime() + if next-now < osRelaxMinNS { + shouldRelax = false } - unlock(&timers.lock) } if shouldRelax { osRelax(true) @@ -3550,7 +3899,7 @@ func sysmon() { // poll network if not polled for more than 10ms lastpoll := int64(atomic.Load64(&sched.lastpoll)) now := nanotime() - if lastpoll != 0 && lastpoll+10*1000*1000 < now { + if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now)) gp := netpoll(false) // non-blocking - returns list of goroutines if gp != nil { @@ -3607,9 +3956,17 @@ const forcePreemptNS = 10 * 1000 * 1000 // 10ms func retake(now int64) uint32 { n := 0 - for i := int32(0); i < gomaxprocs; i++ { + // Prevent allp slice changes. This lock will be completely + // uncontended unless we're already stopping the world. + lock(&allpLock) + // We can't use a range loop over allp because we may + // temporarily drop the allpLock. Hence, we need to re-fetch + // allp each time around the loop. + for i := 0; i < len(allp); i++ { _p_ := allp[i] if _p_ == nil { + // This can happen if procresize has grown + // allp but not yet created new Ps. continue } pd := &_p_.sysmontick @@ -3628,6 +3985,8 @@ func retake(now int64) uint32 { if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now { continue } + // Drop allpLock so we can take sched.lock. + unlock(&allpLock) // Need to decrement number of idle locked M's // (pretending that one more is running) before the CAS. // Otherwise the M from which we retake can exit the syscall, @@ -3643,6 +4002,7 @@ func retake(now int64) uint32 { handoffp(_p_) } incidlelocked(1) + lock(&allpLock) } else if s == _Prunning { // Preempt G if it's running for too long. t := int64(_p_.schedtick) @@ -3657,6 +4017,7 @@ func retake(now int64) uint32 { preemptone(_p_) } } + unlock(&allpLock) return uint32(n) } @@ -3667,9 +4028,8 @@ func retake(now int64) uint32 { // Returns true if preemption request was issued to at least one goroutine. func preemptall() bool { res := false - for i := int32(0); i < gomaxprocs; i++ { - _p_ := allp[i] - if _p_ == nil || _p_.status != _Prunning { + for _, _p_ := range allp { + if _p_.status != _Prunning { continue } if preemptone(_p_) { @@ -3727,23 +4087,19 @@ func schedtrace(detailed bool) { } lock(&sched.lock) - print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) + print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) if detailed { print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n") } // We must be careful while reading data from P's, M's and G's. // Even if we hold schedlock, most data can be changed concurrently. // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. - for i := int32(0); i < gomaxprocs; i++ { - _p_ := allp[i] - if _p_ == nil { - continue - } + for i, _p_ := range allp { mp := _p_.m.ptr() h := atomic.Load(&_p_.runqhead) t := atomic.Load(&_p_.runqtail) if detailed { - id := int32(-1) + id := int64(-1) if mp != nil { id = mp.id } @@ -3756,7 +4112,7 @@ func schedtrace(detailed bool) { print("[") } print(t - h) - if i == gomaxprocs-1 { + if i == len(allp)-1 { print("]\n") } } @@ -3770,7 +4126,7 @@ func schedtrace(detailed bool) { for mp := allm; mp != nil; mp = mp.alllink { _p_ := mp.p.ptr() gp := mp.curg - lockedg := mp.lockedg + lockedg := mp.lockedg.ptr() id1 := int32(-1) if _p_ != nil { id1 = _p_.id @@ -3790,12 +4146,12 @@ func schedtrace(detailed bool) { for gi := 0; gi < len(allgs); gi++ { gp := allgs[gi] mp := gp.m - lockedm := gp.lockedm - id1 := int32(-1) + lockedm := gp.lockedm.ptr() + id1 := int64(-1) if mp != nil { id1 = mp.id } - id2 := int32(-1) + id2 := int64(-1) if lockedm != nil { id2 = lockedm.id } @@ -4077,22 +4433,25 @@ func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool if stealRunNextG { // Try to steal from _p_.runnext. if next := _p_.runnext; next != 0 { - // Sleep to ensure that _p_ isn't about to run the g we - // are about to steal. - // The important use case here is when the g running on _p_ - // ready()s another g and then almost immediately blocks. - // Instead of stealing runnext in this window, back off - // to give _p_ a chance to schedule runnext. This will avoid - // thrashing gs between different Ps. - // A sync chan send/recv takes ~50ns as of time of writing, - // so 3us gives ~50x overshoot. - if GOOS != "windows" { - usleep(3) - } else { - // On windows system timer granularity is 1-15ms, - // which is way too much for this optimization. - // So just yield. - osyield() + if _p_.status == _Prunning { + // Sleep to ensure that _p_ isn't about to run the g + // we are about to steal. + // The important use case here is when the g running + // on _p_ ready()s another g and then almost + // immediately blocks. Instead of stealing runnext + // in this window, back off to give _p_ a chance to + // schedule runnext. This will avoid thrashing gs + // between different Ps. + // A sync chan send/recv takes ~50ns as of time of + // writing, so 3us gives ~50x overshoot. + if GOOS != "windows" { + usleep(3) + } else { + // On windows system timer granularity is + // 1-15ms, which is way too much for this + // optimization. So just yield. + osyield() + } } if !_p_.runnext.cas(next, 0) { continue diff --git a/libgo/go/runtime/proc_runtime_test.go b/libgo/go/runtime/proc_runtime_test.go index d56f9b14636..a7bde2c6df7 100644 --- a/libgo/go/runtime/proc_runtime_test.go +++ b/libgo/go/runtime/proc_runtime_test.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build ignore - // Proc unit tests. In runtime package so can use runtime guts. package runtime diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go index 313a9610e0e..672e1fa0148 100644 --- a/libgo/go/runtime/proc_test.go +++ b/libgo/go/runtime/proc_test.go @@ -658,6 +658,116 @@ func BenchmarkClosureCall(b *testing.B) { _ = sum } +func benchmarkWakeupParallel(b *testing.B, spin func(time.Duration)) { + if runtime.GOMAXPROCS(0) == 1 { + b.Skip("skipping: GOMAXPROCS=1") + } + + wakeDelay := 5 * time.Microsecond + for _, delay := range []time.Duration{ + 0, + 1 * time.Microsecond, + 2 * time.Microsecond, + 5 * time.Microsecond, + 10 * time.Microsecond, + 20 * time.Microsecond, + 50 * time.Microsecond, + 100 * time.Microsecond, + } { + b.Run(delay.String(), func(b *testing.B) { + if b.N == 0 { + return + } + // Start two goroutines, which alternate between being + // sender and receiver in the following protocol: + // + // - The receiver spins for `delay` and then does a + // blocking receive on a channel. + // + // - The sender spins for `delay+wakeDelay` and then + // sends to the same channel. (The addition of + // `wakeDelay` improves the probability that the + // receiver will be blocking when the send occurs when + // the goroutines execute in parallel.) + // + // In each iteration of the benchmark, each goroutine + // acts once as sender and once as receiver, so each + // goroutine spins for delay twice. + // + // BenchmarkWakeupParallel is used to estimate how + // efficiently the scheduler parallelizes goroutines in + // the presence of blocking: + // + // - If both goroutines are executed on the same core, + // an increase in delay by N will increase the time per + // iteration by 4*N, because all 4 delays are + // serialized. + // + // - Otherwise, an increase in delay by N will increase + // the time per iteration by 2*N, and the time per + // iteration is 2 * (runtime overhead + chan + // send/receive pair + delay + wakeDelay). This allows + // the runtime overhead, including the time it takes + // for the unblocked goroutine to be scheduled, to be + // estimated. + ping, pong := make(chan struct{}), make(chan struct{}) + start := make(chan struct{}) + done := make(chan struct{}) + go func() { + <-start + for i := 0; i < b.N; i++ { + // sender + spin(delay + wakeDelay) + ping <- struct{}{} + // receiver + spin(delay) + <-pong + } + done <- struct{}{} + }() + go func() { + for i := 0; i < b.N; i++ { + // receiver + spin(delay) + <-ping + // sender + spin(delay + wakeDelay) + pong <- struct{}{} + } + done <- struct{}{} + }() + b.ResetTimer() + start <- struct{}{} + <-done + <-done + }) + } +} + +func BenchmarkWakeupParallelSpinning(b *testing.B) { + benchmarkWakeupParallel(b, func(d time.Duration) { + end := time.Now().Add(d) + for time.Now().Before(end) { + // do nothing + } + }) +} + +// sysNanosleep is defined by OS-specific files (such as runtime_linux_test.go) +// to sleep for the given duration. If nil, dependent tests are skipped. +// The implementation should invoke a blocking system call and not +// call time.Sleep, which would deschedule the goroutine. +var sysNanosleep func(d time.Duration) + +func BenchmarkWakeupParallelSyscall(b *testing.B) { + if sysNanosleep == nil { + b.Skipf("skipping on %v; sysNanosleep not defined", runtime.GOOS) + } + benchmarkWakeupParallel(b, func(d time.Duration) { + sysNanosleep(d) + }) +} + type Matrix [][]float64 func BenchmarkMatmult(b *testing.B) { @@ -722,8 +832,47 @@ func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, thres } } -/* func TestStealOrder(t *testing.T) { runtime.RunStealOrderTest() } -*/ + +func TestLockOSThreadNesting(t *testing.T) { + go func() { + e, i := runtime.LockOSCounts() + if e != 0 || i != 0 { + t.Errorf("want locked counts 0, 0; got %d, %d", e, i) + return + } + runtime.LockOSThread() + runtime.LockOSThread() + runtime.UnlockOSThread() + e, i = runtime.LockOSCounts() + if e != 1 || i != 0 { + t.Errorf("want locked counts 1, 0; got %d, %d", e, i) + return + } + runtime.UnlockOSThread() + e, i = runtime.LockOSCounts() + if e != 0 || i != 0 { + t.Errorf("want locked counts 0, 0; got %d, %d", e, i) + return + } + }() +} + +func TestLockOSThreadExit(t *testing.T) { + testLockOSThreadExit(t, "testprog") +} + +func testLockOSThreadExit(t *testing.T, prog string) { + output := runTestProg(t, prog, "LockOSThreadMain", "GOMAXPROCS=1") + want := "OK\n" + if output != want { + t.Errorf("want %s, got %s\n", want, output) + } + + output = runTestProg(t, prog, "LockOSThreadAlt") + if output != want { + t.Errorf("want %s, got %s\n", want, output) + } +} diff --git a/libgo/go/runtime/runtime-lldb_test.go b/libgo/go/runtime/runtime-lldb_test.go index 98bc9066662..9a287052eaf 100644 --- a/libgo/go/runtime/runtime-lldb_test.go +++ b/libgo/go/runtime/runtime-lldb_test.go @@ -5,11 +5,7 @@ package runtime_test import ( - "debug/elf" - "debug/macho" - "encoding/binary" "internal/testenv" - "io" "io/ioutil" "os" "os/exec" @@ -158,7 +154,7 @@ func TestLldbPython(t *testing.T) { t.Fatalf("failed to create file: %v", err) } - cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags", "-N -l", "-o", "a.exe") + cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe") cmd.Dir = dir out, err := cmd.CombinedOutput() if err != nil { @@ -182,81 +178,3 @@ func TestLldbPython(t *testing.T) { t.Fatalf("Unexpected lldb output:\n%s", got) } } - -// Check that aranges are valid even when lldb isn't installed. -func TestDwarfAranges(t *testing.T) { - testenv.MustHaveGoBuild(t) - dir, err := ioutil.TempDir("", "go-build") - if err != nil { - t.Fatalf("failed to create temp directory: %v", err) - } - defer os.RemoveAll(dir) - - src := filepath.Join(dir, "main.go") - err = ioutil.WriteFile(src, []byte(lldbHelloSource), 0644) - if err != nil { - t.Fatalf("failed to create file: %v", err) - } - - cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe") - cmd.Dir = dir - out, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("building source %v\n%s", err, out) - } - - filename := filepath.Join(dir, "a.exe") - if f, err := elf.Open(filename); err == nil { - sect := f.Section(".debug_aranges") - if sect == nil { - t.Fatal("Missing aranges section") - } - verifyAranges(t, f.ByteOrder, sect.Open()) - } else if f, err := macho.Open(filename); err == nil { - sect := f.Section("__debug_aranges") - if sect == nil { - t.Fatal("Missing aranges section") - } - verifyAranges(t, f.ByteOrder, sect.Open()) - } else { - t.Skip("Not an elf or macho binary.") - } -} - -func verifyAranges(t *testing.T, byteorder binary.ByteOrder, data io.ReadSeeker) { - var header struct { - UnitLength uint32 // does not include the UnitLength field - Version uint16 - Offset uint32 - AddressSize uint8 - SegmentSize uint8 - } - for { - offset, err := data.Seek(0, io.SeekCurrent) - if err != nil { - t.Fatalf("Seek error: %v", err) - } - if err = binary.Read(data, byteorder, &header); err == io.EOF { - return - } else if err != nil { - t.Fatalf("Error reading arange header: %v", err) - } - tupleSize := int64(header.SegmentSize) + 2*int64(header.AddressSize) - lastTupleOffset := offset + int64(header.UnitLength) + 4 - tupleSize - if lastTupleOffset%tupleSize != 0 { - t.Fatalf("Invalid arange length %d, (addr %d, seg %d)", header.UnitLength, header.AddressSize, header.SegmentSize) - } - if _, err = data.Seek(lastTupleOffset, io.SeekStart); err != nil { - t.Fatalf("Seek error: %v", err) - } - buf := make([]byte, tupleSize) - if n, err := data.Read(buf); err != nil || int64(n) < tupleSize { - t.Fatalf("Read error: %v", err) - } - for _, val := range buf { - if val != 0 { - t.Fatalf("Invalid terminator") - } - } - } -} diff --git a/libgo/go/runtime/runtime.go b/libgo/go/runtime/runtime.go index 58710de406c..d19d6afed38 100644 --- a/libgo/go/runtime/runtime.go +++ b/libgo/go/runtime/runtime.go @@ -61,6 +61,12 @@ func syscall_Getpagesize() int { return int(physPageSize) } //go:linkname os_runtime_args os.runtime_args func os_runtime_args() []string { return append([]string{}, argslice...) } +//go:linkname syscall_Exit syscall.Exit +//go:nosplit +func syscall_Exit(code int) { + exit(int32(code)) +} + // Temporary, for the gccgo runtime code written in C. //go:linkname get_envs runtime_get_envs func get_envs() []string { return envs } diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go index 627adf74765..b617f8598fa 100644 --- a/libgo/go/runtime/runtime1.go +++ b/libgo/go/runtime/runtime1.go @@ -111,10 +111,6 @@ var test_z64, test_x64 uint64 func testAtomic64() { test_z64 = 42 test_x64 = 0 - prefetcht0(uintptr(unsafe.Pointer(&test_z64))) - prefetcht1(uintptr(unsafe.Pointer(&test_z64))) - prefetcht2(uintptr(unsafe.Pointer(&test_z64))) - prefetchnta(uintptr(unsafe.Pointer(&test_z64))) if atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } @@ -413,13 +409,6 @@ func parsedebugvars() { setTraceback(gogetenv("GOTRACEBACK")) traceback_env = traceback_cache - - // For cgocheck > 1, we turn on the write barrier at all times - // and check all pointer writes. - if debug.cgocheck > 1 { - writeBarrier.cgo = true - writeBarrier.enabled = true - } } //go:linkname setTraceback runtime_debug.SetTraceback diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go index 045e76ff4df..543086d09aa 100644 --- a/libgo/go/runtime/runtime2.go +++ b/libgo/go/runtime/runtime2.go @@ -173,9 +173,13 @@ func efaceOf(ep *interface{}) *eface { // a word that is completely ignored by the GC than to have one for which // only a few updates are ignored. // -// Gs, Ms, and Ps are always reachable via true pointers in the -// allgs, allm, and allp lists or (during allocation before they reach those lists) +// Gs and Ps are always reachable via true pointers in the +// allgs and allp lists or (during allocation before they reach those lists) // from stack variables. +// +// Ms are always reachable via true pointers either from allm or +// freem. Unlike Gs and Ps we do free Ms, so it's important that +// nothing ever hold an muintptr across a safe point. // A guintptr holds a goroutine pointer, but typed as a uintptr // to bypass write barriers. It is used in the Gobuf goroutine state @@ -225,6 +229,15 @@ func (pp puintptr) ptr() *p { return (*p)(unsafe.Pointer(pp)) } //go:nosplit func (pp *puintptr) set(p *p) { *pp = puintptr(unsafe.Pointer(p)) } +// muintptr is a *m that is not tracked by the garbage collector. +// +// Because we do free Ms, there are some additional constrains on +// muintptrs: +// +// 1. Never hold an muintptr locally across a safe point. +// +// 2. Any muintptr in the heap must be owned by the M itself so it can +// ensure it is not in use when the last true *m is released. type muintptr uintptr //go:nosplit @@ -256,11 +269,14 @@ type sudog struct { // channel this sudog is blocking on. shrinkstack depends on // this for sudogs involved in channel ops. - g *g - selectdone *uint32 // CAS to 1 to win select race (may point to stack) - next *sudog - prev *sudog - elem unsafe.Pointer // data element (may point to stack) + g *g + + // isSelect indicates g is participating in a select, so + // g.selectDone must be CAS'd to win the wake-up race. + isSelect bool + next *sudog + prev *sudog + elem unsafe.Pointer // data element (may point to stack) // The following fields are never accessed concurrently. // For channels, waitlink is only accessed by g. @@ -351,7 +367,7 @@ type g struct { sysexitticks int64 // cputicks when syscall has returned (for tracing) traceseq uint64 // trace event sequencer tracelastp puintptr // last P emitted an event for this goroutine - lockedm *m + lockedm muintptr sig uint32 writebuf []byte sigcode0 uintptr @@ -362,8 +378,9 @@ type g struct { // Not for gccgo: racectx uintptr waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order // Not for gccgo: cgoCtxt []uintptr // cgo traceback context - labels unsafe.Pointer // profiler labels - timer *timer // cached timer for time.Sleep + labels unsafe.Pointer // profiler labels + timer *timer // cached timer for time.Sleep + selectDone uint32 // are we participating in a select and did someone win the race? // Per-G GC state @@ -381,13 +398,26 @@ type g struct { exception unsafe.Pointer // current exception being thrown isforeign bool // whether current exception is not from Go - // Fields that hold stack and context information if status is Gsyscall + // When using split-stacks, these fields holds the results of + // __splitstack_find while executing a syscall. These are used + // by the garbage collector to scan the goroutine's stack. + // + // When not using split-stacks, g0 stacks are allocated by the + // libc and other goroutine stacks are allocated by malg. + // gcstack: unused (sometimes cleared) + // gcstacksize: g0: 0; others: size of stack + // gcnextsegment: unused + // gcnextsp: current SP while executing a syscall + // gcinitialsp: g0: top of stack; others: start of stack memory gcstack uintptr gcstacksize uintptr gcnextsegment uintptr gcnextsp uintptr gcinitialsp unsafe.Pointer - gcregs g_ucontext_t + + // gcregs holds the register values while executing a syscall. + // This is set by getcontext and scanned by the garbage collector. + gcregs g_ucontext_t entry func(unsafe.Pointer) // goroutine function to run entryfn uintptr // function address passed to __go_go @@ -411,14 +441,15 @@ type m struct { // Fields not known to debuggers. procid uint64 // for debuggers, but offset not hard-coded gsignal *g // signal-handling g + // Not for gccgo: goSigStack gsignalStack // Go-allocated signal handling stack sigmask sigset // storage for saved signal mask - // Not for gccgo: tls [6]uintptr // thread-local storage (for x86 extern register) + // Not for gccgo: tls [6]uintptr // thread-local storage (for x86 extern register) mstartfn func() curg *g // current running goroutine caughtsig guintptr // goroutine running during fatal signal p puintptr // attached p for executing go code (nil if not executing go code) nextp puintptr - id int32 + id int64 mallocing int32 throwing int32 preemptoff string // if != "", keep curg running on this m @@ -432,8 +463,11 @@ type m struct { inwb bool // m is executing a write barrier newSigstack bool // minit on C thread called sigaltstack printlock int8 - incgo bool // m is executing a cgo call - fastrand uint32 + incgo bool // m is executing a cgo call + freeWait uint32 // if == 0, safe to free g0 and delete m (atomic) + fastrand [2]uint32 + needextram bool + traceback uint8 ncgocall uint64 // number of cgo calls in total ncgo int32 // number of cgo calls currently in progress // Not for gccgo: cgoCallersUse uint32 // if non-zero, cgoCallers in use temporarily @@ -442,15 +476,14 @@ type m struct { alllink *m // on allm schedlink muintptr mcache *mcache - lockedg *g + lockedg guintptr createstack [32]location // stack that created this thread. - // Not for gccgo: freglo [16]uint32 // d[i] lsb and f[i] - // Not for gccgo: freghi [16]uint32 // d[i] msb and f[i+16] - // Not for gccgo: fflag uint32 // floating point compare flags - locked uint32 // tracking for lockosthread - nextwaitm uintptr // next m waiting for lock - needextram bool - traceback uint8 + // Not for gccgo: freglo [16]uint32 // d[i] lsb and f[i] + // Not for gccgo: freghi [16]uint32 // d[i] msb and f[i+16] + // Not for gccgo: fflag uint32 // floating point compare flags + lockedExt uint32 // tracking for external LockOSThread + lockedInt uint32 // tracking for internal lockOSThread + nextwaitm muintptr // next m waiting for lock waitunlockf unsafe.Pointer // todo go func(*g, unsafe.pointer) bool waitlock unsafe.Pointer waittraceev byte @@ -458,6 +491,7 @@ type m struct { startingtrace bool syscalltick uint32 // Not for gccgo: thread uintptr // thread handle + freelink *m // on sched.freem // these are here because they are too large to be on the stack // of low-level NOSPLIT functions. @@ -475,6 +509,7 @@ type m struct { gsignalstacksize uintptr dropextram bool // drop after call is done + exiting bool // thread is exiting gcing int32 } @@ -490,7 +525,7 @@ type p struct { sysmontick sysmontick // last tick observed by sysmon m muintptr // back-link to associated m (nil if idle) mcache *mcache - // Not for gccgo: racectx uintptr + racectx uintptr // gccgo has only one size of defer. deferpool []*_defer @@ -535,26 +570,30 @@ type p struct { palloc persistentAlloc // per-P to avoid mutex // Per-P GC state - gcAssistTime int64 // Nanoseconds in assistAlloc - gcBgMarkWorker guintptr - gcMarkWorkerMode gcMarkWorkerMode + gcAssistTime int64 // Nanoseconds in assistAlloc + gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker + gcBgMarkWorker guintptr + gcMarkWorkerMode gcMarkWorkerMode + + // gcMarkWorkerStartTime is the nanotime() at which this mark + // worker started. + gcMarkWorkerStartTime int64 // gcw is this P's GC work buffer cache. The work buffer is // filled by write barriers, drained by mutator assists, and // disposed on certain GC state transitions. gcw gcWork + // wbBuf is this P's GC write barrier buffer. + // + // TODO: Consider caching this in the running G. + wbBuf wbBuf + runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point pad [sys.CacheLineSize]byte } -const ( - // The max value of GOMAXPROCS. - // There are no fundamental restrictions on the value. - _MaxGomaxprocs = 1 << 10 -) - type schedt struct { // accessed atomically. keep at top to ensure alignment on 32-bit systems. goidgen uint64 @@ -562,11 +601,16 @@ type schedt struct { lock mutex + // When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be + // sure to call checkdead(). + midle muintptr // idle m's waiting for work nmidle int32 // number of idle m's waiting for work nmidlelocked int32 // number of locked m's waiting for work - mcount int32 // number of m's that have been created + mnext int64 // number of m's that have been created and next M ID maxmcount int32 // maximum number of m's allowed (or die) + nmsys int32 // number of system m's not counted for deadlock + nmfreed int64 // cumulative number of freed m's ngsys uint32 // number of system goroutines; updated atomically @@ -592,6 +636,10 @@ type schedt struct { deferlock mutex deferpool *_defer + // freem is the list of m's waiting to be freed when their + // m.exited is set. Linked through m.freelink. + freem *m + gcwaiting uint32 // gc is waiting to run stopwait int32 stopnote note @@ -610,18 +658,7 @@ type schedt struct { totaltime int64 // ∫gomaxprocs dt up to procresizetime } -// The m.locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread. -// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active. -// External locks are not recursive; a second lock is silently ignored. -// The upper bits of m.locked record the nesting depth of calls to lockOSThread -// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal). -// Internal locks can be recursive. For instance, a lock for cgo can occur while the main -// goroutine is holding the lock during the initialization phase. -const ( - _LockExternal = 1 - _LockInternal = 2 -) - +// Values for the flags field of a sigTabT. const ( _SigNotify = 1 << iota // let signal.Notify have signal, even if from kernel _SigKill // if signal.Notify doesn't take it, exit quietly @@ -630,7 +667,8 @@ const ( _SigDefault // if the signal isn't explicitly requested, don't monitor it _SigGoExit // cause all runtime procs to exit (only used on Plan 9). _SigSetStack // add SA_ONSTACK to libc handler - _SigUnblock // unblocked in minit + _SigUnblock // always unblock; see blockableSig + _SigIgn // _SIG_DFL action is to ignore the signal ) // Lock-free stack node. @@ -671,8 +709,8 @@ func extendRandom(r []byte, n int) { } } -// deferred subroutine calls -// This is the gccgo version. +// A _defer holds an entry on the list of deferred calls. +// If you add a field here, add code to clear it in freedefer. type _defer struct { // The next entry in the stack. link *_defer @@ -743,7 +781,8 @@ const _TracebackMaxFrames = 100 var ( allglen uintptr allm *m - allp [_MaxGomaxprocs + 1]*p + allp []*p // len(allp) == gomaxprocs; may change at safe points, otherwise immutable + allpLock mutex // Protects P-less reads of allp and all writes gomaxprocs int32 ncpu int32 forcegc forcegcstate diff --git a/libgo/go/runtime/runtime_mmap_test.go b/libgo/go/runtime/runtime_mmap_test.go index 0141e81d4a0..c0040414d46 100644 --- a/libgo/go/runtime/runtime_mmap_test.go +++ b/libgo/go/runtime/runtime_mmap_test.go @@ -14,17 +14,10 @@ import ( // what the code in mem_bsd.go, mem_darwin.go, and mem_linux.go expects. // See the uses of ENOMEM in sysMap in those files. func TestMmapErrorSign(t *testing.T) { - p := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0) + p, err := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0) - // The runtime.mmap function is nosplit, but t.Errorf is not. - // Reset the pointer so that we don't get an "invalid stack - // pointer" error from t.Errorf if we call it. - v := uintptr(p) - p = nil - - err := runtime.Errno() - if v != ^uintptr(0) || err != runtime.ENOMEM { - t.Errorf("mmap = %v, %v, want %v", v, err, runtime.ENOMEM) + if p != nil || err != runtime.ENOMEM { + t.Errorf("mmap = %v, %v, want nil, %v", p, err, runtime.ENOMEM) } } @@ -34,20 +27,20 @@ func TestPhysPageSize(t *testing.T) { ps := runtime.GetPhysPageSize() // Get a region of memory to play with. This should be page-aligned. - b := uintptr(runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)) - if b == ^uintptr(0) { - t.Fatalf("Mmap: %v %v", b, runtime.Errno()) + b, err := runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0) + if err != 0 { + t.Fatalf("Mmap: %v", err) } // Mmap should fail at a half page into the buffer. - err := uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)) - if err != ^uintptr(0) { + _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0) + if err == 0 { t.Errorf("Mmap should have failed with half-page alignment %d, but succeeded: %v", ps/2, err) } // Mmap should succeed at a full page into the buffer. - err = uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)) - if err == ^uintptr(0) { - t.Errorf("Mmap at full-page alignment %d failed: %v %v", ps, err, runtime.Errno()) + _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0) + if err != 0 { + t.Errorf("Mmap at full-page alignment %d failed: %v", ps, err) } } diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go index b8f6ac2aed4..0231043260b 100644 --- a/libgo/go/runtime/runtime_test.go +++ b/libgo/go/runtime/runtime_test.go @@ -5,6 +5,7 @@ package runtime_test import ( + "flag" "io" . "runtime" "runtime/debug" @@ -13,6 +14,8 @@ import ( "unsafe" ) +var flagQuick = flag.Bool("quick", false, "skip slow tests, for second run in all.bash") + func init() { // We're testing the runtime, so make tracebacks show things // in the runtime. This only raises the level, so it won't @@ -196,9 +199,9 @@ func eqstring_generic(s1, s2 string) bool { } func TestEqString(t *testing.T) { - // This isn't really an exhaustive test of eqstring, it's + // This isn't really an exhaustive test of == on strings, it's // just a convenient way of documenting (via eqstring_generic) - // what eqstring does. + // what == does. s := []string{ "", "a", @@ -213,7 +216,7 @@ func TestEqString(t *testing.T) { x := s1 == s2 y := eqstring_generic(s1, s2) if x != y { - t.Errorf(`eqstring("%s","%s") = %t, want %t`, s1, s2, x, y) + t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y) } } } diff --git a/libgo/go/runtime/rwmutex_test.go b/libgo/go/runtime/rwmutex_test.go index a69eca1511f..872b3b098e8 100644 --- a/libgo/go/runtime/rwmutex_test.go +++ b/libgo/go/runtime/rwmutex_test.go @@ -12,6 +12,7 @@ package runtime_test import ( "fmt" . "runtime" + "runtime/debug" "sync/atomic" "testing" ) @@ -47,6 +48,10 @@ func doTestParallelReaders(numReaders int) { func TestParallelRWMutexReaders(t *testing.T) { defer GOMAXPROCS(GOMAXPROCS(-1)) + // If runtime triggers a forced GC during this test then it will deadlock, + // since the goroutines can't be stopped/preempted. + // Disable GC for this test (see issue #10958). + defer debug.SetGCPercent(debug.SetGCPercent(-1)) doTestParallelReaders(1) doTestParallelReaders(3) doTestParallelReaders(4) diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go index 9f8ac49d972..096af52be35 100644 --- a/libgo/go/runtime/select.go +++ b/libgo/go/runtime/select.go @@ -88,7 +88,7 @@ func newselect(sel *hselect, selsize int64, size int32) { } func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) { - pc := getcallerpc(unsafe.Pointer(&sel)) + pc := getcallerpc() i := sel.ncase if i >= sel.tcase { throw("selectsend: too many cases") @@ -109,7 +109,7 @@ func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) { } func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) { - pc := getcallerpc(unsafe.Pointer(&sel)) + pc := getcallerpc() i := sel.ncase if i >= sel.tcase { throw("selectrecv: too many cases") @@ -131,7 +131,7 @@ func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) { } func selectdefault(sel *hselect) { - pc := getcallerpc(unsafe.Pointer(&sel)) + pc := getcallerpc() i := sel.ncase if i >= sel.tcase { throw("selectdefault: too many cases") @@ -301,7 +301,6 @@ func selectgo(sel *hselect) int { var ( gp *g - done uint32 sg *sudog c *hchan k *scase @@ -368,7 +367,6 @@ loop: // pass 2 - enqueue on all chans gp = getg() - done = 0 if gp.waiting != nil { throw("gp.waiting != nil") } @@ -382,8 +380,7 @@ loop: c = cas.c sg := acquireSudog() sg.g = gp - // Note: selectdone is adjusted for stack copies in stack1.go:adjustsudogs - sg.selectdone = (*uint32)(noescape(unsafe.Pointer(&done))) + sg.isSelect = true // No stack splits between assigning elem and enqueuing // sg on gp.waiting where copystack can find it. sg.elem = cas.elem @@ -409,62 +406,9 @@ loop: gp.param = nil gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1) - // While we were asleep, some goroutine came along and completed - // one of the cases in the select and woke us up (called ready). - // As part of that process, the goroutine did a cas on done above - // (aka *sg.selectdone for all queued sg) to win the right to - // complete the select. Now done = 1. - // - // If we copy (grow) our own stack, we will update the - // selectdone pointers inside the gp.waiting sudog list to point - // at the new stack. Another goroutine attempting to - // complete one of our (still linked in) select cases might - // see the new selectdone pointer (pointing at the new stack) - // before the new stack has real data; if the new stack has done = 0 - // (before the old values are copied over), the goroutine might - // do a cas via sg.selectdone and incorrectly believe that it has - // won the right to complete the select, executing a second - // communication and attempting to wake us (call ready) again. - // - // Then things break. - // - // The best break is that the goroutine doing ready sees the - // _Gcopystack status and throws, as in #17007. - // A worse break would be for us to continue on, start running real code, - // block in a semaphore acquisition (sema.go), and have the other - // goroutine wake us up without having really acquired the semaphore. - // That would result in the goroutine spuriously running and then - // queue up another spurious wakeup when the semaphore really is ready. - // In general the situation can cascade until something notices the - // problem and causes a crash. - // - // A stack shrink does not have this problem, because it locks - // all the channels that are involved first, blocking out the - // possibility of a cas on selectdone. - // - // A stack growth before gopark above does not have this - // problem, because we hold those channel locks (released by - // selparkcommit). - // - // A stack growth after sellock below does not have this - // problem, because again we hold those channel locks. - // - // The only problem is a stack growth during sellock. - // To keep that from happening, run sellock on the system stack. - // - // It might be that we could avoid this if copystack copied the - // stack before calling adjustsudogs. In that case, - // syncadjustsudogs would need to recopy the tiny part that - // it copies today, resulting in a little bit of extra copying. - // - // An even better fix, not for the week before a release candidate, - // would be to put space in every sudog and make selectdone - // point at (say) the space in the first sudog. - - systemstack(func() { - sellock(scases, lockorder) - }) + sellock(scases, lockorder) + gp.selectDone = 0 sg = (*sudog)(gp.param) gp.param = nil @@ -477,7 +421,7 @@ loop: sglist = gp.waiting // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { - sg1.selectdone = nil + sg1.isSelect = false sg1.elem = nil sg1.c = nil } @@ -528,10 +472,8 @@ loop: print("wait-return: sel=", sel, " c=", c, " cas=", cas, " kind=", cas.kind, "\n") } - if cas.kind == caseRecv { - if cas.receivedp != nil { - *cas.receivedp = true - } + if cas.kind == caseRecv && cas.receivedp != nil { + *cas.receivedp = true } if raceenabled { diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go index d04e6f592fc..6e2beeccee1 100644 --- a/libgo/go/runtime/sema.go +++ b/libgo/go/runtime/sema.go @@ -275,7 +275,10 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { // on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket. // https://en.wikipedia.org/wiki/Treap // http://faculty.washington.edu/aragon/pubs/rst89.pdf - s.ticket = fastrand() + // + // s.ticket compared with zero in couple of places, therefore set lowest bit. + // It will not affect treap's quality noticeably. + s.ticket = fastrand() | 1 s.parent = last *pt = s diff --git a/libgo/go/runtime/signal_gccgo.go b/libgo/go/runtime/signal_gccgo.go index 056be36a729..6fe7ba10aaf 100644 --- a/libgo/go/runtime/signal_gccgo.go +++ b/libgo/go/runtime/signal_gccgo.go @@ -46,11 +46,6 @@ func kill(pid _pid_t, sig uint32) int32 //extern setitimer func setitimer(which int32, new *_itimerval, old *_itimerval) int32 -type sigTabT struct { - flags int32 - name string -} - type sigctxt struct { info *_siginfo_t ctxt unsafe.Pointer diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go index 378c68e1d90..c042162e7e6 100644 --- a/libgo/go/runtime/signal_sighandler.go +++ b/libgo/go/runtime/signal_sighandler.go @@ -92,9 +92,9 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { } print("PC=", hex(sigpc), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n") - if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 { + if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 { print("signal arrived during cgo execution\n") - gp = _g_.m.lockedg + gp = _g_.m.lockedg.ptr() } print("\n") @@ -111,7 +111,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { if docrash { crashing++ - if crashing < sched.mcount-int32(extraMCount) { + if crashing < mcount()-int32(extraMCount) { // There are other m's that need to dump their stacks. // Relay SIGQUIT to the next m by sending it to the current process. // All m's that have already received SIGQUIT have signal masks blocking diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go index 3237e18765f..85171484a90 100644 --- a/libgo/go/runtime/signal_unix.go +++ b/libgo/go/runtime/signal_unix.go @@ -8,7 +8,6 @@ package runtime import ( "runtime/internal/atomic" - "runtime/internal/sys" "unsafe" ) @@ -16,6 +15,16 @@ import ( //go:linkname initsig runtime.initsig //go:linkname sigtrampgo runtime.sigtrampgo +// sigTabT is the type of an entry in the global sigtable array. +// sigtable is inherently system dependent, and appears in OS-specific files, +// but sigTabT is the same for all Unixy systems. +// The sigtable array is indexed by a system signal number to get the flags +// and printable name of each signal. +type sigTabT struct { + flags int32 + name string +} + //go:linkname os_sigpipe os.sigpipe func os_sigpipe() { systemstack(sigpipe) @@ -275,6 +284,12 @@ func sigpipe() { // sigtrampgo is called from the signal handler function, sigtramp, // written in assembly code. // This is called by the signal handler, and the world may be stopped. +// +// It must be nosplit because getg() is still the G that was running +// (if any) when the signal was delivered, but it's (usually) called +// on the gsignal stack. Until this switches the G to gsignal, the +// stack bounds check won't work. +// //go:nosplit //go:nowritebarrierrec func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) { @@ -355,8 +370,9 @@ func sigpanic() { //go:nosplit //go:nowritebarrierrec func dieFromSignal(sig uint32) { - setsig(sig, _SIG_DFL) unblocksig(sig) + // Mark the signal as unhandled to ensure it is forwarded. + atomic.Store(&handlingSig[sig], 0) raise(sig) // That should have killed us. On some systems, though, raise @@ -368,6 +384,22 @@ func dieFromSignal(sig uint32) { osyield() osyield() + // If that didn't work, try _SIG_DFL. + setsig(sig, _SIG_DFL) + raise(sig) + + osyield() + osyield() + osyield() + + // On Darwin we may still fail to die, because raise sends the + // signal to the whole process rather than just the current thread, + // and osyield just sleeps briefly rather than letting all other + // threads run. See issue 20315. Sleep longer. + if GOOS == "darwin" { + usleep(100) + } + // If we are still somehow running, just exit with the wrong status. exit(2) } @@ -434,7 +466,7 @@ func crash() { // this means the OS X core file will be >128 GB and even on a zippy // workstation can take OS X well over an hour to write (uninterruptible). // Save users from making that mistake. - if sys.PtrSize == 8 { + if GOARCH == "amd64" { return } } @@ -463,7 +495,7 @@ func ensureSigM() { var sigBlocked sigset sigfillset(&sigBlocked) for i := range sigtable { - if sigtable[i].flags&_SigUnblock != 0 { + if !blockableSig(uint32(i)) { sigdelset(&sigBlocked, i) } } @@ -475,7 +507,7 @@ func ensureSigM() { sigdelset(&sigBlocked, int(sig)) } case sig := <-disableSigChan: - if sig > 0 { + if sig > 0 && blockableSig(sig) { sigaddset(&sigBlocked, int(sig)) } } @@ -536,17 +568,23 @@ func sigfwdgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) bool { return false } fwdFn := atomic.Loaduintptr(&fwdSig[sig]) + flags := sigtable[sig].flags - if !signalsOK { - // The only way we can get here is if we are in a - // library or archive, we installed a signal handler - // at program startup, but the Go runtime has not yet - // been initialized. + // If we aren't handling the signal, forward it. + if atomic.Load(&handlingSig[sig]) == 0 || !signalsOK { + // If the signal is ignored, doing nothing is the same as forwarding. + if fwdFn == _SIG_IGN || (fwdFn == _SIG_DFL && flags&_SigIgn != 0) { + return true + } + // We are not handling the signal and there is no other handler to forward to. + // Crash with the default behavior. if fwdFn == _SIG_DFL { + setsig(sig, _SIG_DFL) dieFromSignal(sig) - } else { - sigfwd(fwdFn, sig, info, ctx) + return false } + + sigfwd(fwdFn, sig, info, ctx) return true } @@ -555,18 +593,6 @@ func sigfwdgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) bool { return false } - // If we aren't handling the signal, forward it. - // Really if we aren't handling the signal, we shouldn't get here, - // but on Darwin setsigstack can lead us here because it sets - // the sa_tramp field. The sa_tramp field is not returned by - // sigaction, so the fix for that is non-obvious. - if atomic.Load(&handlingSig[sig]) == 0 { - sigfwd(fwdFn, sig, info, ctx) - return true - } - - flags := sigtable[sig].flags - c := sigctxt{info, ctx} // Only forward synchronous signals and SIGPIPE. // Unfortunately, user generated SIGPIPEs will also be forwarded, because si_code @@ -678,7 +704,7 @@ func minitSignalStack() { func minitSignalMask() { nmask := getg().m.sigmask for i := range sigtable { - if sigtable[i].flags&_SigUnblock != 0 { + if !blockableSig(uint32(i)) { sigdelset(&nmask, i) } } @@ -694,3 +720,22 @@ func unminitSignals() { signalstack(nil, 0) } } + +// blockableSig returns whether sig may be blocked by the signal mask. +// We never want to block the signals marked _SigUnblock; +// these are the synchronous signals that turn into a Go panic. +// In a Go program--not a c-archive/c-shared--we never want to block +// the signals marked _SigKill or _SigThrow, as otherwise it's possible +// for all running threads to block them and delay their delivery until +// we start a new thread. When linked into a C program we let the C code +// decide on the disposition of those signals. +func blockableSig(sig uint32) bool { + flags := sigtable[sig].flags + if flags&_SigUnblock != 0 { + return false + } + if isarchive || islibrary { + return true + } + return flags&(_SigKill|_SigThrow) == 0 +} diff --git a/libgo/go/runtime/sigqueue.go b/libgo/go/runtime/sigqueue.go index cd036ce364c..b108c39cc85 100644 --- a/libgo/go/runtime/sigqueue.go +++ b/libgo/go/runtime/sigqueue.go @@ -45,13 +45,14 @@ import ( // as there is no connection between handling a signal and receiving one, // but atomic instructions should minimize it. var sig struct { - note note - mask [(_NSIG + 31) / 32]uint32 - wanted [(_NSIG + 31) / 32]uint32 - ignored [(_NSIG + 31) / 32]uint32 - recv [(_NSIG + 31) / 32]uint32 - state uint32 - inuse bool + note note + mask [(_NSIG + 31) / 32]uint32 + wanted [(_NSIG + 31) / 32]uint32 + ignored [(_NSIG + 31) / 32]uint32 + recv [(_NSIG + 31) / 32]uint32 + state uint32 + delivering uint32 + inuse bool } const ( @@ -60,15 +61,20 @@ const ( sigSending ) -// Called from sighandler to send a signal back out of the signal handling thread. -// Reports whether the signal was sent. If not, the caller typically crashes the program. +// sigsend delivers a signal from sighandler to the internal signal delivery queue. +// It reports whether the signal was sent. If not, the caller typically crashes the program. +// It runs from the signal handler, so it's limited in what it can do. func sigsend(s uint32) bool { bit := uint32(1) << uint(s&31) if !sig.inuse || s >= uint32(32*len(sig.wanted)) { return false } + atomic.Xadd(&sig.delivering, 1) + // We are running in the signal handler; defer is not available. + if w := atomic.Load(&sig.wanted[s/32]); w&bit == 0 { + atomic.Xadd(&sig.delivering, -1) return false } @@ -76,6 +82,7 @@ func sigsend(s uint32) bool { for { mask := sig.mask[s/32] if mask&bit != 0 { + atomic.Xadd(&sig.delivering, -1) return true // signal already in queue } if atomic.Cas(&sig.mask[s/32], mask, mask|bit) { @@ -104,6 +111,7 @@ Send: } } + atomic.Xadd(&sig.delivering, -1) return true } @@ -155,6 +163,15 @@ func signal_recv() uint32 { // by the os/signal package. //go:linkname signalWaitUntilIdle os_signal.signalWaitUntilIdle func signalWaitUntilIdle() { + // Although the signals we care about have been removed from + // sig.wanted, it is possible that another thread has received + // a signal, has read from sig.wanted, is now updating sig.mask, + // and has not yet woken up the processor thread. We need to wait + // until all current signal deliveries have completed. + for atomic.Load(&sig.delivering) != 0 { + Gosched() + } + // Although WaitUntilIdle seems like the right name for this // function, the state we are looking for is sigReceiving, not // sigIdle. The sigIdle state is really more like sigProcessing. diff --git a/libgo/go/runtime/sizeclasses.go b/libgo/go/runtime/sizeclasses.go index 5366564afda..9e17b001d3e 100644 --- a/libgo/go/runtime/sizeclasses.go +++ b/libgo/go/runtime/sizeclasses.go @@ -3,73 +3,73 @@ package runtime -// class bytes/obj bytes/span objects waste bytes -// 1 8 8192 1024 0 -// 2 16 8192 512 0 -// 3 32 8192 256 0 -// 4 48 8192 170 32 -// 5 64 8192 128 0 -// 6 80 8192 102 32 -// 7 96 8192 85 32 -// 8 112 8192 73 16 -// 9 128 8192 64 0 -// 10 144 8192 56 128 -// 11 160 8192 51 32 -// 12 176 8192 46 96 -// 13 192 8192 42 128 -// 14 208 8192 39 80 -// 15 224 8192 36 128 -// 16 240 8192 34 32 -// 17 256 8192 32 0 -// 18 288 8192 28 128 -// 19 320 8192 25 192 -// 20 352 8192 23 96 -// 21 384 8192 21 128 -// 22 416 8192 19 288 -// 23 448 8192 18 128 -// 24 480 8192 17 32 -// 25 512 8192 16 0 -// 26 576 8192 14 128 -// 27 640 8192 12 512 -// 28 704 8192 11 448 -// 29 768 8192 10 512 -// 30 896 8192 9 128 -// 31 1024 8192 8 0 -// 32 1152 8192 7 128 -// 33 1280 8192 6 512 -// 34 1408 16384 11 896 -// 35 1536 8192 5 512 -// 36 1792 16384 9 256 -// 37 2048 8192 4 0 -// 38 2304 16384 7 256 -// 39 2688 8192 3 128 -// 40 3072 24576 8 0 -// 41 3200 16384 5 384 -// 42 3456 24576 7 384 -// 43 4096 8192 2 0 -// 44 4864 24576 5 256 -// 45 5376 16384 3 256 -// 46 6144 24576 4 0 -// 47 6528 32768 5 128 -// 48 6784 40960 6 256 -// 49 6912 49152 7 768 -// 50 8192 8192 1 0 -// 51 9472 57344 6 512 -// 52 9728 49152 5 512 -// 53 10240 40960 4 0 -// 54 10880 32768 3 128 -// 55 12288 24576 2 0 -// 56 13568 40960 3 256 -// 57 14336 57344 4 0 -// 58 16384 16384 1 0 -// 59 18432 73728 4 0 -// 60 19072 57344 3 128 -// 61 20480 40960 2 0 -// 62 21760 65536 3 256 -// 63 24576 24576 1 0 -// 64 27264 81920 3 128 -// 65 28672 57344 2 0 -// 66 32768 32768 1 0 +// class bytes/obj bytes/span objects tail waste max waste +// 1 8 8192 1024 0 87.50% +// 2 16 8192 512 0 43.75% +// 3 32 8192 256 0 46.88% +// 4 48 8192 170 32 31.52% +// 5 64 8192 128 0 23.44% +// 6 80 8192 102 32 19.07% +// 7 96 8192 85 32 15.95% +// 8 112 8192 73 16 13.56% +// 9 128 8192 64 0 11.72% +// 10 144 8192 56 128 11.82% +// 11 160 8192 51 32 9.73% +// 12 176 8192 46 96 9.59% +// 13 192 8192 42 128 9.25% +// 14 208 8192 39 80 8.12% +// 15 224 8192 36 128 8.15% +// 16 240 8192 34 32 6.62% +// 17 256 8192 32 0 5.86% +// 18 288 8192 28 128 12.16% +// 19 320 8192 25 192 11.80% +// 20 352 8192 23 96 9.88% +// 21 384 8192 21 128 9.51% +// 22 416 8192 19 288 10.71% +// 23 448 8192 18 128 8.37% +// 24 480 8192 17 32 6.82% +// 25 512 8192 16 0 6.05% +// 26 576 8192 14 128 12.33% +// 27 640 8192 12 512 15.48% +// 28 704 8192 11 448 13.93% +// 29 768 8192 10 512 13.94% +// 30 896 8192 9 128 15.52% +// 31 1024 8192 8 0 12.40% +// 32 1152 8192 7 128 12.41% +// 33 1280 8192 6 512 15.55% +// 34 1408 16384 11 896 14.00% +// 35 1536 8192 5 512 14.00% +// 36 1792 16384 9 256 15.57% +// 37 2048 8192 4 0 12.45% +// 38 2304 16384 7 256 12.46% +// 39 2688 8192 3 128 15.59% +// 40 3072 24576 8 0 12.47% +// 41 3200 16384 5 384 6.22% +// 42 3456 24576 7 384 8.83% +// 43 4096 8192 2 0 15.60% +// 44 4864 24576 5 256 16.65% +// 45 5376 16384 3 256 10.92% +// 46 6144 24576 4 0 12.48% +// 47 6528 32768 5 128 6.23% +// 48 6784 40960 6 256 4.36% +// 49 6912 49152 7 768 3.37% +// 50 8192 8192 1 0 15.61% +// 51 9472 57344 6 512 14.28% +// 52 9728 49152 5 512 3.64% +// 53 10240 40960 4 0 4.99% +// 54 10880 32768 3 128 6.24% +// 55 12288 24576 2 0 11.45% +// 56 13568 40960 3 256 9.99% +// 57 14336 57344 4 0 5.35% +// 58 16384 16384 1 0 12.49% +// 59 18432 73728 4 0 11.11% +// 60 19072 57344 3 128 3.57% +// 61 20480 40960 2 0 6.87% +// 62 21760 65536 3 256 6.25% +// 63 24576 24576 1 0 11.45% +// 64 27264 81920 3 128 10.00% +// 65 28672 57344 2 0 4.91% +// 66 32768 32768 1 0 12.50% const ( _MaxSmallSize = 32768 diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go index f61f85e0fcb..ec5aa640222 100644 --- a/libgo/go/runtime/slice.go +++ b/libgo/go/runtime/slice.go @@ -23,6 +23,13 @@ type slice struct { cap int } +// An notInHeapSlice is a slice backed by go:notinheap memory. +type notInHeapSlice struct { + array *notInHeap + len int + cap int +} + // maxElems is a lookup table containing the maximum capacity for a slice. // The index is the size of the slice element. var maxElems = [...]uintptr{ @@ -85,7 +92,7 @@ func makeslice64(et *_type, len64, cap64 int64) slice { // The new slice's length is set to the requested capacity. func growslice(et *_type, old slice, cap int) slice { if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&et)) + callerpc := getcallerpc() racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice)) } if msanenabled { @@ -109,12 +116,20 @@ func growslice(et *_type, old slice, cap int) slice { if old.len < 1024 { newcap = doublecap } else { - for newcap < cap { + // Check 0 < newcap to detect overflow + // and prevent an infinite loop. + for 0 < newcap && newcap < cap { newcap += newcap / 4 } + // Set newcap to the requested cap when + // the newcap calculation overflowed. + if newcap <= 0 { + newcap = cap + } } } + var overflow bool var lenmem, newlenmem, capmem uintptr const ptrSize = unsafe.Sizeof((*byte)(nil)) switch et.size { @@ -122,20 +137,37 @@ func growslice(et *_type, old slice, cap int) slice { lenmem = uintptr(old.len) newlenmem = uintptr(cap) capmem = roundupsize(uintptr(newcap)) + overflow = uintptr(newcap) > _MaxMem newcap = int(capmem) case ptrSize: lenmem = uintptr(old.len) * ptrSize newlenmem = uintptr(cap) * ptrSize capmem = roundupsize(uintptr(newcap) * ptrSize) + overflow = uintptr(newcap) > _MaxMem/ptrSize newcap = int(capmem / ptrSize) default: lenmem = uintptr(old.len) * et.size newlenmem = uintptr(cap) * et.size capmem = roundupsize(uintptr(newcap) * et.size) + overflow = uintptr(newcap) > maxSliceCap(et.size) newcap = int(capmem / et.size) } - if cap < old.cap || uintptr(newcap) > maxSliceCap(et.size) { + // The check of overflow (uintptr(newcap) > maxSliceCap(et.size)) + // in addition to capmem > _MaxMem is needed to prevent an overflow + // which can be used to trigger a segfault on 32bit architectures + // with this example program: + // + // type T [1<<27 + 1]int64 + // + // var d T + // var s []T + // + // func main() { + // s = append(s, d, d, d, d) + // print(len(s), "\n") + // } + if cap < old.cap || overflow || capmem > _MaxMem { panic(errorString("growslice: cap out of range")) } @@ -176,7 +208,7 @@ func slicecopy(to, fm slice, width uintptr) int { } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&to)) + callerpc := getcallerpc() pc := funcPC(slicecopy) racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc) racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc) @@ -207,7 +239,7 @@ func slicestringcopy(to []byte, fm string) int { } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&to)) + callerpc := getcallerpc() pc := funcPC(slicestringcopy) racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc) } diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go index 7436ddfdf4b..e8df9a6b7c4 100644 --- a/libgo/go/runtime/string.go +++ b/libgo/go/runtime/string.go @@ -99,7 +99,7 @@ func slicebytetostring(buf *tmpBuf, b []byte) (str string) { if raceenabled { racereadrangepc(unsafe.Pointer(&b[0]), uintptr(l), - getcallerpc(unsafe.Pointer(&buf)), + getcallerpc(), funcPC(slicebytetostring)) } if msanenabled { @@ -145,7 +145,7 @@ func slicebytetostringtmp(b []byte) string { if raceenabled && len(b) > 0 { racereadrangepc(unsafe.Pointer(&b[0]), uintptr(len(b)), - getcallerpc(unsafe.Pointer(&b)), + getcallerpc(), funcPC(slicebytetostringtmp)) } if msanenabled && len(b) > 0 { @@ -194,7 +194,7 @@ func slicerunetostring(buf *tmpBuf, a []rune) string { if raceenabled && len(a) > 0 { racereadrangepc(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]), - getcallerpc(unsafe.Pointer(&buf)), + getcallerpc(), funcPC(slicerunetostring)) } if msanenabled && len(a) > 0 { diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go index 84fa1c79689..c454356b838 100644 --- a/libgo/go/runtime/stubs.go +++ b/libgo/go/runtime/stubs.go @@ -107,16 +107,21 @@ func reflect_memmove(to, from unsafe.Pointer, n uintptr) { func memcmp(a, b unsafe.Pointer, size uintptr) int32 // exported value for testing -var hashLoad = loadFactor +var hashLoad = float32(loadFactorNum) / float32(loadFactorDen) //go:nosplit func fastrand() uint32 { mp := getg().m - fr := mp.fastrand - mx := uint32(int32(fr)>>31) & 0xa8888eef - fr = fr<<1 ^ mx - mp.fastrand = fr - return fr + // Implement xorshift64+: 2 32-bit xorshift sequences added together. + // Shift triplet [17,7,16] was calculated as indicated in Marsaglia's + // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf + // This generator passes the SmallCrush suite, part of TestU01 framework: + // http://simul.iro.umontreal.ca/testu01/tu01.html + s1, s0 := mp.fastrand[0], mp.fastrand[1] + s1 ^= s1 << 17 + s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16 + mp.fastrand[0], mp.fastrand[1] = s0, s1 + return s0 + s1 } //go:nosplit @@ -192,14 +197,16 @@ func publicationBarrier() // getcallerpc returns the program counter (PC) of its caller's caller. // getcallersp returns the stack pointer (SP) of its caller's caller. -// For both, the argp must be a pointer to the caller's first function argument. +// argp must be a pointer to the caller's first function argument. // The implementation may or may not use argp, depending on -// the architecture. +// the architecture. The implementation may be a compiler +// intrinsic; there is not necessarily code implementing this +// on every platform. // // For example: // // func f(arg1, arg2, arg3 int) { -// pc := getcallerpc(unsafe.Pointer(&arg1)) +// pc := getcallerpc() // sp := getcallersp(unsafe.Pointer(&arg1)) // } // @@ -219,7 +226,7 @@ func publicationBarrier() // immediately and can only be passed to nosplit functions. //go:noescape -func getcallerpc(argp unsafe.Pointer) uintptr +func getcallerpc() uintptr //go:noescape func getcallersp(argp unsafe.Pointer) uintptr @@ -430,7 +437,7 @@ func setpagesize(s uintptr) { } } -// Temporary for gccgo until we port mgc.go. +// Called by C code during library initialization. //go:linkname runtime_m0 runtime.runtime_m0 func runtime_m0() *m { return &m0 diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go index 490405d51fd..e7607722a64 100644 --- a/libgo/go/runtime/stubs2.go +++ b/libgo/go/runtime/stubs2.go @@ -23,3 +23,10 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32 //go:noescape func open(name *byte, mode, perm int32) int32 + +// exitThread terminates the current thread, writing *wait = 0 when +// the stack is safe to reclaim. +func exitThread(wait *uint32) { + // This is never used by gccgo. + throw("exitThread") +} diff --git a/libgo/go/runtime/testdata/testprog/gc.go b/libgo/go/runtime/testdata/testprog/gc.go index 744b6108e2b..542451753b7 100644 --- a/libgo/go/runtime/testdata/testprog/gc.go +++ b/libgo/go/runtime/testdata/testprog/gc.go @@ -25,6 +25,7 @@ func GCSys() { runtime.GC() runtime.ReadMemStats(memstats) sys := memstats.Sys + fmt.Printf("original sys: %#x\n", sys) runtime.MemProfileRate = 0 // disable profiler @@ -36,6 +37,8 @@ func GCSys() { // Should only be using a few MB. // We allocated 100 MB or (if not short) 1 GB. runtime.ReadMemStats(memstats) + fmt.Printf("final sys: %#x\n", memstats.Sys) + fmt.Printf("%#v\n", *memstats) if sys > memstats.Sys { sys = 0 } else { diff --git a/libgo/go/runtime/testdata/testprog/gettid.go b/libgo/go/runtime/testdata/testprog/gettid.go new file mode 100644 index 00000000000..1b3e29ab08e --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/gettid.go @@ -0,0 +1,29 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux + +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "syscall" +) + +func gettid() int { + return syscall.Gettid() +} + +func tidExists(tid int) (exists, supported bool) { + stat, err := ioutil.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid)) + if os.IsNotExist(err) { + return false, true + } + // Check if it's a zombie thread. + state := bytes.Fields(stat)[2] + return !(len(state) == 1 && state[0] == 'Z'), true +} diff --git a/libgo/go/runtime/testdata/testprog/gettid_none.go b/libgo/go/runtime/testdata/testprog/gettid_none.go new file mode 100644 index 00000000000..036db87e10e --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/gettid_none.go @@ -0,0 +1,15 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !linux + +package main + +func gettid() int { + return 0 +} + +func tidExists(tid int) (exists, supported bool) { + return false, false +} diff --git a/libgo/go/runtime/testdata/testprog/lockosthread.go b/libgo/go/runtime/testdata/testprog/lockosthread.go new file mode 100644 index 00000000000..88c0d12e4c1 --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/lockosthread.go @@ -0,0 +1,94 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "os" + "runtime" + "time" +) + +var mainTID int + +func init() { + registerInit("LockOSThreadMain", func() { + // init is guaranteed to run on the main thread. + mainTID = gettid() + }) + register("LockOSThreadMain", LockOSThreadMain) + + registerInit("LockOSThreadAlt", func() { + // Lock the OS thread now so main runs on the main thread. + runtime.LockOSThread() + }) + register("LockOSThreadAlt", LockOSThreadAlt) +} + +func LockOSThreadMain() { + // gettid only works on Linux, so on other platforms this just + // checks that the runtime doesn't do anything terrible. + + // This requires GOMAXPROCS=1 from the beginning to reliably + // start a goroutine on the main thread. + if runtime.GOMAXPROCS(-1) != 1 { + println("requires GOMAXPROCS=1") + os.Exit(1) + } + + ready := make(chan bool, 1) + go func() { + // Because GOMAXPROCS=1, this *should* be on the main + // thread. Stay there. + runtime.LockOSThread() + if mainTID != 0 && gettid() != mainTID { + println("failed to start goroutine on main thread") + os.Exit(1) + } + // Exit with the thread locked, which should exit the + // main thread. + ready <- true + }() + <-ready + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is still running on a different + // thread. + if mainTID != 0 && gettid() == mainTID { + println("goroutine migrated to locked thread") + os.Exit(1) + } + println("OK") +} + +func LockOSThreadAlt() { + // This is running locked to the main OS thread. + + var subTID int + ready := make(chan bool, 1) + go func() { + // This goroutine must be running on a new thread. + runtime.LockOSThread() + subTID = gettid() + ready <- true + // Exit with the thread locked. + }() + <-ready + runtime.UnlockOSThread() + for i := 0; i < 100; i++ { + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is running on a different thread. + if subTID != 0 && gettid() == subTID { + println("locked thread reused") + os.Exit(1) + } + exists, supported := tidExists(subTID) + if !supported || !exists { + goto ok + } + } + println("sub thread", subTID, "still running") + return +ok: + println("OK") +} diff --git a/libgo/go/runtime/testdata/testprog/syscall_windows.go b/libgo/go/runtime/testdata/testprog/syscall_windows.go index 6e6782e987a..b4b66441b83 100644 --- a/libgo/go/runtime/testdata/testprog/syscall_windows.go +++ b/libgo/go/runtime/testdata/testprog/syscall_windows.go @@ -4,11 +4,18 @@ package main -import "syscall" +import ( + "internal/syscall/windows" + "runtime" + "sync" + "syscall" + "unsafe" +) func init() { register("RaiseException", RaiseException) register("ZeroDivisionException", ZeroDivisionException) + register("StackMemory", StackMemory) } func RaiseException() { @@ -25,3 +32,39 @@ func ZeroDivisionException() { z := x / y println(z) } + +func getPagefileUsage() (uintptr, error) { + p, err := syscall.GetCurrentProcess() + if err != nil { + return 0, err + } + var m windows.PROCESS_MEMORY_COUNTERS + err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m))) + if err != nil { + return 0, err + } + return m.PagefileUsage, nil +} + +func StackMemory() { + mem1, err := getPagefileUsage() + if err != nil { + panic(err) + } + const threadCount = 100 + var wg sync.WaitGroup + for i := 0; i < threadCount; i++ { + wg.Add(1) + go func() { + runtime.LockOSThread() + wg.Done() + select {} + }() + } + wg.Wait() + mem2, err := getPagefileUsage() + if err != nil { + panic(err) + } + print((mem2 - mem1) / threadCount) +} diff --git a/libgo/go/runtime/testdata/testprogcgo/callback.go b/libgo/go/runtime/testdata/testprogcgo/callback.go index a49fc19b284..2f7568c2c4e 100644 --- a/libgo/go/runtime/testdata/testprogcgo/callback.go +++ b/libgo/go/runtime/testdata/testprogcgo/callback.go @@ -34,6 +34,7 @@ import "C" import ( "fmt" + "os" "runtime" ) @@ -68,7 +69,10 @@ func grow1(x, sum *int) int { } func CgoCallbackGC() { - const P = 100 + P := 100 + if os.Getenv("RUNTIME_TESTING_SHORT") != "" { + P = 10 + } done := make(chan bool) // allocate a bunch of stack frames and spray them with pointers for i := 0; i < P; i++ { diff --git a/libgo/go/runtime/testdata/testprogcgo/catchpanic.go b/libgo/go/runtime/testdata/testprogcgo/catchpanic.go new file mode 100644 index 00000000000..55a606d1bc8 --- /dev/null +++ b/libgo/go/runtime/testdata/testprogcgo/catchpanic.go @@ -0,0 +1,46 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +package main + +/* +#include <signal.h> +#include <stdlib.h> +#include <string.h> + +static void abrthandler(int signum) { + if (signum == SIGABRT) { + exit(0); // success + } +} + +void registerAbortHandler() { + struct sigaction act; + memset(&act, 0, sizeof act); + act.sa_handler = abrthandler; + sigaction(SIGABRT, &act, NULL); +} + +static void __attribute__ ((constructor)) sigsetup(void) { + if (getenv("CGOCATCHPANIC_EARLY_HANDLER") == NULL) + return; + registerAbortHandler(); +} +*/ +import "C" +import "os" + +func init() { + register("CgoCatchPanic", CgoCatchPanic) +} + +// Test that the SIGABRT raised by panic can be caught by an early signal handler. +func CgoCatchPanic() { + if _, ok := os.LookupEnv("CGOCATCHPANIC_EARLY_HANDLER"); !ok { + C.registerAbortHandler() + } + panic("catch me") +} diff --git a/libgo/go/runtime/testdata/testprogcgo/cgo.go b/libgo/go/runtime/testdata/testprogcgo/cgo.go index 209524a24db..a587db385b3 100644 --- a/libgo/go/runtime/testdata/testprogcgo/cgo.go +++ b/libgo/go/runtime/testdata/testprogcgo/cgo.go @@ -52,7 +52,11 @@ func CgoSignalDeadlock() { time.Sleep(time.Millisecond) start := time.Now() var times []time.Duration - for i := 0; i < 64; i++ { + n := 64 + if os.Getenv("RUNTIME_TEST_SHORT") != "" { + n = 16 + } + for i := 0; i < n; i++ { go func() { runtime.LockOSThread() select {} diff --git a/libgo/go/runtime/testdata/testprogcgo/lockosthread.c b/libgo/go/runtime/testdata/testprogcgo/lockosthread.c new file mode 100644 index 00000000000..b10cc4f3b92 --- /dev/null +++ b/libgo/go/runtime/testdata/testprogcgo/lockosthread.c @@ -0,0 +1,13 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +#include <stdint.h> + +uint32_t threadExited; + +void setExited(void *x) { + __sync_fetch_and_add(&threadExited, 1); +} diff --git a/libgo/go/runtime/testdata/testprogcgo/lockosthread.go b/libgo/go/runtime/testdata/testprogcgo/lockosthread.go new file mode 100644 index 00000000000..36423d9eb0c --- /dev/null +++ b/libgo/go/runtime/testdata/testprogcgo/lockosthread.go @@ -0,0 +1,111 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +package main + +import ( + "os" + "runtime" + "sync/atomic" + "time" + "unsafe" +) + +/* +#include <pthread.h> +#include <stdint.h> + +extern uint32_t threadExited; + +void setExited(void *x); +*/ +import "C" + +var mainThread C.pthread_t + +func init() { + registerInit("LockOSThreadMain", func() { + // init is guaranteed to run on the main thread. + mainThread = C.pthread_self() + }) + register("LockOSThreadMain", LockOSThreadMain) + + registerInit("LockOSThreadAlt", func() { + // Lock the OS thread now so main runs on the main thread. + runtime.LockOSThread() + }) + register("LockOSThreadAlt", LockOSThreadAlt) +} + +func LockOSThreadMain() { + // This requires GOMAXPROCS=1 from the beginning to reliably + // start a goroutine on the main thread. + if runtime.GOMAXPROCS(-1) != 1 { + println("requires GOMAXPROCS=1") + os.Exit(1) + } + + ready := make(chan bool, 1) + go func() { + // Because GOMAXPROCS=1, this *should* be on the main + // thread. Stay there. + runtime.LockOSThread() + self := C.pthread_self() + if C.pthread_equal(mainThread, self) == 0 { + println("failed to start goroutine on main thread") + os.Exit(1) + } + // Exit with the thread locked, which should exit the + // main thread. + ready <- true + }() + <-ready + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is still running on a different + // thread. + self := C.pthread_self() + if C.pthread_equal(mainThread, self) != 0 { + println("goroutine migrated to locked thread") + os.Exit(1) + } + println("OK") +} + +func LockOSThreadAlt() { + // This is running locked to the main OS thread. + + var subThread C.pthread_t + ready := make(chan bool, 1) + C.threadExited = 0 + go func() { + // This goroutine must be running on a new thread. + runtime.LockOSThread() + subThread = C.pthread_self() + // Register a pthread destructor so we can tell this + // thread has exited. + var key C.pthread_key_t + C.pthread_key_create(&key, (*[0]byte)(unsafe.Pointer(C.setExited))) + C.pthread_setspecific(key, unsafe.Pointer(new(int))) + ready <- true + // Exit with the thread locked. + }() + <-ready + for i := 0; i < 100; i++ { + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is running on a different thread. + self := C.pthread_self() + if C.pthread_equal(subThread, self) != 0 { + println("locked thread reused") + os.Exit(1) + } + if atomic.LoadUint32((*uint32)(&C.threadExited)) != 0 { + println("OK") + return + } + } + println("sub thread still running") + os.Exit(1) +} diff --git a/libgo/go/runtime/testdata/testprogcgo/sigstack.go b/libgo/go/runtime/testdata/testprogcgo/sigstack.go new file mode 100644 index 00000000000..e30a5592dcb --- /dev/null +++ b/libgo/go/runtime/testdata/testprogcgo/sigstack.go @@ -0,0 +1,95 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +// Test handling of Go-allocated signal stacks when calling from +// C-created threads with and without signal stacks. (See issue +// #22930.) + +package main + +/* +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#ifndef MAP_STACK +#define MAP_STACK 0 +#endif + +extern void SigStackCallback(); + +static void* WithSigStack(void* arg __attribute__((unused))) { + // Set up an alternate system stack. + void* base = mmap(0, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON|MAP_STACK, -1, 0); + if (base == MAP_FAILED) { + perror("mmap failed"); + abort(); + } + stack_t st = {}, ost = {}; + st.ss_sp = (char*)base; + st.ss_flags = 0; + st.ss_size = SIGSTKSZ; + if (sigaltstack(&st, &ost) < 0) { + perror("sigaltstack failed"); + abort(); + } + + // Call Go. + SigStackCallback(); + + // Disable signal stack and protect it so we can detect reuse. + if (ost.ss_flags & SS_DISABLE) { + // Darwin libsystem has a bug where it checks ss_size + // even if SS_DISABLE is set. (The kernel gets it right.) + ost.ss_size = SIGSTKSZ; + } + if (sigaltstack(&ost, NULL) < 0) { + perror("sigaltstack restore failed"); + abort(); + } + mprotect(base, SIGSTKSZ, PROT_NONE); + return NULL; +} + +static void* WithoutSigStack(void* arg __attribute__((unused))) { + SigStackCallback(); + return NULL; +} + +static void DoThread(int sigstack) { + pthread_t tid; + if (sigstack) { + pthread_create(&tid, NULL, WithSigStack, NULL); + } else { + pthread_create(&tid, NULL, WithoutSigStack, NULL); + } + pthread_join(tid, NULL); +} +*/ +import "C" + +func init() { + register("SigStack", SigStack) +} + +func SigStack() { + C.DoThread(0) + C.DoThread(1) + C.DoThread(0) + C.DoThread(1) + println("OK") +} + +var BadPtr *int + +//export SigStackCallback +func SigStackCallback() { + // Cause the Go signal handler to run. + defer func() { recover() }() + *BadPtr = 42 +} diff --git a/libgo/go/runtime/testdata/testprogcgo/stack_windows.go b/libgo/go/runtime/testdata/testprogcgo/stack_windows.go new file mode 100644 index 00000000000..846297a960c --- /dev/null +++ b/libgo/go/runtime/testdata/testprogcgo/stack_windows.go @@ -0,0 +1,54 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "C" +import ( + "internal/syscall/windows" + "runtime" + "sync" + "syscall" + "unsafe" +) + +func init() { + register("StackMemory", StackMemory) +} + +func getPagefileUsage() (uintptr, error) { + p, err := syscall.GetCurrentProcess() + if err != nil { + return 0, err + } + var m windows.PROCESS_MEMORY_COUNTERS + err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m))) + if err != nil { + return 0, err + } + return m.PagefileUsage, nil +} + +func StackMemory() { + mem1, err := getPagefileUsage() + if err != nil { + panic(err) + } + const threadCount = 100 + var wg sync.WaitGroup + for i := 0; i < threadCount; i++ { + wg.Add(1) + go func() { + runtime.LockOSThread() + wg.Done() + select {} + }() + } + wg.Wait() + mem2, err := getPagefileUsage() + if err != nil { + panic(err) + } + print((mem2 - mem1) / threadCount) +} diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go index f204830a6f7..93181fde600 100644 --- a/libgo/go/runtime/time.go +++ b/libgo/go/runtime/time.go @@ -6,14 +6,18 @@ package runtime -import "unsafe" +import ( + "runtime/internal/sys" + "unsafe" +) // Package time knows the layout of this structure. // If this struct changes, adjust ../time/sleep.go:/runtimeTimer. // For GOOS=nacl, package syscall knows the layout of this structure. // If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer. type timer struct { - i int // heap index + tb *timersBucket // the bucket the timer lives in + i int // heap index // Timer wakes up at when, and then at when+period, ... (period > 0 only) // each time calling f(arg, now) in the timer goroutine, so f must be @@ -25,7 +29,37 @@ type timer struct { seq uintptr } -var timers struct { +// timersLen is the length of timers array. +// +// Ideally, this would be set to GOMAXPROCS, but that would require +// dynamic reallocation +// +// The current value is a compromise between memory usage and performance +// that should cover the majority of GOMAXPROCS values used in the wild. +const timersLen = 64 + +// timers contains "per-P" timer heaps. +// +// Timers are queued into timersBucket associated with the current P, +// so each P may work with its own timers independently of other P instances. +// +// Each timersBucket may be associated with multiple P +// if GOMAXPROCS > timersLen. +var timers [timersLen]struct { + timersBucket + + // The padding should eliminate false sharing + // between timersBucket values. + pad [sys.CacheLineSize - unsafe.Sizeof(timersBucket{})%sys.CacheLineSize]byte +} + +func (t *timer) assignBucket() *timersBucket { + id := uint8(getg().m.p.ptr().id) % timersLen + t.tb = &timers[id].timersBucket + return t.tb +} + +type timersBucket struct { lock mutex gp *g created bool @@ -51,18 +85,20 @@ func timeSleep(ns int64) { return } - t := getg().timer + gp := getg() + t := gp.timer if t == nil { t = new(timer) - getg().timer = t + gp.timer = t } *t = timer{} t.when = nanotime() + ns t.f = goroutineReady - t.arg = getg() - lock(&timers.lock) - addtimerLocked(t) - goparkunlock(&timers.lock, "sleep", traceEvGoSleep, 2) + t.arg = gp + tb := t.assignBucket() + lock(&tb.lock) + tb.addtimerLocked(t) + goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2) } // startTimer adds t to the timer heap. @@ -89,90 +125,98 @@ func goroutineReady(arg interface{}, seq uintptr) { } func addtimer(t *timer) { - lock(&timers.lock) - addtimerLocked(t) - unlock(&timers.lock) + tb := t.assignBucket() + lock(&tb.lock) + tb.addtimerLocked(t) + unlock(&tb.lock) } // Add a timer to the heap and start or kick timerproc if the new timer is // earlier than any of the others. // Timers are locked. -func addtimerLocked(t *timer) { +func (tb *timersBucket) addtimerLocked(t *timer) { // when must never be negative; otherwise timerproc will overflow // during its delta calculation and never expire other runtime timers. if t.when < 0 { t.when = 1<<63 - 1 } - t.i = len(timers.t) - timers.t = append(timers.t, t) - siftupTimer(t.i) + t.i = len(tb.t) + tb.t = append(tb.t, t) + siftupTimer(tb.t, t.i) if t.i == 0 { // siftup moved to top: new earliest deadline. - if timers.sleeping { - timers.sleeping = false - notewakeup(&timers.waitnote) + if tb.sleeping { + tb.sleeping = false + notewakeup(&tb.waitnote) } - if timers.rescheduling { - timers.rescheduling = false - goready(timers.gp, 0) + if tb.rescheduling { + tb.rescheduling = false + goready(tb.gp, 0) } } - if !timers.created { - timers.created = true + if !tb.created { + tb.created = true expectSystemGoroutine() - go timerproc() + go timerproc(tb) } } // Delete timer t from the heap. // Do not need to update the timerproc: if it wakes up early, no big deal. func deltimer(t *timer) bool { - // Dereference t so that any panic happens before the lock is held. - // Discard result, because t might be moving in the heap. - _ = t.i + if t.tb == nil { + // t.tb can be nil if the user created a timer + // directly, without invoking startTimer e.g + // time.Ticker{C: c} + // In this case, return early without any deletion. + // See Issue 21874. + return false + } - lock(&timers.lock) + tb := t.tb + + lock(&tb.lock) // t may not be registered anymore and may have // a bogus i (typically 0, if generated by Go). // Verify it before proceeding. i := t.i - last := len(timers.t) - 1 - if i < 0 || i > last || timers.t[i] != t { - unlock(&timers.lock) + last := len(tb.t) - 1 + if i < 0 || i > last || tb.t[i] != t { + unlock(&tb.lock) return false } if i != last { - timers.t[i] = timers.t[last] - timers.t[i].i = i + tb.t[i] = tb.t[last] + tb.t[i].i = i } - timers.t[last] = nil - timers.t = timers.t[:last] + tb.t[last] = nil + tb.t = tb.t[:last] if i != last { - siftupTimer(i) - siftdownTimer(i) + siftupTimer(tb.t, i) + siftdownTimer(tb.t, i) } - unlock(&timers.lock) + unlock(&tb.lock) return true } // Timerproc runs the time-driven events. -// It sleeps until the next event in the timers heap. +// It sleeps until the next event in the tb heap. // If addtimer inserts a new earlier event, it wakes timerproc early. -func timerproc() { +func timerproc(tb *timersBucket) { setSystemGoroutine() - timers.gp = getg() + tb.gp = getg() for { - lock(&timers.lock) - timers.sleeping = false + lock(&tb.lock) + tb.sleeping = false now := nanotime() delta := int64(-1) for { - if len(timers.t) == 0 { + if len(tb.t) == 0 { delta = -1 break } - t := timers.t[0] + t := tb.t[0] delta = t.when - now if delta > 0 { break @@ -180,43 +224,43 @@ func timerproc() { if t.period > 0 { // leave in heap but adjust next time to fire t.when += t.period * (1 + -delta/t.period) - siftdownTimer(0) + siftdownTimer(tb.t, 0) } else { // remove from heap - last := len(timers.t) - 1 + last := len(tb.t) - 1 if last > 0 { - timers.t[0] = timers.t[last] - timers.t[0].i = 0 + tb.t[0] = tb.t[last] + tb.t[0].i = 0 } - timers.t[last] = nil - timers.t = timers.t[:last] + tb.t[last] = nil + tb.t = tb.t[:last] if last > 0 { - siftdownTimer(0) + siftdownTimer(tb.t, 0) } t.i = -1 // mark as removed } f := t.f arg := t.arg seq := t.seq - unlock(&timers.lock) + unlock(&tb.lock) if raceenabled { raceacquire(unsafe.Pointer(t)) } f(arg, seq) - lock(&timers.lock) + lock(&tb.lock) } if delta < 0 || faketime > 0 { // No timers left - put goroutine to sleep. - timers.rescheduling = true - goparkunlock(&timers.lock, "timer goroutine (idle)", traceEvGoBlock, 1) + tb.rescheduling = true + goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1) continue } // At least one timer pending. Sleep until then. - timers.sleeping = true - timers.sleepUntil = now + delta - noteclear(&timers.waitnote) - unlock(&timers.lock) - notetsleepg(&timers.waitnote, delta) + tb.sleeping = true + tb.sleepUntil = now + delta + noteclear(&tb.waitnote) + unlock(&tb.lock) + notetsleepg(&tb.waitnote, delta) } } @@ -225,28 +269,67 @@ func timejump() *g { return nil } - lock(&timers.lock) - if !timers.created || len(timers.t) == 0 { - unlock(&timers.lock) + for i := range timers { + lock(&timers[i].lock) + } + gp := timejumpLocked() + for i := range timers { + unlock(&timers[i].lock) + } + + return gp +} + +func timejumpLocked() *g { + // Determine a timer bucket with minimum when. + var minT *timer + for i := range timers { + tb := &timers[i] + if !tb.created || len(tb.t) == 0 { + continue + } + t := tb.t[0] + if minT == nil || t.when < minT.when { + minT = t + } + } + if minT == nil || minT.when <= faketime { + return nil + } + + faketime = minT.when + tb := minT.tb + if !tb.rescheduling { return nil } + tb.rescheduling = false + return tb.gp +} + +func timeSleepUntil() int64 { + next := int64(1<<63 - 1) - var gp *g - if faketime < timers.t[0].when { - faketime = timers.t[0].when - if timers.rescheduling { - timers.rescheduling = false - gp = timers.gp + // Determine minimum sleepUntil across all the timer buckets. + // + // The function can not return a precise answer, + // as another timer may pop in as soon as timers have been unlocked. + // So lock the timers one by one instead of all at once. + for i := range timers { + tb := &timers[i] + + lock(&tb.lock) + if tb.sleeping && tb.sleepUntil < next { + next = tb.sleepUntil } + unlock(&tb.lock) } - unlock(&timers.lock) - return gp + + return next } // Heap maintenance algorithms. -func siftupTimer(i int) { - t := timers.t +func siftupTimer(t []*timer, i int) { when := t[i].when tmp := t[i] for i > 0 { @@ -256,14 +339,15 @@ func siftupTimer(i int) { } t[i] = t[p] t[i].i = i - t[p] = tmp - t[p].i = p i = p } + if tmp != t[i] { + t[i] = tmp + t[i].i = i + } } -func siftdownTimer(i int) { - t := timers.t +func siftdownTimer(t []*timer, i int) { n := len(t) when := t[i].when tmp := t[i] @@ -294,10 +378,12 @@ func siftdownTimer(i int) { } t[i] = t[c] t[i].i = i - t[c] = tmp - t[c].i = c i = c } + if tmp != t[i] { + t[i] = tmp + t[i].i = i + } } // Entry points for net, time to call nanotime. @@ -312,4 +398,10 @@ func time_runtimeNano() int64 { return nanotime() } -var startNano int64 = nanotime() +// Monotonic times are reported as offsets from startNano. +// We initialize startNano to nanotime() - 1 so that on systems where +// monotonic time resolution is fairly low (e.g. Windows 2008 +// which appears to have a default resolution of 15ms), +// we avoid ever reporting a nanotime of 0. +// (Callers may want to use 0 as "time not set".) +var startNano int64 = nanotime() - 1 diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go index af9313be37a..8427e76c5a3 100644 --- a/libgo/go/runtime/trace.go +++ b/libgo/go/runtime/trace.go @@ -28,8 +28,8 @@ const ( traceEvProcStop = 6 // stop of P [timestamp] traceEvGCStart = 7 // GC start [timestamp, seq, stack id] traceEvGCDone = 8 // GC done [timestamp] - traceEvGCScanStart = 9 // GC mark termination start [timestamp] - traceEvGCScanDone = 10 // GC mark termination done [timestamp] + traceEvGCSTWStart = 9 // GC STW start [timestamp, kind] + traceEvGCSTWDone = 10 // GC STW done [timestamp] traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id] traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed] traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] @@ -235,21 +235,21 @@ func StartTrace() error { trace.timeStart = nanotime() trace.headerWritten = false trace.footerWritten = false - trace.strings = make(map[string]uint64) + + // string to id mapping + // 0 : reserved for an empty string + // remaining: other strings registered by traceString trace.stringSeq = 0 + trace.strings = make(map[string]uint64) + trace.seqGC = 0 _g_.m.startingtrace = false trace.enabled = true // Register runtime goroutine labels. _, pid, bufp := traceAcquireBuffer() - buf := (*bufp).ptr() - if buf == nil { - buf = traceFlush(0).ptr() - (*bufp).set(buf) - } for i, label := range gcMarkWorkerModeStrings[:] { - trace.markWorkerLabels[i], buf = traceString(buf, label) + trace.markWorkerLabels[i], bufp = traceString(bufp, pid, label) } traceReleaseBuffer(pid) @@ -277,10 +277,9 @@ func StopTrace() { traceGoSched() - for _, p := range &allp { - if p == nil { - break - } + // Loop over all allocated Ps because dead Ps may still have + // trace buffers. + for _, p := range allp[:cap(allp)] { buf := p.tracebuf if buf != 0 { traceFullQueue(buf) @@ -320,10 +319,7 @@ func StopTrace() { // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world. lock(&trace.lock) - for _, p := range &allp { - if p == nil { - break - } + for _, p := range allp[:cap(allp)] { if p.tracebuf != 0 { throw("trace: non-empty trace buffer in proc") } @@ -382,7 +378,7 @@ func ReadTrace() []byte { trace.headerWritten = true trace.lockOwner = nil unlock(&trace.lock) - return []byte("go 1.9 trace\x00\x00\x00\x00") + return []byte("go 1.10 trace\x00\x00\x00") } // Wait for new data. if trace.fullHead == 0 && !trace.shutdown { @@ -408,9 +404,12 @@ func ReadTrace() []byte { var data []byte data = append(data, traceEvFrequency|0<<traceArgCountShift) data = traceAppend(data, uint64(freq)) - if timers.gp != nil { - data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift) - data = traceAppend(data, uint64(timers.gp.goid)) + for i := range timers { + tb := &timers[i] + if tb.gp != nil { + data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift) + data = traceAppend(data, uint64(tb.gp.goid)) + } } // This will emit a bunch of full buffers, we will pick them up // on the next iteration. @@ -514,18 +513,12 @@ func traceEvent(ev byte, skip int, args ...uint64) { buf := (*bufp).ptr() const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params if buf == nil || len(buf.arr)-buf.pos < maxSize { - buf = traceFlush(traceBufPtrOf(buf)).ptr() + buf = traceFlush(traceBufPtrOf(buf), pid).ptr() (*bufp).set(buf) } ticks := uint64(cputicks()) / traceTickDiv tickDiff := ticks - buf.lastTicks - if buf.pos == 0 { - buf.byte(traceEvBatch | 1<<traceArgCountShift) - buf.varint(uint64(pid)) - buf.varint(ticks) - tickDiff = 0 - } buf.lastTicks = ticks narg := byte(len(args)) if skip >= 0 { @@ -602,7 +595,7 @@ func traceReleaseBuffer(pid int32) { } // traceFlush puts buf onto stack of full buffers and returns an empty buffer. -func traceFlush(buf traceBufPtr) traceBufPtr { +func traceFlush(buf traceBufPtr, pid int32) traceBufPtr { owner := trace.lockOwner dolock := owner == nil || owner != getg().m.curg if dolock { @@ -623,34 +616,51 @@ func traceFlush(buf traceBufPtr) traceBufPtr { bufp := buf.ptr() bufp.link.set(nil) bufp.pos = 0 - bufp.lastTicks = 0 + + // initialize the buffer for a new batch + ticks := uint64(cputicks()) / traceTickDiv + bufp.lastTicks = ticks + bufp.byte(traceEvBatch | 1<<traceArgCountShift) + bufp.varint(uint64(pid)) + bufp.varint(ticks) + if dolock { unlock(&trace.lock) } return buf } -func traceString(buf *traceBuf, s string) (uint64, *traceBuf) { +// traceString adds a string to the trace.strings and returns the id. +func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) { if s == "" { - return 0, buf + return 0, bufp } if id, ok := trace.strings[s]; ok { - return id, buf + return id, bufp } trace.stringSeq++ id := trace.stringSeq trace.strings[s] = id + // memory allocation in above may trigger tracing and + // cause *bufp changes. Following code now works with *bufp, + // so there must be no memory allocation or any activities + // that causes tracing after this point. + + buf := (*bufp).ptr() size := 1 + 2*traceBytesPerNumber + len(s) - if len(buf.arr)-buf.pos < size { - buf = traceFlush(traceBufPtrOf(buf)).ptr() + if buf == nil || len(buf.arr)-buf.pos < size { + buf = traceFlush(traceBufPtrOf(buf), pid).ptr() + (*bufp).set(buf) } buf.byte(traceEvString) buf.varint(id) buf.varint(uint64(len(s))) buf.pos += copy(buf.arr[buf.pos:], s) - return id, buf + + (*bufp).set(buf) + return id, bufp } // traceAppend appends v to buf in little-endian-base-128 encoding. @@ -772,7 +782,7 @@ func (tab *traceStackTable) newStack(n int) *traceStack { // releases all memory and resets state. func (tab *traceStackTable) dump() { var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte - buf := traceFlush(0).ptr() + bufp := traceFlush(0, 0) for _, stk := range tab.tab { stk := stk.ptr() for ; stk != nil; stk = stk.link.ptr() { @@ -782,7 +792,7 @@ func (tab *traceStackTable) dump() { tmpbuf = traceAppend(tmpbuf, uint64(len(frames))) for _, f := range frames { var frame traceFrame - frame, buf = traceFrameForPC(buf, f) + frame, bufp = traceFrameForPC(bufp, 0, f) tmpbuf = traceAppend(tmpbuf, uint64(f.pc)) tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID)) tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID)) @@ -790,9 +800,10 @@ func (tab *traceStackTable) dump() { } // Now copy to the buffer. size := 1 + traceBytesPerNumber + len(tmpbuf) - if len(buf.arr)-buf.pos < size { - buf = traceFlush(traceBufPtrOf(buf)).ptr() + if buf := bufp.ptr(); len(buf.arr)-buf.pos < size { + bufp = traceFlush(bufp, 0) } + buf := bufp.ptr() buf.byte(traceEvStack | 3<<traceArgCountShift) buf.varint(uint64(len(tmpbuf))) buf.pos += copy(buf.arr[buf.pos:], tmpbuf) @@ -800,7 +811,7 @@ func (tab *traceStackTable) dump() { } lock(&trace.lock) - traceFullQueue(traceBufPtrOf(buf)) + traceFullQueue(bufp) unlock(&trace.lock) tab.mem.drop() @@ -813,7 +824,10 @@ type traceFrame struct { line uint64 } -func traceFrameForPC(buf *traceBuf, f location) (traceFrame, *traceBuf) { +// traceFrameForPC records the frame information. +// It may allocate memory. +func traceFrameForPC(buf traceBufPtr, pid int32, f location) (traceFrame, traceBufPtr) { + bufp := &buf var frame traceFrame fn := f.function @@ -821,14 +835,14 @@ func traceFrameForPC(buf *traceBuf, f location) (traceFrame, *traceBuf) { if len(fn) > maxLen { fn = fn[len(fn)-maxLen:] } - frame.funcID, buf = traceString(buf, fn) + frame.funcID, bufp = traceString(bufp, pid, fn) frame.line = uint64(f.lineno) file := f.filename if len(file) > maxLen { file = file[len(file)-maxLen:] } - frame.fileID, buf = traceString(buf, file) - return frame, buf + frame.fileID, bufp = traceString(bufp, pid, file) + return frame, (*bufp) } // traceAlloc is a non-thread-safe region allocator. @@ -917,12 +931,12 @@ func traceGCDone() { traceEvent(traceEvGCDone, -1) } -func traceGCScanStart() { - traceEvent(traceEvGCScanStart, -1) +func traceGCSTWStart(kind int) { + traceEvent(traceEvGCSTWStart, -1, uint64(kind)) } -func traceGCScanDone() { - traceEvent(traceEvGCScanDone, -1) +func traceGCSTWDone() { + traceEvent(traceEvGCSTWDone, -1) } // traceGCSweepStart prepares to trace a sweep loop. This does not diff --git a/libgo/go/runtime/trace/example_test.go b/libgo/go/runtime/trace/example_test.go new file mode 100644 index 00000000000..8e0ee5a1a3f --- /dev/null +++ b/libgo/go/runtime/trace/example_test.go @@ -0,0 +1,41 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package trace_test + +import ( + "fmt" + "log" + "os" + "runtime/trace" +) + +// Example demonstrates the use of the trace package to trace +// the execution of a Go program. The trace output will be +// written to the file trace.out +func Example() { + f, err := os.Create("trace.out") + if err != nil { + log.Fatalf("failed to create trace output file: %v", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Fatalf("failed to close trace file: %v", err) + } + }() + + if err := trace.Start(f); err != nil { + log.Fatalf("failed to start trace: %v", err) + } + defer trace.Stop() + + // your program here + RunMyProgram() +} + +func RunMyProgram() { + fmt.Printf("this function will be traced") +} diff --git a/libgo/go/runtime/trace/trace.go b/libgo/go/runtime/trace/trace.go index 7cbb8a6e82c..439f998c03a 100644 --- a/libgo/go/runtime/trace/trace.go +++ b/libgo/go/runtime/trace/trace.go @@ -2,13 +2,36 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Go execution tracer. -// The tracer captures a wide range of execution events like goroutine -// creation/blocking/unblocking, syscall enter/exit/block, GC-related events, -// changes of heap size, processor start/stop, etc and writes them to an io.Writer -// in a compact form. A precise nanosecond-precision timestamp and a stack -// trace is captured for most events. A trace can be analyzed later with -// 'go tool trace' command. +// Package trace contains facilities for programs to generate trace +// for Go execution tracer. +// +// The execution trace captures a wide range of execution events such as +// goroutine creation/blocking/unblocking, syscall enter/exit/block, +// GC-related events, changes of heap size, processor start/stop, etc. +// A precise nanosecond-precision timestamp and a stack trace is +// captured for most events. The generated trace can be interpreted +// using `go tool trace`. +// +// Tracing a Go program +// +// Support for tracing tests and benchmarks built with the standard +// testing package is built into `go test`. For example, the following +// command runs the test in the current directory and writes the trace +// file (trace.out). +// +// go test -trace=test.out +// +// This runtime/trace package provides APIs to add equivalent tracing +// support to a standalone program. See the Example that demonstrates +// how to use this API to enable tracing. +// +// There is also a standard HTTP interface to profiling data. Adding the +// following line will install handlers under the /debug/pprof/trace URL +// to download live profiles: +// +// import _ "net/http/pprof" +// +// See the net/http/pprof package for more details. package trace import ( diff --git a/libgo/go/runtime/trace/trace_test.go b/libgo/go/runtime/trace/trace_test.go index c5f64fcf4cf..5fa5b82f8e2 100644 --- a/libgo/go/runtime/trace/trace_test.go +++ b/libgo/go/runtime/trace/trace_test.go @@ -7,6 +7,7 @@ package trace_test import ( "bytes" "flag" + "internal/race" "internal/trace" "io" "io/ioutil" @@ -14,6 +15,7 @@ import ( "os" "runtime" . "runtime/trace" + "strconv" "sync" "testing" "time" @@ -23,6 +25,61 @@ var ( saveTraces = flag.Bool("savetraces", false, "save traces collected by tests") ) +// TestEventBatch tests Flush calls that happen during Start +// don't produce corrupted traces. +func TestEventBatch(t *testing.T) { + if race.Enabled { + t.Skip("skipping in race mode") + } + if testing.Short() { + t.Skip("skipping in short mode") + } + // During Start, bunch of records are written to reflect the current + // snapshot of the program, including state of each goroutines. + // And some string constants are written to the trace to aid trace + // parsing. This test checks Flush of the buffer occurred during + // this process doesn't cause corrupted traces. + // When a Flush is called during Start is complicated + // so we test with a range of number of goroutines hoping that one + // of them triggers Flush. + // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate + // and traceEvGoWaiting events (12~13bytes per goroutine). + for g := 4950; g < 5050; g++ { + n := g + t.Run("G="+strconv.Itoa(n), func(t *testing.T) { + var wg sync.WaitGroup + wg.Add(n) + + in := make(chan bool, 1000) + for i := 0; i < n; i++ { + go func() { + <-in + wg.Done() + }() + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + for i := 0; i < n; i++ { + in <- true + } + wg.Wait() + Stop() + + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + }) + } +} + func TestTraceStartStop(t *testing.T) { buf := new(bytes.Buffer) if err := Start(buf); err != nil { diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go index 37c569887b0..79f78d8d247 100644 --- a/libgo/go/runtime/traceback_gccgo.go +++ b/libgo/go/runtime/traceback_gccgo.go @@ -52,7 +52,7 @@ func c_callers(skip int32, locbuf *location, max int32, keepThunks bool) int32 // callers returns a stack trace of the current goroutine. // The gc version of callers takes []uintptr, but we take []location. func callers(skip int, locbuf []location) int { - n := c_callers(int32(skip), &locbuf[0], int32(len(locbuf)), false) + n := c_callers(int32(skip)+1, &locbuf[0], int32(len(locbuf)), false) return int(n) } @@ -156,7 +156,7 @@ func goroutineheader(gp *g) { if waitfor >= 1 { print(", ", waitfor, " minutes") } - if gp.lockedm != nil { + if gp.lockedm != 0 { print(", locked to thread") } print("]:\n") |