diff options
author | Ian Lance Taylor <iant@golang.org> | 2019-09-06 18:12:46 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2019-09-06 18:12:46 +0000 |
commit | aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13 (patch) | |
tree | 7e63b06d1eec92beec6997c9d3ab47a5d6a835be /libgo/go/runtime | |
parent | 920ea3b8ba3164b61ac9490dfdfceb6936eda6dd (diff) |
libgo: update to Go 1.13beta1 release
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/193497
From-SVN: r275473
Diffstat (limited to 'libgo/go/runtime')
92 files changed, 4461 insertions, 1188 deletions
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go index 0daddf10e11..f96a75d1d99 100644 --- a/libgo/go/runtime/alg.go +++ b/libgo/go/runtime/alg.go @@ -235,6 +235,9 @@ func ifaceeq(x, y iface) bool { panic(errorString("comparing uncomparable type " + t.string())) } if isDirectIface(t) { + // Direct interface types are ptr, chan, map, func, and single-element structs/arrays thereof. + // Maps and funcs are not comparable, so they can't reach here. + // Ptrs, chans, and single-element items can be compared directly using ==. return x.data == y.data } return eq(x.data, y.data) @@ -291,6 +294,7 @@ func efacevaleq(x eface, t *_type, p unsafe.Pointer) bool { panic(errorString("comparing uncomparable type " + t.string())) } if isDirectIface(t) { + // See comment in efaceeq. return x.data == p } return eq(x.data, p) @@ -420,3 +424,21 @@ func initAlgAES() { // Initialize with random data so hash collisions will be hard to engineer. getRandomData(aeskeysched[:]) } + +// Note: These routines perform the read with an native endianness. +func readUnaligned32(p unsafe.Pointer) uint32 { + q := (*[4]byte)(p) + if sys.BigEndian { + return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24 + } + return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24 +} + +func readUnaligned64(p unsafe.Pointer) uint64 { + q := (*[8]byte)(p) + if sys.BigEndian { + return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 | + uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56 + } + return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56 +} diff --git a/libgo/go/runtime/auxv_none.go b/libgo/go/runtime/auxv_none.go index 3ca617b21eb..3a560a17937 100644 --- a/libgo/go/runtime/auxv_none.go +++ b/libgo/go/runtime/auxv_none.go @@ -7,6 +7,7 @@ // +build !dragonfly // +build !freebsd // +build !netbsd +// +build !openbsd !arm64 // +build !solaris package runtime diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go index 69c3e443137..587001cb977 100644 --- a/libgo/go/runtime/cgocall.go +++ b/libgo/go/runtime/cgocall.go @@ -101,7 +101,7 @@ const cgoResultFail = "cgo result has Go pointer" // depending on indir. The top parameter is whether we are at the top // level, where Go pointers are allowed. func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) { - if t.kind&kindNoPointers != 0 { + if t.ptrdata == 0 { // If the type has no pointers there is nothing to do. return } @@ -164,7 +164,7 @@ func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) { if !top { panic(errorString(msg)) } - if st.elem.kind&kindNoPointers != 0 { + if st.elem.ptrdata == 0 { return } for i := 0; i < s.cap; i++ { diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go index c9e40473771..130db295acf 100644 --- a/libgo/go/runtime/cgocheck.go +++ b/libgo/go/runtime/cgocheck.go @@ -64,7 +64,7 @@ func cgoCheckWriteBarrier(dst *uintptr, src uintptr) { //go:nosplit //go:nowritebarrier func cgoCheckMemmove(typ *_type, dst, src unsafe.Pointer, off, size uintptr) { - if typ.kind&kindNoPointers != 0 { + if typ.ptrdata == 0 { return } if !cgoIsGoPointer(src) { @@ -83,7 +83,7 @@ func cgoCheckMemmove(typ *_type, dst, src unsafe.Pointer, off, size uintptr) { //go:nosplit //go:nowritebarrier func cgoCheckSliceCopy(typ *_type, dst, src slice, n int) { - if typ.kind&kindNoPointers != 0 { + if typ.ptrdata == 0 { return } if !cgoIsGoPointer(src.array) { @@ -204,7 +204,7 @@ func cgoCheckBits(src unsafe.Pointer, gcbits *byte, off, size uintptr) { //go:nowritebarrier //go:systemstack func cgoCheckUsingType(typ *_type, src unsafe.Pointer, off, size uintptr) { - if typ.kind&kindNoPointers != 0 { + if typ.ptrdata == 0 { return } diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go index a1216cf3223..291fe0013d1 100644 --- a/libgo/go/runtime/chan.go +++ b/libgo/go/runtime/chan.go @@ -107,7 +107,7 @@ func makechan(t *chantype, size int) *hchan { c = (*hchan)(mallocgc(hchanSize, nil, true)) // Race detector uses this location for synchronization. c.buf = c.raceaddr() - case elem.kind&kindNoPointers != 0: + case elem.ptrdata == 0: // Elements do not contain pointers. // Allocate hchan and buf in one call. c = (*hchan)(mallocgc(hchanSize+mem, nil, true)) @@ -700,6 +700,14 @@ func reflect_chanlen(c *hchan) int { return int(c.qcount) } +//go:linkname reflectlite_chanlen internal..z2freflectlite.chanlen +func reflectlite_chanlen(c *hchan) int { + if c == nil { + return 0 + } + return int(c.qcount) +} + //go:linkname reflect_chancap reflect.chancap func reflect_chancap(c *hchan) int { if c == nil { @@ -751,10 +759,8 @@ func (q *waitq) dequeue() *sudog { // We use a flag in the G struct to tell us when someone // else has won the race to signal this goroutine but the goroutine // hasn't removed itself from the queue yet. - if sgp.isSelect { - if !atomic.Cas(&sgp.g.selectDone, 0, 1) { - continue - } + if sgp.isSelect && !atomic.Cas(&sgp.g.selectDone, 0, 1) { + continue } return sgp diff --git a/libgo/go/runtime/cpuprof.go b/libgo/go/runtime/cpuprof.go index e7cf1b4102d..e49625b46e6 100644 --- a/libgo/go/runtime/cpuprof.go +++ b/libgo/go/runtime/cpuprof.go @@ -179,7 +179,7 @@ func (p *cpuProfile) addLostAtomic64(count uint64) { // The details of generating that format have changed, // so this functionality has been removed. // -// Deprecated: use the runtime/pprof package, +// Deprecated: Use the runtime/pprof package, // or the handlers in the net/http/pprof package, // or the testing package's -test.cpuprofile flag instead. func CPUProfile() []byte { diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go index f437b9a7e0f..2b7d274f953 100644 --- a/libgo/go/runtime/crash_cgo_test.go +++ b/libgo/go/runtime/crash_cgo_test.go @@ -90,9 +90,9 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) { case "plan9", "windows": t.Skipf("no pthreads on %s", runtime.GOOS) } - if runtime.GOARCH == "ppc64" { + if runtime.GOARCH == "ppc64" && runtime.GOOS == "linux" { // TODO(austin) External linking not implemented on - // ppc64 (issue #8912) + // linux/ppc64 (issue #8912) t.Skipf("no external linking on ppc64") } @@ -290,7 +290,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg, top, bottom string) { // See Issue 18243 and Issue 19938. t.Skipf("Skipping failing test on Alpine (golang.org/issue/18243). Ignoring error: %v", err) } - t.Fatal(err) + t.Fatalf("%s\n\n%v", got, err) } fn := strings.TrimSpace(string(got)) defer os.Remove(fn) diff --git a/libgo/go/runtime/crash_unix_test.go b/libgo/go/runtime/crash_unix_test.go index cb5acb13de9..b4b015e09de 100644 --- a/libgo/go/runtime/crash_unix_test.go +++ b/libgo/go/runtime/crash_unix_test.go @@ -34,8 +34,12 @@ func init() { } func TestCrashDumpsAllThreads(t *testing.T) { + if *flagQuick { + t.Skip("-quick") + } + switch runtime.GOOS { - case "darwin", "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris": + case "darwin", "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "illumos", "solaris": default: t.Skipf("skipping; not supported on %v", runtime.GOOS) } @@ -63,7 +67,7 @@ func TestCrashDumpsAllThreads(t *testing.T) { t.Fatalf("failed to create Go file: %v", err) } - cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe") + cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe", "main.go") cmd.Dir = dir out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go index 5be2ec42bc9..e480466b4d5 100644 --- a/libgo/go/runtime/debug.go +++ b/libgo/go/runtime/debug.go @@ -65,3 +65,14 @@ func NumGoroutine() int { // PkgPath.Name.FieldName. The value will be true for each field // added. func Fieldtrack(map[string]bool) + +//go:linkname debug_modinfo runtime..z2fdebug.modinfo +func debug_modinfo() string { + return modinfo +} + +// setmodinfo is visible to code generated by cmd/go/internal/modload.ModInfoProg. +//go:linkname setmodinfo runtime.setmodinfo +func setmodinfo(s string) { + modinfo = s +} diff --git a/libgo/go/runtime/debug/mod.go b/libgo/go/runtime/debug/mod.go index f2948c65cbd..58c6ae019af 100644 --- a/libgo/go/runtime/debug/mod.go +++ b/libgo/go/runtime/debug/mod.go @@ -9,20 +9,14 @@ import ( _ "unsafe" // for go:linkname ) -// set using cmd/go/internal/modload.ModInfoProg -var modinfo string - -// setmodinfo is visible to code generated by cmd/go/internal/modload.ModInfoProg. -//go:linkname setmodinfo runtime..z2fdebug.setmodinfo -func setmodinfo(s string) { - modinfo = s -} +// exported from runtime +func modinfo() string // ReadBuildInfo returns the build information embedded // in the running binary. The information is available only // in binaries built with module support. func ReadBuildInfo() (info *BuildInfo, ok bool) { - return readBuildInfo(modinfo) + return readBuildInfo(modinfo()) } // BuildInfo represents the build information read from diff --git a/libgo/go/runtime/debuglog.go b/libgo/go/runtime/debuglog.go new file mode 100644 index 00000000000..4f4109f71a6 --- /dev/null +++ b/libgo/go/runtime/debuglog.go @@ -0,0 +1,813 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file provides an internal debug logging facility. The debug +// log is a lightweight, in-memory, per-M ring buffer. By default, the +// runtime prints the debug log on panic. +// +// To print something to the debug log, call dlog to obtain a dlogger +// and use the methods on that to add values. The values will be +// space-separated in the output (much like println). +// +// This facility can be enabled by passing -tags debuglog when +// building. Without this tag, dlog calls compile to nothing. + +package runtime + +import ( + "runtime/internal/atomic" + "unsafe" +) + +// debugLogBytes is the size of each per-M ring buffer. This is +// allocated off-heap to avoid blowing up the M and hence the GC'd +// heap size. +const debugLogBytes = 16 << 10 + +// debugLogStringLimit is the maximum number of bytes in a string. +// Above this, the string will be truncated with "..(n more bytes).." +const debugLogStringLimit = debugLogBytes / 8 + +// dlog returns a debug logger. The caller can use methods on the +// returned logger to add values, which will be space-separated in the +// final output, much like println. The caller must call end() to +// finish the message. +// +// dlog can be used from highly-constrained corners of the runtime: it +// is safe to use in the signal handler, from within the write +// barrier, from within the stack implementation, and in places that +// must be recursively nosplit. +// +// This will be compiled away if built without the debuglog build tag. +// However, argument construction may not be. If any of the arguments +// are not literals or trivial expressions, consider protecting the +// call with "if dlogEnabled". +// +//go:nosplit +//go:nowritebarrierrec +func dlog() *dlogger { + if !dlogEnabled { + return nil + } + + // Get the time. + tick, nano := uint64(cputicks()), uint64(nanotime()) + + // Try to get a cached logger. + l := getCachedDlogger() + + // If we couldn't get a cached logger, try to get one from the + // global pool. + if l == nil { + allp := (*uintptr)(unsafe.Pointer(&allDloggers)) + all := (*dlogger)(unsafe.Pointer(atomic.Loaduintptr(allp))) + for l1 := all; l1 != nil; l1 = l1.allLink { + if atomic.Load(&l1.owned) == 0 && atomic.Cas(&l1.owned, 0, 1) { + l = l1 + break + } + } + } + + // If that failed, allocate a new logger. + if l == nil { + l = (*dlogger)(sysAlloc(unsafe.Sizeof(dlogger{}), nil)) + if l == nil { + throw("failed to allocate debug log") + } + l.w.r.data = &l.w.data + l.owned = 1 + + // Prepend to allDloggers list. + headp := (*uintptr)(unsafe.Pointer(&allDloggers)) + for { + head := atomic.Loaduintptr(headp) + l.allLink = (*dlogger)(unsafe.Pointer(head)) + if atomic.Casuintptr(headp, head, uintptr(unsafe.Pointer(l))) { + break + } + } + } + + // If the time delta is getting too high, write a new sync + // packet. We set the limit so we don't write more than 6 + // bytes of delta in the record header. + const deltaLimit = 1<<(3*7) - 1 // ~2ms between sync packets + if tick-l.w.tick > deltaLimit || nano-l.w.nano > deltaLimit { + l.w.writeSync(tick, nano) + } + + // Reserve space for framing header. + l.w.ensure(debugLogHeaderSize) + l.w.write += debugLogHeaderSize + + // Write record header. + l.w.uvarint(tick - l.w.tick) + l.w.uvarint(nano - l.w.nano) + gp := getg() + if gp != nil && gp.m != nil && gp.m.p != 0 { + l.w.varint(int64(gp.m.p.ptr().id)) + } else { + l.w.varint(-1) + } + + return l +} + +// A dlogger writes to the debug log. +// +// To obtain a dlogger, call dlog(). When done with the dlogger, call +// end(). +// +//go:notinheap +type dlogger struct { + w debugLogWriter + + // allLink is the next dlogger in the allDloggers list. + allLink *dlogger + + // owned indicates that this dlogger is owned by an M. This is + // accessed atomically. + owned uint32 +} + +// allDloggers is a list of all dloggers, linked through +// dlogger.allLink. This is accessed atomically. This is prepend only, +// so it doesn't need to protect against ABA races. +var allDloggers *dlogger + +//go:nosplit +func (l *dlogger) end() { + if !dlogEnabled { + return + } + + // Fill in framing header. + size := l.w.write - l.w.r.end + if !l.w.writeFrameAt(l.w.r.end, size) { + throw("record too large") + } + + // Commit the record. + l.w.r.end = l.w.write + + // Attempt to return this logger to the cache. + if putCachedDlogger(l) { + return + } + + // Return the logger to the global pool. + atomic.Store(&l.owned, 0) +} + +const ( + debugLogUnknown = 1 + iota + debugLogBoolTrue + debugLogBoolFalse + debugLogInt + debugLogUint + debugLogHex + debugLogPtr + debugLogString + debugLogConstString + debugLogStringOverflow + + debugLogPC + debugLogTraceback +) + +//go:nosplit +func (l *dlogger) b(x bool) *dlogger { + if !dlogEnabled { + return l + } + if x { + l.w.byte(debugLogBoolTrue) + } else { + l.w.byte(debugLogBoolFalse) + } + return l +} + +//go:nosplit +func (l *dlogger) i(x int) *dlogger { + return l.i64(int64(x)) +} + +//go:nosplit +func (l *dlogger) i8(x int8) *dlogger { + return l.i64(int64(x)) +} + +//go:nosplit +func (l *dlogger) i16(x int16) *dlogger { + return l.i64(int64(x)) +} + +//go:nosplit +func (l *dlogger) i32(x int32) *dlogger { + return l.i64(int64(x)) +} + +//go:nosplit +func (l *dlogger) i64(x int64) *dlogger { + if !dlogEnabled { + return l + } + l.w.byte(debugLogInt) + l.w.varint(x) + return l +} + +//go:nosplit +func (l *dlogger) u(x uint) *dlogger { + return l.u64(uint64(x)) +} + +//go:nosplit +func (l *dlogger) uptr(x uintptr) *dlogger { + return l.u64(uint64(x)) +} + +//go:nosplit +func (l *dlogger) u8(x uint8) *dlogger { + return l.u64(uint64(x)) +} + +//go:nosplit +func (l *dlogger) u16(x uint16) *dlogger { + return l.u64(uint64(x)) +} + +//go:nosplit +func (l *dlogger) u32(x uint32) *dlogger { + return l.u64(uint64(x)) +} + +//go:nosplit +func (l *dlogger) u64(x uint64) *dlogger { + if !dlogEnabled { + return l + } + l.w.byte(debugLogUint) + l.w.uvarint(x) + return l +} + +//go:nosplit +func (l *dlogger) hex(x uint64) *dlogger { + if !dlogEnabled { + return l + } + l.w.byte(debugLogHex) + l.w.uvarint(x) + return l +} + +//go:nosplit +func (l *dlogger) p(x interface{}) *dlogger { + if !dlogEnabled { + return l + } + l.w.byte(debugLogPtr) + if x == nil { + l.w.uvarint(0) + } else { + v := efaceOf(&x) + switch v._type.kind & kindMask { + case kindChan, kindFunc, kindMap, kindPtr, kindUnsafePointer: + l.w.uvarint(uint64(uintptr(v.data))) + default: + throw("not a pointer type") + } + } + return l +} + +// C function to get an address before the read-only data segment. +func getText() uintptr + +// C function to get an address after the read-only data segment. +func getEtext() uintptr + +//go:nosplit +func (l *dlogger) s(x string) *dlogger { + if !dlogEnabled { + return l + } + str := stringStructOf(&x) + text := getText() + etext := getEtext() + if len(x) > 4 && text != 0 && etext != 0 && text <= uintptr(str.str) && uintptr(str.str) < etext { + // String constants are in the rodata section, which + // isn't recorded in moduledata. But it has to be + // somewhere between etext and end. + l.w.byte(debugLogConstString) + l.w.uvarint(uint64(str.len)) + l.w.uvarint(uint64(uintptr(str.str) - text)) + } else { + l.w.byte(debugLogString) + var b []byte + bb := (*slice)(unsafe.Pointer(&b)) + bb.array = str.str + bb.len, bb.cap = str.len, str.len + if len(b) > debugLogStringLimit { + b = b[:debugLogStringLimit] + } + l.w.uvarint(uint64(len(b))) + l.w.bytes(b) + if len(b) != len(x) { + l.w.byte(debugLogStringOverflow) + l.w.uvarint(uint64(len(x) - len(b))) + } + } + return l +} + +//go:nosplit +func (l *dlogger) pc(x uintptr) *dlogger { + if !dlogEnabled { + return l + } + l.w.byte(debugLogPC) + l.w.uvarint(uint64(x)) + return l +} + +//go:nosplit +func (l *dlogger) traceback(x []uintptr) *dlogger { + if !dlogEnabled { + return l + } + l.w.byte(debugLogTraceback) + l.w.uvarint(uint64(len(x))) + for _, pc := range x { + l.w.uvarint(uint64(pc)) + } + return l +} + +// A debugLogWriter is a ring buffer of binary debug log records. +// +// A log record consists of a 2-byte framing header and a sequence of +// fields. The framing header gives the size of the record as a little +// endian 16-bit value. Each field starts with a byte indicating its +// type, followed by type-specific data. If the size in the framing +// header is 0, it's a sync record consisting of two little endian +// 64-bit values giving a new time base. +// +// Because this is a ring buffer, new records will eventually +// overwrite old records. Hence, it maintains a reader that consumes +// the log as it gets overwritten. That reader state is where an +// actual log reader would start. +// +//go:notinheap +type debugLogWriter struct { + write uint64 + data debugLogBuf + + // tick and nano are the time bases from the most recently + // written sync record. + tick, nano uint64 + + // r is a reader that consumes records as they get overwritten + // by the writer. It also acts as the initial reader state + // when printing the log. + r debugLogReader + + // buf is a scratch buffer for encoding. This is here to + // reduce stack usage. + buf [10]byte +} + +//go:notinheap +type debugLogBuf [debugLogBytes]byte + +const ( + // debugLogHeaderSize is the number of bytes in the framing + // header of every dlog record. + debugLogHeaderSize = 2 + + // debugLogSyncSize is the number of bytes in a sync record. + debugLogSyncSize = debugLogHeaderSize + 2*8 +) + +//go:nosplit +func (l *debugLogWriter) ensure(n uint64) { + for l.write+n >= l.r.begin+uint64(len(l.data)) { + // Consume record at begin. + if l.r.skip() == ^uint64(0) { + // Wrapped around within a record. + // + // TODO(austin): It would be better to just + // eat the whole buffer at this point, but we + // have to communicate that to the reader + // somehow. + throw("record wrapped around") + } + } +} + +//go:nosplit +func (l *debugLogWriter) writeFrameAt(pos, size uint64) bool { + l.data[pos%uint64(len(l.data))] = uint8(size) + l.data[(pos+1)%uint64(len(l.data))] = uint8(size >> 8) + return size <= 0xFFFF +} + +//go:nosplit +func (l *debugLogWriter) writeSync(tick, nano uint64) { + l.tick, l.nano = tick, nano + l.ensure(debugLogHeaderSize) + l.writeFrameAt(l.write, 0) + l.write += debugLogHeaderSize + l.writeUint64LE(tick) + l.writeUint64LE(nano) + l.r.end = l.write +} + +//go:nosplit +func (l *debugLogWriter) writeUint64LE(x uint64) { + var b [8]byte + b[0] = byte(x) + b[1] = byte(x >> 8) + b[2] = byte(x >> 16) + b[3] = byte(x >> 24) + b[4] = byte(x >> 32) + b[5] = byte(x >> 40) + b[6] = byte(x >> 48) + b[7] = byte(x >> 56) + l.bytes(b[:]) +} + +//go:nosplit +func (l *debugLogWriter) byte(x byte) { + l.ensure(1) + pos := l.write + l.write++ + l.data[pos%uint64(len(l.data))] = x +} + +//go:nosplit +func (l *debugLogWriter) bytes(x []byte) { + l.ensure(uint64(len(x))) + pos := l.write + l.write += uint64(len(x)) + for len(x) > 0 { + n := copy(l.data[pos%uint64(len(l.data)):], x) + pos += uint64(n) + x = x[n:] + } +} + +//go:nosplit +func (l *debugLogWriter) varint(x int64) { + var u uint64 + if x < 0 { + u = (^uint64(x) << 1) | 1 // complement i, bit 0 is 1 + } else { + u = (uint64(x) << 1) // do not complement i, bit 0 is 0 + } + l.uvarint(u) +} + +//go:nosplit +func (l *debugLogWriter) uvarint(u uint64) { + i := 0 + for u >= 0x80 { + l.buf[i] = byte(u) | 0x80 + u >>= 7 + i++ + } + l.buf[i] = byte(u) + i++ + l.bytes(l.buf[:i]) +} + +type debugLogReader struct { + data *debugLogBuf + + // begin and end are the positions in the log of the beginning + // and end of the log data, modulo len(data). + begin, end uint64 + + // tick and nano are the current time base at begin. + tick, nano uint64 +} + +//go:nosplit +func (r *debugLogReader) skip() uint64 { + // Read size at pos. + if r.begin+debugLogHeaderSize > r.end { + return ^uint64(0) + } + size := uint64(r.readUint16LEAt(r.begin)) + if size == 0 { + // Sync packet. + r.tick = r.readUint64LEAt(r.begin + debugLogHeaderSize) + r.nano = r.readUint64LEAt(r.begin + debugLogHeaderSize + 8) + size = debugLogSyncSize + } + if r.begin+size > r.end { + return ^uint64(0) + } + r.begin += size + return size +} + +//go:nosplit +func (r *debugLogReader) readUint16LEAt(pos uint64) uint16 { + return uint16(r.data[pos%uint64(len(r.data))]) | + uint16(r.data[(pos+1)%uint64(len(r.data))])<<8 +} + +//go:nosplit +func (r *debugLogReader) readUint64LEAt(pos uint64) uint64 { + var b [8]byte + for i := range b { + b[i] = r.data[pos%uint64(len(r.data))] + pos++ + } + return uint64(b[0]) | uint64(b[1])<<8 | + uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | + uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func (r *debugLogReader) peek() (tick uint64) { + // Consume any sync records. + size := uint64(0) + for size == 0 { + if r.begin+debugLogHeaderSize > r.end { + return ^uint64(0) + } + size = uint64(r.readUint16LEAt(r.begin)) + if size != 0 { + break + } + if r.begin+debugLogSyncSize > r.end { + return ^uint64(0) + } + // Sync packet. + r.tick = r.readUint64LEAt(r.begin + debugLogHeaderSize) + r.nano = r.readUint64LEAt(r.begin + debugLogHeaderSize + 8) + r.begin += debugLogSyncSize + } + + // Peek tick delta. + if r.begin+size > r.end { + return ^uint64(0) + } + pos := r.begin + debugLogHeaderSize + var u uint64 + for i := uint(0); ; i += 7 { + b := r.data[pos%uint64(len(r.data))] + pos++ + u |= uint64(b&^0x80) << i + if b&0x80 == 0 { + break + } + } + if pos > r.begin+size { + return ^uint64(0) + } + return r.tick + u +} + +func (r *debugLogReader) header() (end, tick, nano uint64, p int) { + // Read size. We've already skipped sync packets and checked + // bounds in peek. + size := uint64(r.readUint16LEAt(r.begin)) + end = r.begin + size + r.begin += debugLogHeaderSize + + // Read tick, nano, and p. + tick = r.uvarint() + r.tick + nano = r.uvarint() + r.nano + p = int(r.varint()) + + return +} + +func (r *debugLogReader) uvarint() uint64 { + var u uint64 + for i := uint(0); ; i += 7 { + b := r.data[r.begin%uint64(len(r.data))] + r.begin++ + u |= uint64(b&^0x80) << i + if b&0x80 == 0 { + break + } + } + return u +} + +func (r *debugLogReader) varint() int64 { + u := r.uvarint() + var v int64 + if u&1 == 0 { + v = int64(u >> 1) + } else { + v = ^int64(u >> 1) + } + return v +} + +func (r *debugLogReader) printVal() bool { + typ := r.data[r.begin%uint64(len(r.data))] + r.begin++ + + switch typ { + default: + print("<unknown field type ", hex(typ), " pos ", r.begin-1, " end ", r.end, ">\n") + return false + + case debugLogUnknown: + print("<unknown kind>") + + case debugLogBoolTrue: + print(true) + + case debugLogBoolFalse: + print(false) + + case debugLogInt: + print(r.varint()) + + case debugLogUint: + print(r.uvarint()) + + case debugLogHex, debugLogPtr: + print(hex(r.uvarint())) + + case debugLogString: + sl := r.uvarint() + if r.begin+sl > r.end { + r.begin = r.end + print("<string length corrupted>") + break + } + for sl > 0 { + b := r.data[r.begin%uint64(len(r.data)):] + if uint64(len(b)) > sl { + b = b[:sl] + } + r.begin += uint64(len(b)) + sl -= uint64(len(b)) + gwrite(b) + } + + case debugLogConstString: + len, ptr := int(r.uvarint()), uintptr(r.uvarint()) + ptr += getText() + str := stringStruct{ + str: unsafe.Pointer(ptr), + len: len, + } + s := *(*string)(unsafe.Pointer(&str)) + print(s) + + case debugLogStringOverflow: + print("..(", r.uvarint(), " more bytes)..") + + case debugLogPC: + printDebugLogPC(uintptr(r.uvarint())) + + case debugLogTraceback: + n := int(r.uvarint()) + for i := 0; i < n; i++ { + print("\n\t") + printDebugLogPC(uintptr(r.uvarint())) + } + } + + return true +} + +// printDebugLog prints the debug log. +func printDebugLog() { + if !dlogEnabled { + return + } + + // This function should not panic or throw since it is used in + // the fatal panic path and this may deadlock. + + printlock() + + // Get the list of all debug logs. + allp := (*uintptr)(unsafe.Pointer(&allDloggers)) + all := (*dlogger)(unsafe.Pointer(atomic.Loaduintptr(allp))) + + // Count the logs. + n := 0 + for l := all; l != nil; l = l.allLink { + n++ + } + if n == 0 { + printunlock() + return + } + + // Prepare read state for all logs. + type readState struct { + debugLogReader + first bool + lost uint64 + nextTick uint64 + } + state1 := sysAlloc(unsafe.Sizeof(readState{})*uintptr(n), nil) + if state1 == nil { + println("failed to allocate read state for", n, "logs") + printunlock() + return + } + state := (*[1 << 20]readState)(state1)[:n] + { + l := all + for i := range state { + s := &state[i] + s.debugLogReader = l.w.r + s.first = true + s.lost = l.w.r.begin + s.nextTick = s.peek() + l = l.allLink + } + } + + // Print records. + for { + // Find the next record. + var best struct { + tick uint64 + i int + } + best.tick = ^uint64(0) + for i := range state { + if state[i].nextTick < best.tick { + best.tick = state[i].nextTick + best.i = i + } + } + if best.tick == ^uint64(0) { + break + } + + // Print record. + s := &state[best.i] + if s.first { + print(">> begin log ", best.i) + if s.lost != 0 { + print("; lost first ", s.lost>>10, "KB") + } + print(" <<\n") + s.first = false + } + + end, _, nano, p := s.header() + oldEnd := s.end + s.end = end + + print("[") + var tmpbuf [21]byte + pnano := int64(nano) - runtimeInitTime + if pnano < 0 { + // Logged before runtimeInitTime was set. + pnano = 0 + } + print(string(itoaDiv(tmpbuf[:], uint64(pnano), 9))) + print(" P ", p, "] ") + + for i := 0; s.begin < s.end; i++ { + if i > 0 { + print(" ") + } + if !s.printVal() { + // Abort this P log. + print("<aborting P log>") + end = oldEnd + break + } + } + println() + + // Move on to the next record. + s.begin = end + s.end = oldEnd + s.nextTick = s.peek() + } + + printunlock() +} + +func printDebugLogPC(pc uintptr) { + print(hex(pc)) + name, file, line, _ := funcfileline(pc, -1) + if name == "" { + print(" [unknown PC]") + } else { + print(" [", name, "+", hex(pc-funcentry(pc)), + " ", file, ":", line, "]") + } +} diff --git a/libgo/go/runtime/debuglog_off.go b/libgo/go/runtime/debuglog_off.go new file mode 100644 index 00000000000..bb3e172498e --- /dev/null +++ b/libgo/go/runtime/debuglog_off.go @@ -0,0 +1,19 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !debuglog + +package runtime + +const dlogEnabled = false + +type dlogPerM struct{} + +func getCachedDlogger() *dlogger { + return nil +} + +func putCachedDlogger(l *dlogger) bool { + return false +} diff --git a/libgo/go/runtime/debuglog_on.go b/libgo/go/runtime/debuglog_on.go new file mode 100644 index 00000000000..3d477e8ef5f --- /dev/null +++ b/libgo/go/runtime/debuglog_on.go @@ -0,0 +1,45 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build debuglog + +package runtime + +const dlogEnabled = true + +// dlogPerM is the per-M debug log data. This is embedded in the m +// struct. +type dlogPerM struct { + dlogCache *dlogger +} + +// getCachedDlogger returns a cached dlogger if it can do so +// efficiently, or nil otherwise. The returned dlogger will be owned. +func getCachedDlogger() *dlogger { + mp := acquirem() + // We don't return a cached dlogger if we're running on the + // signal stack in case the signal arrived while in + // get/putCachedDlogger. (Too bad we don't have non-atomic + // exchange!) + var l *dlogger + if getg() != mp.gsignal { + l = mp.dlogCache + mp.dlogCache = nil + } + releasem(mp) + return l +} + +// putCachedDlogger attempts to return l to the local cache. It +// returns false if this fails. +func putCachedDlogger(l *dlogger) bool { + mp := acquirem() + if getg() != mp.gsignal && mp.dlogCache == nil { + mp.dlogCache = l + releasem(mp) + return true + } + releasem(mp) + return false +} diff --git a/libgo/go/runtime/debuglog_test.go b/libgo/go/runtime/debuglog_test.go new file mode 100644 index 00000000000..2570e3565bf --- /dev/null +++ b/libgo/go/runtime/debuglog_test.go @@ -0,0 +1,158 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO(austin): All of these tests are skipped if the debuglog build +// tag isn't provided. That means we basically never test debuglog. +// There are two potential ways around this: +// +// 1. Make these tests re-build the runtime test with the debuglog +// build tag and re-invoke themselves. +// +// 2. Always build the whole debuglog infrastructure and depend on +// linker dead-code elimination to drop it. This is easy for dlog() +// since there won't be any calls to it. For printDebugLog, we can +// make panic call a wrapper that is call printDebugLog if the +// debuglog build tag is set, or otherwise do nothing. Then tests +// could call printDebugLog directly. This is the right answer in +// principle, but currently our linker reads in all symbols +// regardless, so this would slow down and bloat all links. If the +// linker gets more efficient about this, we should revisit this +// approach. + +package runtime_test + +import ( + "bytes" + "fmt" + "regexp" + "runtime" + "strings" + "sync" + "sync/atomic" + "testing" +) + +func skipDebugLog(t *testing.T) { + if !runtime.DlogEnabled { + t.Skip("debug log disabled (rebuild with -tags debuglog)") + } +} + +func dlogCanonicalize(x string) string { + begin := regexp.MustCompile(`(?m)^>> begin log \d+ <<\n`) + x = begin.ReplaceAllString(x, "") + prefix := regexp.MustCompile(`(?m)^\[[^]]+\]`) + x = prefix.ReplaceAllString(x, "[]") + return x +} + +func TestDebugLog(t *testing.T) { + skipDebugLog(t) + runtime.ResetDebugLog() + runtime.Dlog().S("testing").End() + got := dlogCanonicalize(runtime.DumpDebugLog()) + if want := "[] testing\n"; got != want { + t.Fatalf("want %q, got %q", want, got) + } +} + +func TestDebugLogTypes(t *testing.T) { + skipDebugLog(t) + runtime.ResetDebugLog() + var varString = strings.Repeat("a", 4) + runtime.Dlog().B(true).B(false).I(-42).I16(0x7fff).U64(^uint64(0)).Hex(0xfff).P(nil).S(varString).S("const string").End() + got := dlogCanonicalize(runtime.DumpDebugLog()) + if want := "[] true false -42 32767 18446744073709551615 0xfff 0x0 aaaa const string\n"; got != want { + t.Fatalf("want %q, got %q", want, got) + } +} + +func TestDebugLogSym(t *testing.T) { + skipDebugLog(t) + runtime.ResetDebugLog() + pc, _, _, _ := runtime.Caller(0) + runtime.Dlog().PC(pc).End() + got := dlogCanonicalize(runtime.DumpDebugLog()) + want := regexp.MustCompile(`\[\] 0x[0-9a-f]+ \[runtime_test\.TestDebugLogSym\+0x[0-9a-f]+ .*/debuglog_test\.go:[0-9]+\]\n`) + if !want.MatchString(got) { + t.Fatalf("want matching %s, got %q", want, got) + } +} + +func TestDebugLogInterleaving(t *testing.T) { + skipDebugLog(t) + runtime.ResetDebugLog() + var wg sync.WaitGroup + done := int32(0) + wg.Add(1) + go func() { + // Encourage main goroutine to move around to + // different Ms and Ps. + for atomic.LoadInt32(&done) == 0 { + runtime.Gosched() + } + wg.Done() + }() + var want bytes.Buffer + for i := 0; i < 1000; i++ { + runtime.Dlog().I(i).End() + fmt.Fprintf(&want, "[] %d\n", i) + runtime.Gosched() + } + atomic.StoreInt32(&done, 1) + wg.Wait() + + gotFull := runtime.DumpDebugLog() + got := dlogCanonicalize(gotFull) + if got != want.String() { + // Since the timestamps are useful in understand + // failures of this test, we print the uncanonicalized + // output. + t.Fatalf("want %q, got (uncanonicalized) %q", want.String(), gotFull) + } +} + +func TestDebugLogWraparound(t *testing.T) { + skipDebugLog(t) + + // Make sure we don't switch logs so it's easier to fill one up. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + runtime.ResetDebugLog() + var longString = strings.Repeat("a", 128) + var want bytes.Buffer + for i, j := 0, 0; j < 2*runtime.DebugLogBytes; i, j = i+1, j+len(longString) { + runtime.Dlog().I(i).S(longString).End() + fmt.Fprintf(&want, "[] %d %s\n", i, longString) + } + log := runtime.DumpDebugLog() + + // Check for "lost" message. + lost := regexp.MustCompile(`^>> begin log \d+; lost first \d+KB <<\n`) + if !lost.MatchString(log) { + t.Fatalf("want matching %s, got %q", lost, log) + } + idx := lost.FindStringIndex(log) + // Strip lost message. + log = dlogCanonicalize(log[idx[1]:]) + + // Check log. + if !strings.HasSuffix(want.String(), log) { + t.Fatalf("wrong suffix:\n%s", log) + } +} + +func TestDebugLogLongString(t *testing.T) { + skipDebugLog(t) + + runtime.ResetDebugLog() + var longString = strings.Repeat("a", runtime.DebugLogStringLimit+1) + runtime.Dlog().S(longString).End() + got := dlogCanonicalize(runtime.DumpDebugLog()) + want := "[] " + strings.Repeat("a", runtime.DebugLogStringLimit) + " ..(1 more bytes)..\n" + if got != want { + t.Fatalf("want %q, got %q", want, got) + } +} diff --git a/libgo/go/runtime/export_debuglog_test.go b/libgo/go/runtime/export_debuglog_test.go new file mode 100644 index 00000000000..8cd943b4382 --- /dev/null +++ b/libgo/go/runtime/export_debuglog_test.go @@ -0,0 +1,46 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Export debuglog guts for testing. + +package runtime + +const DlogEnabled = dlogEnabled + +const DebugLogBytes = debugLogBytes + +const DebugLogStringLimit = debugLogStringLimit + +var Dlog = dlog + +func (l *dlogger) End() { l.end() } +func (l *dlogger) B(x bool) *dlogger { return l.b(x) } +func (l *dlogger) I(x int) *dlogger { return l.i(x) } +func (l *dlogger) I16(x int16) *dlogger { return l.i16(x) } +func (l *dlogger) U64(x uint64) *dlogger { return l.u64(x) } +func (l *dlogger) Hex(x uint64) *dlogger { return l.hex(x) } +func (l *dlogger) P(x interface{}) *dlogger { return l.p(x) } +func (l *dlogger) S(x string) *dlogger { return l.s(x) } +func (l *dlogger) PC(x uintptr) *dlogger { return l.pc(x) } + +func DumpDebugLog() string { + g := getg() + g.writebuf = make([]byte, 0, 1<<20) + printDebugLog() + buf := g.writebuf + g.writebuf = nil + + return string(buf) +} + +func ResetDebugLog() { + stopTheWorld("ResetDebugLog") + for l := allDloggers; l != nil; l = l.allLink { + l.w.write = 0 + l.w.tick, l.w.nano = 0, 0 + l.w.r.begin, l.w.r.end = 0, 0 + l.w.r.tick, l.w.r.nano = 0, 0 + } + startTheWorld() +} diff --git a/libgo/go/runtime/export_mmap_test.go b/libgo/go/runtime/export_mmap_test.go index 6e05bb9fbb7..5f3e99af2cc 100644 --- a/libgo/go/runtime/export_mmap_test.go +++ b/libgo/go/runtime/export_mmap_test.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build aix darwin dragonfly freebsd hurd linux nacl netbsd openbsd solaris + // Export guts for testing. package runtime diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go index d919e0486b2..0db23937926 100644 --- a/libgo/go/runtime/export_test.go +++ b/libgo/go/runtime/export_test.go @@ -34,6 +34,10 @@ var Fastlog2 = fastlog2 var Atoi = atoi var Atoi32 = atoi32 +var Nanotime = nanotime + +var PhysHugePageSize = physHugePageSize + type LFNode struct { Next uint64 Pushcnt uintptr @@ -332,7 +336,7 @@ func ReadMemStatsSlow() (base, slow MemStats) { slow.BySize[i].Frees = bySize[i].Frees } - for i := mheap_.scav.start(); i.valid(); i = i.next() { + for i := mheap_.free.start(0, 0); i.valid(); i = i.next() { slow.HeapReleased += uint64(i.span().released()) } @@ -402,6 +406,18 @@ func LockOSCounts() (external, internal uint32) { return g.m.lockedExt, g.m.lockedInt } +//go:noinline +func TracebackSystemstack(stk []uintptr, i int) int { + if i == 0 { + return callersRaw(stk) + } + n := 0 + systemstack(func() { + n = TracebackSystemstack(stk, i-1) + }) + return n +} + func KeepNArenaHints(n int) { hint := mheap_.arenaHints for i := 1; i < n; i++ { @@ -495,3 +511,167 @@ func MapTombstoneCheck(m map[int]int) { } } } + +// UnscavHugePagesSlow returns the value of mheap_.freeHugePages +// and the number of unscavenged huge pages calculated by +// scanning the heap. +func UnscavHugePagesSlow() (uintptr, uintptr) { + var base, slow uintptr + // Run on the system stack to avoid deadlock from stack growth + // trying to acquire the heap lock. + systemstack(func() { + lock(&mheap_.lock) + base = mheap_.free.unscavHugePages + for _, s := range mheap_.allspans { + if s.state == mSpanFree && !s.scavenged { + slow += s.hugePages() + } + } + unlock(&mheap_.lock) + }) + return base, slow +} + +// Span is a safe wrapper around an mspan, whose memory +// is managed manually. +type Span struct { + *mspan +} + +func AllocSpan(base, npages uintptr, scavenged bool) Span { + var s *mspan + systemstack(func() { + lock(&mheap_.lock) + s = (*mspan)(mheap_.spanalloc.alloc()) + unlock(&mheap_.lock) + }) + s.init(base, npages) + s.scavenged = scavenged + return Span{s} +} + +func (s *Span) Free() { + systemstack(func() { + lock(&mheap_.lock) + mheap_.spanalloc.free(unsafe.Pointer(s.mspan)) + unlock(&mheap_.lock) + }) + s.mspan = nil +} + +func (s Span) Base() uintptr { + return s.mspan.base() +} + +func (s Span) Pages() uintptr { + return s.mspan.npages +} + +type TreapIterType treapIterType + +const ( + TreapIterScav TreapIterType = TreapIterType(treapIterScav) + TreapIterHuge = TreapIterType(treapIterHuge) + TreapIterBits = treapIterBits +) + +type TreapIterFilter treapIterFilter + +func TreapFilter(mask, match TreapIterType) TreapIterFilter { + return TreapIterFilter(treapFilter(treapIterType(mask), treapIterType(match))) +} + +func (s Span) MatchesIter(mask, match TreapIterType) bool { + return treapFilter(treapIterType(mask), treapIterType(match)).matches(s.treapFilter()) +} + +type TreapIter struct { + treapIter +} + +func (t TreapIter) Span() Span { + return Span{t.span()} +} + +func (t TreapIter) Valid() bool { + return t.valid() +} + +func (t TreapIter) Next() TreapIter { + return TreapIter{t.next()} +} + +func (t TreapIter) Prev() TreapIter { + return TreapIter{t.prev()} +} + +// Treap is a safe wrapper around mTreap for testing. +// +// It must never be heap-allocated because mTreap is +// notinheap. +// +//go:notinheap +type Treap struct { + mTreap +} + +func (t *Treap) Start(mask, match TreapIterType) TreapIter { + return TreapIter{t.start(treapIterType(mask), treapIterType(match))} +} + +func (t *Treap) End(mask, match TreapIterType) TreapIter { + return TreapIter{t.end(treapIterType(mask), treapIterType(match))} +} + +func (t *Treap) Insert(s Span) { + // mTreap uses a fixalloc in mheap_ for treapNode + // allocation which requires the mheap_ lock to manipulate. + // Locking here is safe because the treap itself never allocs + // or otherwise ends up grabbing this lock. + systemstack(func() { + lock(&mheap_.lock) + t.insert(s.mspan) + unlock(&mheap_.lock) + }) + t.CheckInvariants() +} + +func (t *Treap) Find(npages uintptr) TreapIter { + return TreapIter{t.find(npages)} +} + +func (t *Treap) Erase(i TreapIter) { + // mTreap uses a fixalloc in mheap_ for treapNode + // freeing which requires the mheap_ lock to manipulate. + // Locking here is safe because the treap itself never allocs + // or otherwise ends up grabbing this lock. + systemstack(func() { + lock(&mheap_.lock) + t.erase(i.treapIter) + unlock(&mheap_.lock) + }) + t.CheckInvariants() +} + +func (t *Treap) RemoveSpan(s Span) { + // See Erase about locking. + systemstack(func() { + lock(&mheap_.lock) + t.removeSpan(s.mspan) + unlock(&mheap_.lock) + }) + t.CheckInvariants() +} + +func (t *Treap) Size() int { + i := 0 + t.mTreap.treap.walkTreap(func(t *treapNode) { + i++ + }) + return i +} + +func (t *Treap) CheckInvariants() { + t.mTreap.treap.walkTreap(checkTreapNode) + t.mTreap.treap.validateInvariants() +} diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go index 298eb81c65e..9dbf057b17d 100644 --- a/libgo/go/runtime/extern.go +++ b/libgo/go/runtime/extern.go @@ -134,6 +134,9 @@ that can be blocked in system calls on behalf of Go code; those do not count aga the GOMAXPROCS limit. This package's GOMAXPROCS function queries and changes the limit. +The GORACE variable configures the race detector, for programs built using -race. +See https://golang.org/doc/articles/race_detector.html for details. + The GOTRACEBACK variable controls the amount of output generated when a Go program fails due to an unrecovered panic or an unexpected runtime condition. By default, a failure prints a stack trace for the current goroutine, diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go index 384b75f2de3..3eb01bf4729 100644 --- a/libgo/go/runtime/gc_test.go +++ b/libgo/go/runtime/gc_test.go @@ -473,6 +473,25 @@ func TestReadMemStats(t *testing.T) { } } +func TestUnscavHugePages(t *testing.T) { + // Allocate 20 MiB and immediately free it a few times to increase + // the chance that unscavHugePages isn't zero and that some kind of + // accounting had to happen in the runtime. + for j := 0; j < 3; j++ { + var large [][]byte + for i := 0; i < 5; i++ { + large = append(large, make([]byte, runtime.PhysHugePageSize)) + } + runtime.KeepAlive(large) + runtime.GC() + } + base, slow := runtime.UnscavHugePagesSlow() + if base != slow { + logDiff(t, "unscavHugePages", reflect.ValueOf(base), reflect.ValueOf(slow)) + t.Fatal("unscavHugePages mismatch") + } +} + func logDiff(t *testing.T, prefix string, got, want reflect.Value) { typ := got.Type() switch typ.Kind() { diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go index b0506a8e475..c968ab36d9e 100644 --- a/libgo/go/runtime/heapdump.go +++ b/libgo/go/runtime/heapdump.go @@ -195,7 +195,7 @@ func dumptype(t *_type) { dwritebyte('.') dwrite(name.str, uintptr(name.len)) } - dumpbool(t.kind&kindDirectIface == 0 || t.kind&kindNoPointers == 0) + dumpbool(t.kind&kindDirectIface == 0 || t.ptrdata != 0) } // dump an object diff --git a/libgo/go/runtime/iface.go b/libgo/go/runtime/iface.go index 3fa5dd6deec..74b54f5209c 100644 --- a/libgo/go/runtime/iface.go +++ b/libgo/go/runtime/iface.go @@ -504,6 +504,16 @@ func reflect_ifaceE2I(inter *interfacetype, e eface, dst *iface) { dst.data = e.data } +//go:linkname reflectlite_ifaceE2I internal..z2freflectlite.ifaceE2I +func reflectlite_ifaceE2I(inter *interfacetype, e eface, dst *iface) { + t := e._type + if t == nil { + panic(TypeAssertionError{nil, nil, &inter.typ, ""}) + } + dst.tab = requireitab((*_type)(unsafe.Pointer(inter)), t) + dst.data = e.data +} + // staticbytes is used to avoid convT2E for byte-sized values. var staticbytes = [...]byte{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go index 25ece4354ea..0ba75447e8c 100644 --- a/libgo/go/runtime/internal/atomic/atomic_test.go +++ b/libgo/go/runtime/internal/atomic/atomic_test.go @@ -29,14 +29,18 @@ func runParallel(N, iter int, f func()) { } func TestXadduintptr(t *testing.T) { - const N = 20 - const iter = 100000 + N := 20 + iter := 100000 + if testing.Short() { + N = 10 + iter = 10000 + } inc := uintptr(100) total := uintptr(0) runParallel(N, iter, func() { atomic.Xadduintptr(&total, inc) }) - if want := uintptr(N * iter * inc); want != total { + if want := uintptr(N*iter) * inc; want != total { t.Fatalf("xadduintpr error, want %d, got %d", want, total) } total = 0 diff --git a/libgo/go/runtime/lock_js.go b/libgo/go/runtime/lock_js.go index f58c915b630..c038499f2a9 100644 --- a/libgo/go/runtime/lock_js.go +++ b/libgo/go/runtime/lock_js.go @@ -11,8 +11,6 @@ import ( ) // js/wasm has no support for threads yet. There is no preemption. -// Waiting for a mutex is implemented by allowing other goroutines -// to run until the mutex gets unlocked. const ( mutex_unlocked = 0 @@ -28,9 +26,16 @@ const ( ) func lock(l *mutex) { - for l.key == mutex_locked { - mcall(gosched_m) + if l.key == mutex_locked { + // js/wasm is single-threaded so we should never + // observe this. + throw("self deadlock") } + gp := getg() + if gp.m.locks < 0 { + throw("lock count") + } + gp.m.locks++ l.key = mutex_locked } @@ -38,6 +43,11 @@ func unlock(l *mutex) { if l.key == mutex_unlocked { throw("unlock of unlocked lock") } + gp := getg() + gp.m.locks-- + if gp.m.locks < 0 { + throw("lock count") + } l.key = mutex_unlocked } diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go index bf9211a6c3b..5cf24061795 100644 --- a/libgo/go/runtime/lock_sema.go +++ b/libgo/go/runtime/lock_sema.go @@ -133,7 +133,13 @@ func unlock(l *mutex) { // One-time notifications. func noteclear(n *note) { - n.key = 0 + if GOOS == "aix" { + // On AIX, semaphores might not synchronize the memory in some + // rare cases. See issue #30189. + atomic.Storeuintptr(&n.key, 0) + } else { + n.key = 0 + } } func notewakeup(n *note) { @@ -273,7 +279,7 @@ func notetsleep_internal(n *note, ns int64, gp *g, deadline int64) bool { func notetsleep(n *note, ns int64) bool { gp := getg() - if gp != gp.m.g0 && gp.m.preemptoff != "" { + if gp != gp.m.g0 { throw("notetsleep not on g0") } semacreate(gp.m) diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go index e1e908b2859..cee5f6bc4de 100644 --- a/libgo/go/runtime/malloc.go +++ b/libgo/go/runtime/malloc.go @@ -335,53 +335,81 @@ const ( var physPageSize uintptr // physHugePageSize is the size in bytes of the OS's default physical huge -// page size whose allocation is opaque to the application. It is assumed -// and verified to be a power of two. +// page size whose allocation is opaque to the application. // // If set, this must be set by the OS init code (typically in osinit) before // mallocinit. However, setting it at all is optional, and leaving the default // value is always safe (though potentially less efficient). -// -// Since physHugePageSize is always assumed to be a power of two, -// physHugePageShift is defined as physHugePageSize == 1 << physHugePageShift. -// The purpose of physHugePageShift is to avoid doing divisions in -// performance critical functions. -var ( - physHugePageSize uintptr - physHugePageShift uint -) +var physHugePageSize uintptr -// OS-defined helpers: +// OS memory management abstraction layer // -// sysAlloc obtains a large chunk of zeroed memory from the -// operating system, typically on the order of a hundred kilobytes -// or a megabyte. -// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator -// may use larger alignment, so the caller must be careful to realign the -// memory obtained by sysAlloc. +// Regions of the address space managed by the runtime may be in one of four +// states at any given time: +// 1) None - Unreserved and unmapped, the default state of any region. +// 2) Reserved - Owned by the runtime, but accessing it would cause a fault. +// Does not count against the process' memory footprint. +// 3) Prepared - Reserved, intended not to be backed by physical memory (though +// an OS may implement this lazily). Can transition efficiently to +// Ready. Accessing memory in such a region is undefined (may +// fault, may give back unexpected zeroes, etc.). +// 4) Ready - may be accessed safely. // -// sysUnused notifies the operating system that the contents -// of the memory region are no longer needed and can be reused -// for other purposes. -// sysUsed notifies the operating system that the contents -// of the memory region are needed again. +// This set of states is more than is strictly necessary to support all the +// currently supported platforms. One could get by with just None, Reserved, and +// Ready. However, the Prepared state gives us flexibility for performance +// purposes. For example, on POSIX-y operating systems, Reserved is usually a +// private anonymous mmap'd region with PROT_NONE set, and to transition +// to Ready would require setting PROT_READ|PROT_WRITE. However the +// underspecification of Prepared lets us use just MADV_FREE to transition from +// Ready to Prepared. Thus with the Prepared state we can set the permission +// bits just once early on, we can efficiently tell the OS that it's free to +// take pages away from us when we don't strictly need them. +// +// For each OS there is a common set of helpers defined that transition +// memory regions between these states. The helpers are as follows: +// +// sysAlloc transitions an OS-chosen region of memory from None to Ready. +// More specifically, it obtains a large chunk of zeroed memory from the +// operating system, typically on the order of a hundred kilobytes +// or a megabyte. This memory is always immediately available for use. // -// sysFree returns it unconditionally; this is only used if -// an out-of-memory error has been detected midway through -// an allocation. It is okay if sysFree is a no-op. +// sysFree transitions a memory region from any state to None. Therefore, it +// returns memory unconditionally. It is used if an out-of-memory error has been +// detected midway through an allocation or to carve out an aligned section of +// the address space. It is okay if sysFree is a no-op only if sysReserve always +// returns a memory region aligned to the heap allocator's alignment +// restrictions. // -// sysReserve reserves address space without allocating memory. +// sysReserve transitions a memory region from None to Reserved. It reserves +// address space in such a way that it would cause a fatal fault upon access +// (either via permissions or not committing the memory). Such a reservation is +// thus never backed by physical memory. // If the pointer passed to it is non-nil, the caller wants the // reservation there, but sysReserve can still choose another // location if that one is unavailable. // NOTE: sysReserve returns OS-aligned memory, but the heap allocator // may use larger alignment, so the caller must be careful to realign the -// memory obtained by sysAlloc. +// memory obtained by sysReserve. // -// sysMap maps previously reserved address space for use. +// sysMap transitions a memory region from Reserved to Prepared. It ensures the +// memory region can be efficiently transitioned to Ready. // -// sysFault marks a (already sysAlloc'd) region to fault -// if accessed. Used only for debugging the runtime. +// sysUsed transitions a memory region from Prepared to Ready. It notifies the +// operating system that the memory region is needed and ensures that the region +// may be safely accessed. This is typically a no-op on systems that don't have +// an explicit commit step and hard over-commit limits, but is critical on +// Windows, for example. +// +// sysUnused transitions a memory region from Ready to Prepared. It notifies the +// operating system that the physical pages backing this memory region are no +// longer needed and can be reused for other purposes. The contents of a +// sysUnused memory region are considered forfeit and the region must not be +// accessed again until sysUsed is called. +// +// sysFault transitions a memory region from Ready or Prepared to Reserved. It +// marks a region such that it will always fault if accessed. Used only for +// debugging the runtime. func mallocinit() { if class_to_size[_TinySizeClass] != _TinySize { @@ -422,7 +450,7 @@ func mallocinit() { _g_.m.mcache = allocmcache() // Create initial arena growth hints. - if sys.PtrSize == 8 && GOARCH != "wasm" { + if sys.PtrSize == 8 { // On a 64-bit machine, we pick the following hints // because: // @@ -559,6 +587,9 @@ func mallocinit() { // heapArenaBytes. sysAlloc returns nil on failure. // There is no corresponding free function. // +// sysAlloc returns a memory region in the Prepared state. This region must +// be transitioned to Ready before use. +// // h must be locked. func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { n = round(n, heapArenaBytes) @@ -600,7 +631,7 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { // TODO: This would be cleaner if sysReserve could be // told to only return the requested address. In // particular, this is already how Windows behaves, so - // it would simply things there. + // it would simplify things there. if v != nil { sysFree(v, n, nil) } @@ -657,7 +688,7 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { throw("misrounded allocation in sysAlloc") } - // Back the reservation. + // Transition from Reserved to Prepared. sysMap(v, size, &memstats.heap_sys) mapped: @@ -897,7 +928,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { dataSize := size c := gomcache() var x unsafe.Pointer - noscan := typ == nil || typ.kind&kindNoPointers != 0 + noscan := typ == nil || typ.ptrdata == 0 if size <= maxSmallSize { if noscan && size < maxTinySize { // Tiny allocator. @@ -1115,6 +1146,11 @@ func reflect_unsafe_New(typ *_type) unsafe.Pointer { return mallocgc(typ.size, typ, true) } +//go:linkname reflectlite_unsafe_New internal..z2freflectlite.unsafe_New +func reflectlite_unsafe_New(typ *_type) unsafe.Pointer { + return mallocgc(typ.size, typ, true) +} + // newarray allocates an array of n elements of type typ. func newarray(typ *_type, n int) unsafe.Pointer { if n == 1 { @@ -1317,8 +1353,8 @@ func inPersistentAlloc(p uintptr) bool { } // linearAlloc is a simple linear allocator that pre-reserves a region -// of memory and then maps that region as needed. The caller is -// responsible for locking. +// of memory and then maps that region into the Ready state as needed. The +// caller is responsible for locking. type linearAlloc struct { next uintptr // next free byte mapped uintptr // one byte past end of mapped space @@ -1337,8 +1373,9 @@ func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer } l.next = p + size if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped { - // We need to map more of the reserved space. + // Transition from Reserved to Prepared to Ready. sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat) + sysUsed(unsafe.Pointer(l.mapped), pEnd-l.mapped) l.mapped = pEnd } return unsafe.Pointer(p) diff --git a/libgo/go/runtime/map.go b/libgo/go/runtime/map.go index eebb2103bb2..349577b232a 100644 --- a/libgo/go/runtime/map.go +++ b/libgo/go/runtime/map.go @@ -8,7 +8,7 @@ package runtime // // A map is just a hash table. The data is arranged // into an array of buckets. Each bucket contains up to -// 8 key/value pairs. The low-order bits of the hash are +// 8 key/elem pairs. The low-order bits of the hash are // used to select a bucket. Each bucket contains a few // high-order bits of each hash to distinguish the entries // within a single bucket. @@ -33,7 +33,7 @@ package runtime // Picking loadFactor: too large and we have lots of overflow // buckets, too small and we waste a lot of space. I wrote // a simple program to check some stats for different loads: -// (64-bit, 8 byte keys and values) +// (64-bit, 8 byte keys and elems) // loadFactor %overflow bytes/entry hitprobe missprobe // 4.00 2.13 20.77 3.00 4.00 // 4.50 4.05 17.30 3.25 4.50 @@ -46,7 +46,7 @@ package runtime // 8.00 41.10 9.40 5.00 8.00 // // %overflow = percentage of buckets which have an overflow bucket -// bytes/entry = overhead bytes used per key/value pair +// bytes/entry = overhead bytes used per key/elem pair // hitprobe = # of entries to check when looking up a present key // missprobe = # of entries to check when looking up an absent key // @@ -76,7 +76,7 @@ import ( //go:linkname mapiternext const ( - // Maximum number of key/value pairs a bucket can hold. + // Maximum number of key/elem pairs a bucket can hold. bucketCntBits = 3 bucketCnt = 1 << bucketCntBits @@ -85,12 +85,12 @@ const ( loadFactorNum = 13 loadFactorDen = 2 - // Maximum key or value size to keep inline (instead of mallocing per element). + // Maximum key or elem size to keep inline (instead of mallocing per element). // Must fit in a uint8. - // Fast versions cannot handle big values - the cutoff size for - // fast versions in cmd/compile/internal/gc/walk.go must be at most this value. - maxKeySize = 128 - maxValueSize = 128 + // Fast versions cannot handle big elems - the cutoff size for + // fast versions in cmd/compile/internal/gc/walk.go must be at most this elem. + maxKeySize = 128 + maxElemSize = 128 // data offset should be the size of the bmap struct, but needs to be // aligned correctly. For amd64p32 this means 64-bit alignment @@ -106,7 +106,7 @@ const ( // during map writes and thus no one else can observe the map during that time). emptyRest = 0 // this cell is empty, and there are no more non-empty cells at higher indexes or overflows. emptyOne = 1 // this cell is empty - evacuatedX = 2 // key/value is valid. Entry has been evacuated to first half of larger table. + evacuatedX = 2 // key/elem is valid. Entry has been evacuated to first half of larger table. evacuatedY = 3 // same as above, but evacuated to second half of larger table. evacuatedEmpty = 4 // cell is empty, bucket is evacuated. minTopHash = 5 // minimum tophash for a normal filled cell. @@ -145,11 +145,11 @@ type hmap struct { // mapextra holds fields that are not present on all maps. type mapextra struct { - // If both key and value do not contain pointers and are inline, then we mark bucket + // If both key and elem do not contain pointers and are inline, then we mark bucket // type as containing no pointers. This avoids scanning such maps. // However, bmap.overflow is a pointer. In order to keep overflow buckets // alive, we store pointers to all overflow buckets in hmap.extra.overflow and hmap.extra.oldoverflow. - // overflow and oldoverflow are only used if key and value do not contain pointers. + // overflow and oldoverflow are only used if key and elem do not contain pointers. // overflow contains overflow buckets for hmap.buckets. // oldoverflow contains overflow buckets for hmap.oldbuckets. // The indirection allows to store a pointer to the slice in hiter. @@ -166,9 +166,9 @@ type bmap struct { // for each key in this bucket. If tophash[0] < minTopHash, // tophash[0] is a bucket evacuation state instead. tophash [bucketCnt]uint8 - // Followed by bucketCnt keys and then bucketCnt values. - // NOTE: packing all the keys together and then all the values together makes the - // code a bit more complicated than alternating key/value/key/value/... but it allows + // Followed by bucketCnt keys and then bucketCnt elems. + // NOTE: packing all the keys together and then all the elems together makes the + // code a bit more complicated than alternating key/elem/key/elem/... but it allows // us to eliminate padding which would be needed for, e.g., map[int64]int8. // Followed by an overflow pointer. } @@ -178,7 +178,7 @@ type bmap struct { // the layout of this structure. type hiter struct { key unsafe.Pointer // Must be in first position. Write nil to indicate iteration end (see cmd/internal/gc/range.go). - value unsafe.Pointer // Must be in second position (see cmd/internal/gc/range.go). + elem unsafe.Pointer // Must be in second position (see cmd/internal/gc/range.go). t *maptype h *hmap buckets unsafe.Pointer // bucket ptr at hash_iter initialization time @@ -196,10 +196,8 @@ type hiter struct { // bucketShift returns 1<<b, optimized for code generation. func bucketShift(b uint8) uintptr { - if sys.GoarchAmd64|sys.GoarchAmd64p32|sys.Goarch386 != 0 { - b &= sys.PtrSize*8 - 1 // help x86 archs remove shift overflow checks - } - return uintptr(1) << b + // Masking the shift amount allows overflow checks to be elided. + return uintptr(1) << (b & (sys.PtrSize*8 - 1)) } // bucketMask returns 1<<b - 1, optimized for code generation. @@ -279,7 +277,7 @@ func (h *hmap) newoverflow(t *maptype, b *bmap) *bmap { ovf = (*bmap)(newobject(t.bucket)) } h.incrnoverflow() - if t.bucket.kind&kindNoPointers != 0 { + if t.bucket.ptrdata == 0 { h.createOverflow() *h.extra.overflow = append(*h.extra.overflow, ovf) } @@ -303,7 +301,7 @@ func makemap64(t *maptype, hint int64, h *hmap) *hmap { return makemap(t, int(hint), h) } -// makehmap_small implements Go map creation for make(map[k]v) and +// makemap_small implements Go map creation for make(map[k]v) and // make(map[k]v, hint) when hint is known to be at most bucketCnt // at compile time and the map needs to be allocated on the heap. func makemap_small() *hmap { @@ -383,7 +381,7 @@ func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets un // but may not be empty. buckets = dirtyalloc size := t.bucket.size * nbuckets - if t.bucket.kind&kindNoPointers == 0 { + if t.bucket.ptrdata != 0 { memclrHasPointers(buckets, size) } else { memclrNoHeapPointers(buckets, size) @@ -404,7 +402,7 @@ func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets un } // mapaccess1 returns a pointer to h[key]. Never returns nil, instead -// it will return a reference to the zero object for the value type if +// it will return a reference to the zero object for the elem type if // the key is not in the map. // NOTE: The returned pointer may keep the whole map live, so don't // hold onto it for very long. @@ -462,11 +460,11 @@ bucketloop: k = *((*unsafe.Pointer)(k)) } if equalfn(key, k) { - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) - if t.indirectvalue() { - v = *((*unsafe.Pointer)(v)) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) + if t.indirectelem() { + e = *((*unsafe.Pointer)(e)) } - return v + return e } } } @@ -527,18 +525,18 @@ bucketloop: k = *((*unsafe.Pointer)(k)) } if equalfn(key, k) { - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) - if t.indirectvalue() { - v = *((*unsafe.Pointer)(v)) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) + if t.indirectelem() { + e = *((*unsafe.Pointer)(e)) } - return v, true + return e, true } } } return unsafe.Pointer(&zeroVal[0]), false } -// returns both key and value. Used by map iterator +// returns both key and elem. Used by map iterator func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe.Pointer) { // Check preemption, since unlike gc we don't check on every call. if getg().preempt { @@ -578,11 +576,11 @@ bucketloop: k = *((*unsafe.Pointer)(k)) } if equalfn(key, k) { - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) - if t.indirectvalue() { - v = *((*unsafe.Pointer)(v)) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) + if t.indirectelem() { + e = *((*unsafe.Pointer)(e)) } - return k, v + return k, e } } } @@ -590,19 +588,19 @@ bucketloop: } func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer { - v := mapaccess1(t, h, key) - if v == unsafe.Pointer(&zeroVal[0]) { + e := mapaccess1(t, h, key) + if e == unsafe.Pointer(&zeroVal[0]) { return zero } - return v + return e } func mapaccess2_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) (unsafe.Pointer, bool) { - v := mapaccess1(t, h, key) - if v == unsafe.Pointer(&zeroVal[0]) { + e := mapaccess1(t, h, key) + if e == unsafe.Pointer(&zeroVal[0]) { return zero, false } - return v, true + return e, true } // Like mapaccess, but allocates a slot for the key if it is not present in the map. @@ -649,7 +647,7 @@ again: var inserti *uint8 var insertk unsafe.Pointer - var val unsafe.Pointer + var elem unsafe.Pointer bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { @@ -657,7 +655,7 @@ bucketloop: if isEmpty(b.tophash[i]) && inserti == nil { inserti = &b.tophash[i] insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) + elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) } if b.tophash[i] == emptyRest { break bucketloop @@ -675,7 +673,7 @@ bucketloop: if t.needkeyupdate() { typedmemmove(t.key, k, key) } - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) + elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) goto done } ovf := b.overflow(t) @@ -699,18 +697,18 @@ bucketloop: newb := h.newoverflow(t, b) inserti = &newb.tophash[0] insertk = add(unsafe.Pointer(newb), dataOffset) - val = add(insertk, bucketCnt*uintptr(t.keysize)) + elem = add(insertk, bucketCnt*uintptr(t.keysize)) } - // store new key/value at insert position + // store new key/elem at insert position if t.indirectkey() { kmem := newobject(t.key) *(*unsafe.Pointer)(insertk) = kmem insertk = kmem } - if t.indirectvalue() { + if t.indirectelem() { vmem := newobject(t.elem) - *(*unsafe.Pointer)(val) = vmem + *(*unsafe.Pointer)(elem) = vmem } typedmemmove(t.key, insertk, key) *inserti = top @@ -721,10 +719,10 @@ done: throw("concurrent map writes") } h.flags &^= hashWriting - if t.indirectvalue() { - val = *((*unsafe.Pointer)(val)) + if t.indirectelem() { + elem = *((*unsafe.Pointer)(elem)) } - return val + return elem } func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { @@ -782,16 +780,16 @@ search: // Only clear key if there are pointers in it. if t.indirectkey() { *(*unsafe.Pointer)(k) = nil - } else if t.key.kind&kindNoPointers == 0 { + } else if t.key.ptrdata != 0 { memclrHasPointers(k, t.key.size) } - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) - if t.indirectvalue() { - *(*unsafe.Pointer)(v) = nil - } else if t.elem.kind&kindNoPointers == 0 { - memclrHasPointers(v, t.elem.size) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) + if t.indirectelem() { + *(*unsafe.Pointer)(e) = nil + } else if t.elem.ptrdata != 0 { + memclrHasPointers(e, t.elem.size) } else { - memclrNoHeapPointers(v, t.elem.size) + memclrNoHeapPointers(e, t.elem.size) } b.tophash[i] = emptyOne // If the bucket now ends in a bunch of emptyOne states, @@ -845,15 +843,19 @@ search: // and it's cheaper to zero it here. func mapiterinit(t *maptype, h *hmap, it *hiter) { it.key = nil - it.value = nil + it.elem = nil it.t = nil it.h = nil it.buckets = nil it.bptr = nil it.overflow = nil it.oldoverflow = nil + it.startBucket = 0 + it.offset = 0 it.wrapped = false + it.B = 0 it.i = 0 + it.bucket = 0 it.checkBucket = 0 if raceenabled && h != nil { @@ -874,7 +876,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { // grab snapshot of bucket state it.B = h.B it.buckets = h.buckets - if t.bucket.kind&kindNoPointers != 0 { + if t.bucket.ptrdata == 0 { // Allocate the current slice and remember pointers to both current and old. // This preserves all relevant overflow buckets alive even if // the table grows and/or overflow buckets are added to the table @@ -931,7 +933,7 @@ next: if bucket == it.startBucket && it.wrapped { // end of iteration it.key = nil - it.value = nil + it.elem = nil return } if h.growing() && it.B == h.B { @@ -969,7 +971,7 @@ next: if t.indirectkey() { k = *((*unsafe.Pointer)(k)) } - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize)) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.elemsize)) if checkBucket != noCheck && !h.sameSizeGrow() { // Special case: iterator was started during a grow to a larger size // and the grow is not done yet. We're working on a bucket whose @@ -1005,10 +1007,10 @@ next: // key!=key, so the entry can't be deleted or updated, so we can just return it. // That's lucky for us because when key!=key we can't look it up successfully. it.key = k - if t.indirectvalue() { - v = *((*unsafe.Pointer)(v)) + if t.indirectelem() { + e = *((*unsafe.Pointer)(e)) } - it.value = v + it.elem = e } else { // The hash table has grown since the iterator was started. // The golden data for this key is now somewhere else. @@ -1017,12 +1019,12 @@ next: // has been deleted, updated, or deleted and reinserted. // NOTE: we need to regrab the key as it has potentially been // updated to an equal() but not identical key (e.g. +0.0 vs -0.0). - rk, rv := mapaccessK(t, h, k) + rk, re := mapaccessK(t, h, k) if rk == nil { continue // key has been deleted } it.key = rk - it.value = rv + it.elem = re } it.bucket = bucket if it.bptr != b { // avoid unnecessary write barrier; see issue 14921 @@ -1188,9 +1190,9 @@ func bucketEvacuated(t *maptype, h *hmap, bucket uintptr) bool { // evacDst is an evacuation destination. type evacDst struct { b *bmap // current destination bucket - i int // key/val index into b + i int // key/elem index into b k unsafe.Pointer // pointer to current key storage - v unsafe.Pointer // pointer to current value storage + e unsafe.Pointer // pointer to current elem storage } func evacuate(t *maptype, h *hmap, oldbucket uintptr) { @@ -1205,7 +1207,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { x := &xy[0] x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) x.k = add(unsafe.Pointer(x.b), dataOffset) - x.v = add(x.k, bucketCnt*uintptr(t.keysize)) + x.e = add(x.k, bucketCnt*uintptr(t.keysize)) if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. @@ -1213,13 +1215,13 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { y := &xy[1] y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) y.k = add(unsafe.Pointer(y.b), dataOffset) - y.v = add(y.k, bucketCnt*uintptr(t.keysize)) + y.e = add(y.k, bucketCnt*uintptr(t.keysize)) } for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) - v := add(k, bucketCnt*uintptr(t.keysize)) - for i := 0; i < bucketCnt; i, k, v = i+1, add(k, uintptr(t.keysize)), add(v, uintptr(t.valuesize)) { + e := add(k, bucketCnt*uintptr(t.keysize)) + for i := 0; i < bucketCnt; i, k, e = i+1, add(k, uintptr(t.keysize)), add(e, uintptr(t.elemsize)) { top := b.tophash[i] if isEmpty(top) { b.tophash[i] = evacuatedEmpty @@ -1235,7 +1237,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { var useY uint8 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need - // to send this key/value to bucket x or bucket y). + // to send this key/elem to bucket x or bucket y). hash := t.key.hashfn(k2, uintptr(h.hash0)) if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.equalfn(k2, k2) { // If key != key (NaNs), then the hash could be (and probably @@ -1269,30 +1271,30 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { dst.b = h.newoverflow(t, dst.b) dst.i = 0 dst.k = add(unsafe.Pointer(dst.b), dataOffset) - dst.v = add(dst.k, bucketCnt*uintptr(t.keysize)) + dst.e = add(dst.k, bucketCnt*uintptr(t.keysize)) } dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check if t.indirectkey() { *(*unsafe.Pointer)(dst.k) = k2 // copy pointer } else { - typedmemmove(t.key, dst.k, k) // copy value + typedmemmove(t.key, dst.k, k) // copy elem } - if t.indirectvalue() { - *(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v) + if t.indirectelem() { + *(*unsafe.Pointer)(dst.e) = *(*unsafe.Pointer)(e) } else { - typedmemmove(t.elem, dst.v, v) + typedmemmove(t.elem, dst.e, e) } dst.i++ // These updates might push these pointers past the end of the - // key or value arrays. That's ok, as we have the overflow pointer + // key or elem arrays. That's ok, as we have the overflow pointer // at the end of the bucket to protect against pointing past the // end of the bucket. dst.k = add(dst.k, uintptr(t.keysize)) - dst.v = add(dst.v, uintptr(t.valuesize)) + dst.e = add(dst.e, uintptr(t.elemsize)) } } - // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + // Unlink the overflow buckets & clear key/elem to help GC. + if h.flags&oldIterator == 0 && t.bucket.ptrdata != 0 { b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) // Preserve b.tophash because the evacuation // state is maintained there. @@ -1347,21 +1349,21 @@ func reflect_makemap(t *maptype, cap int) *hmap { t.key.size <= maxKeySize && (t.indirectkey() || t.keysize != uint8(t.key.size)) { throw("key size wrong") } - if t.elem.size > maxValueSize && (!t.indirectvalue() || t.valuesize != uint8(sys.PtrSize)) || - t.elem.size <= maxValueSize && (t.indirectvalue() || t.valuesize != uint8(t.elem.size)) { - throw("value size wrong") + if t.elem.size > maxElemSize && (!t.indirectelem() || t.elemsize != uint8(sys.PtrSize)) || + t.elem.size <= maxElemSize && (t.indirectelem() || t.elemsize != uint8(t.elem.size)) { + throw("elem size wrong") } if t.key.align > bucketCnt { throw("key align too big") } if t.elem.align > bucketCnt { - throw("value align too big") + throw("elem align too big") } if t.key.size%uintptr(t.key.align) != 0 { throw("key size not a multiple of key align") } if t.elem.size%uintptr(t.elem.align) != 0 { - throw("value size not a multiple of value align") + throw("elem size not a multiple of elem align") } if bucketCnt < 8 { throw("bucketsize too small for proper alignment") @@ -1370,7 +1372,7 @@ func reflect_makemap(t *maptype, cap int) *hmap { throw("need padding in bucket (key)") } if dataOffset%uintptr(t.elem.align) != 0 { - throw("need padding in bucket (value)") + throw("need padding in bucket (elem)") } return makemap(t, cap, nil) @@ -1378,18 +1380,18 @@ func reflect_makemap(t *maptype, cap int) *hmap { //go:linkname reflect_mapaccess reflect.mapaccess func reflect_mapaccess(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { - val, ok := mapaccess2(t, h, key) + elem, ok := mapaccess2(t, h, key) if !ok { // reflect wants nil for a missing element - val = nil + elem = nil } - return val + return elem } //go:linkname reflect_mapassign reflect.mapassign -func reflect_mapassign(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) { +func reflect_mapassign(t *maptype, h *hmap, key unsafe.Pointer, elem unsafe.Pointer) { p := mapassign(t, h, key) - typedmemmove(t.elem, p, val) + typedmemmove(t.elem, p, elem) } //go:linkname reflect_mapdelete reflect.mapdelete @@ -1414,9 +1416,9 @@ func reflect_mapiterkey(it *hiter) unsafe.Pointer { return it.key } -//go:linkname reflect_mapitervalue reflect.mapitervalue -func reflect_mapitervalue(it *hiter) unsafe.Pointer { - return it.value +//go:linkname reflect_mapiterelem reflect.mapiterelem +func reflect_mapiterelem(it *hiter) unsafe.Pointer { + return it.elem } //go:linkname reflect_maplen reflect.maplen @@ -1431,6 +1433,18 @@ func reflect_maplen(h *hmap) int { return h.count } +//go:linkname reflectlite_maplen internal..z2freflectlite.maplen +func reflectlite_maplen(h *hmap) int { + if h == nil { + return 0 + } + if raceenabled { + callerpc := getcallerpc() + racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen)) + } + return h.count +} + //go:linkname reflect_ismapkey reflect.ismapkey func reflect_ismapkey(t *_type) bool { return ismapkey(t) diff --git a/libgo/go/runtime/map_fast32.go b/libgo/go/runtime/map_fast32.go index 67d6df8c9a3..57b3c0f1c50 100644 --- a/libgo/go/runtime/map_fast32.go +++ b/libgo/go/runtime/map_fast32.go @@ -50,7 +50,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { if *(*uint32)(k) == key && !isEmpty(b.tophash[i]) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.elemsize)) } } } @@ -90,7 +90,7 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { if *(*uint32)(k) == key && !isEmpty(b.tophash[i]) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.elemsize)), true } } } @@ -179,12 +179,12 @@ bucketloop: h.count++ done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) + elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.elemsize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting - return val + return elem } func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { @@ -269,12 +269,12 @@ bucketloop: h.count++ done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) + elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.elemsize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting - return val + return elem } func mapdelete_fast32(t *maptype, h *hmap, key uint32) { @@ -307,14 +307,14 @@ search: continue } // Only clear key if there are pointers in it. - if t.key.kind&kindNoPointers == 0 { + if t.key.ptrdata != 0 { memclrHasPointers(k, t.key.size) } - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) - if t.elem.kind&kindNoPointers == 0 { - memclrHasPointers(v, t.elem.size) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.elemsize)) + if t.elem.ptrdata != 0 { + memclrHasPointers(e, t.elem.size) } else { - memclrNoHeapPointers(v, t.elem.size) + memclrNoHeapPointers(e, t.elem.size) } b.tophash[i] = emptyOne // If the bucket now ends in a bunch of emptyOne states, @@ -381,7 +381,7 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { x := &xy[0] x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) x.k = add(unsafe.Pointer(x.b), dataOffset) - x.v = add(x.k, bucketCnt*4) + x.e = add(x.k, bucketCnt*4) if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. @@ -389,13 +389,13 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { y := &xy[1] y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) y.k = add(unsafe.Pointer(y.b), dataOffset) - y.v = add(y.k, bucketCnt*4) + y.e = add(y.k, bucketCnt*4) } for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) - v := add(k, bucketCnt*4) - for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) { + e := add(k, bucketCnt*4) + for i := 0; i < bucketCnt; i, k, e = i+1, add(k, 4), add(e, uintptr(t.elemsize)) { top := b.tophash[i] if isEmpty(top) { b.tophash[i] = evacuatedEmpty @@ -407,7 +407,7 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { var useY uint8 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need - // to send this key/value to bucket x or bucket y). + // to send this key/elem to bucket x or bucket y). hash := t.key.hashfn(k, uintptr(h.hash0)) if hash&newbit != 0 { useY = 1 @@ -421,30 +421,30 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { dst.b = h.newoverflow(t, dst.b) dst.i = 0 dst.k = add(unsafe.Pointer(dst.b), dataOffset) - dst.v = add(dst.k, bucketCnt*4) + dst.e = add(dst.k, bucketCnt*4) } dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check // Copy key. - if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + if sys.PtrSize == 4 && t.key.ptrdata != 0 && writeBarrier.enabled { // Write with a write barrier. *(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k) } else { *(*uint32)(dst.k) = *(*uint32)(k) } - typedmemmove(t.elem, dst.v, v) + typedmemmove(t.elem, dst.e, e) dst.i++ // These updates might push these pointers past the end of the - // key or value arrays. That's ok, as we have the overflow pointer + // key or elem arrays. That's ok, as we have the overflow pointer // at the end of the bucket to protect against pointing past the // end of the bucket. dst.k = add(dst.k, 4) - dst.v = add(dst.v, uintptr(t.valuesize)) + dst.e = add(dst.e, uintptr(t.elemsize)) } } - // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + // Unlink the overflow buckets & clear key/elem to help GC. + if h.flags&oldIterator == 0 && t.bucket.ptrdata != 0 { b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) // Preserve b.tophash because the evacuation // state is maintained there. diff --git a/libgo/go/runtime/map_fast64.go b/libgo/go/runtime/map_fast64.go index b62ecb106cc..af86f747f05 100644 --- a/libgo/go/runtime/map_fast64.go +++ b/libgo/go/runtime/map_fast64.go @@ -50,7 +50,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { if *(*uint64)(k) == key && !isEmpty(b.tophash[i]) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.elemsize)) } } } @@ -90,7 +90,7 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { if *(*uint64)(k) == key && !isEmpty(b.tophash[i]) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.elemsize)), true } } } @@ -179,12 +179,12 @@ bucketloop: h.count++ done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) + elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.elemsize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting - return val + return elem } func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { @@ -269,12 +269,12 @@ bucketloop: h.count++ done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) + elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.elemsize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting - return val + return elem } func mapdelete_fast64(t *maptype, h *hmap, key uint64) { @@ -307,14 +307,14 @@ search: continue } // Only clear key if there are pointers in it. - if t.key.kind&kindNoPointers == 0 { + if t.key.ptrdata != 0 { memclrHasPointers(k, t.key.size) } - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) - if t.elem.kind&kindNoPointers == 0 { - memclrHasPointers(v, t.elem.size) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.elemsize)) + if t.elem.ptrdata != 0 { + memclrHasPointers(e, t.elem.size) } else { - memclrNoHeapPointers(v, t.elem.size) + memclrNoHeapPointers(e, t.elem.size) } b.tophash[i] = emptyOne // If the bucket now ends in a bunch of emptyOne states, @@ -381,7 +381,7 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { x := &xy[0] x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) x.k = add(unsafe.Pointer(x.b), dataOffset) - x.v = add(x.k, bucketCnt*8) + x.e = add(x.k, bucketCnt*8) if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. @@ -389,13 +389,13 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { y := &xy[1] y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) y.k = add(unsafe.Pointer(y.b), dataOffset) - y.v = add(y.k, bucketCnt*8) + y.e = add(y.k, bucketCnt*8) } for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) - v := add(k, bucketCnt*8) - for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) { + e := add(k, bucketCnt*8) + for i := 0; i < bucketCnt; i, k, e = i+1, add(k, 8), add(e, uintptr(t.elemsize)) { top := b.tophash[i] if isEmpty(top) { b.tophash[i] = evacuatedEmpty @@ -407,7 +407,7 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { var useY uint8 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need - // to send this key/value to bucket x or bucket y). + // to send this key/elem to bucket x or bucket y). hash := t.key.hashfn(k, uintptr(h.hash0)) if hash&newbit != 0 { useY = 1 @@ -421,12 +421,12 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { dst.b = h.newoverflow(t, dst.b) dst.i = 0 dst.k = add(unsafe.Pointer(dst.b), dataOffset) - dst.v = add(dst.k, bucketCnt*8) + dst.e = add(dst.k, bucketCnt*8) } dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check // Copy key. - if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + if t.key.ptrdata != 0 && writeBarrier.enabled { if sys.PtrSize == 8 { // Write with a write barrier. *(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k) @@ -439,18 +439,18 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { *(*uint64)(dst.k) = *(*uint64)(k) } - typedmemmove(t.elem, dst.v, v) + typedmemmove(t.elem, dst.e, e) dst.i++ // These updates might push these pointers past the end of the - // key or value arrays. That's ok, as we have the overflow pointer + // key or elem arrays. That's ok, as we have the overflow pointer // at the end of the bucket to protect against pointing past the // end of the bucket. dst.k = add(dst.k, 8) - dst.v = add(dst.v, uintptr(t.valuesize)) + dst.e = add(dst.e, uintptr(t.elemsize)) } } - // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + // Unlink the overflow buckets & clear key/elem to help GC. + if h.flags&oldIterator == 0 && t.bucket.ptrdata != 0 { b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) // Preserve b.tophash because the evacuation // state is maintained there. diff --git a/libgo/go/runtime/map_faststr.go b/libgo/go/runtime/map_faststr.go index 2202695a45b..3c5175d9686 100644 --- a/libgo/go/runtime/map_faststr.go +++ b/libgo/go/runtime/map_faststr.go @@ -42,7 +42,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)) } } return unsafe.Pointer(&zeroVal[0]) @@ -58,7 +58,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { continue } if k.str == key.str { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)) } // check first 4 bytes if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { @@ -77,7 +77,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { if keymaybe != bucketCnt { k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) if memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)) + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize)) } } return unsafe.Pointer(&zeroVal[0]) @@ -104,7 +104,7 @@ dohash: continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)) } } } @@ -137,7 +137,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true } } return unsafe.Pointer(&zeroVal[0]), false @@ -153,7 +153,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { continue } if k.str == key.str { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true } // check first 4 bytes if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { @@ -172,7 +172,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { if keymaybe != bucketCnt { k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) if memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize)), true } } return unsafe.Pointer(&zeroVal[0]), false @@ -199,7 +199,7 @@ dohash: continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true } } } @@ -293,12 +293,12 @@ bucketloop: h.count++ done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize)) + elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.elemsize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting - return val + return elem } func mapdelete_faststr(t *maptype, h *hmap, ky string) { @@ -338,11 +338,11 @@ search: } // Clear key's pointer. k.str = nil - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) - if t.elem.kind&kindNoPointers == 0 { - memclrHasPointers(v, t.elem.size) + e := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)) + if t.elem.ptrdata != 0 { + memclrHasPointers(e, t.elem.size) } else { - memclrNoHeapPointers(v, t.elem.size) + memclrNoHeapPointers(e, t.elem.size) } b.tophash[i] = emptyOne // If the bucket now ends in a bunch of emptyOne states, @@ -409,7 +409,7 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { x := &xy[0] x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) x.k = add(unsafe.Pointer(x.b), dataOffset) - x.v = add(x.k, bucketCnt*2*sys.PtrSize) + x.e = add(x.k, bucketCnt*2*sys.PtrSize) if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. @@ -417,13 +417,13 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { y := &xy[1] y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) y.k = add(unsafe.Pointer(y.b), dataOffset) - y.v = add(y.k, bucketCnt*2*sys.PtrSize) + y.e = add(y.k, bucketCnt*2*sys.PtrSize) } for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) - v := add(k, bucketCnt*2*sys.PtrSize) - for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) { + e := add(k, bucketCnt*2*sys.PtrSize) + for i := 0; i < bucketCnt; i, k, e = i+1, add(k, 2*sys.PtrSize), add(e, uintptr(t.elemsize)) { top := b.tophash[i] if isEmpty(top) { b.tophash[i] = evacuatedEmpty @@ -435,7 +435,7 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { var useY uint8 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need - // to send this key/value to bucket x or bucket y). + // to send this key/elem to bucket x or bucket y). hash := t.key.hashfn(k, uintptr(h.hash0)) if hash&newbit != 0 { useY = 1 @@ -449,26 +449,25 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { dst.b = h.newoverflow(t, dst.b) dst.i = 0 dst.k = add(unsafe.Pointer(dst.b), dataOffset) - dst.v = add(dst.k, bucketCnt*2*sys.PtrSize) + dst.e = add(dst.k, bucketCnt*2*sys.PtrSize) } dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check // Copy key. *(*string)(dst.k) = *(*string)(k) - typedmemmove(t.elem, dst.v, v) + typedmemmove(t.elem, dst.e, e) dst.i++ // These updates might push these pointers past the end of the - // key or value arrays. That's ok, as we have the overflow pointer + // key or elem arrays. That's ok, as we have the overflow pointer // at the end of the bucket to protect against pointing past the // end of the bucket. dst.k = add(dst.k, 2*sys.PtrSize) - dst.v = add(dst.v, uintptr(t.valuesize)) + dst.e = add(dst.e, uintptr(t.elemsize)) } } - // Unlink the overflow buckets & clear key/value to help GC. - // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + // Unlink the overflow buckets & clear key/elem to help GC. + if h.flags&oldIterator == 0 && t.bucket.ptrdata != 0 { b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) // Preserve b.tophash because the evacuation // state is maintained there. diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go index 00e5eb8baa5..e66b50d1927 100644 --- a/libgo/go/runtime/mbarrier.go +++ b/libgo/go/runtime/mbarrier.go @@ -163,7 +163,7 @@ func typedmemmove(typ *_type, dst, src unsafe.Pointer) { if dst == src { return } - if typ.kind&kindNoPointers == 0 { + if typ.ptrdata != 0 { bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.size) } // There's a race here: if some other goroutine can write to @@ -192,11 +192,16 @@ func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) { typedmemmove(typ, dst, src) } +//go:linkname reflectlite_typedmemmove internal..z2freflectlite.typedmemmove +func reflectlite_typedmemmove(typ *_type, dst, src unsafe.Pointer) { + reflect_typedmemmove(typ, dst, src) +} + // typedmemmovepartial is like typedmemmove but assumes that // dst and src point off bytes into the value and only copies size bytes. //go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) { - if writeBarrier.needed && typ.kind&kindNoPointers == 0 && size >= sys.PtrSize { + if writeBarrier.needed && typ.ptrdata != 0 && size >= sys.PtrSize { // Pointer-align start address for bulk barrier. adst, asrc, asize := dst, src, size if frag := -off & (sys.PtrSize - 1); frag != 0 { @@ -248,7 +253,7 @@ func typedslicecopy(typ *_type, dst, src slice) int { return n } - // Note: No point in checking typ.kind&kindNoPointers here: + // Note: No point in checking typ.ptrdata here: // compiler only emits calls to typedslicecopy for types with pointers, // and growslice and reflect_typedslicecopy check for pointers // before calling typedslicecopy. @@ -264,7 +269,7 @@ func typedslicecopy(typ *_type, dst, src slice) int { //go:linkname reflect_typedslicecopy reflect.typedslicecopy func reflect_typedslicecopy(elemType *_type, dst, src slice) int { - if elemType.kind&kindNoPointers != 0 { + if elemType.ptrdata == 0 { n := dst.len if n > src.len { n = src.len @@ -301,7 +306,7 @@ func reflect_typedslicecopy(elemType *_type, dst, src slice) int { // //go:nosplit func typedmemclr(typ *_type, ptr unsafe.Pointer) { - if typ.kind&kindNoPointers == 0 { + if typ.ptrdata != 0 { bulkBarrierPreWrite(uintptr(ptr), 0, typ.size) } memclrNoHeapPointers(ptr, typ.size) @@ -314,7 +319,7 @@ func reflect_typedmemclr(typ *_type, ptr unsafe.Pointer) { //go:linkname reflect_typedmemclrpartial reflect.typedmemclrpartial func reflect_typedmemclrpartial(typ *_type, ptr unsafe.Pointer, off, size uintptr) { - if typ.kind&kindNoPointers == 0 { + if typ.ptrdata != 0 { bulkBarrierPreWrite(uintptr(ptr), 0, size) } memclrNoHeapPointers(ptr, size) @@ -322,7 +327,7 @@ func reflect_typedmemclrpartial(typ *_type, ptr unsafe.Pointer, off, size uintpt // memclrHasPointers clears n bytes of typed memory starting at ptr. // The caller must ensure that the type of the object at ptr has -// pointers, usually by checking typ.kind&kindNoPointers. However, ptr +// pointers, usually by checking typ.ptrdata. However, ptr // does not have to point to the start of the allocation. // //go:nosplit diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go index 9c25a215f35..b84fe0fc6c4 100644 --- a/libgo/go/runtime/mbitmap.go +++ b/libgo/go/runtime/mbitmap.go @@ -595,7 +595,7 @@ func (h heapBits) setCheckmarked(size uintptr) { // The pointer bitmap is not maintained for allocations containing // no pointers at all; any caller of bulkBarrierPreWrite must first // make sure the underlying allocation contains pointers, usually -// by checking typ.kind&kindNoPointers. +// by checking typ.ptrdata. // // Callers must perform cgo checks if writeBarrier.cgo. // @@ -1687,15 +1687,12 @@ Run: if n == 0 { // Program is over; continue in trailer if present. if trailer != nil { - //println("trailer") p = trailer trailer = nil continue } - //println("done") break Run } - //println("lit", n, dst) nbyte := n / 8 for i := uintptr(0); i < nbyte; i++ { bits |= uintptr(*p) << nbits diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go index 2045158636a..ca926827e11 100644 --- a/libgo/go/runtime/mcache.go +++ b/libgo/go/runtime/mcache.go @@ -76,10 +76,13 @@ func (p gclinkptr) ptr() *gclink { var emptymspan mspan func allocmcache() *mcache { - lock(&mheap_.lock) - c := (*mcache)(mheap_.cachealloc.alloc()) - c.flushGen = mheap_.sweepgen - unlock(&mheap_.lock) + var c *mcache + systemstack(func() { + lock(&mheap_.lock) + c = (*mcache)(mheap_.cachealloc.alloc()) + c.flushGen = mheap_.sweepgen + unlock(&mheap_.lock) + }) for i := range c.alloc { c.alloc[i] = &emptymspan } diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go index a60eb9fd0ca..cd5901054ae 100644 --- a/libgo/go/runtime/mcentral.go +++ b/libgo/go/runtime/mcentral.go @@ -251,16 +251,16 @@ func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { func (c *mcentral) grow() *mspan { npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) size := uintptr(class_to_size[c.spanclass.sizeclass()]) - n := (npages << _PageShift) / size s := mheap_.alloc(npages, c.spanclass, false, true) if s == nil { return nil } - p := s.base() - s.limit = p + size*n - + // Use division by multiplication and shifts to quickly compute: + // n := (npages << _PageShift) / size + n := (npages << _PageShift) >> s.divShift * uintptr(s.divMul) >> s.divShift2 + s.limit = s.base() + size*n heapBitsForAddr(s.base()).initSpan(s) return s } diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go index 5ce816c323a..ba38ebaa9ab 100644 --- a/libgo/go/runtime/mem_gccgo.go +++ b/libgo/go/runtime/mem_gccgo.go @@ -7,7 +7,6 @@ package runtime import ( - "runtime/internal/sys" "unsafe" ) @@ -92,37 +91,35 @@ func sysUnused(v unsafe.Pointer, n uintptr) { // gets most of the benefit of huge pages while keeping the // number of VMAs under control. With hugePageSize = 2MB, even // a pessimal heap can reach 128GB before running out of VMAs. - if sys.HugePageSize != 0 && _MADV_NOHUGEPAGE != 0 { - var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :( - + if physHugePageSize != 0 && _MADV_NOHUGEPAGE != 0 { // If it's a large allocation, we want to leave huge // pages enabled. Hence, we only adjust the huge page // flag on the huge pages containing v and v+n-1, and // only if those aren't aligned. var head, tail uintptr - if uintptr(v)%s != 0 { + if uintptr(v)%physHugePageSize != 0 { // Compute huge page containing v. - head = uintptr(v) &^ (s - 1) + head = uintptr(v) &^ (physHugePageSize - 1) } - if (uintptr(v)+n)%s != 0 { + if (uintptr(v)+n)%physHugePageSize != 0 { // Compute huge page containing v+n-1. - tail = (uintptr(v) + n - 1) &^ (s - 1) + tail = (uintptr(v) + n - 1) &^ (physHugePageSize - 1) } // Note that madvise will return EINVAL if the flag is // already set, which is quite likely. We ignore // errors. - if head != 0 && head+sys.HugePageSize == tail { + if head != 0 && head+physHugePageSize == tail { // head and tail are different but adjacent, // so do this in one call. - madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE) + madvise(unsafe.Pointer(head), 2*physHugePageSize, _MADV_NOHUGEPAGE) } else { // Advise the huge pages containing v and v+n-1. if head != 0 { - madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE) + madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE) } if tail != 0 && tail != head { - madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE) + madvise(unsafe.Pointer(tail), physHugePageSize, _MADV_NOHUGEPAGE) } } } @@ -142,21 +139,23 @@ func sysUnused(v unsafe.Pointer, n uintptr) { } func sysUsed(v unsafe.Pointer, n uintptr) { - if sys.HugePageSize != 0 && _MADV_HUGEPAGE != 0 { - // Partially undo the NOHUGEPAGE marks from sysUnused - // for whole huge pages between v and v+n. This may - // leave huge pages off at the end points v and v+n - // even though allocations may cover these entire huge - // pages. We could detect this and undo NOHUGEPAGE on - // the end points as well, but it's probably not worth - // the cost because when neighboring allocations are - // freed sysUnused will just set NOHUGEPAGE again. - var s uintptr = sys.HugePageSize + // Partially undo the NOHUGEPAGE marks from sysUnused + // for whole huge pages between v and v+n. This may + // leave huge pages off at the end points v and v+n + // even though allocations may cover these entire huge + // pages. We could detect this and undo NOHUGEPAGE on + // the end points as well, but it's probably not worth + // the cost because when neighboring allocations are + // freed sysUnused will just set NOHUGEPAGE again. + sysHugePage(v, n) +} +func sysHugePage(v unsafe.Pointer, n uintptr) { + if physHugePageSize != 0 && _MADV_HUGEPAGE != 0 { // Round v up to a huge page boundary. - beg := (uintptr(v) + (s - 1)) &^ (s - 1) + beg := (uintptr(v) + (physHugePageSize - 1)) &^ (physHugePageSize - 1) // Round v+n down to a huge page boundary. - end := (uintptr(v) + n) &^ (s - 1) + end := (uintptr(v) + n) &^ (physHugePageSize - 1) if beg < end { madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE) diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go index b490cd815f5..0b2e19123d3 100644 --- a/libgo/go/runtime/memmove_test.go +++ b/libgo/go/runtime/memmove_test.go @@ -15,6 +15,9 @@ import ( ) func TestMemmove(t *testing.T) { + if *flagQuick { + t.Skip("-quick") + } t.Parallel() size := 256 if testing.Short() { @@ -54,6 +57,9 @@ func TestMemmove(t *testing.T) { } func TestMemmoveAlias(t *testing.T) { + if *flagQuick { + t.Skip("-quick") + } t.Parallel() size := 256 if testing.Short() { diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go index caf2e7e1cce..2ca6280da34 100644 --- a/libgo/go/runtime/mfinal.go +++ b/libgo/go/runtime/mfinal.go @@ -318,7 +318,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) { if uintptr(e.data) != base { // As an implementation detail we allow to set finalizers for an inner byte // of an object if it could come from tiny alloc (see mallocgc for details). - if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize { + if ot.elem == nil || ot.elem.ptrdata != 0 || ot.elem.size >= maxTinySize { throw("runtime.SetFinalizer: pointer not at beginning of allocated block") } } diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go index b8c91ac29a1..46b7334e910 100644 --- a/libgo/go/runtime/mgc.go +++ b/libgo/go/runtime/mgc.go @@ -141,7 +141,7 @@ const ( // sweepMinHeapDistance is a lower bound on the heap distance // (in bytes) reserved for concurrent sweeping between GC - // cycles. This will be scaled by gcpercent/100. + // cycles. sweepMinHeapDistance = 1024 * 1024 ) @@ -202,27 +202,35 @@ func readgogc() int32 { // gcenable is called after the bulk of the runtime initialization, // just before we're about to start letting user code run. -// It kicks off the background sweeper goroutine and enables GC. +// It kicks off the background sweeper goroutine, the background +// scavenger goroutine, and enables GC. func gcenable() { - c := make(chan int, 1) + // Kick off sweeping and scavenging. + c := make(chan int, 2) expectSystemGoroutine() go bgsweep(c) + expectSystemGoroutine() + go bgscavenge(c) + <-c <-c memstats.enablegc = true // now that runtime is initialized, GC is okay } //go:linkname setGCPercent runtime..z2fdebug.setGCPercent func setGCPercent(in int32) (out int32) { - lock(&mheap_.lock) - out = gcpercent - if in < 0 { - in = -1 - } - gcpercent = in - heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 - // Update pacing in response to gcpercent change. - gcSetTriggerRatio(memstats.triggerRatio) - unlock(&mheap_.lock) + // Run on the system stack since we grab the heap lock. + systemstack(func() { + lock(&mheap_.lock) + out = gcpercent + if in < 0 { + in = -1 + } + gcpercent = in + heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 + // Update pacing in response to gcpercent change. + gcSetTriggerRatio(memstats.triggerRatio) + unlock(&mheap_.lock) + }) // If we just disabled GC, wait for any concurrent GC mark to // finish so we always return with no GC running. @@ -405,23 +413,6 @@ func (c *gcControllerState) startCycle() { c.fractionalMarkTime = 0 c.idleMarkTime = 0 - // If this is the first GC cycle or we're operating on a very - // small heap, fake heap_marked so it looks like gc_trigger is - // the appropriate growth from heap_marked, even though the - // real heap_marked may not have a meaningful value (on the - // first cycle) or may be much smaller (resulting in a large - // error response). - if memstats.gc_trigger <= heapminimum { - memstats.heap_marked = uint64(float64(memstats.gc_trigger) / (1 + memstats.triggerRatio)) - } - - // Re-compute the heap goal for this cycle in case something - // changed. This is the same calculation we use elsewhere. - memstats.next_gc = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 - if gcpercent < 0 { - memstats.next_gc = ^uint64(0) - } - // Ensure that the heap goal is at least a little larger than // the current live heap size. This may not be the case if GC // start is delayed or if the allocation that pushed heap_live @@ -586,7 +577,7 @@ func (c *gcControllerState) endCycle() float64 { // growth if we had the desired CPU utilization). The // difference between this estimate and the GOGC-based goal // heap growth is the error. - goalGrowthRatio := float64(gcpercent) / 100 + goalGrowthRatio := gcEffectiveGrowthRatio() actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 assistDuration := nanotime() - c.markStartTime @@ -766,6 +757,14 @@ func pollFractionalWorkerExit() bool { // // mheap_.lock must be held or the world must be stopped. func gcSetTriggerRatio(triggerRatio float64) { + // Compute the next GC goal, which is when the allocated heap + // has grown by GOGC/100 over the heap marked by the last + // cycle. + goal := ^uint64(0) + if gcpercent >= 0 { + goal = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 + } + // Set the trigger ratio, capped to reasonable bounds. if triggerRatio < 0 { // This can happen if the mutator is allocating very @@ -796,7 +795,7 @@ func gcSetTriggerRatio(triggerRatio float64) { // that concurrent sweep has some heap growth // in which to perform sweeping before we // start the next GC cycle. - sweepMin := atomic.Load64(&memstats.heap_live) + sweepMinHeapDistance*uint64(gcpercent)/100 + sweepMin := atomic.Load64(&memstats.heap_live) + sweepMinHeapDistance if sweepMin > minTrigger { minTrigger = sweepMin } @@ -808,22 +807,16 @@ func gcSetTriggerRatio(triggerRatio float64) { print("runtime: next_gc=", memstats.next_gc, " heap_marked=", memstats.heap_marked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "triggerRatio=", triggerRatio, " minTrigger=", minTrigger, "\n") throw("gc_trigger underflow") } - } - memstats.gc_trigger = trigger - - // Compute the next GC goal, which is when the allocated heap - // has grown by GOGC/100 over the heap marked by the last - // cycle. - goal := ^uint64(0) - if gcpercent >= 0 { - goal = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 - if goal < trigger { + if trigger > goal { // The trigger ratio is always less than GOGC/100, but // other bounds on the trigger may have raised it. // Push up the goal, too. goal = trigger } } + + // Commit to the trigger and goal. + memstats.gc_trigger = trigger memstats.next_gc = goal if trace.enabled { traceNextGC() @@ -866,6 +859,26 @@ func gcSetTriggerRatio(triggerRatio float64) { atomic.Store64(&mheap_.pagesSweptBasis, pagesSwept) } } + + gcPaceScavenger() +} + +// gcEffectiveGrowthRatio returns the current effective heap growth +// ratio (GOGC/100) based on heap_marked from the previous GC and +// next_gc for the current GC. +// +// This may differ from gcpercent/100 because of various upper and +// lower bounds on gcpercent. For example, if the heap is smaller than +// heapminimum, this can be higher than gcpercent/100. +// +// mheap_.lock must be held or the world must be stopped. +func gcEffectiveGrowthRatio() float64 { + egogc := float64(memstats.next_gc-memstats.heap_marked) / float64(memstats.heap_marked) + if egogc < 0 { + // Shouldn't happen, but just in case. + egogc = 0 + } + return egogc } // gcGoalUtilization is the goal CPU utilization for @@ -1137,15 +1150,10 @@ type gcTrigger struct { type gcTriggerKind int const ( - // gcTriggerAlways indicates that a cycle should be started - // unconditionally, even if GOGC is off or we're in a cycle - // right now. This cannot be consolidated with other cycles. - gcTriggerAlways gcTriggerKind = iota - // gcTriggerHeap indicates that a cycle should be started when // the heap size reaches the trigger heap size computed by the // controller. - gcTriggerHeap + gcTriggerHeap gcTriggerKind = iota // gcTriggerTime indicates that a cycle should be started when // it's been more than forcegcperiod nanoseconds since the @@ -1162,13 +1170,7 @@ const ( // that the exit condition for the _GCoff phase has been met. The exit // condition should be tested when allocating. func (t gcTrigger) test() bool { - if !memstats.enablegc || panicking != 0 { - return false - } - if t.kind == gcTriggerAlways { - return true - } - if gcphase != _GCoff { + if !memstats.enablegc || panicking != 0 || gcphase != _GCoff { return false } switch t.kind { @@ -1234,7 +1236,7 @@ func gcStart(trigger gcTrigger) { } // For stats, check if this GC was forced by the user. - work.userForced = trigger.kind == gcTriggerAlways || trigger.kind == gcTriggerCycle + work.userForced = trigger.kind == gcTriggerCycle // In gcstoptheworld debug mode, upgrade the mode accordingly. // We do this after re-checking the transition condition so @@ -1264,7 +1266,7 @@ func gcStart(trigger gcTrigger) { gcBgMarkStartWorkers() - gcResetMarkState() + systemstack(gcResetMarkState) work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs if work.stwprocs > ncpu { @@ -1994,7 +1996,6 @@ func gcMarkWorkAvailable(p *p) bool { // gcMark runs the mark (or, for concurrent GC, mark termination) // All gcWork caches must be empty. // STW is in effect at this point. -//TODO go:nowritebarrier func gcMark(start_time int64) { if debug.allocfreetrace > 0 { tracegc() @@ -2082,6 +2083,9 @@ func gcMark(start_time int64) { } } +// gcSweep must be called on the system stack because it acquires the heap +// lock. See mheap for details. +//go:systemstack func gcSweep(mode gcMode) { if gcphase != _GCoff { throw("gcSweep being done but phase is not GCoff") @@ -2138,6 +2142,11 @@ func gcSweep(mode gcMode) { // // This is safe to do without the world stopped because any Gs created // during or after this will start out in the reset state. +// +// gcResetMarkState must be called on the system stack because it acquires +// the heap lock. See mheap for details. +// +//go:systemstack func gcResetMarkState() { // This may be called during a concurrent phase, so make sure // allgs doesn't change. diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go index 7b01a117808..414db100192 100644 --- a/libgo/go/runtime/mgclarge.go +++ b/libgo/go/runtime/mgclarge.go @@ -6,24 +6,31 @@ // // See malloc.go for the general overview. // -// Large spans are the subject of this file. Spans consisting of less than -// _MaxMHeapLists are held in lists of like sized spans. Larger spans -// are held in a treap. See https://en.wikipedia.org/wiki/Treap or +// Allocation policy is the subject of this file. All free spans live in +// a treap for most of their time being free. See +// https://en.wikipedia.org/wiki/Treap or // https://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview. // sema.go also holds an implementation of a treap. // -// Each treapNode holds a single span. The treap is sorted by page size -// and for spans of the same size a secondary sort based on start address -// is done. -// Spans are returned based on a best fit algorithm and for spans of the same -// size the one at the lowest address is selected. +// Each treapNode holds a single span. The treap is sorted by base address +// and each span necessarily has a unique base address. +// Spans are returned based on a first-fit algorithm, acquiring the span +// with the lowest base address which still satisfies the request. +// +// The first-fit algorithm is possible due to an augmentation of each +// treapNode to maintain the size of the largest span in the subtree rooted +// at that treapNode. Below we refer to this invariant as the maxPages +// invariant. // // The primary routines are // insert: adds a span to the treap // remove: removes the span from that treap that best fits the required size // removeSpan: which removes a specific span from the treap // -// _mheap.lock must be held when manipulating this data structure. +// Whenever a pointer to a span which is owned by the treap is acquired, that +// span must not be mutated. To mutate a span in the treap, remove it first. +// +// mheap_.lock must be held when manipulating this data structure. package runtime @@ -33,70 +40,153 @@ import ( //go:notinheap type mTreap struct { - treap *treapNode + treap *treapNode + unscavHugePages uintptr // number of unscavenged huge pages in the treap } //go:notinheap type treapNode struct { - right *treapNode // all treapNodes > this treap node - left *treapNode // all treapNodes < this treap node - parent *treapNode // direct parent of this node, nil if root - npagesKey uintptr // number of pages in spanKey, used as primary sort key - spanKey *mspan // span of size npagesKey, used as secondary sort key - priority uint32 // random number used by treap algorithm to keep tree probabilistically balanced + right *treapNode // all treapNodes > this treap node + left *treapNode // all treapNodes < this treap node + parent *treapNode // direct parent of this node, nil if root + key uintptr // base address of the span, used as primary sort key + span *mspan // span at base address key + maxPages uintptr // the maximum size of any span in this subtree, including the root + priority uint32 // random number used by treap algorithm to keep tree probabilistically balanced + types treapIterFilter // the types of spans available in this subtree } -func (t *treapNode) pred() *treapNode { +// updateInvariants is a helper method which has a node recompute its own +// maxPages and types values by looking at its own span as well as the +// values of its direct children. +// +// Returns true if anything changed. +func (t *treapNode) updateInvariants() bool { + m, i := t.maxPages, t.types + t.maxPages = t.span.npages + t.types = t.span.treapFilter() if t.left != nil { - // If it has a left child, its predecessor will be - // its right most left (grand)child. - t = t.left - for t.right != nil { - t = t.right + t.types |= t.left.types + if t.maxPages < t.left.maxPages { + t.maxPages = t.left.maxPages } - return t } - // If it has no left child, its predecessor will be - // the first grandparent who's right child is its - // ancestor. - // - // We compute this by walking up the treap until the - // current node's parent is its parent's right child. - // - // If we find at any point walking up the treap - // that the current node doesn't have a parent, - // we've hit the root. This means that t is already - // the left-most node in the treap and therefore - // has no predecessor. - for t.parent != nil && t.parent.right != t { - if t.parent.left != t { - println("runtime: predecessor t=", t, "t.spanKey=", t.spanKey) - throw("node is not its parent's child") + if t.right != nil { + t.types |= t.right.types + if t.maxPages < t.right.maxPages { + t.maxPages = t.right.maxPages } - t = t.parent } - return t.parent + return m != t.maxPages || i != t.types } -func (t *treapNode) succ() *treapNode { - if t.right != nil { - // If it has a right child, its successor will be - // its left-most right (grand)child. - t = t.right - for t.left != nil { +// findMinimal finds the minimal (lowest base addressed) node in the treap +// which matches the criteria set out by the filter f and returns nil if +// none exists. +// +// This algorithm is functionally the same as (*mTreap).find, so see that +// method for more details. +func (t *treapNode) findMinimal(f treapIterFilter) *treapNode { + if t == nil || !f.matches(t.types) { + return nil + } + for t != nil { + if t.left != nil && f.matches(t.left.types) { t = t.left + } else if f.matches(t.span.treapFilter()) { + break + } else if t.right != nil && f.matches(t.right.types) { + t = t.right + } else { + println("runtime: f=", f) + throw("failed to find minimal node matching filter") } - return t } - // See pred. - for t.parent != nil && t.parent.left != t { - if t.parent.right != t { - println("runtime: predecessor t=", t, "t.spanKey=", t.spanKey) - throw("node is not its parent's child") + return t +} + +// findMaximal finds the maximal (highest base addressed) node in the treap +// which matches the criteria set out by the filter f and returns nil if +// none exists. +// +// This algorithm is the logical inversion of findMinimal and just changes +// the order of the left and right tests. +func (t *treapNode) findMaximal(f treapIterFilter) *treapNode { + if t == nil || !f.matches(t.types) { + return nil + } + for t != nil { + if t.right != nil && f.matches(t.right.types) { + t = t.right + } else if f.matches(t.span.treapFilter()) { + break + } else if t.left != nil && f.matches(t.left.types) { + t = t.left + } else { + println("runtime: f=", f) + throw("failed to find minimal node matching filter") } + } + return t +} + +// pred returns the predecessor of t in the treap subject to the criteria +// specified by the filter f. Returns nil if no such predecessor exists. +func (t *treapNode) pred(f treapIterFilter) *treapNode { + if t.left != nil && f.matches(t.left.types) { + // The node has a left subtree which contains at least one matching + // node, find the maximal matching node in that subtree. + return t.left.findMaximal(f) + } + // Lacking a left subtree, look to the parents. + p := t // previous node + t = t.parent + for t != nil { + // Walk up the tree until we find a node that has a left subtree + // that we haven't already visited. + if t.right == p { + if f.matches(t.span.treapFilter()) { + // If this node matches, then it's guaranteed to be the + // predecessor since everything to its left is strictly + // greater. + return t + } else if t.left != nil && f.matches(t.left.types) { + // Failing the root of this subtree, if its left subtree has + // something, that's where we'll find our predecessor. + return t.left.findMaximal(f) + } + } + p = t + t = t.parent + } + // If the parent is nil, then we've hit the root without finding + // a suitable left subtree containing the node (and the predecessor + // wasn't on the path). Thus, there's no predecessor, so just return + // nil. + return nil +} + +// succ returns the successor of t in the treap subject to the criteria +// specified by the filter f. Returns nil if no such successor exists. +func (t *treapNode) succ(f treapIterFilter) *treapNode { + // See pred. This method is just the logical inversion of it. + if t.right != nil && f.matches(t.right.types) { + return t.right.findMinimal(f) + } + p := t + t = t.parent + for t != nil { + if t.left == p { + if f.matches(t.span.treapFilter()) { + return t + } else if t.right != nil && f.matches(t.right.types) { + return t.right.findMinimal(f) + } + } + p = t t = t.parent } - return t.parent + return nil } // isSpanInTreap is handy for debugging. One should hold the heap lock, usually @@ -105,10 +195,10 @@ func (t *treapNode) isSpanInTreap(s *mspan) bool { if t == nil { return false } - return t.spanKey == s || t.left.isSpanInTreap(s) || t.right.isSpanInTreap(s) + return t.span == s || t.left.isSpanInTreap(s) || t.right.isSpanInTreap(s) } -// walkTreap is handy for debugging. +// walkTreap is handy for debugging and testing. // Starting at some treapnode t, for example the root, do a depth first preorder walk of // the tree executing fn at each treap node. One should hold the heap lock, usually // mheap_.lock(). @@ -124,33 +214,116 @@ func (t *treapNode) walkTreap(fn func(tn *treapNode)) { // checkTreapNode when used in conjunction with walkTreap can usually detect a // poorly formed treap. func checkTreapNode(t *treapNode) { - // lessThan is used to order the treap. - // npagesKey and npages are the primary keys. - // spanKey and span are the secondary keys. - // span == nil (0) will always be lessThan all - // spans of the same size. - lessThan := func(npages uintptr, s *mspan) bool { - if t.npagesKey != npages { - return t.npagesKey < npages - } - // t.npagesKey == npages - return uintptr(unsafe.Pointer(t.spanKey)) < uintptr(unsafe.Pointer(s)) + if t == nil { + return + } + if t.span.next != nil || t.span.prev != nil || t.span.list != nil { + throw("span may be on an mSpanList while simultaneously in the treap") } + if t.span.base() != t.key { + println("runtime: checkTreapNode treapNode t=", t, " t.key=", t.key, + "t.span.base()=", t.span.base()) + throw("why does span.base() and treap.key do not match?") + } + if t.left != nil && t.key < t.left.key { + throw("found out-of-order spans in treap (left child has greater base address)") + } + if t.right != nil && t.key > t.right.key { + throw("found out-of-order spans in treap (right child has lesser base address)") + } +} +// validateInvariants is handy for debugging and testing. +// It ensures that the various invariants on each treap node are +// appropriately maintained throughout the treap by walking the +// treap in a post-order manner. +func (t *treapNode) validateInvariants() (uintptr, treapIterFilter) { if t == nil { - return + return 0, 0 + } + leftMax, leftTypes := t.left.validateInvariants() + rightMax, rightTypes := t.right.validateInvariants() + max := t.span.npages + if leftMax > max { + max = leftMax } - if t.spanKey.npages != t.npagesKey || t.spanKey.next != nil { - println("runtime: checkTreapNode treapNode t=", t, " t.npagesKey=", t.npagesKey, - "t.spanKey.npages=", t.spanKey.npages) - throw("why does span.npages and treap.ngagesKey do not match?") + if rightMax > max { + max = rightMax } - if t.left != nil && lessThan(t.left.npagesKey, t.left.spanKey) { - throw("t.lessThan(t.left.npagesKey, t.left.spanKey) is not false") + if max != t.maxPages { + println("runtime: t.maxPages=", t.maxPages, "want=", max) + throw("maxPages invariant violated in treap") } - if t.right != nil && !lessThan(t.right.npagesKey, t.right.spanKey) { - throw("!t.lessThan(t.left.npagesKey, t.left.spanKey) is not false") + typ := t.span.treapFilter() | leftTypes | rightTypes + if typ != t.types { + println("runtime: t.types=", t.types, "want=", typ) + throw("types invariant violated in treap") } + return max, typ +} + +// treapIterType represents the type of iteration to perform +// over the treap. Each different flag is represented by a bit +// in the type, and types may be combined together by a bitwise +// or operation. +// +// Note that only 5 bits are available for treapIterType, do not +// use the 3 higher-order bits. This constraint is to allow for +// expansion into a treapIterFilter, which is a uint32. +type treapIterType uint8 + +const ( + treapIterScav treapIterType = 1 << iota // scavenged spans + treapIterHuge // spans containing at least one huge page + treapIterBits = iota +) + +// treapIterFilter is a bitwise filter of different spans by binary +// properties. Each bit of a treapIterFilter represents a unique +// combination of bits set in a treapIterType, in other words, it +// represents the power set of a treapIterType. +// +// The purpose of this representation is to allow the existence of +// a specific span type to bubble up in the treap (see the types +// field on treapNode). +// +// More specifically, any treapIterType may be transformed into a +// treapIterFilter for a specific combination of flags via the +// following operation: 1 << (0x1f&treapIterType). +type treapIterFilter uint32 + +// treapFilterAll represents the filter which allows all spans. +const treapFilterAll = ^treapIterFilter(0) + +// treapFilter creates a new treapIterFilter from two treapIterTypes. +// mask represents a bitmask for which flags we should check against +// and match for the expected result after applying the mask. +func treapFilter(mask, match treapIterType) treapIterFilter { + allow := treapIterFilter(0) + for i := treapIterType(0); i < 1<<treapIterBits; i++ { + if mask&i == match { + allow |= 1 << i + } + } + return allow +} + +// matches returns true if m and f intersect. +func (f treapIterFilter) matches(m treapIterFilter) bool { + return f&m != 0 +} + +// treapFilter returns the treapIterFilter exactly matching this span, +// i.e. popcount(result) == 1. +func (s *mspan) treapFilter() treapIterFilter { + have := treapIterType(0) + if s.scavenged { + have |= treapIterScav + } + if s.hugePages() > 0 { + have |= treapIterHuge + } + return treapIterFilter(uint32(1) << (0x1f & have)) } // treapIter is a bidirectional iterator type which may be used to iterate over a @@ -160,13 +333,14 @@ func checkTreapNode(t *treapNode) { // // To create iterators over the treap, call start or end on an mTreap. type treapIter struct { + f treapIterFilter t *treapNode } // span returns the span at the current position in the treap. // If the treap is not valid, span will panic. func (i *treapIter) span() *mspan { - return i.t.spanKey + return i.t.span } // valid returns whether the iterator represents a valid position @@ -178,58 +352,78 @@ func (i *treapIter) valid() bool { // next moves the iterator forward by one. Once the iterator // ceases to be valid, calling next will panic. func (i treapIter) next() treapIter { - i.t = i.t.succ() + i.t = i.t.succ(i.f) return i } // prev moves the iterator backwards by one. Once the iterator // ceases to be valid, calling prev will panic. func (i treapIter) prev() treapIter { - i.t = i.t.pred() + i.t = i.t.pred(i.f) return i } // start returns an iterator which points to the start of the treap (the -// left-most node in the treap). -func (root *mTreap) start() treapIter { - t := root.treap - if t == nil { - return treapIter{} - } - for t.left != nil { - t = t.left - } - return treapIter{t: t} +// left-most node in the treap) subject to mask and match constraints. +func (root *mTreap) start(mask, match treapIterType) treapIter { + f := treapFilter(mask, match) + return treapIter{f, root.treap.findMinimal(f)} } // end returns an iterator which points to the end of the treap (the -// right-most node in the treap). -func (root *mTreap) end() treapIter { - t := root.treap - if t == nil { - return treapIter{} +// right-most node in the treap) subject to mask and match constraints. +func (root *mTreap) end(mask, match treapIterType) treapIter { + f := treapFilter(mask, match) + return treapIter{f, root.treap.findMaximal(f)} +} + +// mutate allows one to mutate the span without removing it from the treap via a +// callback. The span's base and size are allowed to change as long as the span +// remains in the same order relative to its predecessor and successor. +// +// Note however that any operation that causes a treap rebalancing inside of fn +// is strictly forbidden, as that may cause treap node metadata to go +// out-of-sync. +func (root *mTreap) mutate(i treapIter, fn func(span *mspan)) { + s := i.span() + // Save some state about the span for later inspection. + hpages := s.hugePages() + scavenged := s.scavenged + // Call the mutator. + fn(s) + // Update unscavHugePages appropriately. + if !scavenged { + mheap_.free.unscavHugePages -= hpages + } + if !s.scavenged { + mheap_.free.unscavHugePages += s.hugePages() } - for t.right != nil { - t = t.right + // Update the key in case the base changed. + i.t.key = s.base() + // Updating invariants up the tree needs to happen if + // anything changed at all, so just go ahead and do it + // unconditionally. + // + // If it turns out nothing changed, it'll exit quickly. + t := i.t + for t != nil && t.updateInvariants() { + t = t.parent } - return treapIter{t: t} } // insert adds span to the large span treap. func (root *mTreap) insert(span *mspan) { - npages := span.npages + if !span.scavenged { + root.unscavHugePages += span.hugePages() + } + base := span.base() var last *treapNode pt := &root.treap for t := *pt; t != nil; t = *pt { last = t - if t.npagesKey < npages { + if t.key < base { pt = &t.right - } else if t.npagesKey > npages { - pt = &t.left - } else if t.spanKey.base() < span.base() { - // t.npagesKey == npages, so sort on span addresses. - pt = &t.right - } else if t.spanKey.base() > span.base() { + } else if t.key > base { pt = &t.left } else { throw("inserting span already in treap") @@ -238,25 +432,34 @@ func (root *mTreap) insert(span *mspan) { // Add t as new leaf in tree of span size and unique addrs. // The balanced tree is a treap using priority as the random heap priority. - // That is, it is a binary tree ordered according to the npagesKey, + // That is, it is a binary tree ordered according to the key, // but then among the space of possible binary trees respecting those - // npagesKeys, it is kept balanced on average by maintaining a heap ordering + // keys, it is kept balanced on average by maintaining a heap ordering // on the priority: s.priority <= both s.right.priority and s.right.priority. // https://en.wikipedia.org/wiki/Treap // https://faculty.washington.edu/aragon/pubs/rst89.pdf t := (*treapNode)(mheap_.treapalloc.alloc()) - t.npagesKey = span.npages + t.key = span.base() t.priority = fastrand() - t.spanKey = span + t.span = span + t.maxPages = span.npages + t.types = span.treapFilter() t.parent = last *pt = t // t now at a leaf. + + // Update the tree to maintain the various invariants. + i := t + for i.parent != nil && i.parent.updateInvariants() { + i = i.parent + } + // Rotate up into tree according to priority. for t.parent != nil && t.parent.priority > t.priority { - if t != nil && t.spanKey.npages != t.npagesKey { - println("runtime: insert t=", t, "t.npagesKey=", t.npagesKey) - println("runtime: t.spanKey=", t.spanKey, "t.spanKey.npages=", t.spanKey.npages) - throw("span and treap sizes do not match?") + if t != nil && t.span.base() != t.key { + println("runtime: insert t=", t, "t.key=", t.key) + println("runtime: t.span=", t.span, "t.span.base()=", t.span.base()) + throw("span and treap node base addresses do not match") } if t.parent.left == t { root.rotateRight(t.parent) @@ -270,8 +473,11 @@ func (root *mTreap) insert(span *mspan) { } func (root *mTreap) removeNode(t *treapNode) { - if t.spanKey.npages != t.npagesKey { - throw("span and treap node npages do not match") + if !t.span.scavenged { + root.unscavHugePages -= t.span.hugePages() + } + if t.span.base() != t.key { + throw("span and treap node base addresses do not match") } // Rotate t down to be leaf of tree for removal, respecting priorities. for t.right != nil || t.left != nil { @@ -283,10 +489,15 @@ func (root *mTreap) removeNode(t *treapNode) { } // Remove t, now a leaf. if t.parent != nil { - if t.parent.left == t { - t.parent.left = nil + p := t.parent + if p.left == t { + p.left = nil } else { - t.parent.right = nil + p.right = nil + } + // Walk up the tree updating invariants until no updates occur. + for p != nil && p.updateInvariants() { + p = p.parent } } else { root.treap = nil @@ -295,44 +506,64 @@ func (root *mTreap) removeNode(t *treapNode) { mheap_.treapalloc.free(unsafe.Pointer(t)) } -// find searches for, finds, and returns the treap node containing the -// smallest span that can hold npages. If no span has at least npages -// it returns nil. -// This is slightly more complicated than a simple binary tree search -// since if an exact match is not found the next larger node is -// returned. -func (root *mTreap) find(npages uintptr) *treapNode { +// find searches for, finds, and returns the treap iterator over all spans +// representing the position of the span with the smallest base address which is +// at least npages in size. If no span has at least npages it returns an invalid +// iterator. +// +// This algorithm is as follows: +// * If there's a left child and its subtree can satisfy this allocation, +// continue down that subtree. +// * If there's no such left child, check if the root of this subtree can +// satisfy the allocation. If so, we're done. +// * If the root cannot satisfy the allocation either, continue down the +// right subtree if able. +// * Else, break and report that we cannot satisfy the allocation. +// +// The preference for left, then current, then right, results in us getting +// the left-most node which will contain the span with the lowest base +// address. +// +// Note that if a request cannot be satisfied the fourth case will be +// reached immediately at the root, since neither the left subtree nor +// the right subtree will have a sufficient maxPages, whilst the root +// node is also unable to satisfy it. +func (root *mTreap) find(npages uintptr) treapIter { t := root.treap for t != nil { - if t.spanKey == nil { - throw("treap node with nil spanKey found") + if t.span == nil { + throw("treap node with nil span found") } - if t.npagesKey < npages { - t = t.right - } else if t.left != nil && t.left.npagesKey >= npages { + // Iterate over the treap trying to go as far left + // as possible while simultaneously ensuring that the + // subtrees we choose always have a span which can + // satisfy the allocation. + if t.left != nil && t.left.maxPages >= npages { t = t.left + } else if t.span.npages >= npages { + // Before going right, if this span can satisfy the + // request, stop here. + break + } else if t.right != nil && t.right.maxPages >= npages { + t = t.right } else { - return t + t = nil } } - return nil + return treapIter{treapFilterAll, t} } // removeSpan searches for, finds, deletes span along with // the associated treap node. If the span is not in the treap -// then t will eventually be set to nil and the t.spanKey +// then t will eventually be set to nil and the t.span // will throw. func (root *mTreap) removeSpan(span *mspan) { - npages := span.npages + base := span.base() t := root.treap - for t.spanKey != span { - if t.npagesKey < npages { - t = t.right - } else if t.npagesKey > npages { - t = t.left - } else if t.spanKey.base() < span.base() { + for t.span != span { + if t.key < base { t = t.right - } else if t.spanKey.base() > span.base() { + } else if t.key > base { t = t.left } } @@ -381,6 +612,9 @@ func (root *mTreap) rotateLeft(x *treapNode) { } p.right = y } + + x.updateInvariants() + y.updateInvariants() } // rotateRight rotates the tree rooted at node y. @@ -417,4 +651,7 @@ func (root *mTreap) rotateRight(y *treapNode) { } p.right = x } + + y.updateInvariants() + x.updateInvariants() } diff --git a/libgo/go/runtime/mgcscavenge.go b/libgo/go/runtime/mgcscavenge.go new file mode 100644 index 00000000000..910c1231278 --- /dev/null +++ b/libgo/go/runtime/mgcscavenge.go @@ -0,0 +1,367 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Scavenging free pages. +// +// This file implements scavenging (the release of physical pages backing mapped +// memory) of free and unused pages in the heap as a way to deal with page-level +// fragmentation and reduce the RSS of Go applications. +// +// Scavenging in Go happens on two fronts: there's the background +// (asynchronous) scavenger and the heap-growth (synchronous) scavenger. +// +// The former happens on a goroutine much like the background sweeper which is +// soft-capped at using scavengePercent of the mutator's time, based on +// order-of-magnitude estimates of the costs of scavenging. The background +// scavenger's primary goal is to bring the estimated heap RSS of the +// application down to a goal. +// +// That goal is defined as (retainExtraPercent+100) / 100 * next_gc. +// +// The goal is updated after each GC and the scavenger's pacing parameters +// (which live in mheap_) are updated to match. The pacing parameters work much +// like the background sweeping parameters. The parameters define a line whose +// horizontal axis is time and vertical axis is estimated heap RSS, and the +// scavenger attempts to stay below that line at all times. +// +// The synchronous heap-growth scavenging happens whenever the heap grows in +// size, for some definition of heap-growth. The intuition behind this is that +// the application had to grow the heap because existing fragments were +// not sufficiently large to satisfy a page-level memory allocation, so we +// scavenge those fragments eagerly to offset the growth in RSS that results. + +package runtime + +const ( + // The background scavenger is paced according to these parameters. + // + // scavengePercent represents the portion of mutator time we're willing + // to spend on scavenging in percent. + // + // scavengePageLatency is a worst-case estimate (order-of-magnitude) of + // the time it takes to scavenge one (regular-sized) page of memory. + // scavengeHugePageLatency is the same but for huge pages. + // + // scavengePagePeriod is derived from scavengePercent and scavengePageLatency, + // and represents the average time between scavenging one page that we're + // aiming for. scavengeHugePagePeriod is the same but for huge pages. + // These constants are core to the scavenge pacing algorithm. + scavengePercent = 1 // 1% + scavengePageLatency = 10e3 // 10µs + scavengeHugePageLatency = 10e3 // 10µs + scavengePagePeriod = scavengePageLatency / (scavengePercent / 100.0) + scavengeHugePagePeriod = scavengePageLatency / (scavengePercent / 100.0) + + // retainExtraPercent represents the amount of memory over the heap goal + // that the scavenger should keep as a buffer space for the allocator. + // + // The purpose of maintaining this overhead is to have a greater pool of + // unscavenged memory available for allocation (since using scavenged memory + // incurs an additional cost), to account for heap fragmentation and + // the ever-changing layout of the heap. + retainExtraPercent = 10 +) + +// heapRetained returns an estimate of the current heap RSS. +// +// mheap_.lock must be held or the world must be stopped. +func heapRetained() uint64 { + return memstats.heap_sys - memstats.heap_released +} + +// gcPaceScavenger updates the scavenger's pacing, particularly +// its rate and RSS goal. +// +// The RSS goal is based on the current heap goal with a small overhead +// to accomodate non-determinism in the allocator. +// +// The pacing is based on scavengePageRate, which applies to both regular and +// huge pages. See that constant for more information. +// +// mheap_.lock must be held or the world must be stopped. +func gcPaceScavenger() { + // Compute our scavenging goal and align it to a physical page boundary + // to make the following calculations more exact. + retainedGoal := memstats.next_gc + // Add retainExtraPercent overhead to retainedGoal. This calculation + // looks strange but the purpose is to arrive at an integer division + // (e.g. if retainExtraPercent = 12.5, then we get a divisor of 8) + // that also avoids the overflow from a multiplication. + retainedGoal += retainedGoal / (1.0 / (retainExtraPercent / 100.0)) + retainedGoal = (retainedGoal + uint64(physPageSize) - 1) &^ (uint64(physPageSize) - 1) + + // Represents where we are now in the heap's contribution to RSS in bytes. + // + // Guaranteed to always be a multiple of physPageSize on systems where + // physPageSize <= pageSize since we map heap_sys at a rate larger than + // any physPageSize and released memory in multiples of the physPageSize. + // + // However, certain functions recategorize heap_sys as other stats (e.g. + // stack_sys) and this happens in multiples of pageSize, so on systems + // where physPageSize > pageSize the calculations below will not be exact. + // Generally this is OK since we'll be off by at most one regular + // physical page. + retainedNow := heapRetained() + + // If we're already below our goal, publish the goal in case it changed + // then disable the background scavenger. + if retainedNow <= retainedGoal { + mheap_.scavengeRetainedGoal = retainedGoal + mheap_.scavengeBytesPerNS = 0 + return + } + + // Now we start to compute the total amount of work necessary and the total + // amount of time we're willing to give the scavenger to complete this work. + // This will involve calculating how much of the work consists of huge pages + // and how much consists of regular pages since the former can let us scavenge + // more memory in the same time. + totalWork := retainedNow - retainedGoal + + // On systems without huge page support, all work is regular work. + regularWork := totalWork + hugeTime := uint64(0) + + // On systems where we have huge pages, we want to do as much of the + // scavenging work as possible on huge pages, because the costs are the + // same per page, but we can give back more more memory in a shorter + // period of time. + if physHugePageSize != 0 { + // Start by computing the amount of free memory we have in huge pages + // in total. Trivially, this is all the huge page work we need to do. + hugeWork := uint64(mheap_.free.unscavHugePages * physHugePageSize) + + // ...but it could turn out that there's more huge work to do than + // total work, so cap it at total work. This might happen for very large + // heaps where the additional factor of retainExtraPercent can make it so + // that there are free chunks of memory larger than a huge page that we don't want + // to scavenge. + if hugeWork >= totalWork { + hugePages := totalWork / uint64(physHugePageSize) + hugeWork = hugePages * uint64(physHugePageSize) + } + // Everything that's not huge work is regular work. At this point we + // know huge work so we can calculate how much time that will take + // based on scavengePageRate (which applies to pages of any size). + regularWork = totalWork - hugeWork + hugeTime = hugeWork / uint64(physHugePageSize) * scavengeHugePagePeriod + } + // Finally, we can compute how much time it'll take to do the regular work + // and the total time to do all the work. + regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod + totalTime := hugeTime + regularTime + + now := nanotime() + + lock(&scavenge.lock) + + // Update all the pacing parameters in mheap with scavenge.lock held, + // so that scavenge.gen is kept in sync with the updated values. + mheap_.scavengeRetainedGoal = retainedGoal + mheap_.scavengeRetainedBasis = retainedNow + mheap_.scavengeTimeBasis = now + mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime) + scavenge.gen++ // increase scavenge generation + + // Wake up background scavenger if needed, since the pacing was just updated. + wakeScavengerLocked() + + unlock(&scavenge.lock) +} + +// State of the background scavenger. +var scavenge struct { + lock mutex + g *g + parked bool + timer *timer + gen uint32 // read with either lock or mheap_.lock, write with both +} + +// wakeScavengerLocked unparks the scavenger if necessary. It must be called +// after any pacing update. +// +// scavenge.lock must be held. +func wakeScavengerLocked() { + if scavenge.parked { + // Try to stop the timer but we don't really care if we succeed. + // It's possible that either a timer was never started, or that + // we're racing with it. + // In the case that we're racing with there's the low chance that + // we experience a spurious wake-up of the scavenger, but that's + // totally safe. + stopTimer(scavenge.timer) + + // Unpark the goroutine and tell it that there may have been a pacing + // change. + scavenge.parked = false + ready(scavenge.g, 0, true) + } +} + +// scavengeSleep attempts to put the scavenger to sleep for ns. +// It also checks to see if gen != scavenge.gen before going to sleep, +// and aborts if true (meaning an update had occurred). +// +// Note that this function should only be called by the scavenger. +// +// The scavenger may be woken up earlier by a pacing change, and it may not go +// to sleep at all if there's a pending pacing change. +// +// Returns false if awoken early (i.e. true means a complete sleep). +func scavengeSleep(gen uint32, ns int64) bool { + lock(&scavenge.lock) + + // If there was an update, just abort the sleep. + if scavenge.gen != gen { + unlock(&scavenge.lock) + return false + } + + // Set the timer. + now := nanotime() + scavenge.timer.when = now + ns + startTimer(scavenge.timer) + + // Park the goroutine. It's fine that we don't publish the + // fact that the timer was set; even if the timer wakes up + // and fire scavengeReady before we park, it'll block on + // scavenge.lock. + scavenge.parked = true + goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2) + + // Return true if we completed the full sleep. + return (nanotime() - now) >= ns +} + +// Background scavenger. +// +// The background scavenger maintains the RSS of the application below +// the line described by the proportional scavenging statistics in +// the mheap struct. +func bgscavenge(c chan int) { + setSystemGoroutine() + + scavenge.g = getg() + + lock(&scavenge.lock) + scavenge.parked = true + + scavenge.timer = new(timer) + scavenge.timer.f = func(_ interface{}, _ uintptr) { + lock(&scavenge.lock) + wakeScavengerLocked() + unlock(&scavenge.lock) + } + + c <- 1 + goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1) + + // Parameters for sleeping. + // + // If we end up doing more work than we need, we should avoid spinning + // until we have more work to do: instead, we know exactly how much time + // until more work will need to be done, so we sleep. + // + // We should avoid sleeping for less than minSleepNS because Gosched() + // overheads among other things will work out better in that case. + // + // There's no reason to set a maximum on sleep time because we'll always + // get woken up earlier if there's any kind of update that could change + // the scavenger's pacing. + // + // retryDelayNS tracks how much to sleep next time we fail to do any + // useful work. + const minSleepNS = int64(100 * 1000) // 100 µs + + retryDelayNS := minSleepNS + + for { + released := uintptr(0) + park := false + ttnext := int64(0) + gen := uint32(0) + + // Run on the system stack since we grab the heap lock, + // and a stack growth with the heap lock means a deadlock. + systemstack(func() { + lock(&mheap_.lock) + + gen = scavenge.gen + + // If background scavenging is disabled or if there's no work to do just park. + retained := heapRetained() + if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal { + unlock(&mheap_.lock) + park = true + return + } + + // Calculate how big we want the retained heap to be + // at this point in time. + // + // The formula is for that of a line, y = b - mx + // We want y (want), + // m = scavengeBytesPerNS (> 0) + // x = time between scavengeTimeBasis and now + // b = scavengeRetainedBasis + rate := mheap_.scavengeBytesPerNS + tdist := nanotime() - mheap_.scavengeTimeBasis + rdist := uint64(rate * float64(tdist)) + want := mheap_.scavengeRetainedBasis - rdist + + // If we're above the line, scavenge to get below the + // line. + if retained > want { + released = mheap_.scavengeLocked(uintptr(retained - want)) + } + unlock(&mheap_.lock) + + // If we over-scavenged a bit, calculate how much time it'll + // take at the current rate for us to make that up. We definitely + // won't have any work to do until at least that amount of time + // passes. + if released > uintptr(retained-want) { + extra := released - uintptr(retained-want) + ttnext = int64(float64(extra) / rate) + } + }) + + if park { + lock(&scavenge.lock) + scavenge.parked = true + goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1) + continue + } + + if debug.gctrace > 0 { + if released > 0 { + print("scvg: ", released>>20, " MB released\n") + } + print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n") + } + + if released == 0 { + // If we were unable to release anything this may be because there's + // no free memory available to scavenge. Go to sleep and try again. + if scavengeSleep(gen, retryDelayNS) { + // If we successfully slept through the delay, back off exponentially. + retryDelayNS *= 2 + } + continue + } + retryDelayNS = minSleepNS + + if ttnext > 0 && ttnext > minSleepNS { + // If there's an appreciable amount of time until the next scavenging + // goal, just sleep. We'll get woken up if anything changes and this + // way we avoid spinning. + scavengeSleep(gen, ttnext) + continue + } + + // Give something else a chance to run, no locks are held. + Gosched() + } +} diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go index 539a982c39d..c1c6e654c6e 100644 --- a/libgo/go/runtime/mgcsweep.go +++ b/libgo/go/runtime/mgcsweep.go @@ -205,7 +205,6 @@ func (s *mspan) ensureSwept() { // Returns true if the span was returned to heap. // If preserve=true, don't return it to heap nor relink in mcentral lists; // caller takes care of it. -//TODO go:nowritebarrier func (s *mspan) sweep(preserve bool) bool { // It's critical that we enter this function with preemption disabled, // GC must not start while we are in the middle of this function. diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go index 2332a2e40bf..f18bf9beea3 100644 --- a/libgo/go/runtime/mheap.go +++ b/libgo/go/runtime/mheap.go @@ -29,9 +29,10 @@ const minPhysPageSize = 4096 // //go:notinheap type mheap struct { + // lock must only be acquired on the system stack, otherwise a g + // could self-deadlock if its stack grows with the lock held. lock mutex - free mTreap // free and non-scavenged spans - scav mTreap // free and scavenged spans + free mTreap // free spans sweepgen uint32 // sweep generation, see comment in mspan sweepdone uint32 // all spans are swept sweepers uint32 // number of active sweepone calls @@ -88,6 +89,25 @@ type mheap struct { // TODO(austin): pagesInUse should be a uintptr, but the 386 // compiler can't 8-byte align fields. + // Scavenger pacing parameters + // + // The two basis parameters and the scavenge ratio parallel the proportional + // sweeping implementation, the primary differences being that: + // * Scavenging concerns itself with RSS, estimated as heapRetained() + // * Rather than pacing the scavenger to the GC, it is paced to a + // time-based rate computed in gcPaceScavenger. + // + // scavengeRetainedGoal represents our goal RSS. + // + // All fields must be accessed with lock. + // + // TODO(mknyszek): Consider abstracting the basis fields and the scavenge ratio + // into its own type so that this logic may be shared with proportional sweeping. + scavengeTimeBasis int64 + scavengeRetainedBasis uint64 + scavengeBytesPerNS float64 + scavengeRetainedGoal uint64 + // Page reclaimer state // reclaimIndex is the page index in allArenas of next page to @@ -107,14 +127,6 @@ type mheap struct { // This is accessed atomically. reclaimCredit uintptr - // scavengeCredit is spare credit for extra bytes scavenged. - // Since the scavenging mechanisms operate on spans, it may - // scavenge more than requested. Any spare pages released - // go to this credit pool. - // - // This is protected by the mheap lock. - scavengeCredit uintptr - // Malloc stats. largealloc uint64 // bytes allocated for large objects nlargealloc uint64 // number of large object allocations @@ -173,7 +185,7 @@ type mheap struct { // simply blocking GC (by disabling preemption). sweepArenas []arenaIdx - // _ uint32 // ensure 64-bit alignment of central + _ uint32 // ensure 64-bit alignment of central // central free lists for small size classes. // the padding makes sure that the mcentrals are @@ -395,7 +407,6 @@ type mspan struct { divShift2 uint8 // for divide by elemsize - divMagic.shift2 scavenged bool // whether this span has had its pages released to the OS elemsize uintptr // computed from sizeclass or from npages - unusedsince int64 // first time spotted by gc in mspanfree state limit uintptr // end of data in span speciallock mutex // guards specials list specials *special // linked list of special records sorted by offset. @@ -428,35 +439,43 @@ func (s *mspan) physPageBounds() (uintptr, uintptr) { } func (h *mheap) coalesce(s *mspan) { - // We scavenge s at the end after coalescing if s or anything - // it merged with is marked scavenged. - needsScavenge := false - prescavenged := s.released() // number of bytes already scavenged. - // merge is a helper which merges other into s, deletes references to other // in heap metadata, and then discards it. other must be adjacent to s. - merge := func(other *mspan) { + merge := func(a, b, other *mspan) { + // Caller must ensure a.startAddr < b.startAddr and that either a or + // b is s. a and b must be adjacent. other is whichever of the two is + // not s. + + if pageSize < physPageSize && a.scavenged && b.scavenged { + // If we're merging two scavenged spans on systems where + // pageSize < physPageSize, then their boundary should always be on + // a physical page boundary, due to the realignment that happens + // during coalescing. Throw if this case is no longer true, which + // means the implementation should probably be changed to scavenge + // along the boundary. + _, start := a.physPageBounds() + end, _ := b.physPageBounds() + if start != end { + println("runtime: a.base=", hex(a.base()), "a.npages=", a.npages) + println("runtime: b.base=", hex(b.base()), "b.npages=", b.npages) + println("runtime: physPageSize=", physPageSize, "pageSize=", pageSize) + throw("neighboring scavenged spans boundary is not a physical page boundary") + } + } + // Adjust s via base and npages and also in heap metadata. s.npages += other.npages s.needzero |= other.needzero - if other.startAddr < s.startAddr { + if a == s { + h.setSpan(s.base()+s.npages*pageSize-1, s) + } else { s.startAddr = other.startAddr h.setSpan(s.base(), s) - } else { - h.setSpan(s.base()+s.npages*pageSize-1, s) } - // If before or s are scavenged, then we need to scavenge the final coalesced span. - needsScavenge = needsScavenge || other.scavenged || s.scavenged - prescavenged += other.released() - // The size is potentially changing so the treap needs to delete adjacent nodes and // insert back as a combined node. - if other.scavenged { - h.scav.removeSpan(other) - } else { - h.free.removeSpan(other) - } + h.free.removeSpan(other) other.state = mSpanDead h.spanalloc.free(unsafe.Pointer(other)) } @@ -468,17 +487,14 @@ func (h *mheap) coalesce(s *mspan) { // b is s. a and b must be adjacent. other is whichever of the two is // not s. - // If pageSize <= physPageSize then spans are always aligned + // If pageSize >= physPageSize then spans are always aligned // to physical page boundaries, so just exit. - if pageSize <= physPageSize { + if pageSize >= physPageSize { return } // Since we're resizing other, we must remove it from the treap. - if other.scavenged { - h.scav.removeSpan(other) - } else { - h.free.removeSpan(other) - } + h.free.removeSpan(other) + // Round boundary to the nearest physical page size, toward the // scavenged span. boundary := b.startAddr @@ -495,17 +511,15 @@ func (h *mheap) coalesce(s *mspan) { h.setSpan(boundary, b) // Re-insert other now that it has a new size. - if other.scavenged { - h.scav.insert(other) - } else { - h.free.insert(other) - } + h.free.insert(other) } + hpBefore := s.hugePages() + // Coalesce with earlier, later spans. if before := spanOf(s.base() - 1); before != nil && before.state == mSpanFree { if s.scavenged == before.scavenged { - merge(before) + merge(before, s, before) } else { realign(before, s, before) } @@ -514,28 +528,44 @@ func (h *mheap) coalesce(s *mspan) { // Now check to see if next (greater addresses) span is free and can be coalesced. if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == mSpanFree { if s.scavenged == after.scavenged { - merge(after) + merge(s, after, after) } else { realign(s, after, after) } } - if needsScavenge { - // When coalescing spans, some physical pages which - // were not returned to the OS previously because - // they were only partially covered by the span suddenly - // become available for scavenging. We want to make sure - // those holes are filled in, and the span is properly - // scavenged. Rather than trying to detect those holes - // directly, we collect how many bytes were already - // scavenged above and subtract that from heap_released - // before re-scavenging the entire newly-coalesced span, - // which will implicitly bump up heap_released. - memstats.heap_released -= uint64(prescavenged) - s.scavenge() + if !s.scavenged && s.hugePages() > hpBefore { + // If s has grown such that it now may contain more huge pages than it + // did before, then mark the whole region as huge-page-backable. + // + // Otherwise, on systems where we break up huge pages (like Linux) + // s may not be backed by huge pages because it could be made up of + // pieces which are broken up in the underlying VMA. The primary issue + // with this is that it can lead to a poor estimate of the amount of + // free memory backed by huge pages for determining the scavenging rate. + sysHugePage(unsafe.Pointer(s.base()), s.npages*pageSize) } } +// hugePages returns the number of aligned physical huge pages in the memory +// regioned owned by this mspan. +func (s *mspan) hugePages() uintptr { + if physHugePageSize == 0 || s.npages < physHugePageSize/pageSize { + return 0 + } + start := s.base() + end := start + s.npages*pageSize + if physHugePageSize > pageSize { + // Round start and end in. + start = (start + physHugePageSize - 1) &^ (physHugePageSize - 1) + end &^= physHugePageSize - 1 + } + if start < end { + return (end - start) / physHugePageSize + } + return 0 +} + func (s *mspan) scavenge() uintptr { // start and end must be rounded in, otherwise madvise // will round them *out* and release more memory @@ -1067,9 +1097,8 @@ func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero b // The memory backing the returned span may not be zeroed if // span.needzero is set. // -// allocManual must be called on the system stack to prevent stack -// growth. Since this is used by the stack allocator, stack growth -// during allocManual would self-deadlock. +// allocManual must be called on the system stack because it acquires +// the heap lock. See mheap for details. // //go:systemstack func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan { @@ -1115,80 +1144,65 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) { } } -// pickFreeSpan acquires a free span from internal free list -// structures if one is available. Otherwise returns nil. -// h must be locked. -func (h *mheap) pickFreeSpan(npage uintptr) *mspan { - tf := h.free.find(npage) - ts := h.scav.find(npage) - - // Check for whichever treap gave us the smaller, non-nil result. - // Note that we want the _smaller_ free span, i.e. the free span - // closer in size to the amount we requested (npage). - var s *mspan - if tf != nil && (ts == nil || tf.spanKey.npages <= ts.spanKey.npages) { - s = tf.spanKey - h.free.removeNode(tf) - } else if ts != nil && (tf == nil || tf.spanKey.npages > ts.spanKey.npages) { - s = ts.spanKey - h.scav.removeNode(ts) - } - return s -} - // Allocates a span of the given size. h must be locked. // The returned span has been removed from the // free structures, but its state is still mSpanFree. func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan { - var s *mspan - - s = h.pickFreeSpan(npage) - if s != nil { + t := h.free.find(npage) + if t.valid() { goto HaveSpan } - // On failure, grow the heap and try again. if !h.grow(npage) { return nil } - s = h.pickFreeSpan(npage) - if s != nil { + t = h.free.find(npage) + if t.valid() { goto HaveSpan } throw("grew heap, but no adequate free span found") HaveSpan: - // Mark span in use. + s := t.span() if s.state != mSpanFree { throw("candidate mspan for allocation is not free") } - if s.npages < npage { - throw("candidate mspan for allocation is too small") - } // First, subtract any memory that was released back to - // the OS from s. We will re-scavenge the trimmed section - // if necessary. + // the OS from s. We will add back what's left if necessary. memstats.heap_released -= uint64(s.released()) - if s.npages > npage { - // Trim extra and put it back in the heap. - t := (*mspan)(h.spanalloc.alloc()) - t.init(s.base()+npage<<_PageShift, s.npages-npage) - s.npages = npage - h.setSpan(t.base()-1, s) - h.setSpan(t.base(), t) - h.setSpan(t.base()+t.npages*pageSize-1, t) - t.needzero = s.needzero - // If s was scavenged, then t may be scavenged. - start, end := t.physPageBounds() - if s.scavenged && start < end { - memstats.heap_released += uint64(end - start) - t.scavenged = true - } - s.state = mSpanManual // prevent coalescing with s - t.state = mSpanManual - h.freeSpanLocked(t, false, false, s.unusedsince) - s.state = mSpanFree + if s.npages == npage { + h.free.erase(t) + } else if s.npages > npage { + // Trim off the lower bits and make that our new span. + // Do this in-place since this operation does not + // affect the original span's location in the treap. + n := (*mspan)(h.spanalloc.alloc()) + h.free.mutate(t, func(s *mspan) { + n.init(s.base(), npage) + s.npages -= npage + s.startAddr = s.base() + npage*pageSize + h.setSpan(s.base()-1, n) + h.setSpan(s.base(), s) + h.setSpan(n.base(), n) + n.needzero = s.needzero + // n may not be big enough to actually be scavenged, but that's fine. + // We still want it to appear to be scavenged so that we can do the + // right bookkeeping later on in this function (i.e. sysUsed). + n.scavenged = s.scavenged + // Check if s is still scavenged. + if s.scavenged { + start, end := s.physPageBounds() + if start < end { + memstats.heap_released += uint64(end - start) + } else { + s.scavenged = false + } + } + }) + s = n + } else { + throw("candidate mspan for allocation is too small") } // "Unscavenge" s only AFTER splitting so that // we only sysUsed whatever we actually need. @@ -1201,22 +1215,20 @@ HaveSpan: // Since we allocated out of a scavenged span, we just // grew the RSS. Mitigate this by scavenging enough free - // space to make up for it. + // space to make up for it but only if we need to. // - // Also, scavengeLargest may cause coalescing, so prevent + // scavengeLocked may cause coalescing, so prevent // coalescing with s by temporarily changing its state. s.state = mSpanManual - h.scavengeLargest(s.npages * pageSize) + h.scavengeIfNeededLocked(s.npages * pageSize) s.state = mSpanFree } - s.unusedsince = 0 h.setSpans(s.base(), npage, s) *stat += uint64(npage << _PageShift) memstats.heap_idle -= uint64(npage << _PageShift) - //println("spanalloc", hex(s.start<<_PageShift)) if s.inList() { throw("still in list") } @@ -1235,23 +1247,22 @@ func (h *mheap) grow(npage uintptr) bool { return false } - // Scavenge some pages out of the free treap to make up for - // the virtual memory space we just allocated. We prefer to - // scavenge the largest spans first since the cost of scavenging - // is proportional to the number of sysUnused() calls rather than - // the number of pages released, so we make fewer of those calls - // with larger spans. - h.scavengeLargest(size) - // Create a fake "in use" span and free it, so that the - // right coalescing happens. + // right accounting and coalescing happens. s := (*mspan)(h.spanalloc.alloc()) s.init(uintptr(v), size/pageSize) h.setSpans(s.base(), s.npages, s) - atomic.Store(&s.sweepgen, h.sweepgen) - s.state = mSpanInUse - h.pagesInUse += uint64(s.npages) - h.freeSpanLocked(s, false, true, 0) + s.state = mSpanFree + memstats.heap_idle += uint64(size) + // (*mheap).sysAlloc returns untouched/uncommitted memory. + s.scavenged = true + // s is always aligned to the heap arena size which is always > physPageSize, + // so its totally safe to just add directly to heap_released. Coalescing, + // if possible, will also always be correct in terms of accounting, because + // s.base() must be a physical page boundary. + memstats.heap_released += uint64(size) + h.coalesce(s) + h.free.insert(s) return true } @@ -1281,7 +1292,7 @@ func (h *mheap) freeSpan(s *mspan, large bool) { // heap_scan changed. gcController.revise() } - h.freeSpanLocked(s, true, true, 0) + h.freeSpanLocked(s, true, true) unlock(&h.lock) }) } @@ -1293,8 +1304,8 @@ func (h *mheap) freeSpan(s *mspan, large bool) { // This must only be called when gcphase == _GCoff. See mSpanState for // an explanation. // -// freeManual must be called on the system stack to prevent stack -// growth, just like allocManual. +// freeManual must be called on the system stack because it acquires +// the heap lock. See mheap for details. // //go:systemstack func (h *mheap) freeManual(s *mspan, stat *uint64) { @@ -1302,12 +1313,11 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) { lock(&h.lock) *stat -= uint64(s.npages << _PageShift) memstats.heap_sys += uint64(s.npages << _PageShift) - h.freeSpanLocked(s, false, true, 0) + h.freeSpanLocked(s, false, true) unlock(&h.lock) } -// s must be on the busy list or unlinked. -func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) { +func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) { switch s.state { case mSpanManual: if s.allocCount != 0 { @@ -1335,119 +1345,151 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i } s.state = mSpanFree - // Stamp newly unused spans. The scavenger will use that - // info to potentially give back some pages to the OS. - s.unusedsince = unusedsince - if unusedsince == 0 { - s.unusedsince = nanotime() - } - // Coalesce span with neighbors. h.coalesce(s) - // Insert s into the appropriate treap. - if s.scavenged { - h.scav.insert(s) - } else { - h.free.insert(s) - } + // Insert s into the treap. + h.free.insert(s) } -// scavengeLargest scavenges nbytes worth of spans in unscav -// starting from the largest span and working down. It then takes those spans -// and places them in scav. h must be locked. -func (h *mheap) scavengeLargest(nbytes uintptr) { - // Use up scavenge credit if there's any available. - if nbytes > h.scavengeCredit { - nbytes -= h.scavengeCredit - h.scavengeCredit = 0 - } else { - h.scavengeCredit -= nbytes - return +// scavengeSplit takes t.span() and attempts to split off a span containing size +// (in bytes) worth of physical pages from the back. +// +// The split point is only approximately defined by size since the split point +// is aligned to physPageSize and pageSize every time. If physHugePageSize is +// non-zero and the split point would break apart a huge page in the span, then +// the split point is also aligned to physHugePageSize. +// +// If the desired split point ends up at the base of s, or if size is obviously +// much larger than s, then a split is not possible and this method returns nil. +// Otherwise if a split occurred it returns the newly-created span. +func (h *mheap) scavengeSplit(t treapIter, size uintptr) *mspan { + s := t.span() + start, end := s.physPageBounds() + if end <= start || end-start <= size { + // Size covers the whole span. + return nil } - // Iterate over the treap backwards (from largest to smallest) scavenging spans - // until we've reached our quota of nbytes. - released := uintptr(0) - for t := h.free.end(); released < nbytes && t.valid(); { - s := t.span() - r := s.scavenge() - if r == 0 { - // Since we're going in order of largest-to-smallest span, this - // means all other spans are no bigger than s. There's a high - // chance that the other spans don't even cover a full page, - // (though they could) but iterating further just for a handful - // of pages probably isn't worth it, so just stop here. - // - // This check also preserves the invariant that spans that have - // `scavenged` set are only ever in the `scav` treap, and - // those which have it unset are only in the `free` treap. - return - } - n := t.prev() - h.free.erase(t) - // Now that s is scavenged, we must eagerly coalesce it - // with its neighbors to prevent having two spans with - // the same scavenged state adjacent to each other. - h.coalesce(s) - t = n - h.scav.insert(s) - released += r + // The span is bigger than what we need, so compute the base for the new + // span if we decide to split. + base := end - size + // Round down to the next physical or logical page, whichever is bigger. + base &^= (physPageSize - 1) | (pageSize - 1) + if base <= start { + return nil } - // If we over-scavenged, turn that extra amount into credit. - if released > nbytes { - h.scavengeCredit += released - nbytes + if physHugePageSize > pageSize && base&^(physHugePageSize-1) >= start { + // We're in danger of breaking apart a huge page, so include the entire + // huge page in the bound by rounding down to the huge page size. + // base should still be aligned to pageSize. + base &^= physHugePageSize - 1 } + if base == start { + // After all that we rounded base down to s.base(), so no need to split. + return nil + } + if base < start { + print("runtime: base=", base, ", s.npages=", s.npages, ", s.base()=", s.base(), ", size=", size, "\n") + print("runtime: physPageSize=", physPageSize, ", physHugePageSize=", physHugePageSize, "\n") + throw("bad span split base") + } + + // Split s in-place, removing from the back. + n := (*mspan)(h.spanalloc.alloc()) + nbytes := s.base() + s.npages*pageSize - base + h.free.mutate(t, func(s *mspan) { + n.init(base, nbytes/pageSize) + s.npages -= nbytes / pageSize + h.setSpan(n.base()-1, s) + h.setSpan(n.base(), n) + h.setSpan(n.base()+nbytes-1, n) + n.needzero = s.needzero + n.state = s.state + }) + return n } -// scavengeAll visits each node in the unscav treap and scavenges the -// treapNode's span. It then removes the scavenged span from -// unscav and adds it into scav before continuing. h must be locked. -func (h *mheap) scavengeAll(now, limit uint64) uintptr { - // Iterate over the treap scavenging spans if unused for at least limit time. +// scavengeLocked scavenges nbytes worth of spans in the free treap by +// starting from the span with the highest base address and working down. +// It then takes those spans and places them in scav. +// +// Returns the amount of memory scavenged in bytes. h must be locked. +func (h *mheap) scavengeLocked(nbytes uintptr) uintptr { released := uintptr(0) - for t := h.free.start(); t.valid(); { - s := t.span() - n := t.next() - if (now - uint64(s.unusedsince)) > limit { - r := s.scavenge() - if r != 0 { + // Iterate over spans with huge pages first, then spans without. + const mask = treapIterScav | treapIterHuge + for _, match := range []treapIterType{treapIterHuge, 0} { + // Iterate over the treap backwards (from highest address to lowest address) + // scavenging spans until we've reached our quota of nbytes. + for t := h.free.end(mask, match); released < nbytes && t.valid(); { + s := t.span() + start, end := s.physPageBounds() + if start >= end { + // This span doesn't cover at least one physical page, so skip it. + t = t.prev() + continue + } + n := t.prev() + if span := h.scavengeSplit(t, nbytes-released); span != nil { + s = span + } else { h.free.erase(t) - // Now that s is scavenged, we must eagerly coalesce it - // with its neighbors to prevent having two spans with - // the same scavenged state adjacent to each other. - h.coalesce(s) - h.scav.insert(s) - released += r } + released += s.scavenge() + // Now that s is scavenged, we must eagerly coalesce it + // with its neighbors to prevent having two spans with + // the same scavenged state adjacent to each other. + h.coalesce(s) + t = n + h.free.insert(s) } - t = n } return released } -func (h *mheap) scavenge(k int32, now, limit uint64) { +// scavengeIfNeededLocked calls scavengeLocked if we're currently above the +// scavenge goal in order to prevent the mutator from out-running the +// the scavenger. +// +// h must be locked. +func (h *mheap) scavengeIfNeededLocked(size uintptr) { + if r := heapRetained(); r+uint64(size) > h.scavengeRetainedGoal { + todo := uint64(size) + // If we're only going to go a little bit over, just request what + // we actually need done. + if overage := r + uint64(size) - h.scavengeRetainedGoal; overage < todo { + todo = overage + } + h.scavengeLocked(uintptr(todo)) + } +} + +// scavengeAll visits each node in the free treap and scavenges the +// treapNode's span. It then removes the scavenged span from +// unscav and adds it into scav before continuing. +func (h *mheap) scavengeAll() { // Disallow malloc or panic while holding the heap lock. We do // this here because this is an non-mallocgc entry-point to // the mheap API. gp := getg() gp.m.mallocing++ lock(&h.lock) - released := h.scavengeAll(now, limit) + released := h.scavengeLocked(^uintptr(0)) unlock(&h.lock) gp.m.mallocing-- if debug.gctrace > 0 { if released > 0 { - print("scvg", k, ": ", released>>20, " MB released\n") + print("forced scvg: ", released>>20, " MB released\n") } - print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n") + print("forced scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n") } } //go:linkname runtime_debug_freeOSMemory runtime..z2fdebug.freeOSMemory func runtime_debug_freeOSMemory() { GC() - systemstack(func() { mheap_.scavenge(-1, ^uint64(0), 0) }) + systemstack(func() { mheap_.scavengeAll() }) } // Initialize a new span with the given start and npages. @@ -1462,7 +1504,6 @@ func (span *mspan) init(base uintptr, npages uintptr) { span.spanclass = 0 span.elemsize = 0 span.state = mSpanDead - span.unusedsince = 0 span.scavenged = false span.speciallock.key = 0 span.specials = nil diff --git a/libgo/go/runtime/mksizeclasses.go b/libgo/go/runtime/mksizeclasses.go index b146dbcd6c9..cacbb64207a 100644 --- a/libgo/go/runtime/mksizeclasses.go +++ b/libgo/go/runtime/mksizeclasses.go @@ -171,7 +171,7 @@ func makeClasses() []class { // computeDivMagic computes some magic constants to implement // the division required to compute object number from span offset. // n / c.size is implemented as n >> c.shift * c.mul >> c.shift2 -// for all 0 <= n < c.npages * pageSize +// for all 0 <= n <= c.npages * pageSize func computeDivMagic(c *class) { // divisor d := c.size @@ -180,7 +180,7 @@ func computeDivMagic(c *class) { } // maximum input value for which the formula needs to work. - max := c.npages*pageSize - 1 + max := c.npages * pageSize if powerOfTwo(d) { // If the size is a power of two, heapBitsForObject can divide even faster by masking. diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go index cd9da02afda..cdab2ca7b87 100644 --- a/libgo/go/runtime/mstats.go +++ b/libgo/go/runtime/mstats.go @@ -470,6 +470,9 @@ func readGCStats(pauses *[]uint64) { }) } +// readGCStats_m must be called on the system stack because it acquires the heap +// lock. See mheap for details. +//go:systemstack func readGCStats_m(pauses *[]uint64) { p := *pauses // Calling code in runtime/debug should make the slice large enough. diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go index 00c7f525147..1ce98082d10 100644 --- a/libgo/go/runtime/netpoll.go +++ b/libgo/go/runtime/netpoll.go @@ -52,13 +52,14 @@ type pollDesc struct { // The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations. // This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime. // pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification) - // proceed w/o taking the lock. So closing, rg, rd, wg and wd are manipulated + // proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated // in a lock-free way by all operations. // NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg), // that will blow up when GC starts moving objects. lock mutex // protects the following fields fd uintptr closing bool + everr bool // marks event scanning error happened user uint32 // user settable cookie rseq uintptr // protects from stale read timers rg uintptr // pdReady, pdWait, G waiting for read or nil @@ -123,6 +124,7 @@ func poll_runtime_pollOpen(fd uintptr) (uintptr, int) { } pd.fd = fd pd.closing = false + pd.everr = false pd.rseq++ pd.rg = 0 pd.rd = 0 @@ -181,8 +183,8 @@ func poll_runtime_pollWait(ctx uintptr, mode int) int { if err != 0 { return err } - // As for now only Solaris, AIX and Hurd use level-triggered IO. - if GOOS == "solaris" || GOOS == "aix" || GOOS == "hurd" { + // As for now only Solaris, illumos, and AIX use level-triggered IO. + if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" || GOOS == "hurd" { netpollarm(pd, mode) } for !netpollblock(pd, int32(mode), false) { @@ -344,10 +346,16 @@ func netpollready(toRun *gList, pd *pollDesc, mode int32) { func netpollcheckerr(pd *pollDesc, mode int32) int { if pd.closing { - return 1 // errClosing + return 1 // ErrFileClosing or ErrNetClosing } if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) { - return 2 // errTimeout + return 2 // ErrTimeout + } + // Report an event scanning error only on a read event. + // An error on a write event will be captured in a subsequent + // write call that is able to report a more specific error. + if mode == 'r' && pd.everr { + return 3 // ErrNotPollable } return 0 } diff --git a/libgo/go/runtime/netpoll_aix.go b/libgo/go/runtime/netpoll_aix.go index 70bf9eb876e..39e36c70274 100644 --- a/libgo/go/runtime/netpoll_aix.go +++ b/libgo/go/runtime/netpoll_aix.go @@ -48,8 +48,6 @@ var ( pendingUpdates int32 ) -const pollVerbose = false - func netpollinit() { var p [2]int32 @@ -69,13 +67,7 @@ func netpollinit() { fcntl(wrwake, _F_SETFD, _FD_CLOEXEC) // Pre-allocate array of pollfd structures for poll. - if pollVerbose { - println("*** allocating") - } pfds = make([]pollfd, 1, 128) - if pollVerbose { - println("*** allocating done", &pfds[0]) - } // Poll the read side of the pipe. pfds[0].fd = rdwake @@ -97,18 +89,12 @@ func netpolldescriptor() uintptr { func netpollwakeup() { if pendingUpdates == 0 { pendingUpdates = 1 - if pollVerbose { - println("*** writing 1 byte") - } b := [1]byte{0} write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1) } } func netpollopen(fd uintptr, pd *pollDesc) int32 { - if pollVerbose { - println("*** netpollopen", fd) - } lock(&mtxpoll) netpollwakeup() @@ -123,9 +109,6 @@ func netpollopen(fd uintptr, pd *pollDesc) int32 { } func netpollclose(fd uintptr) int32 { - if pollVerbose { - println("*** netpollclose", fd) - } lock(&mtxpoll) netpollwakeup() @@ -148,9 +131,6 @@ func netpollclose(fd uintptr) int32 { } func netpollarm(pd *pollDesc, mode int) { - if pollVerbose { - println("*** netpollarm", pd.fd, mode) - } lock(&mtxpoll) netpollwakeup() @@ -173,31 +153,19 @@ func netpoll(block bool) gList { timeout = 0 return gList{} } - if pollVerbose { - println("*** netpoll", block) - } retry: lock(&mtxpoll) lock(&mtxset) pendingUpdates = 0 unlock(&mtxpoll) - if pollVerbose { - println("*** netpoll before poll") - } n := libc_poll(&pfds[0], uintptr(len(pfds)), timeout) - if pollVerbose { - println("*** netpoll after poll", n) - } if n < 0 { e := errno() if e != _EINTR { println("errno=", e, " len(pfds)=", len(pfds)) throw("poll failed") } - if pollVerbose { - println("*** poll failed") - } unlock(&mtxset) goto retry } @@ -205,9 +173,6 @@ retry: if n != 0 && pfds[0].revents&(_POLLIN|_POLLHUP|_POLLERR) != 0 { var b [1]byte for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 { - if pollVerbose { - println("*** read 1 byte from pipe") - } } // Do not look at the other fds in this case as the mode may have changed // XXX only additions of flags are made, so maybe it is ok @@ -228,8 +193,9 @@ retry: pfd.events &= ^_POLLOUT } if mode != 0 { - if pollVerbose { - println("*** netpollready i=", i, "revents=", pfd.revents, "events=", pfd.events, "pd=", pds[i]) + pds[i].everr = false + if pfd.revents == _POLLERR { + pds[i].everr = true } netpollready(&toRun, pds[i], mode) n-- @@ -239,8 +205,5 @@ retry: if block && toRun.empty() { goto retry } - if pollVerbose { - println("*** netpoll returning end") - } return toRun } diff --git a/libgo/go/runtime/netpoll_epoll.go b/libgo/go/runtime/netpoll_epoll.go index 2004fbc4da8..885ac1fe1e5 100644 --- a/libgo/go/runtime/netpoll_epoll.go +++ b/libgo/go/runtime/netpoll_epoll.go @@ -109,7 +109,10 @@ retry: } if mode != 0 { pd := *(**pollDesc)(unsafe.Pointer(&ev.data)) - + pd.everr = false + if ev.events == _EPOLLERR { + pd.everr = true + } netpollready(&toRun, pd, mode) } } diff --git a/libgo/go/runtime/netpoll_kqueue.go b/libgo/go/runtime/netpoll_kqueue.go index 4ea3ac9f27e..ce1acdf0cf5 100644 --- a/libgo/go/runtime/netpoll_kqueue.go +++ b/libgo/go/runtime/netpoll_kqueue.go @@ -118,7 +118,12 @@ retry: mode += 'w' } if mode != 0 { - netpollready(&toRun, (*pollDesc)(unsafe.Pointer(ev.udata)), mode) + pd := (*pollDesc)(unsafe.Pointer(ev.udata)) + pd.everr = false + if ev.flags == _EV_ERROR { + pd.everr = true + } + netpollready(&toRun, pd, mode) } } if block && toRun.empty() { diff --git a/libgo/go/runtime/netpoll_solaris.go b/libgo/go/runtime/netpoll_solaris.go index b8baffdfc12..222af29b73e 100644 --- a/libgo/go/runtime/netpoll_solaris.go +++ b/libgo/go/runtime/netpoll_solaris.go @@ -219,6 +219,11 @@ retry: } if mode != 0 { + // TODO(mikio): Consider implementing event + // scanning error reporting once we are sure + // about the event port on SmartOS. + // + // See golang.org/x/issue/30840. netpollready(&toRun, pd, mode) } } diff --git a/libgo/go/runtime/os_darwin.go b/libgo/go/runtime/os_darwin.go index deaa9e9d26d..498bd430e54 100644 --- a/libgo/go/runtime/os_darwin.go +++ b/libgo/go/runtime/os_darwin.go @@ -71,7 +71,7 @@ func semasleep(ns int64) int32 { return -1 } var t timespec - t.set_nsec(ns - spent) + t.setNsec(ns - spent) err := pthread_cond_timedwait_relative_np(&mp.cond, &mp.mutex, &t) if err == _ETIMEDOUT { pthread_mutex_unlock(&mp.mutex) diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go index f8577e4226e..611a8cd4627 100644 --- a/libgo/go/runtime/os_freebsd.go +++ b/libgo/go/runtime/os_freebsd.go @@ -139,7 +139,7 @@ func futexsleep1(addr *uint32, val uint32, ns int64) { if ns >= 0 { var ut umtx_time ut._clockid = _CLOCK_MONOTONIC - ut._timeout.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ut._timeout.tv_nsec))))) + ut._timeout.setNsec(ns) utp = &ut } ret := sys_umtx_op(addr, _UMTX_OP_WAIT_UINT_PRIVATE, val, unsafe.Sizeof(*utp), utp) diff --git a/libgo/go/runtime/os_hurd.go b/libgo/go/runtime/os_hurd.go index 3a545d0768d..5be5a1d9713 100644 --- a/libgo/go/runtime/os_hurd.go +++ b/libgo/go/runtime/os_hurd.go @@ -60,8 +60,7 @@ func semasleep(ns int64) int32 { _m_ := getg().m if ns >= 0 { var ts timespec - ts.set_sec(ns / 1000000000) - ts.set_nsec(int32(ns % 1000000000)) + ts.setNsec(ns) if sem_timedwait((*_sem_t)(unsafe.Pointer(_m_.mos.waitsema)), &ts) != 0 { err := errno() diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go index b72872f1822..1e864466557 100644 --- a/libgo/go/runtime/os_linux.go +++ b/libgo/go/runtime/os_linux.go @@ -44,8 +44,6 @@ const ( // Don't sleep longer than ns; ns < 0 means forever. //go:nosplit func futexsleep(addr *uint32, val uint32, ns int64) { - var ts timespec - // Some Linux kernels have a bug where futex of // FUTEX_WAIT returns an internal error code // as an errno. Libpthread ignores the return value @@ -56,19 +54,8 @@ func futexsleep(addr *uint32, val uint32, ns int64) { return } - // It's difficult to live within the no-split stack limits here. - // On ARM and 386, a 64-bit divide invokes a general software routine - // that needs more stack than we can afford. So we use timediv instead. - // But on real 64-bit systems, where words are larger but the stack limit - // is not, even timediv is too heavy, and we really need to use just an - // ordinary machine instruction. - if sys.PtrSize == 8 { - ts.set_sec(ns / 1000000000) - ts.set_nsec(int32(ns % 1000000000)) - } else { - ts.tv_nsec = 0 - ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec))))) - } + var ts timespec + ts.setNsec(ns) futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0) } diff --git a/libgo/go/runtime/os_netbsd.go b/libgo/go/runtime/os_netbsd.go index b7aa9535df3..7c3d41fb9d1 100644 --- a/libgo/go/runtime/os_netbsd.go +++ b/libgo/go/runtime/os_netbsd.go @@ -84,9 +84,7 @@ func semasleep(ns int64) int32 { if wait <= 0 { return -1 } - var nsec int32 - ts.set_sec(timediv(wait, 1000000000, &nsec)) - ts.set_nsec(nsec) + ts.setNsec(wait) tsp = &ts } ret := lwp_park(_CLOCK_MONOTONIC, _TIMER_RELTIME, tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil) diff --git a/libgo/go/runtime/os_netbsd_arm64.go b/libgo/go/runtime/os_netbsd_arm64.go new file mode 100644 index 00000000000..fd81eb7557d --- /dev/null +++ b/libgo/go/runtime/os_netbsd_arm64.go @@ -0,0 +1,24 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) { + // Machine dependent mcontext initialisation for LWP. + mc.__gregs[_REG_ELR] = uint64(funcPC(lwp_tramp)) + mc.__gregs[_REG_X31] = uint64(uintptr(stk)) + mc.__gregs[_REG_X0] = uint64(uintptr(unsafe.Pointer(mp))) + mc.__gregs[_REG_X1] = uint64(uintptr(unsafe.Pointer(mp.g0))) + mc.__gregs[_REG_X2] = uint64(fn) +} + +//go:nosplit +func cputicks() int64 { + // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand(). + // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. + // TODO: need more entropy to better seed fastrand. + return nanotime() +} diff --git a/libgo/go/runtime/os_openbsd.go b/libgo/go/runtime/os_openbsd.go index 4298172b054..9cfaa94b086 100644 --- a/libgo/go/runtime/os_openbsd.go +++ b/libgo/go/runtime/os_openbsd.go @@ -86,10 +86,7 @@ func semasleep(ns int64) int32 { var tsp *timespec if ns >= 0 { var ts timespec - var nsec int32 - ns += nanotime() - ts.set_sec(int64(timediv(ns, 1000000000, &nsec))) - ts.set_nsec(nsec) + ts.setNsec(ns + nanotime()) tsp = &ts } diff --git a/libgo/go/runtime/os_openbsd_arm64.go b/libgo/go/runtime/os_openbsd_arm64.go new file mode 100644 index 00000000000..f15a95b653f --- /dev/null +++ b/libgo/go/runtime/os_openbsd_arm64.go @@ -0,0 +1,24 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "internal/cpu" +) + +//go:nosplit +func cputicks() int64 { + // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand(). + // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. + // TODO: need more entropy to better seed fastrand. + return nanotime() +} + +func sysargs(argc int32, argv **byte) { + // OpenBSD does not have auxv, however we still need to initialise cpu.HWCaps. + // For now specify the bare minimum until we add some form of capabilities + // detection. See issue #31746. + cpu.HWCap = 1<<1 | 1<<0 // ASIMD, FP +} diff --git a/libgo/go/runtime/os_solaris.go b/libgo/go/runtime/os_solaris.go index cf457680f71..ea723936c5c 100644 --- a/libgo/go/runtime/os_solaris.go +++ b/libgo/go/runtime/os_solaris.go @@ -52,8 +52,7 @@ func semasleep(ns int64) int32 { _m_ := getg().m if ns >= 0 { var ts timespec - ts.set_sec(ns / 1000000000) - ts.set_nsec(int32(ns % 1000000000)) + ts.setNsec(ns) if sem_reltimedwait_np((*semt)(unsafe.Pointer(_m_.mos.waitsema)), &ts) != 0 { err := errno() diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go index 8e56bbe8b56..21ffb5c2d0d 100644 --- a/libgo/go/runtime/panic.go +++ b/libgo/go/runtime/panic.go @@ -61,24 +61,13 @@ func panicCheck1(pc uintptr, msg string) { } // Same as above, but calling from the runtime is allowed. -// -// Using this function is necessary for any panic that may be -// generated by runtime.sigpanic, since those are always called by the -// runtime. func panicCheck2(err string) { - // panic allocates, so to avoid recursive malloc, turn panics - // during malloc into throws. gp := getg() if gp != nil && gp.m != nil && gp.m.mallocing != 0 { throw(err) } } -// Many of the following panic entry-points turn into throws when they -// happen in various runtime contexts. These should never happen in -// the runtime, and if they do, they indicate a serious issue and -// should not be caught by user code. -// // The panic{Index,Slice,divide,shift} functions are called by // code generated by the compiler for out of bounds index expressions, // out of bounds slice expressions, division by zero, and shift by negative. @@ -208,10 +197,6 @@ func panicmem() { panic(memoryError) } -func throwinit() { - throw("recursive call during initialization - linker skew") -} - // deferproc creates a new deferred function. // The compiler turns a defer statement into a call to this. // frame points into the stack frame; it is used to determine which @@ -1200,6 +1185,8 @@ func dopanic_m(gp *g, pc, sp uintptr) bool { lock(&deadlock) } + printDebugLog() + return docrash } diff --git a/libgo/go/runtime/pprof/internal/profile/profile.go b/libgo/go/runtime/pprof/internal/profile/profile.go index a6f8354b1e8..443accdd6d3 100644 --- a/libgo/go/runtime/pprof/internal/profile/profile.go +++ b/libgo/go/runtime/pprof/internal/profile/profile.go @@ -211,9 +211,7 @@ func (p *Profile) setMain() { continue } // Swap what we guess is main to position 0. - tmp := p.Mapping[i] - p.Mapping[i] = p.Mapping[0] - p.Mapping[0] = tmp + p.Mapping[i], p.Mapping[0] = p.Mapping[0], p.Mapping[i] break } } diff --git a/libgo/go/runtime/pprof/label.go b/libgo/go/runtime/pprof/label.go index 35647ee3ce1..20f9cdbae6f 100644 --- a/libgo/go/runtime/pprof/label.go +++ b/libgo/go/runtime/pprof/label.go @@ -54,6 +54,8 @@ func WithLabels(ctx context.Context, labels LabelSet) context.Context { // Labels takes an even number of strings representing key-value pairs // and makes a LabelSet containing them. // A label overwrites a prior label with the same key. +// Currently only CPU profile utilizes labels information. +// See https://golang.org/issue/23458 for details. func Labels(args ...string) LabelSet { if len(args)%2 != 0 { panic("uneven number of arguments to pprof.Labels") diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go index bf9f5266e29..49a555c82c8 100644 --- a/libgo/go/runtime/pprof/pprof_test.go +++ b/libgo/go/runtime/pprof/pprof_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !aix,!nacl,!js +// +build !nacl,!js package pprof @@ -159,12 +159,27 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri t.Skip("skipping on plan9") } - const maxDuration = 5 * time.Second + broken := false + switch runtime.GOOS { + case "darwin", "dragonfly", "netbsd", "illumos", "solaris": + broken = true + case "openbsd": + if runtime.GOARCH == "arm" || runtime.GOARCH == "arm64" { + broken = true + } + } + + maxDuration := 5 * time.Second + if testing.Short() && broken { + // If it's expected to be broken, no point waiting around. + maxDuration /= 10 + } + // If we're running a long test, start with a long duration // for tests that try to make sure something *doesn't* happen. duration := 5 * time.Second if testing.Short() { - duration = 200 * time.Millisecond + duration = 100 * time.Millisecond } // Profiling tests are inherently flaky, especially on a @@ -190,14 +205,10 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri } } - switch runtime.GOOS { - case "darwin", "dragonfly", "netbsd", "solaris": - t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS) - case "openbsd": - if runtime.GOARCH == "arm" { - t.Skipf("ignoring failure on %s/%s; see golang.org/issue/13841", runtime.GOOS, runtime.GOARCH) - } + if broken { + t.Skipf("ignoring failure on %s/%s; see golang.org/issue/13841", runtime.GOOS, runtime.GOARCH) } + // Ignore the failure if the tests are running in a QEMU-based emulator, // QEMU is not perfect at emulating everything. // IN_QEMU environmental variable is set by some of the Go builders. diff --git a/libgo/go/runtime/pprof/proto_test.go b/libgo/go/runtime/pprof/proto_test.go index 5a915fb4c3a..7e7c7cc6f0a 100644 --- a/libgo/go/runtime/pprof/proto_test.go +++ b/libgo/go/runtime/pprof/proto_test.go @@ -209,11 +209,11 @@ ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0 [vsysca 7ffc34343000 7ffc34345000 00000000 [vdso] ffffffffff600000 ffffffffff601000 00000090 [vsyscall] -00400000-07000000 r-xp 00000000 00:00 0 +00400000-07000000 r-xp 00000000 00:00 0 07000000-07093000 r-xp 06c00000 00:2e 536754 /path/to/gobench_server_main 07093000-0722d000 rw-p 06c92000 00:2e 536754 /path/to/gobench_server_main -0722d000-07b21000 rw-p 00000000 00:00 0 -c000000000-c000036000 rw-p 00000000 00:00 0 +0722d000-07b21000 rw-p 00000000 00:00 0 +c000000000-c000036000 rw-p 00000000 00:00 0 -> 07000000 07093000 06c00000 /path/to/gobench_server_main ` @@ -303,7 +303,7 @@ func TestProcSelfMaps(t *testing.T) { }) } -// TestMapping checkes the mapping section of CPU profiles +// TestMapping checks the mapping section of CPU profiles // has the HasFunctions field set correctly. If all PCs included // in the samples are successfully symbolized, the corresponding // mapping entry (in this test case, only one entry) should have diff --git a/libgo/go/runtime/pprof/testdata/mappingtest/main.go b/libgo/go/runtime/pprof/testdata/mappingtest/main.go index 7850faab0de..476b9e88a32 100644 --- a/libgo/go/runtime/pprof/testdata/mappingtest/main.go +++ b/libgo/go/runtime/pprof/testdata/mappingtest/main.go @@ -69,7 +69,7 @@ func main() { if err := pprof.StartCPUProfile(os.Stdout); err != nil { log.Fatal("can't start CPU profile: ", err) } - time.Sleep(1 * time.Second) + time.Sleep(200 * time.Millisecond) pprof.StopCPUProfile() if err := os.Stdout.Close(); err != nil { diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index fa85d262efe..afedad50b21 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -70,6 +70,9 @@ func main_main() var buildVersion = sys.TheVersion +// set using cmd/go/internal/modload.ModInfoProg +var modinfo string + // Goroutine scheduler // The scheduler's job is to distribute ready-to-run goroutines over worker threads. // @@ -134,8 +137,9 @@ var buildVersion = sys.TheVersion // for nmspinning manipulation. var ( - m0 m - g0 g + m0 m + g0 g + raceprocctx0 uintptr ) // main_init_done is a signal used by cgocallbackg that initialization @@ -187,6 +191,10 @@ func main(unsafe.Pointer) { throw("runtime.main not on m0") } + if nanotime() == 0 { + throw("nanotime returning zero") + } + // Defer unlock so that runtime.Goexit during init does the unlock too. needUnlock := true defer func() { @@ -211,13 +219,13 @@ func main(unsafe.Pointer) { createGcRootsIndex() close(main_init_done) - needUnlock = false - unlockOSThread() - // For gccgo we have to wait until after main is initialized // to enable GC, because initializing main registers the GC roots. gcenable() + needUnlock = false + unlockOSThread() + if isarchive || islibrary { // A program compiled with -buildmode=c-archive or c-shared // has a main, but it is not executed. @@ -322,7 +330,7 @@ func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason w throw("gopark: bad g status") } mp.waitlock = lock - mp.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf)) + mp.waitunlockf = unlockf gp.waitreason = reason mp.waittraceev = traceEv mp.waittraceskip = traceskip @@ -491,7 +499,7 @@ func cpuinit() { var env string switch GOOS { - case "aix", "darwin", "dragonfly", "freebsd", "netbsd", "openbsd", "solaris", "linux": + case "aix", "darwin", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": cpu.DebugOptions = true // Similar to goenv_unix but extracts the environment value for @@ -577,6 +585,11 @@ func schedinit() { // to ensure runtime·buildVersion is kept in the resulting binary. buildVersion = "unknown" } + if len(modinfo) == 1 { + // Condition should never trigger. This code just serves + // to ensure runtime·modinfo is kept in the resulting binary. + modinfo = "" + } } func dumpgstatus(gp *g) { @@ -637,7 +650,7 @@ func ready(gp *g, traceskip int, next bool) { // Mark runnable. _g_ := getg() - _g_.m.locks++ // disable preemption because it can be holding p in a local var + mp := acquirem() // disable preemption because it can be holding p in a local var if status&^_Gscan != _Gwaiting { dumpgstatus(gp) throw("bad g->status in ready") @@ -649,7 +662,7 @@ func ready(gp *g, traceskip int, next bool) { if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { wakep() } - _g_.m.locks-- + releasem(mp) } // freezeStopWait is a large value that freezetheworld sets @@ -684,13 +697,6 @@ func freezetheworld() { usleep(1000) } -func isscanstatus(status uint32) bool { - if status == _Gscan { - throw("isscanstatus: Bad status Gscan") - } - return status&_Gscan == _Gscan -} - // All reads and writes of g's status go through readgstatus, casgstatus // castogscanstatus, casfrom_Gscanstatus. //go:nosplit @@ -1111,9 +1117,7 @@ func stopTheWorldWithSema() { } func startTheWorldWithSema(emitTraceEvent bool) int64 { - _g_ := getg() - - _g_.m.locks++ // disable preemption because it can be holding p in a local var + mp := acquirem() // disable preemption because it can be holding p in a local var if netpollinited() { list := netpoll(false) // non-blocking injectglist(&list) @@ -1163,7 +1167,7 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 { wakep() } - _g_.m.locks-- + releasem(mp) return startTime } @@ -1477,7 +1481,7 @@ func runSafePointFn() { //go:yeswritebarrierrec func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointer, g0StackSize uintptr) { _g_ := getg() - _g_.m.locks++ // disable GC because it can be called from sysmon + acquirem() // disable GC because it can be called from sysmon if _g_.m.p == 0 { acquirep(_p_) // temporarily borrow p for mallocs in this function } @@ -1512,7 +1516,7 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe if _p_ == _g_.m.p.ptr() { releasep() } - _g_.m.locks-- + releasem(_g_.m) return mp, g0Stack, g0StackSize } @@ -2480,15 +2484,22 @@ top: var gp *g var inheritTime bool + + // Normal goroutines will check for need to wakeP in ready, + // but GCworkers and tracereaders will not, so the check must + // be done here instead. + tryWakeP := false if trace.enabled || trace.shutdown { gp = traceReader() if gp != nil { casgstatus(gp, _Gwaiting, _Grunnable) traceGoUnpark(gp, 0) + tryWakeP = true } } if gp == nil && gcBlackenEnabled != 0 { gp = gcController.findRunnableGCWorker(_g_.m.p.ptr()) + tryWakeP = tryWakeP || gp != nil } if gp == nil { // Check the global runnable queue once in a while to ensure fairness. @@ -2549,6 +2560,13 @@ top: } } + // If about to schedule a not-normal goroutine (a GCworker or tracereader), + // wake a P if there is one. + if tryWakeP { + if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { + wakep() + } + } if gp.lockedm != 0 { // Hands off own p to the locked m, // then blocks waiting for a new p. @@ -2589,8 +2607,7 @@ func park_m(gp *g) { dropg() casgstatus(gp, _Grunning, _Gwaiting) - if _g_.m.waitunlockf != nil { - fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf)) + if fn := _g_.m.waitunlockf; fn != nil { ok := fn(gp, _g_.m.waitlock) _g_.m.waitunlockf = nil _g_.m.waitlock = nil @@ -3151,7 +3168,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g { _g_.m.throwing = -1 // do not dump full stacks throw("go of nil func value") } - _g_.m.locks++ // disable preemption because it can be holding p in a local var + acquirem() // disable preemption because it can be holding p in a local var _p_ := _g_.m.p.ptr() newg := gfget(_p_) @@ -3214,7 +3231,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g { if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted { wakep() } - _g_.m.locks-- + releasem(_g_.m) return newg } @@ -3673,6 +3690,88 @@ func setcpuprofilerate(hz int32) { _g_.m.locks-- } +// init initializes pp, which may be a freshly allocated p or a +// previously destroyed p, and transitions it to status _Pgcstop. +func (pp *p) init(id int32) { + pp.id = id + pp.status = _Pgcstop + pp.sudogcache = pp.sudogbuf[:0] + pp.deferpool = pp.deferpoolbuf[:0] + pp.wbBuf.reset() + if pp.mcache == nil { + if id == 0 { + if getg().m.mcache == nil { + throw("missing mcache?") + } + pp.mcache = getg().m.mcache // bootstrap + } else { + pp.mcache = allocmcache() + } + } + if raceenabled && pp.raceprocctx == 0 { + if id == 0 { + pp.raceprocctx = raceprocctx0 + raceprocctx0 = 0 // bootstrap + } else { + pp.raceprocctx = raceproccreate() + } + } +} + +// destroy releases all of the resources associated with pp and +// transitions it to status _Pdead. +// +// sched.lock must be held and the world must be stopped. +func (pp *p) destroy() { + // Move all runnable goroutines to the global queue + for pp.runqhead != pp.runqtail { + // Pop from tail of local queue + pp.runqtail-- + gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr() + // Push onto head of global queue + globrunqputhead(gp) + } + if pp.runnext != 0 { + globrunqputhead(pp.runnext.ptr()) + pp.runnext = 0 + } + // If there's a background worker, make it runnable and put + // it on the global queue so it can clean itself up. + if gp := pp.gcBgMarkWorker.ptr(); gp != nil { + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + globrunqput(gp) + // This assignment doesn't race because the + // world is stopped. + pp.gcBgMarkWorker.set(nil) + } + // Flush p's write barrier buffer. + if gcphase != _GCoff { + wbBufFlush1(pp) + pp.gcw.dispose() + } + for i := range pp.sudogbuf { + pp.sudogbuf[i] = nil + } + pp.sudogcache = pp.sudogbuf[:0] + for i := range pp.deferpoolbuf { + pp.deferpoolbuf[i] = nil + } + pp.deferpool = pp.deferpoolbuf[:0] + freemcache(pp.mcache) + pp.mcache = nil + gfpurge(pp) + traceProcFree(pp) + if raceenabled { + raceprocdestroy(pp.raceprocctx) + pp.raceprocctx = 0 + } + pp.gcAssistTime = 0 + pp.status = _Pdead +} + // Change number of processors. The world is stopped, sched is locked. // gcworkbufs are not being modified by either the GC or // the write barrier code. @@ -3711,89 +3810,13 @@ func procresize(nprocs int32) *p { } // initialize new P's - for i := int32(0); i < nprocs; i++ { + for i := old; i < nprocs; i++ { pp := allp[i] if pp == nil { pp = new(p) - pp.id = i - pp.status = _Pgcstop - pp.sudogcache = pp.sudogbuf[:0] - pp.deferpool = pp.deferpoolbuf[:0] - pp.wbBuf.reset() - atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) - } - if pp.mcache == nil { - if old == 0 && i == 0 { - if getg().m.mcache == nil { - throw("missing mcache?") - } - pp.mcache = getg().m.mcache // bootstrap - } else { - pp.mcache = allocmcache() - } } - } - - // free unused P's - for i := nprocs; i < old; i++ { - p := allp[i] - if trace.enabled && p == getg().m.p.ptr() { - // moving to p[0], pretend that we were descheduled - // and then scheduled again to keep the trace sane. - traceGoSched() - traceProcStop(p) - } - // move all runnable goroutines to the global queue - for p.runqhead != p.runqtail { - // pop from tail of local queue - p.runqtail-- - gp := p.runq[p.runqtail%uint32(len(p.runq))].ptr() - // push onto head of global queue - globrunqputhead(gp) - } - if p.runnext != 0 { - globrunqputhead(p.runnext.ptr()) - p.runnext = 0 - } - // if there's a background worker, make it runnable and put - // it on the global queue so it can clean itself up - if gp := p.gcBgMarkWorker.ptr(); gp != nil { - casgstatus(gp, _Gwaiting, _Grunnable) - if trace.enabled { - traceGoUnpark(gp, 0) - } - globrunqput(gp) - // This assignment doesn't race because the - // world is stopped. - p.gcBgMarkWorker.set(nil) - } - // Flush p's write barrier buffer. - if gcphase != _GCoff { - wbBufFlush1(p) - p.gcw.dispose() - } - for i := range p.sudogbuf { - p.sudogbuf[i] = nil - } - p.sudogcache = p.sudogbuf[:0] - for i := range p.deferpoolbuf { - p.deferpoolbuf[i] = nil - } - p.deferpool = p.deferpoolbuf[:0] - freemcache(p.mcache) - p.mcache = nil - gfpurge(p) - traceProcFree(p) - p.gcAssistTime = 0 - p.status = _Pdead - // can't free P itself because it can be referenced by an M in syscall - } - - // Trim allp. - if int32(len(allp)) != nprocs { - lock(&allpLock) - allp = allp[:nprocs] - unlock(&allpLock) + pp.init(i) + atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) } _g_ := getg() @@ -3802,8 +3825,19 @@ func procresize(nprocs int32) *p { _g_.m.p.ptr().status = _Prunning _g_.m.p.ptr().mcache.prepareForSweep() } else { - // release the current P and acquire allp[0] + // release the current P and acquire allp[0]. + // + // We must do this before destroying our current P + // because p.destroy itself has write barriers, so we + // need to do that from a valid P. if _g_.m.p != 0 { + if trace.enabled { + // Pretend that we were descheduled + // and then scheduled again to keep + // the trace sane. + traceGoSched() + traceProcStop(_g_.m.p.ptr()) + } _g_.m.p.ptr().m = 0 } _g_.m.p = 0 @@ -3816,6 +3850,21 @@ func procresize(nprocs int32) *p { traceGoStart() } } + + // release resources from unused P's + for i := nprocs; i < old; i++ { + p := allp[i] + p.destroy() + // can't free P itself because it can be referenced by an M in syscall + } + + // Trim allp. + if int32(len(allp)) != nprocs { + lock(&allpLock) + allp = allp[:nprocs] + unlock(&allpLock) + } + var runnablePs *p for i := nprocs - 1; i >= 0; i-- { p := allp[i] @@ -3893,7 +3942,7 @@ func releasep() *p { } _p_ := _g_.m.p.ptr() if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning { - print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n") + print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", hex(_p_.m), " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n") throw("releasep: invalid p state") } if trace.enabled { @@ -3940,7 +3989,12 @@ func checkdead() { // for details.) var run0 int32 if !iscgo && cgoHasExtraM { - run0 = 1 + mp := lockextra(true) + haveExtraM := extraMCount > 0 + unlockextra(mp) + if haveExtraM { + run0 = 1 + } } run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys @@ -4016,19 +4070,6 @@ func sysmon() { checkdead() unlock(&sched.lock) - // If a heap span goes unused for 5 minutes after a garbage collection, - // we hand it back to the operating system. - scavengelimit := int64(5 * 60 * 1e9) - - if debug.scavenge > 0 { - // Scavenge-a-lot for testing. - forcegcperiod = 10 * 1e6 - scavengelimit = 20 * 1e6 - } - - lastscavenge := nanotime() - nscavenge := 0 - lasttrace := int64(0) idle := 0 // how many cycles in succession we had not wokeup somebody delay := uint32(0) @@ -4050,9 +4091,6 @@ func sysmon() { // Make wake-up period small enough // for the sampling to be correct. maxsleep := forcegcperiod / 2 - if scavengelimit < forcegcperiod { - maxsleep = scavengelimit / 2 - } shouldRelax := true if osRelaxMinNS > 0 { next := timeSleepUntil() @@ -4115,12 +4153,6 @@ func sysmon() { injectglist(&list) unlock(&forcegc.lock) } - // scavenge heap once in a while - if lastscavenge+scavengelimit/2 < now { - mheap_.scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit)) - lastscavenge = now - nscavenge++ - } if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now { lasttrace = now schedtrace(debug.scheddetail > 0) @@ -4156,10 +4188,24 @@ func retake(now int64) uint32 { } pd := &_p_.sysmontick s := _p_.status + sysretake := false + if s == _Prunning || s == _Psyscall { + // Preempt G if it's running for too long. + t := int64(_p_.schedtick) + if int64(pd.schedtick) != t { + pd.schedtick = uint32(t) + pd.schedwhen = now + } else if pd.schedwhen+forcePreemptNS <= now { + preemptone(_p_) + // In case of syscall, preemptone() doesn't + // work, because there is no M wired to P. + sysretake = true + } + } if s == _Psyscall { // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). t := int64(_p_.syscalltick) - if int64(pd.syscalltick) != t { + if !sysretake && int64(pd.syscalltick) != t { pd.syscalltick = uint32(t) pd.syscallwhen = now continue @@ -4188,18 +4234,6 @@ func retake(now int64) uint32 { } incidlelocked(1) lock(&allpLock) - } else if s == _Prunning { - // Preempt G if it's running for too long. - t := int64(_p_.schedtick) - if int64(pd.schedtick) != t { - pd.schedtick = uint32(t) - pd.schedwhen = now - continue - } - if pd.schedwhen+forcePreemptNS > now { - continue - } - preemptone(_p_) } } unlock(&allpLock) diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go index da5ffbbdee2..b9be3387dbd 100644 --- a/libgo/go/runtime/proc_test.go +++ b/libgo/go/runtime/proc_test.go @@ -5,6 +5,7 @@ package runtime_test import ( + "fmt" "math" "net" "runtime" @@ -917,3 +918,69 @@ func TestLockOSThreadAvoidsStatePropagation(t *testing.T) { t.Errorf("want %q, got %q", want, output) } } + +// fakeSyscall emulates a system call. +//go:nosplit +func fakeSyscall(duration time.Duration) { + runtime.Entersyscall() + for start := runtime.Nanotime(); runtime.Nanotime()-start < int64(duration); { + } + runtime.Exitsyscall() +} + +// Check that a goroutine will be preempted if it is calling short system calls. +func testPreemptionAfterSyscall(t *testing.T, syscallDuration time.Duration) { + if runtime.GOARCH == "wasm" { + t.Skip("no preemption on wasm yet") + } + + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) + + interations := 10 + if testing.Short() { + interations = 1 + } + const ( + maxDuration = 3 * time.Second + nroutines = 8 + ) + + for i := 0; i < interations; i++ { + c := make(chan bool, nroutines) + stop := uint32(0) + + start := time.Now() + for g := 0; g < nroutines; g++ { + go func(stop *uint32) { + c <- true + for atomic.LoadUint32(stop) == 0 { + fakeSyscall(syscallDuration) + } + c <- true + }(&stop) + } + // wait until all goroutines have started. + for g := 0; g < nroutines; g++ { + <-c + } + atomic.StoreUint32(&stop, 1) + // wait until all goroutines have finished. + for g := 0; g < nroutines; g++ { + <-c + } + duration := time.Since(start) + + if duration > maxDuration { + t.Errorf("timeout exceeded: %v (%v)", duration, maxDuration) + } + } +} + +func TestPreemptionAfterSyscall(t *testing.T) { + for _, i := range []time.Duration{10, 100, 1000} { + d := i * time.Microsecond + t.Run(fmt.Sprint(d), func(t *testing.T) { + testPreemptionAfterSyscall(t, d) + }) + } +} diff --git a/libgo/go/runtime/runtime-lldb_test.go b/libgo/go/runtime/runtime-lldb_test.go index 08d6a34f50c..1e2e5d5be93 100644 --- a/libgo/go/runtime/runtime-lldb_test.go +++ b/libgo/go/runtime/runtime-lldb_test.go @@ -152,13 +152,20 @@ func TestLldbPython(t *testing.T) { src := filepath.Join(dir, "main.go") err = ioutil.WriteFile(src, []byte(lldbHelloSource), 0644) if err != nil { - t.Fatalf("failed to create file: %v", err) + t.Fatalf("failed to create src file: %v", err) + } + + mod := filepath.Join(dir, "go.mod") + err = ioutil.WriteFile(mod, []byte("module lldbtest"), 0644) + if err != nil { + t.Fatalf("failed to create mod file: %v", err) } // As of 2018-07-17, lldb doesn't support compressed DWARF, so // disable it for this test. cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-ldflags=-compressdwarf=false", "-o", "a.exe") cmd.Dir = dir + cmd.Env = append(os.Environ(), "GOPATH=") // issue 31100 out, err := cmd.CombinedOutput() if err != nil { t.Fatalf("building source %v\n%s", err, out) diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go index d9309cca771..c64549cd255 100644 --- a/libgo/go/runtime/runtime1.go +++ b/libgo/go/runtime/runtime1.go @@ -429,6 +429,7 @@ func setTraceback(level string) { // This is a very special function, do not use it if you are not sure what you are doing. // int64 division is lowered into _divv() call on 386, which does not fit into nosplit functions. // Handles overflow in a time-specific manner. +// This keeps us within no-split stack limits on 32-bit processors. //go:nosplit func timediv(v int64, div int32, rem *int32) int32 { res := int32(0) diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go index fd77c4ca84e..77648c2477a 100644 --- a/libgo/go/runtime/runtime2.go +++ b/libgo/go/runtime/runtime2.go @@ -22,6 +22,13 @@ const ( // If you add to this list, add to the list // of "okay during garbage collection" status // in mgcmark.go too. + // + // TODO(austin): The _Gscan bit could be much lighter-weight. + // For example, we could choose not to run _Gscanrunnable + // goroutines found in the run queue, rather than CAS-looping + // until they become _Grunnable. And transitions like + // _Gscanwaiting -> _Gscanrunnable are actually okay because + // they don't affect stack ownership. // _Gidle means this goroutine was just allocated and has not // yet been initialized. @@ -97,10 +104,51 @@ const ( const ( // P status - _Pidle = iota - _Prunning // Only this P is allowed to change from _Prunning. + + // _Pidle means a P is not being used to run user code or the + // scheduler. Typically, it's on the idle P list and available + // to the scheduler, but it may just be transitioning between + // other states. + // + // The P is owned by the idle list or by whatever is + // transitioning its state. Its run queue is empty. + _Pidle = iota + + // _Prunning means a P is owned by an M and is being used to + // run user code or the scheduler. Only the M that owns this P + // is allowed to change the P's status from _Prunning. The M + // may transition the P to _Pidle (if it has no more work to + // do), _Psyscall (when entering a syscall), or _Pgcstop (to + // halt for the GC). The M may also hand ownership of the P + // off directly to another M (e.g., to schedule a locked G). + _Prunning + + // _Psyscall means a P is not running user code. It has + // affinity to an M in a syscall but is not owned by it and + // may be stolen by another M. This is similar to _Pidle but + // uses lightweight transitions and maintains M affinity. + // + // Leaving _Psyscall must be done with a CAS, either to steal + // or retake the P. Note that there's an ABA hazard: even if + // an M successfully CASes its original P back to _Prunning + // after a syscall, it must understand the P may have been + // used by another M in the interim. _Psyscall + + // _Pgcstop means a P is halted for STW and owned by the M + // that stopped the world. The M that stopped the world + // continues to use its P, even in _Pgcstop. Transitioning + // from _Prunning to _Pgcstop causes an M to release its P and + // park. + // + // The P retains its run queue and startTheWorld will restart + // the scheduler on Ps with non-empty run queues. _Pgcstop + + // _Pdead means a P is no longer used (GOMAXPROCS shrank). We + // reuse Ps if GOMAXPROCS increases. A dead P is mostly + // stripped of its resources, though a few things remain + // (e.g., trace buffers). _Pdead ) @@ -481,7 +529,6 @@ type m struct { profilehz int32 spinning bool // m is out of work and is actively looking for work blocked bool // m is blocked on a note - inwb bool // m is executing a write barrier newSigstack bool // minit on C thread called sigaltstack printlock int8 incgo bool // m is executing a cgo call @@ -498,11 +545,11 @@ type m struct { schedlink muintptr mcache *mcache lockedg guintptr - createstack [32]location // stack that created this thread. - lockedExt uint32 // tracking for external LockOSThread - lockedInt uint32 // tracking for internal lockOSThread - nextwaitm muintptr // next m waiting for lock - waitunlockf unsafe.Pointer // todo go func(*g, unsafe.pointer) bool + createstack [32]location // stack that created this thread. + lockedExt uint32 // tracking for external LockOSThread + lockedInt uint32 // tracking for internal lockOSThread + nextwaitm muintptr // next m waiting for lock + waitunlockf func(*g, unsafe.Pointer) bool waitlock unsafe.Pointer waittraceev byte waittraceskip int @@ -519,7 +566,9 @@ type m struct { // Not for gccgo: libcallg guintptr // Not for gccgo: syscall libcall // stores syscall parameters on windows - mos mOS + dlogPerM + + mOS // Remaining fields are specific to gccgo. @@ -529,14 +578,10 @@ type m struct { dropextram bool // drop after call is done exiting bool // thread is exiting - gcing int32 - scannote note // synchonization for signal-based stack scanning } type p struct { - lock mutex - id int32 status uint32 // one of pidle/prunning/... link puintptr @@ -545,7 +590,7 @@ type p struct { sysmontick sysmontick // last tick observed by sysmon m muintptr // back-link to associated m (nil if idle) mcache *mcache - racectx uintptr + raceprocctx uintptr // gccgo has only one size of defer. deferpool []*_defer @@ -591,10 +636,12 @@ type p struct { palloc persistentAlloc // per-P to avoid mutex + _ uint32 // Alignment for atomic fields below + // Per-P GC state - gcAssistTime int64 // Nanoseconds in assistAlloc - gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker - gcBgMarkWorker guintptr + gcAssistTime int64 // Nanoseconds in assistAlloc + gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic) + gcBgMarkWorker guintptr // (atomic) gcMarkWorkerMode gcMarkWorkerMode // gcMarkWorkerStartTime is the nanotime() at which this mark @@ -707,7 +754,7 @@ const ( ) // Lock-free stack node. -// // Also known to export_test.go. +// Also known to export_test.go. type lfnode struct { next uint64 pushcnt uintptr @@ -847,6 +894,7 @@ const ( waitReasonSelectNoCases // "select (no cases)" waitReasonGCAssistWait // "GC assist wait" waitReasonGCSweepWait // "GC sweep wait" + waitReasonGCScavengeWait // "GC scavenge wait" waitReasonChanReceive // "chan receive" waitReasonChanSend // "chan send" waitReasonFinalizerWait // "finalizer wait" @@ -874,6 +922,7 @@ var waitReasonStrings = [...]string{ waitReasonSelectNoCases: "select (no cases)", waitReasonGCAssistWait: "GC assist wait", waitReasonGCSweepWait: "GC sweep wait", + waitReasonGCScavengeWait: "GC scavenge wait", waitReasonChanReceive: "chan receive", waitReasonChanSend: "chan send", waitReasonFinalizerWait: "finalizer wait", diff --git a/libgo/go/runtime/runtime_mmap_test.go b/libgo/go/runtime/runtime_mmap_test.go index 188fd5d0f0c..c7703f47053 100644 --- a/libgo/go/runtime/runtime_mmap_test.go +++ b/libgo/go/runtime/runtime_mmap_test.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build aix darwin dragonfly freebsd hurd linux nacl netbsd openbsd solaris + package runtime_test import ( diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go index 222679e048d..e60786804d0 100644 --- a/libgo/go/runtime/runtime_test.go +++ b/libgo/go/runtime/runtime_test.go @@ -70,6 +70,18 @@ func BenchmarkEfaceCmpDiff(b *testing.B) { } } +func BenchmarkEfaceCmpDiffIndirect(b *testing.B) { + efaceCmp1 = [2]int{1, 2} + efaceCmp2 = [2]int{1, 2} + for i := 0; i < b.N; i++ { + for j := 0; j < 100; j++ { + if efaceCmp1 != efaceCmp2 { + b.Fatal("bad comparison") + } + } + } +} + func BenchmarkDefer(b *testing.B) { for i := 0; i < b.N; i++ { defer1() diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go index 2c7ad329662..c002e2930be 100644 --- a/libgo/go/runtime/sema.go +++ b/libgo/go/runtime/sema.go @@ -53,22 +53,22 @@ var semtable [semTabSize]struct { //go:linkname sync_runtime_Semacquire sync.runtime_Semacquire func sync_runtime_Semacquire(addr *uint32) { - semacquire1(addr, false, semaBlockProfile) + semacquire1(addr, false, semaBlockProfile, 0) } //go:linkname poll_runtime_Semacquire internal..z2fpoll.runtime_Semacquire func poll_runtime_Semacquire(addr *uint32) { - semacquire1(addr, false, semaBlockProfile) + semacquire1(addr, false, semaBlockProfile, 0) } //go:linkname sync_runtime_Semrelease sync.runtime_Semrelease -func sync_runtime_Semrelease(addr *uint32, handoff bool) { - semrelease1(addr, handoff) +func sync_runtime_Semrelease(addr *uint32, handoff bool, skipframes int) { + semrelease1(addr, handoff, skipframes) } //go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex -func sync_runtime_SemacquireMutex(addr *uint32, lifo bool) { - semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile) +func sync_runtime_SemacquireMutex(addr *uint32, lifo bool, skipframes int) { + semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes) } //go:linkname poll_runtime_Semrelease internal..z2fpoll.runtime_Semrelease @@ -92,10 +92,10 @@ const ( // Called from runtime. func semacquire(addr *uint32) { - semacquire1(addr, false, 0) + semacquire1(addr, false, 0, 0) } -func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) { +func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes int) { gp := getg() if gp != gp.m.curg { throw("semacquire not on the G stack") @@ -141,22 +141,22 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) { // Any semrelease after the cansemacquire knows we're waiting // (we set nwait above), so go to sleep. root.queue(addr, s, lifo) - goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4) + goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4+skipframes) if s.ticket != 0 || cansemacquire(addr) { break } } if s.releasetime > 0 { - blockevent(s.releasetime-t0, 3) + blockevent(s.releasetime-t0, 3+skipframes) } releaseSudog(s) } func semrelease(addr *uint32) { - semrelease1(addr, false) + semrelease1(addr, false, 0) } -func semrelease1(addr *uint32, handoff bool) { +func semrelease1(addr *uint32, handoff bool, skipframes int) { root := semroot(addr) atomic.Xadd(addr, 1) @@ -183,7 +183,7 @@ func semrelease1(addr *uint32, handoff bool) { if s != nil { // May be slow, so unlock first acquiretime := s.acquiretime if acquiretime != 0 { - mutexevent(t0-acquiretime, 3) + mutexevent(t0-acquiretime, 3+skipframes) } if s.ticket != 0 { throw("corrupted semaphore ticket") @@ -191,7 +191,7 @@ func semrelease1(addr *uint32, handoff bool) { if handoff && cansemacquire(addr) { s.ticket = 1 } - readyWithTime(s, 5) + readyWithTime(s, 5+skipframes) } } diff --git a/libgo/go/runtime/semasleep_test.go b/libgo/go/runtime/semasleep_test.go index 5b2cc64483f..f5b4a506972 100644 --- a/libgo/go/runtime/semasleep_test.go +++ b/libgo/go/runtime/semasleep_test.go @@ -7,11 +7,7 @@ package runtime_test import ( - "internal/testenv" - "io/ioutil" - "os" "os/exec" - "path/filepath" "syscall" "testing" "time" @@ -21,39 +17,17 @@ import ( // shouldn't cause semasleep to retry with the same timeout which would // cause indefinite spinning. func TestSpuriousWakeupsNeverHangSemasleep(t *testing.T) { - testenv.MustHaveGoBuild(t) - tempDir, err := ioutil.TempDir("", "issue-27250") - if err != nil { - t.Fatalf("Failed to create the temp directory: %v", err) + if *flagQuick { + t.Skip("-quick") } - defer os.RemoveAll(tempDir) - - repro := ` - package main - import "time" - - func main() { - <-time.After(1 * time.Second) - } - ` - mainPath := filepath.Join(tempDir, "main.go") - if err := ioutil.WriteFile(mainPath, []byte(repro), 0644); err != nil { - t.Fatalf("Failed to create temp file for repro.go: %v", err) - } - binaryPath := filepath.Join(tempDir, "binary") - - // Build the binary so that we can send the signal to its PID. - out, err := exec.Command(testenv.GoToolPath(t), "build", "-o", binaryPath, mainPath).CombinedOutput() + exe, err := buildTestProg(t, "testprog") if err != nil { - t.Fatalf("Failed to compile the binary: err: %v\nOutput: %s\n", err, out) - } - if err := os.Chmod(binaryPath, 0755); err != nil { - t.Fatalf("Failed to chmod binary: %v", err) + t.Fatal(err) } - // Now run the binary. - cmd := exec.Command(binaryPath) + start := time.Now() + cmd := exec.Command(exe, "After1") if err := cmd.Start(); err != nil { t.Fatalf("Failed to start command: %v", err) } @@ -82,6 +56,9 @@ func TestSpuriousWakeupsNeverHangSemasleep(t *testing.T) { if err != nil { t.Fatalf("The program returned but unfortunately with an error: %v", err) } + if time.Since(start) < 100*time.Millisecond { + t.Fatalf("The program stopped too quickly.") + } return } } diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go index 4f11877a297..3583c7b98a0 100644 --- a/libgo/go/runtime/signal_sighandler.go +++ b/libgo/go/runtime/signal_sighandler.go @@ -170,5 +170,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { crash() } + printDebugLog() + exit(2) } diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go index e1bab8caba3..365f5dd0ad6 100644 --- a/libgo/go/runtime/signal_unix.go +++ b/libgo/go/runtime/signal_unix.go @@ -277,7 +277,7 @@ func setThreadCPUProfiler(hz int32) { } func sigpipe() { - if sigsend(_SIGPIPE) { + if signal_ignored(_SIGPIPE) || sigsend(_SIGPIPE) { return } dieFromSignal(_SIGPIPE) @@ -328,6 +328,9 @@ func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) { // // The signal handler must not inject a call to sigpanic if // getg().throwsplit, since sigpanic may need to grow the stack. +// +// This is exported via linkname to assembly in runtime/cgo. +//go:linkname sigpanic func sigpanic() { g := getg() if !canpanic(g) { @@ -466,16 +469,14 @@ func raisebadsignal(sig uint32, c *sigctxt) { //go:nosplit func crash() { - if GOOS == "darwin" { - // OS X core dumps are linear dumps of the mapped memory, - // from the first virtual byte to the last, with zeros in the gaps. - // Because of the way we arrange the address space on 64-bit systems, - // this means the OS X core file will be >128 GB and even on a zippy - // workstation can take OS X well over an hour to write (uninterruptible). - // Save users from making that mistake. - if GOARCH == "amd64" { - return - } + // OS X core dumps are linear dumps of the mapped memory, + // from the first virtual byte to the last, with zeros in the gaps. + // Because of the way we arrange the address space on 64-bit systems, + // this means the OS X core file will be >128 GB and even on a zippy + // workstation can take OS X well over an hour to write (uninterruptible). + // Save users from making that mistake. + if GOOS == "darwin" && GOARCH == "amd64" { + return } dieFromSignal(_SIGABRT) diff --git a/libgo/go/runtime/sizeclasses.go b/libgo/go/runtime/sizeclasses.go index 9e17b001d3e..9c1b44fe0b2 100644 --- a/libgo/go/runtime/sizeclasses.go +++ b/libgo/go/runtime/sizeclasses.go @@ -90,6 +90,6 @@ type divMagic struct { baseMask uint16 } -var class_to_divmagic = [_NumSizeClasses]divMagic{{0, 0, 0, 0}, {3, 0, 1, 65528}, {4, 0, 1, 65520}, {5, 0, 1, 65504}, {4, 9, 171, 0}, {6, 0, 1, 65472}, {4, 10, 205, 0}, {5, 9, 171, 0}, {4, 11, 293, 0}, {7, 0, 1, 65408}, {4, 9, 57, 0}, {5, 10, 205, 0}, {4, 12, 373, 0}, {6, 7, 43, 0}, {4, 13, 631, 0}, {5, 11, 293, 0}, {4, 13, 547, 0}, {8, 0, 1, 65280}, {5, 9, 57, 0}, {6, 9, 103, 0}, {5, 12, 373, 0}, {7, 7, 43, 0}, {5, 10, 79, 0}, {6, 10, 147, 0}, {5, 11, 137, 0}, {9, 0, 1, 65024}, {6, 9, 57, 0}, {7, 6, 13, 0}, {6, 11, 187, 0}, {8, 5, 11, 0}, {7, 8, 37, 0}, {10, 0, 1, 64512}, {7, 9, 57, 0}, {8, 6, 13, 0}, {7, 11, 187, 0}, {9, 5, 11, 0}, {8, 8, 37, 0}, {11, 0, 1, 63488}, {8, 9, 57, 0}, {7, 10, 49, 0}, {10, 5, 11, 0}, {7, 10, 41, 0}, {7, 9, 19, 0}, {12, 0, 1, 61440}, {8, 9, 27, 0}, {8, 10, 49, 0}, {11, 5, 11, 0}, {7, 13, 161, 0}, {7, 13, 155, 0}, {8, 9, 19, 0}, {13, 0, 1, 57344}, {8, 12, 111, 0}, {9, 9, 27, 0}, {11, 6, 13, 0}, {7, 14, 193, 0}, {12, 3, 3, 0}, {8, 13, 155, 0}, {11, 8, 37, 0}, {14, 0, 1, 49152}, {11, 8, 29, 0}, {7, 13, 55, 0}, {12, 5, 7, 0}, {8, 14, 193, 0}, {13, 3, 3, 0}, {7, 14, 77, 0}, {12, 7, 19, 0}, {15, 0, 1, 32768}} +var class_to_divmagic = [_NumSizeClasses]divMagic{{0, 0, 0, 0}, {3, 0, 1, 65528}, {4, 0, 1, 65520}, {5, 0, 1, 65504}, {4, 11, 683, 0}, {6, 0, 1, 65472}, {4, 10, 205, 0}, {5, 9, 171, 0}, {4, 11, 293, 0}, {7, 0, 1, 65408}, {4, 13, 911, 0}, {5, 10, 205, 0}, {4, 12, 373, 0}, {6, 9, 171, 0}, {4, 13, 631, 0}, {5, 11, 293, 0}, {4, 13, 547, 0}, {8, 0, 1, 65280}, {5, 9, 57, 0}, {6, 9, 103, 0}, {5, 12, 373, 0}, {7, 7, 43, 0}, {5, 10, 79, 0}, {6, 10, 147, 0}, {5, 11, 137, 0}, {9, 0, 1, 65024}, {6, 9, 57, 0}, {7, 9, 103, 0}, {6, 11, 187, 0}, {8, 7, 43, 0}, {7, 8, 37, 0}, {10, 0, 1, 64512}, {7, 9, 57, 0}, {8, 6, 13, 0}, {7, 11, 187, 0}, {9, 5, 11, 0}, {8, 8, 37, 0}, {11, 0, 1, 63488}, {8, 9, 57, 0}, {7, 10, 49, 0}, {10, 5, 11, 0}, {7, 10, 41, 0}, {7, 9, 19, 0}, {12, 0, 1, 61440}, {8, 9, 27, 0}, {8, 10, 49, 0}, {11, 5, 11, 0}, {7, 13, 161, 0}, {7, 13, 155, 0}, {8, 9, 19, 0}, {13, 0, 1, 57344}, {8, 12, 111, 0}, {9, 9, 27, 0}, {11, 6, 13, 0}, {7, 14, 193, 0}, {12, 3, 3, 0}, {8, 13, 155, 0}, {11, 8, 37, 0}, {14, 0, 1, 49152}, {11, 8, 29, 0}, {7, 13, 55, 0}, {12, 5, 7, 0}, {8, 14, 193, 0}, {13, 3, 3, 0}, {7, 14, 77, 0}, {12, 7, 19, 0}, {15, 0, 1, 32768}} var size_to_class8 = [smallSizeMax/smallSizeDiv + 1]uint8{0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31} var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8{31, 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 39, 40, 40, 40, 41, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 47, 47, 47, 48, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66} diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go index 4b15f82a54e..c258ebd2bd0 100644 --- a/libgo/go/runtime/slice.go +++ b/libgo/go/runtime/slice.go @@ -174,7 +174,7 @@ func growslice(et *_type, oldarray unsafe.Pointer, oldlen, oldcap, cap int) slic } var p unsafe.Pointer - if et.kind&kindNoPointers != 0 { + if et.ptrdata == 0 { p = mallocgc(capmem, nil, false) // The append() that calls growslice is going to overwrite from oldlen to cap (which will be the new length). // Only clear the part that will not be overwritten. @@ -182,8 +182,8 @@ func growslice(et *_type, oldarray unsafe.Pointer, oldlen, oldcap, cap int) slic } else { // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory. p = mallocgc(capmem, et, true) - if writeBarrier.enabled { - // Only shade the pointers in oldarray since we know the destination slice p + if lenmem > 0 && writeBarrier.enabled { + // Only shade the pointers in old.array since we know the destination slice p // only contains nil pointers because it has been cleared during alloc. bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(oldarray), lenmem) } diff --git a/libgo/go/runtime/stack_test.go b/libgo/go/runtime/stack_test.go index b6962532ffd..6ed65e8285f 100644 --- a/libgo/go/runtime/stack_test.go +++ b/libgo/go/runtime/stack_test.go @@ -1,14 +1,821 @@ -// Copyright 2019 The Go Authors. All rights reserved. +// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package runtime_test -import "testing" +import ( + "bytes" + "fmt" + "os" + "reflect" + "regexp" + . "runtime" + "strconv" + "strings" + "sync" + "sync/atomic" + "testing" + "time" +) + +// TestStackMem measures per-thread stack segment cache behavior. +// The test consumed up to 500MB in the past. +func TestStackMem(t *testing.T) { + const ( + BatchSize = 32 + BatchCount = 256 + ArraySize = 1024 + RecursionDepth = 128 + ) + if testing.Short() { + return + } + defer GOMAXPROCS(GOMAXPROCS(BatchSize)) + s0 := new(MemStats) + ReadMemStats(s0) + for b := 0; b < BatchCount; b++ { + c := make(chan bool, BatchSize) + for i := 0; i < BatchSize; i++ { + go func() { + var f func(k int, a [ArraySize]byte) + f = func(k int, a [ArraySize]byte) { + if k == 0 { + time.Sleep(time.Millisecond) + return + } + f(k-1, a) + } + f(RecursionDepth, [ArraySize]byte{}) + c <- true + }() + } + for i := 0; i < BatchSize; i++ { + <-c + } + + // The goroutines have signaled via c that they are ready to exit. + // Give them a chance to exit by sleeping. If we don't wait, we + // might not reuse them on the next batch. + time.Sleep(10 * time.Millisecond) + } + s1 := new(MemStats) + ReadMemStats(s1) + consumed := int64(s1.StackSys - s0.StackSys) + t.Logf("Consumed %vMB for stack mem", consumed>>20) + estimate := int64(8 * BatchSize * ArraySize * RecursionDepth) // 8 is to reduce flakiness. + if consumed > estimate { + t.Fatalf("Stack mem: want %v, got %v", estimate, consumed) + } + // Due to broken stack memory accounting (https://golang.org/issue/7468), + // StackInuse can decrease during function execution, so we cast the values to int64. + inuse := int64(s1.StackInuse) - int64(s0.StackInuse) + t.Logf("Inuse %vMB for stack mem", inuse>>20) + if inuse > 4<<20 { + t.Fatalf("Stack inuse: want %v, got %v", 4<<20, inuse) + } +} + +// Test stack growing in different contexts. +func TestStackGrowth(t *testing.T) { + if *flagQuick { + t.Skip("-quick") + } + + if GOARCH == "wasm" { + t.Skip("fails on wasm (too slow?)") + } + + // Don't make this test parallel as this makes the 20 second + // timeout unreliable on slow builders. (See issue #19381.) + + var wg sync.WaitGroup + + // in a normal goroutine + var growDuration time.Duration // For debugging failures + wg.Add(1) + go func() { + defer wg.Done() + start := time.Now() + growStack(nil) + growDuration = time.Since(start) + }() + wg.Wait() + + // in locked goroutine + wg.Add(1) + go func() { + defer wg.Done() + LockOSThread() + growStack(nil) + UnlockOSThread() + }() + wg.Wait() + + // in finalizer + wg.Add(1) + go func() { + defer wg.Done() + done := make(chan bool) + var startTime time.Time + var started, progress uint32 + go func() { + s := new(string) + SetFinalizer(s, func(ss *string) { + startTime = time.Now() + atomic.StoreUint32(&started, 1) + growStack(&progress) + done <- true + }) + s = nil + done <- true + }() + <-done + GC() + + timeout := 20 * time.Second + if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" { + scale, err := strconv.Atoi(s) + if err == nil { + timeout *= time.Duration(scale) + } + } + + select { + case <-done: + case <-time.After(timeout): + if atomic.LoadUint32(&started) == 0 { + t.Log("finalizer did not start") + } else { + t.Logf("finalizer started %s ago and finished %d iterations", time.Since(startTime), atomic.LoadUint32(&progress)) + } + t.Log("first growStack took", growDuration) + t.Error("finalizer did not run") + return + } + }() + wg.Wait() +} + +// ... and in init +//func init() { +// growStack() +//} + +func growStack(progress *uint32) { + n := 1 << 10 + if testing.Short() { + n = 1 << 8 + } + for i := 0; i < n; i++ { + x := 0 + growStackIter(&x, i) + if x != i+1 { + panic("stack is corrupted") + } + if progress != nil { + atomic.StoreUint32(progress, uint32(i)) + } + } + GC() +} + +// This function is not an anonymous func, so that the compiler can do escape +// analysis and place x on stack (and subsequently stack growth update the pointer). +func growStackIter(p *int, n int) { + if n == 0 { + *p = n + 1 + GC() + return + } + *p = n + 1 + x := 0 + growStackIter(&x, n-1) + if x != n { + panic("stack is corrupted") + } +} + +func TestStackGrowthCallback(t *testing.T) { + t.Parallel() + var wg sync.WaitGroup + + // test stack growth at chan op + wg.Add(1) + go func() { + defer wg.Done() + c := make(chan int, 1) + growStackWithCallback(func() { + c <- 1 + <-c + }) + }() + + // test stack growth at map op + wg.Add(1) + go func() { + defer wg.Done() + m := make(map[int]int) + growStackWithCallback(func() { + _, _ = m[1] + m[1] = 1 + }) + }() + + // test stack growth at goroutine creation + wg.Add(1) + go func() { + defer wg.Done() + growStackWithCallback(func() { + done := make(chan bool) + go func() { + done <- true + }() + <-done + }) + }() + wg.Wait() +} + +func growStackWithCallback(cb func()) { + var f func(n int) + f = func(n int) { + if n == 0 { + cb() + return + } + f(n - 1) + } + for i := 0; i < 1<<10; i++ { + f(i) + } +} + +// TestDeferPtrs tests the adjustment of Defer's argument pointers (p aka &y) +// during a stack copy. +func set(p *int, x int) { + *p = x +} +func TestDeferPtrs(t *testing.T) { + var y int + + defer func() { + if y != 42 { + t.Errorf("defer's stack references were not adjusted appropriately") + } + }() + defer set(&y, 42) + growStack(nil) +} + +type bigBuf [4 * 1024]byte + +// TestDeferPtrsGoexit is like TestDeferPtrs but exercises the possibility that the +// stack grows as part of starting the deferred function. It calls Goexit at various +// stack depths, forcing the deferred function (with >4kB of args) to be run at +// the bottom of the stack. The goal is to find a stack depth less than 4kB from +// the end of the stack. Each trial runs in a different goroutine so that an earlier +// stack growth does not invalidate a later attempt. +func TestDeferPtrsGoexit(t *testing.T) { + for i := 0; i < 100; i++ { + c := make(chan int, 1) + go testDeferPtrsGoexit(c, i) + if n := <-c; n != 42 { + t.Fatalf("defer's stack references were not adjusted appropriately (i=%d n=%d)", i, n) + } + } +} + +func testDeferPtrsGoexit(c chan int, i int) { + var y int + defer func() { + c <- y + }() + defer setBig(&y, 42, bigBuf{}) + useStackAndCall(i, Goexit) +} + +func setBig(p *int, x int, b bigBuf) { + *p = x +} + +// TestDeferPtrsPanic is like TestDeferPtrsGoexit, but it's using panic instead +// of Goexit to run the Defers. Those two are different execution paths +// in the runtime. +func TestDeferPtrsPanic(t *testing.T) { + for i := 0; i < 100; i++ { + c := make(chan int, 1) + go testDeferPtrsGoexit(c, i) + if n := <-c; n != 42 { + t.Fatalf("defer's stack references were not adjusted appropriately (i=%d n=%d)", i, n) + } + } +} + +func testDeferPtrsPanic(c chan int, i int) { + var y int + defer func() { + if recover() == nil { + c <- -1 + return + } + c <- y + }() + defer setBig(&y, 42, bigBuf{}) + useStackAndCall(i, func() { panic(1) }) +} + +//go:noinline +func testDeferLeafSigpanic1() { + // Cause a sigpanic to be injected in this frame. + // + // This function has to be declared before + // TestDeferLeafSigpanic so the runtime will crash if we think + // this function's continuation PC is in + // TestDeferLeafSigpanic. + *(*int)(nil) = 0 +} + +// TestDeferLeafSigpanic tests defer matching around leaf functions +// that sigpanic. This is tricky because on LR machines the outer +// function and the inner function have the same SP, but it's critical +// that we match up the defer correctly to get the right liveness map. +// See issue #25499. +func TestDeferLeafSigpanic(t *testing.T) { + // Push a defer that will walk the stack. + defer func() { + if err := recover(); err == nil { + t.Fatal("expected panic from nil pointer") + } + GC() + }() + // Call a leaf function. We must set up the exact call stack: + // + // defering function -> leaf function -> sigpanic + // + // On LR machines, the leaf function will have the same SP as + // the SP pushed for the defer frame. + testDeferLeafSigpanic1() +} + +// TestPanicUseStack checks that a chain of Panic structs on the stack are +// updated correctly if the stack grows during the deferred execution that +// happens as a result of the panic. +func TestPanicUseStack(t *testing.T) { + pc := make([]uintptr, 10000) + defer func() { + recover() + Callers(0, pc) // force stack walk + useStackAndCall(100, func() { + defer func() { + recover() + Callers(0, pc) // force stack walk + useStackAndCall(200, func() { + defer func() { + recover() + Callers(0, pc) // force stack walk + }() + panic(3) + }) + }() + panic(2) + }) + }() + panic(1) +} + +func TestPanicFar(t *testing.T) { + var xtree *xtreeNode + pc := make([]uintptr, 10000) + defer func() { + // At this point we created a large stack and unwound + // it via recovery. Force a stack walk, which will + // check the stack's consistency. + Callers(0, pc) + }() + defer func() { + recover() + }() + useStackAndCall(100, func() { + // Kick off the GC and make it do something nontrivial. + // (This used to force stack barriers to stick around.) + xtree = makeTree(18) + // Give the GC time to start scanning stacks. + time.Sleep(time.Millisecond) + panic(1) + }) + _ = xtree +} + +type xtreeNode struct { + l, r *xtreeNode +} + +func makeTree(d int) *xtreeNode { + if d == 0 { + return new(xtreeNode) + } + return &xtreeNode{makeTree(d - 1), makeTree(d - 1)} +} + +// use about n KB of stack and call f +func useStackAndCall(n int, f func()) { + if n == 0 { + f() + return + } + var b [1024]byte // makes frame about 1KB + useStackAndCall(n-1+int(b[99]), f) +} + +func useStack(n int) { + useStackAndCall(n, func() {}) +} + +func growing(c chan int, done chan struct{}) { + for n := range c { + useStack(n) + done <- struct{}{} + } + done <- struct{}{} +} + +func TestStackCache(t *testing.T) { + // Allocate a bunch of goroutines and grow their stacks. + // Repeat a few times to test the stack cache. + const ( + R = 4 + G = 200 + S = 5 + ) + for i := 0; i < R; i++ { + var reqchans [G]chan int + done := make(chan struct{}) + for j := 0; j < G; j++ { + reqchans[j] = make(chan int) + go growing(reqchans[j], done) + } + for s := 0; s < S; s++ { + for j := 0; j < G; j++ { + reqchans[j] <- 1 << uint(s) + } + for j := 0; j < G; j++ { + <-done + } + } + for j := 0; j < G; j++ { + close(reqchans[j]) + } + for j := 0; j < G; j++ { + <-done + } + } +} + +func TestStackOutput(t *testing.T) { + b := make([]byte, 1024) + stk := string(b[:Stack(b, false)]) + if !strings.HasPrefix(stk, "goroutine ") { + t.Errorf("Stack (len %d):\n%s", len(stk), stk) + t.Errorf("Stack output should begin with \"goroutine \"") + } +} + +func TestStackAllOutput(t *testing.T) { + b := make([]byte, 1024) + stk := string(b[:Stack(b, true)]) + if !strings.HasPrefix(stk, "goroutine ") { + t.Errorf("Stack (len %d):\n%s", len(stk), stk) + t.Errorf("Stack output should begin with \"goroutine \"") + } +} + +func TestStackPanic(t *testing.T) { + // Test that stack copying copies panics correctly. This is difficult + // to test because it is very unlikely that the stack will be copied + // in the middle of gopanic. But it can happen. + // To make this test effective, edit panic.go:gopanic and uncomment + // the GC() call just before freedefer(d). + defer func() { + if x := recover(); x == nil { + t.Errorf("recover failed") + } + }() + useStack(32) + panic("test panic") +} + +func BenchmarkStackCopyPtr(b *testing.B) { + c := make(chan bool) + for i := 0; i < b.N; i++ { + go func() { + i := 1000000 + countp(&i) + c <- true + }() + <-c + } +} + +func countp(n *int) { + if *n == 0 { + return + } + *n-- + countp(n) +} + +func BenchmarkStackCopy(b *testing.B) { + c := make(chan bool) + for i := 0; i < b.N; i++ { + go func() { + count(1000000) + c <- true + }() + <-c + } +} + +func count(n int) int { + if n == 0 { + return 0 + } + return 1 + count(n-1) +} + +func BenchmarkStackCopyNoCache(b *testing.B) { + c := make(chan bool) + for i := 0; i < b.N; i++ { + go func() { + count1(1000000) + c <- true + }() + <-c + } +} + +func count1(n int) int { + if n <= 0 { + return 0 + } + return 1 + count2(n-1) +} + +func count2(n int) int { return 1 + count3(n-1) } +func count3(n int) int { return 1 + count4(n-1) } +func count4(n int) int { return 1 + count5(n-1) } +func count5(n int) int { return 1 + count6(n-1) } +func count6(n int) int { return 1 + count7(n-1) } +func count7(n int) int { return 1 + count8(n-1) } +func count8(n int) int { return 1 + count9(n-1) } +func count9(n int) int { return 1 + count10(n-1) } +func count10(n int) int { return 1 + count11(n-1) } +func count11(n int) int { return 1 + count12(n-1) } +func count12(n int) int { return 1 + count13(n-1) } +func count13(n int) int { return 1 + count14(n-1) } +func count14(n int) int { return 1 + count15(n-1) } +func count15(n int) int { return 1 + count16(n-1) } +func count16(n int) int { return 1 + count17(n-1) } +func count17(n int) int { return 1 + count18(n-1) } +func count18(n int) int { return 1 + count19(n-1) } +func count19(n int) int { return 1 + count20(n-1) } +func count20(n int) int { return 1 + count21(n-1) } +func count21(n int) int { return 1 + count22(n-1) } +func count22(n int) int { return 1 + count23(n-1) } +func count23(n int) int { return 1 + count1(n-1) } + +type structWithMethod struct{} + +func (s structWithMethod) caller() string { + _, file, line, ok := Caller(1) + if !ok { + panic("Caller failed") + } + return fmt.Sprintf("%s:%d", file, line) +} + +func (s structWithMethod) callers() []uintptr { + pc := make([]uintptr, 16) + return pc[:Callers(0, pc)] +} + +// The noinline prevents this function from being inlined +// into a wrapper. TODO: remove this when issue 28640 is fixed. +//go:noinline +func (s structWithMethod) stack() string { + buf := make([]byte, 4<<10) + return string(buf[:Stack(buf, false)]) +} + +func (s structWithMethod) nop() {} + +func TestStackWrapperCaller(t *testing.T) { + var d structWithMethod + // Force the compiler to construct a wrapper method. + wrapper := (*structWithMethod).caller + // Check that the wrapper doesn't affect the stack trace. + if dc, ic := d.caller(), wrapper(&d); dc != ic { + t.Fatalf("direct caller %q != indirect caller %q", dc, ic) + } +} + +func TestStackWrapperCallers(t *testing.T) { + var d structWithMethod + wrapper := (*structWithMethod).callers + // Check that <autogenerated> doesn't appear in the stack trace. + pcs := wrapper(&d) + frames := CallersFrames(pcs) + for { + fr, more := frames.Next() + if fr.File == "<autogenerated>" { + t.Fatalf("<autogenerated> appears in stack trace: %+v", fr) + } + if !more { + break + } + } +} + +func TestStackWrapperStack(t *testing.T) { + var d structWithMethod + wrapper := (*structWithMethod).stack + // Check that <autogenerated> doesn't appear in the stack trace. + stk := wrapper(&d) + if strings.Contains(stk, "<autogenerated>") { + t.Fatalf("<autogenerated> appears in stack trace:\n%s", stk) + } +} + +type I interface { + M() +} + +func TestStackWrapperStackPanic(t *testing.T) { + if Compiler == "gccgo" { + t.Skip("gccgo currently uses different, meaningless, wrapper names") + } + + t.Run("sigpanic", func(t *testing.T) { + // nil calls to interface methods cause a sigpanic. + testStackWrapperPanic(t, func() { I.M(nil) }, "runtime_test.I.M") + }) + t.Run("panicwrap", func(t *testing.T) { + // Nil calls to value method wrappers call panicwrap. + wrapper := (*structWithMethod).nop + testStackWrapperPanic(t, func() { wrapper(nil) }, "runtime_test.(*structWithMethod).nop") + }) +} + +func testStackWrapperPanic(t *testing.T, cb func(), expect string) { + // Test that the stack trace from a panicking wrapper includes + // the wrapper, even though elide these when they don't panic. + t.Run("CallersFrames", func(t *testing.T) { + defer func() { + err := recover() + if err == nil { + t.Fatalf("expected panic") + } + pcs := make([]uintptr, 10) + n := Callers(0, pcs) + frames := CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + t.Log(frame.Function) + if frame.Function == expect { + return + } + if !more { + break + } + } + t.Fatalf("panicking wrapper %s missing from stack trace", expect) + }() + cb() + }) + t.Run("Stack", func(t *testing.T) { + defer func() { + err := recover() + if err == nil { + t.Fatalf("expected panic") + } + buf := make([]byte, 4<<10) + stk := string(buf[:Stack(buf, false)]) + if !strings.Contains(stk, "\n"+expect) { + t.Fatalf("panicking wrapper %s missing from stack trace:\n%s", expect, stk) + } + }() + cb() + }) +} + +func TestCallersFromWrapper(t *testing.T) { + if Compiler == "gccgo" { + t.Skip("gccgo currently uses different, meaningless, wrapper names") + } + // Test that invoking CallersFrames on a stack where the first + // PC is an autogenerated wrapper keeps the wrapper in the + // trace. Normally we elide these, assuming that the wrapper + // calls the thing you actually wanted to see, but in this + // case we need to keep it. + pc := reflect.ValueOf(I.M).Pointer() + frames := CallersFrames([]uintptr{pc}) + frame, more := frames.Next() + if frame.Function != "runtime_test.I.M" { + t.Fatalf("want function %s, got %s", "runtime_test.I.M", frame.Function) + } + if more { + t.Fatalf("want 1 frame, got > 1") + } +} + +func TestTracebackSystemstack(t *testing.T) { + if Compiler == "gccgo" { + t.Skip("test currently fails with gccgo") + } + if GOARCH == "ppc64" || GOARCH == "ppc64le" { + t.Skip("systemstack tail call not implemented on ppc64x") + } + + // Test that profiles correctly jump over systemstack, + // including nested systemstack calls. + pcs := make([]uintptr, 20) + pcs = pcs[:TracebackSystemstack(pcs, 5)] + // Check that runtime.TracebackSystemstack appears five times + // and that we see TestTracebackSystemstack. + countIn, countOut := 0, 0 + frames := CallersFrames(pcs) + var tb bytes.Buffer + for { + frame, more := frames.Next() + fmt.Fprintf(&tb, "\n%s+0x%x %s:%d", frame.Function, frame.PC-frame.Entry, frame.File, frame.Line) + switch frame.Function { + case "runtime.TracebackSystemstack": + countIn++ + case "runtime_test.TestTracebackSystemstack": + countOut++ + } + if !more { + break + } + } + if countIn != 5 || countOut != 1 { + t.Fatalf("expected 5 calls to TracebackSystemstack and 1 call to TestTracebackSystemstack, got:%s", tb.String()) + } +} + +func TestTracebackAncestors(t *testing.T) { + if Compiler == "gccgo" { + t.Skip("gccgo currently doesn't generate full ancestor tracebacks") + } + goroutineRegex := regexp.MustCompile(`goroutine [0-9]+ \[`) + for _, tracebackDepth := range []int{0, 1, 5, 50} { + output := runTestProg(t, "testprog", "TracebackAncestors", fmt.Sprintf("GODEBUG=tracebackancestors=%d", tracebackDepth)) + + numGoroutines := 3 + numFrames := 2 + ancestorsExpected := numGoroutines + if numGoroutines > tracebackDepth { + ancestorsExpected = tracebackDepth + } + + matches := goroutineRegex.FindAllStringSubmatch(output, -1) + if len(matches) != 2 { + t.Fatalf("want 2 goroutines, got:\n%s", output) + } + + // Check functions in the traceback. + fns := []string{"main.recurseThenCallGo", "main.main", "main.printStack", "main.TracebackAncestors"} + for _, fn := range fns { + if !strings.Contains(output, "\n"+fn+"(") { + t.Fatalf("expected %q function in traceback:\n%s", fn, output) + } + } + + if want, count := "originating from goroutine", ancestorsExpected; strings.Count(output, want) != count { + t.Errorf("output does not contain %d instances of %q:\n%s", count, want, output) + } + + if want, count := "main.recurseThenCallGo(...)", ancestorsExpected*(numFrames+1); strings.Count(output, want) != count { + t.Errorf("output does not contain %d instances of %q:\n%s", count, want, output) + } + + if want, count := "main.recurseThenCallGo(0x", 1; strings.Count(output, want) != count { + t.Errorf("output does not contain %d instances of %q:\n%s", count, want, output) + } + } +} + +// Test that defer closure is correctly scanned when the stack is scanned. +func TestDeferLiveness(t *testing.T) { + output := runTestProg(t, "testprog", "DeferLiveness", "GODEBUG=clobberfree=1") + if output != "" { + t.Errorf("output:\n%s\n\nwant no output", output) + } +} func TestDeferHeapAndStack(t *testing.T) { P := 4 // processors - N := 10000 // iterations + N := 10000 //iterations D := 200 // stack depth if testing.Short() { diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go index d225dc3a643..741b6b4ca2f 100644 --- a/libgo/go/runtime/string.go +++ b/libgo/go/runtime/string.go @@ -302,6 +302,8 @@ func gobytes(p *byte, n int) (b []byte) { return } +// This is exported via linkname to assembly in syscall (for Plan9). +//go:linkname gostring func gostring(p *byte) string { l := findnull(p) if l == 0 { diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go index 4662251da7f..a2e1530d107 100644 --- a/libgo/go/runtime/stubs.go +++ b/libgo/go/runtime/stubs.go @@ -240,28 +240,6 @@ func asmcgocall(fn, arg unsafe.Pointer) int32 { return 0 } -// argp used in Defer structs when there is no argp. -const _NoArgs = ^uintptr(0) - -//extern __builtin_prefetch -func prefetch(addr unsafe.Pointer, rw int32, locality int32) - -func prefetcht0(addr uintptr) { - prefetch(unsafe.Pointer(addr), 0, 3) -} - -func prefetcht1(addr uintptr) { - prefetch(unsafe.Pointer(addr), 0, 2) -} - -func prefetcht2(addr uintptr) { - prefetch(unsafe.Pointer(addr), 0, 1) -} - -func prefetchnta(addr uintptr) { - prefetch(unsafe.Pointer(addr), 0, 0) -} - // round n up to a multiple of a. a must be a power of 2. func round(n, a uintptr) uintptr { return (n + a - 1) &^ (a - 1) diff --git a/libgo/go/runtime/symtab.go b/libgo/go/runtime/symtab.go index 8f3c843a78b..a2ecf388fea 100644 --- a/libgo/go/runtime/symtab.go +++ b/libgo/go/runtime/symtab.go @@ -120,37 +120,6 @@ type Func struct { entry uintptr } -// A FuncID identifies particular functions that need to be treated -// specially by the runtime. -// Note that in some situations involving plugins, there may be multiple -// copies of a particular special runtime function. -// Note: this list must match the list in cmd/internal/objabi/funcid.go. -type funcID uint8 - -const ( - funcID_normal funcID = iota // not a special function - funcID_runtime_main - funcID_goexit - funcID_jmpdefer - funcID_mcall - funcID_morestack - funcID_mstart - funcID_rt0_go - funcID_asmcgocall - funcID_sigpanic - funcID_runfinq - funcID_gcBgMarkWorker - funcID_systemstack_switch - funcID_systemstack - funcID_cgocallback_gofunc - funcID_gogo - funcID_externalthreadhandler - funcID_debugCallV1 - funcID_gopanic - funcID_panicwrap - funcID_wrapper // any autogenerated code (hash/eq algorithms, method wrappers, etc.) -) - // FuncForPC returns a *Func describing the function that contains the // given program counter address, or else nil. // diff --git a/libgo/go/runtime/testdata/testprog/deadlock.go b/libgo/go/runtime/testdata/testprog/deadlock.go index ca2be579114..5f0d1200047 100644 --- a/libgo/go/runtime/testdata/testprog/deadlock.go +++ b/libgo/go/runtime/testdata/testprog/deadlock.go @@ -112,12 +112,16 @@ func RecursivePanic() { } func GoexitExit() { + println("t1") go func() { time.Sleep(time.Millisecond) }() i := 0 + println("t2") runtime.SetFinalizer(&i, func(p *int) {}) + println("t3") runtime.GC() + println("t4") runtime.Goexit() } diff --git a/libgo/go/runtime/testdata/testprog/gc.go b/libgo/go/runtime/testdata/testprog/gc.go index 629cf2f04fb..3fd1cd8a1ff 100644 --- a/libgo/go/runtime/testdata/testprog/gc.go +++ b/libgo/go/runtime/testdata/testprog/gc.go @@ -130,59 +130,58 @@ func GCFairness2() { fmt.Println("OK") } -var maybeSaved []byte - func GCPhys() { - // In this test, we construct a very specific scenario. We first - // allocate N objects and drop half of their pointers on the floor, - // effectively creating N/2 'holes' in our allocated arenas. We then - // try to allocate objects twice as big. At the end, we measure the - // physical memory overhead of large objects. + // This test ensures that heap-growth scavenging is working as intended. // - // The purpose of this test is to ensure that the GC scavenges free - // spans eagerly to ensure high physical memory utilization even - // during fragmentation. + // It sets up a specific scenario: it allocates two pairs of objects whose + // sizes sum to size. One object in each pair is "small" (though must be + // large enough to be considered a large object by the runtime) and one is + // large. The small objects are kept while the large objects are freed, + // creating two large unscavenged holes in the heap. The heap goal should + // also be small as a result (so size must be at least as large as the + // minimum heap size). We then allocate one large object, bigger than both + // pairs of objects combined. This allocation, because it will tip + // HeapSys-HeapReleased well above the heap goal, should trigger heap-growth + // scavenging and scavenge most, if not all, of the large holes we created + // earlier. const ( - // Unfortunately, measuring actual used physical pages is - // difficult because HeapReleased doesn't include the parts - // of an arena that haven't yet been touched. So, we just - // make objects and size sufficiently large such that even - // 64 MB overhead is relatively small in the final - // calculation. - // - // Currently, we target 480MiB worth of memory for our test, - // computed as size * objects + (size*2) * (objects/2) - // = 2 * size * objects - // // Size must be also large enough to be considered a large // object (not in any size-segregated span). - size = 1 << 20 - objects = 240 + size = 4 << 20 + split = 64 << 10 + objects = 2 ) + // Set GOGC so that this test operates under consistent assumptions. + debug.SetGCPercent(100) // Save objects which we want to survive, and condemn objects which we don't. // Note that we condemn objects in this way and release them all at once in // order to avoid having the GC start freeing up these objects while the loop // is still running and filling in the holes we intend to make. - saved := make([][]byte, 0, objects) - condemned := make([][]byte, 0, objects/2+1) - for i := 0; i < objects; i++ { - // Write into a global, to prevent this from being optimized away by - // the compiler in the future. - maybeSaved = make([]byte, size) + saved := make([][]byte, 0, objects+1) + condemned := make([][]byte, 0, objects) + for i := 0; i < 2*objects; i++ { if i%2 == 0 { - saved = append(saved, maybeSaved) + saved = append(saved, make([]byte, split)) } else { - condemned = append(condemned, maybeSaved) + condemned = append(condemned, make([]byte, size-split)) } } condemned = nil // Clean up the heap. This will free up every other object created above // (i.e. everything in condemned) creating holes in the heap. + // Also, if the condemned objects are still being swept, its possible that + // the scavenging that happens as a result of the next allocation won't see + // the holes at all. We call runtime.GC() twice here so that when we allocate + // our large object there's no race with sweeping. runtime.GC() - // Allocate many new objects of 2x size. - for i := 0; i < objects/2; i++ { - saved = append(saved, make([]byte, size*2)) - } + runtime.GC() + // Perform one big allocation which should also scavenge any holes. + // + // The heap goal will rise after this object is allocated, so it's very + // important that we try to do all the scavenging in a single allocation + // that exceeds the heap goal. Otherwise the rising heap goal could foil our + // test. + saved = append(saved, make([]byte, objects*size)) // Clean up the heap again just to put it in a known state. runtime.GC() // heapBacked is an estimate of the amount of physical memory used by @@ -194,21 +193,29 @@ func GCPhys() { var stats runtime.MemStats runtime.ReadMemStats(&stats) heapBacked := stats.HeapSys - stats.HeapReleased - // If heapBacked exceeds the amount of memory actually used for heap - // allocated objects by 10% (post-GC HeapAlloc should be quite close to - // the size of the working set), then fail. + // If heapBacked does not exceed the heap goal by more than retainExtraPercent + // then the scavenger is working as expected; the newly-created holes have been + // scavenged immediately as part of the allocations which cannot fit in the holes. // - // In the context of this test, that indicates a large amount of - // fragmentation with physical pages that are otherwise unused but not - // returned to the OS. + // Since the runtime should scavenge the entirety of the remaining holes, + // theoretically there should be no more free and unscavenged memory. However due + // to other allocations that happen during this test we may still see some physical + // memory over-use. 10% here is an arbitrary but very conservative threshold which + // should easily account for any other allocations this test may have done. overuse := (float64(heapBacked) - float64(stats.HeapAlloc)) / float64(stats.HeapAlloc) - if overuse > 0.1 { - fmt.Printf("exceeded physical memory overuse threshold of 10%%: %3.2f%%\n"+ - "(alloc: %d, sys: %d, rel: %d, objs: %d)\n", overuse*100, stats.HeapAlloc, - stats.HeapSys, stats.HeapReleased, len(saved)) + if overuse <= 0.10 { + fmt.Println("OK") return } - fmt.Println("OK") + // Physical memory utilization exceeds the threshold, so heap-growth scavenging + // did not operate as expected. + // + // In the context of this test, this indicates a large amount of + // fragmentation with physical pages that are otherwise unused but not + // returned to the OS. + fmt.Printf("exceeded physical memory overuse threshold of 10%%: %3.2f%%\n"+ + "(alloc: %d, goal: %d, sys: %d, rel: %d, objs: %d)\n", overuse*100, + stats.HeapAlloc, stats.NextGC, stats.HeapSys, stats.HeapReleased, len(saved)) runtime.KeepAlive(saved) } diff --git a/libgo/go/runtime/testdata/testprog/sleep.go b/libgo/go/runtime/testdata/testprog/sleep.go new file mode 100644 index 00000000000..86e2f6cfe6c --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/sleep.go @@ -0,0 +1,17 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "time" + +// for golang.org/issue/27250 + +func init() { + register("After1", After1) +} + +func After1() { + <-time.After(1 * time.Second) +} diff --git a/libgo/go/runtime/testdata/testprogcgo/dll_windows.go b/libgo/go/runtime/testdata/testprogcgo/dll_windows.go index aed2410a456..25380fb2175 100644 --- a/libgo/go/runtime/testdata/testprogcgo/dll_windows.go +++ b/libgo/go/runtime/testdata/testprogcgo/dll_windows.go @@ -12,7 +12,7 @@ DWORD getthread() { } */ import "C" -import "./windows" +import "runtime/testdata/testprogcgo/windows" func init() { register("CgoDLLImportsMain", CgoDLLImportsMain) diff --git a/libgo/go/runtime/testdata/testprogcgo/sigstack.go b/libgo/go/runtime/testdata/testprogcgo/sigstack.go index 492dfeff7f5..21b668d6c00 100644 --- a/libgo/go/runtime/testdata/testprogcgo/sigstack.go +++ b/libgo/go/runtime/testdata/testprogcgo/sigstack.go @@ -17,11 +17,18 @@ package main #include <stdlib.h> #include <sys/mman.h> +#ifdef _AIX +// On AIX, SIGSTKSZ is too small to handle Go sighandler. +#define CSIGSTKSZ 0x4000 +#else +#define CSIGSTKSZ SIGSTKSZ +#endif + extern void SigStackCallback(); static void* WithSigStack(void* arg __attribute__((unused))) { // Set up an alternate system stack. - void* base = mmap(0, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); + void* base = mmap(0, CSIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (base == MAP_FAILED) { perror("mmap failed"); abort(); @@ -29,7 +36,7 @@ static void* WithSigStack(void* arg __attribute__((unused))) { stack_t st = {}, ost = {}; st.ss_sp = (char*)base; st.ss_flags = 0; - st.ss_size = SIGSTKSZ; + st.ss_size = CSIGSTKSZ; if (sigaltstack(&st, &ost) < 0) { perror("sigaltstack failed"); abort(); @@ -42,13 +49,13 @@ static void* WithSigStack(void* arg __attribute__((unused))) { if (ost.ss_flags & SS_DISABLE) { // Darwin libsystem has a bug where it checks ss_size // even if SS_DISABLE is set. (The kernel gets it right.) - ost.ss_size = SIGSTKSZ; + ost.ss_size = CSIGSTKSZ; } if (sigaltstack(&ost, NULL) < 0) { perror("sigaltstack restore failed"); abort(); } - mprotect(base, SIGSTKSZ, PROT_NONE); + mprotect(base, CSIGSTKSZ, PROT_NONE); return NULL; } diff --git a/libgo/go/runtime/trace/trace_test.go b/libgo/go/runtime/trace/trace_test.go index fc81abc30ff..235845df4e2 100644 --- a/libgo/go/runtime/trace/trace_test.go +++ b/libgo/go/runtime/trace/trace_test.go @@ -186,6 +186,10 @@ func TestTraceStress(t *testing.T) { if IsEnabled() { t.Skip("skipping because -test.trace is set") } + if testing.Short() { + t.Skip("skipping in -short mode") + } + var wg sync.WaitGroup done := make(chan bool) @@ -237,7 +241,7 @@ func TestTraceStress(t *testing.T) { runtime.GC() // Trigger GC from malloc. n := int(1e3) - if runtime.GOOS == "openbsd" && runtime.GOARCH == "arm" { + if isMemoryConstrained() { // Reduce allocation to avoid running out of // memory on the builder - see issue/12032. n = 512 @@ -322,6 +326,21 @@ func TestTraceStress(t *testing.T) { testBrokenTimestamps(t, trace) } +// isMemoryConstrained reports whether the current machine is likely +// to be memory constrained. +// This was originally for the openbsd/arm builder (Issue 12032). +// TODO: move this to testenv? Make this look at memory? Look at GO_BUILDER_NAME? +func isMemoryConstrained() bool { + if runtime.GOOS == "plan9" { + return true + } + switch runtime.GOARCH { + case "arm", "mips", "mipsle": + return true + } + return false +} + // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine. // And concurrently with all that start/stop trace 3 times. func TestTraceStressStartStop(t *testing.T) { @@ -381,9 +400,9 @@ func TestTraceStressStartStop(t *testing.T) { runtime.GC() // Trigger GC from malloc. n := int(1e3) - if runtime.GOOS == "openbsd" && runtime.GOARCH == "arm" { + if isMemoryConstrained() { // Reduce allocation to avoid running out of - // memory on the builder - see issue/12032. + // memory on the builder. n = 512 } for i := 0; i < n; i++ { diff --git a/libgo/go/runtime/treap_test.go b/libgo/go/runtime/treap_test.go new file mode 100644 index 00000000000..110f51c811c --- /dev/null +++ b/libgo/go/runtime/treap_test.go @@ -0,0 +1,270 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "fmt" + "runtime" + "testing" +) + +var spanDesc = map[uintptr]struct { + pages uintptr + scav bool +}{ + 0xc0000000: {2, false}, + 0xc0006000: {1, false}, + 0xc0010000: {8, false}, + 0xc0022000: {7, false}, + 0xc0034000: {4, true}, + 0xc0040000: {5, false}, + 0xc0050000: {5, true}, + 0xc0060000: {5000, false}, +} + +// Wrap the Treap one more time because go:notinheap doesn't +// actually follow a structure across package boundaries. +// +//go:notinheap +type treap struct { + runtime.Treap +} + +func maskMatchName(mask, match runtime.TreapIterType) string { + return fmt.Sprintf("%0*b-%0*b", runtime.TreapIterBits, uint8(mask), runtime.TreapIterBits, uint8(match)) +} + +func TestTreapFilter(t *testing.T) { + var iterTypes = [...]struct { + mask, match runtime.TreapIterType + filter runtime.TreapIterFilter // expected filter + }{ + {0, 0, 0xf}, + {runtime.TreapIterScav, 0, 0x5}, + {runtime.TreapIterScav, runtime.TreapIterScav, 0xa}, + {runtime.TreapIterScav | runtime.TreapIterHuge, runtime.TreapIterHuge, 0x4}, + {runtime.TreapIterScav | runtime.TreapIterHuge, 0, 0x1}, + {0, runtime.TreapIterScav, 0x0}, + } + for _, it := range iterTypes { + t.Run(maskMatchName(it.mask, it.match), func(t *testing.T) { + if f := runtime.TreapFilter(it.mask, it.match); f != it.filter { + t.Fatalf("got %#x, want %#x", f, it.filter) + } + }) + } +} + +// This test ensures that the treap implementation in the runtime +// maintains all stated invariants after different sequences of +// insert, removeSpan, find, and erase. Invariants specific to the +// treap data structure are checked implicitly: after each mutating +// operation, treap-related invariants are checked for the entire +// treap. +func TestTreap(t *testing.T) { + // Set up a bunch of spans allocated into mheap_. + // Also, derive a set of typeCounts of each type of span + // according to runtime.TreapIterType so we can verify against + // them later. + spans := make([]runtime.Span, 0, len(spanDesc)) + typeCounts := [1 << runtime.TreapIterBits][1 << runtime.TreapIterBits]int{} + for base, de := range spanDesc { + s := runtime.AllocSpan(base, de.pages, de.scav) + defer s.Free() + spans = append(spans, s) + + for i := runtime.TreapIterType(0); i < 1<<runtime.TreapIterBits; i++ { + for j := runtime.TreapIterType(0); j < 1<<runtime.TreapIterBits; j++ { + if s.MatchesIter(i, j) { + typeCounts[i][j]++ + } + } + } + } + t.Run("TypeCountsSanity", func(t *testing.T) { + // Just sanity check type counts for a few values. + check := func(mask, match runtime.TreapIterType, count int) { + tc := typeCounts[mask][match] + if tc != count { + name := maskMatchName(mask, match) + t.Fatalf("failed a sanity check for mask/match %s counts: got %d, wanted %d", name, tc, count) + } + } + check(0, 0, len(spanDesc)) + check(runtime.TreapIterScav, 0, 6) + check(runtime.TreapIterScav, runtime.TreapIterScav, 2) + }) + t.Run("Insert", func(t *testing.T) { + tr := treap{} + // Test just a very basic insert/remove for sanity. + tr.Insert(spans[0]) + tr.RemoveSpan(spans[0]) + }) + t.Run("FindTrivial", func(t *testing.T) { + tr := treap{} + // Test just a very basic find operation for sanity. + tr.Insert(spans[0]) + i := tr.Find(1) + if i.Span() != spans[0] { + t.Fatal("found unknown span in treap") + } + tr.RemoveSpan(spans[0]) + }) + t.Run("FindFirstFit", func(t *testing.T) { + // Run this 10 times, recreating the treap each time. + // Because of the non-deterministic structure of a treap, + // we'll be able to test different structures this way. + for i := 0; i < 10; i++ { + tr := runtime.Treap{} + for _, s := range spans { + tr.Insert(s) + } + i := tr.Find(5) + if i.Span().Base() != 0xc0010000 { + t.Fatalf("expected span at lowest address which could fit 5 pages, instead found span at %x", i.Span().Base()) + } + for _, s := range spans { + tr.RemoveSpan(s) + } + } + }) + t.Run("Iterate", func(t *testing.T) { + for mask := runtime.TreapIterType(0); mask < 1<<runtime.TreapIterBits; mask++ { + for match := runtime.TreapIterType(0); match < 1<<runtime.TreapIterBits; match++ { + iterName := maskMatchName(mask, match) + t.Run(iterName, func(t *testing.T) { + t.Run("StartToEnd", func(t *testing.T) { + // Ensure progressing an iterator actually goes over the whole treap + // from the start and that it iterates over the elements in order. + // Furthermore, ensure that it only iterates over the relevant parts + // of the treap. + // Finally, ensures that Start returns a valid iterator. + tr := treap{} + for _, s := range spans { + tr.Insert(s) + } + nspans := 0 + lastBase := uintptr(0) + for i := tr.Start(mask, match); i.Valid(); i = i.Next() { + nspans++ + if lastBase > i.Span().Base() { + t.Fatalf("not iterating in correct order: encountered base %x before %x", lastBase, i.Span().Base()) + } + lastBase = i.Span().Base() + if !i.Span().MatchesIter(mask, match) { + t.Fatalf("found non-matching span while iteration over mask/match %s: base %x", iterName, i.Span().Base()) + } + } + if nspans != typeCounts[mask][match] { + t.Fatal("failed to iterate forwards over full treap") + } + for _, s := range spans { + tr.RemoveSpan(s) + } + }) + t.Run("EndToStart", func(t *testing.T) { + // See StartToEnd tests. + tr := treap{} + for _, s := range spans { + tr.Insert(s) + } + nspans := 0 + lastBase := ^uintptr(0) + for i := tr.End(mask, match); i.Valid(); i = i.Prev() { + nspans++ + if lastBase < i.Span().Base() { + t.Fatalf("not iterating in correct order: encountered base %x before %x", lastBase, i.Span().Base()) + } + lastBase = i.Span().Base() + if !i.Span().MatchesIter(mask, match) { + t.Fatalf("found non-matching span while iteration over mask/match %s: base %x", iterName, i.Span().Base()) + } + } + if nspans != typeCounts[mask][match] { + t.Fatal("failed to iterate backwards over full treap") + } + for _, s := range spans { + tr.RemoveSpan(s) + } + }) + }) + } + } + t.Run("Prev", func(t *testing.T) { + // Test the iterator invariant that i.prev().next() == i. + tr := treap{} + for _, s := range spans { + tr.Insert(s) + } + i := tr.Start(0, 0).Next().Next() + p := i.Prev() + if !p.Valid() { + t.Fatal("i.prev() is invalid") + } + if p.Next().Span() != i.Span() { + t.Fatal("i.prev().next() != i") + } + for _, s := range spans { + tr.RemoveSpan(s) + } + }) + t.Run("Next", func(t *testing.T) { + // Test the iterator invariant that i.next().prev() == i. + tr := treap{} + for _, s := range spans { + tr.Insert(s) + } + i := tr.Start(0, 0).Next().Next() + n := i.Next() + if !n.Valid() { + t.Fatal("i.next() is invalid") + } + if n.Prev().Span() != i.Span() { + t.Fatal("i.next().prev() != i") + } + for _, s := range spans { + tr.RemoveSpan(s) + } + }) + }) + t.Run("EraseOne", func(t *testing.T) { + // Test that erasing one iterator correctly retains + // all relationships between elements. + tr := treap{} + for _, s := range spans { + tr.Insert(s) + } + i := tr.Start(0, 0).Next().Next().Next() + s := i.Span() + n := i.Next() + p := i.Prev() + tr.Erase(i) + if n.Prev().Span() != p.Span() { + t.Fatal("p, n := i.Prev(), i.Next(); n.prev() != p after i was erased") + } + if p.Next().Span() != n.Span() { + t.Fatal("p, n := i.Prev(), i.Next(); p.next() != n after i was erased") + } + tr.Insert(s) + for _, s := range spans { + tr.RemoveSpan(s) + } + }) + t.Run("EraseAll", func(t *testing.T) { + // Test that erasing iterators actually removes nodes from the treap. + tr := treap{} + for _, s := range spans { + tr.Insert(s) + } + for i := tr.Start(0, 0); i.Valid(); { + n := i.Next() + tr.Erase(i) + i = n + } + if size := tr.Size(); size != 0 { + t.Fatalf("should have emptied out treap, %d spans left", size) + } + }) +} diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go index 13905353f83..63ad310355d 100644 --- a/libgo/go/runtime/type.go +++ b/libgo/go/runtime/type.go @@ -79,7 +79,7 @@ type maptype struct { elem *_type bucket *_type // internal type representing a hash bucket keysize uint8 // size of key slot - valuesize uint8 // size of value slot + elemsize uint8 // size of elem slot bucketsize uint16 // size of bucket flags uint32 } @@ -89,7 +89,7 @@ type maptype struct { func (mt *maptype) indirectkey() bool { // store ptr to key instead of key itself return mt.flags&1 != 0 } -func (mt *maptype) indirectvalue() bool { // store ptr to value instead of value itself +func (mt *maptype) indirectelem() bool { // store ptr to elem instead of elem itself return mt.flags&2 != 0 } func (mt *maptype) reflexivekey() bool { // true if k==k for all keys diff --git a/libgo/go/runtime/typekind.go b/libgo/go/runtime/typekind.go index abb27777fe9..7087a9b0468 100644 --- a/libgo/go/runtime/typekind.go +++ b/libgo/go/runtime/typekind.go @@ -34,7 +34,6 @@ const ( kindDirectIface = 1 << 5 kindGCProg = 1 << 6 - kindNoPointers = 1 << 7 kindMask = (1 << 5) - 1 ) diff --git a/libgo/go/runtime/unaligned1.go b/libgo/go/runtime/unaligned1.go deleted file mode 100644 index 86e0df05810..00000000000 --- a/libgo/go/runtime/unaligned1.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x wasm ppc s390 arm64be riscv64 - -package runtime - -import "unsafe" - -func readUnaligned32(p unsafe.Pointer) uint32 { - return *(*uint32)(p) -} - -func readUnaligned64(p unsafe.Pointer) uint64 { - return *(*uint64)(p) -} diff --git a/libgo/go/runtime/unaligned2.go b/libgo/go/runtime/unaligned2.go deleted file mode 100644 index 9f52e8d2643..00000000000 --- a/libgo/go/runtime/unaligned2.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build alpha arm armbe ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 sh shbe sparc sparc64 - -package runtime - -import "unsafe" - -// Note: These routines perform the read with an unspecified endianness. -func readUnaligned32(p unsafe.Pointer) uint32 { - q := (*[4]byte)(p) - return uint32(q[0]) + uint32(q[1])<<8 + uint32(q[2])<<16 + uint32(q[3])<<24 -} - -func readUnaligned64(p unsafe.Pointer) uint64 { - q := (*[8]byte)(p) - return uint64(q[0]) + uint64(q[1])<<8 + uint64(q[2])<<16 + uint64(q[3])<<24 + uint64(q[4])<<32 + uint64(q[5])<<40 + uint64(q[6])<<48 + uint64(q[7])<<56 -} diff --git a/libgo/go/runtime/write_err_android.go b/libgo/go/runtime/write_err_android.go index bf99b5f6c5b..2419fc8663e 100644 --- a/libgo/go/runtime/write_err_android.go +++ b/libgo/go/runtime/write_err_android.go @@ -21,7 +21,7 @@ var ( // in kernel ring buffers. In Android-L, those /dev/log files are no longer // accessible and logging is done through a centralized user-mode logger, logd. // -// https://android.googlesource.com/platform/system/core/+/master/liblog/logd_write.c +// https://android.googlesource.com/platform/system/core/+/refs/tags/android-6.0.1_r78/liblog/logd_write.c type loggerType int32 const ( |