1 files changed, 384 insertions, 151 deletions
diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go
index 10890d35a65..9a977d829b3 100644
--- a/libgo/go/runtime/export_test.go
+++ b/libgo/go/runtime/export_test.go
@@ -35,9 +35,18 @@ var Atoi = atoi
 var Atoi32 = atoi32
 
 var Nanotime = nanotime
+var NetpollBreak = netpollBreak
+var Usleep = usleep
 
+var PhysPageSize = physPageSize
 var PhysHugePageSize = physHugePageSize
 
+var NetpollGenericInit = netpollGenericInit
+
+var ParseRelease = parseRelease
+
+const PreemptMSupported = preemptMSupported
+
 type LFNode struct {
 	Next    uint64
 	Pushcnt uintptr
@@ -51,6 +60,12 @@ func LFStackPop(head *uint64) *LFNode {
 	return (*LFNode)(unsafe.Pointer((*lfstack)(head).pop()))
 }
 
+func Netpoll(delta int64) {
+	systemstack(func() {
+		netpoll(delta)
+	})
+}
+
 func GCMask(x interface{}) (ret []byte) {
 	return nil
 }
@@ -241,7 +256,7 @@ func CountPagesInUse() (pagesInUse, counted uintptr) {
 	pagesInUse = uintptr(mheap_.pagesInUse)
 
 	for _, s := range mheap_.allspans {
-		if s.state == mSpanInUse {
+		if s.state.get() == mSpanInUse {
 			counted += s.npages
 		}
 	}
@@ -303,7 +318,7 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 
 		// Add up current allocations in spans.
 		for _, s := range mheap_.allspans {
-			if s.state != mSpanInUse {
+			if s.state.get() != mSpanInUse {
 				continue
 			}
 			if sizeclass := s.spanclass.sizeclass(); sizeclass == 0 {
@@ -336,10 +351,18 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 			slow.BySize[i].Frees = bySize[i].Frees
 		}
 
-		for i := mheap_.free.start(0, 0); i.valid(); i = i.next() {
-			slow.HeapReleased += uint64(i.span().released())
+		for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
+			pg := mheap_.pages.chunkOf(i).scavenged.popcntRange(0, pallocChunkPages)
+			slow.HeapReleased += uint64(pg) * pageSize
+		}
+		for _, p := range allp {
+			pg := sys.OnesCount64(p.pcache.scav)
+			slow.HeapReleased += uint64(pg) * pageSize
 		}
 
+		// Unused space in the current arena also counts as released space.
+		slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base)
+
 		getg().m.mallocing--
 	})
 
@@ -512,200 +535,410 @@ func MapTombstoneCheck(m map[int]int) {
 	}
 }
 
-// UnscavHugePagesSlow returns the value of mheap_.freeHugePages
-// and the number of unscavenged huge pages calculated by
-// scanning the heap.
-func UnscavHugePagesSlow() (uintptr, uintptr) {
-	var base, slow uintptr
-	// Run on the system stack to avoid deadlock from stack growth
-	// trying to acquire the heap lock.
-	systemstack(func() {
-		lock(&mheap_.lock)
-		base = mheap_.free.unscavHugePages
-		for _, s := range mheap_.allspans {
-			if s.state == mSpanFree && !s.scavenged {
-				slow += s.hugePages()
-			}
-		}
-		unlock(&mheap_.lock)
-	})
-	return base, slow
-}
+func RunGetgThreadSwitchTest() {
+	// Test that getg works correctly with thread switch.
+	// With gccgo, if we generate getg inlined, the backend
+	// may cache the address of the TLS variable, which
+	// will become invalid after a thread switch. This test
+	// checks that the bad caching doesn't happen.
 
-// Span is a safe wrapper around an mspan, whose memory
-// is managed manually.
-type Span struct {
-	*mspan
+	ch := make(chan int)
+	go func(ch chan int) {
+		ch <- 5
+		LockOSThread()
+	}(ch)
+
+	g1 := getg()
+
+	// Block on a receive. This is likely to get us a thread
+	// switch. If we yield to the sender goroutine, it will
+	// lock the thread, forcing us to resume on a different
+	// thread.
+	<-ch
+
+	g2 := getg()
+	if g1 != g2 {
+		panic("g1 != g2")
+	}
+
+	// Also test getg after some control flow, as the
+	// backend is sensitive to control flow.
+	g3 := getg()
+	if g1 != g3 {
+		panic("g1 != g3")
+	}
 }
 
-func AllocSpan(base, npages uintptr, scavenged bool) Span {
-	var s *mspan
-	systemstack(func() {
-		lock(&mheap_.lock)
-		s = (*mspan)(mheap_.spanalloc.alloc())
-		unlock(&mheap_.lock)
-	})
-	s.init(base, npages)
-	s.scavenged = scavenged
-	return Span{s}
+const (
+	PageSize         = pageSize
+	PallocChunkPages = pallocChunkPages
+	PageAlloc64Bit   = pageAlloc64Bit
+)
+
+// Expose pallocSum for testing.
+type PallocSum pallocSum
+
+func PackPallocSum(start, max, end uint) PallocSum { return PallocSum(packPallocSum(start, max, end)) }
+func (m PallocSum) Start() uint                    { return pallocSum(m).start() }
+func (m PallocSum) Max() uint                      { return pallocSum(m).max() }
+func (m PallocSum) End() uint                      { return pallocSum(m).end() }
+
+// Expose pallocBits for testing.
+type PallocBits pallocBits
+
+func (b *PallocBits) Find(npages uintptr, searchIdx uint) (uint, uint) {
+	return (*pallocBits)(b).find(npages, searchIdx)
 }
+func (b *PallocBits) AllocRange(i, n uint)       { (*pallocBits)(b).allocRange(i, n) }
+func (b *PallocBits) Free(i, n uint)             { (*pallocBits)(b).free(i, n) }
+func (b *PallocBits) Summarize() PallocSum       { return PallocSum((*pallocBits)(b).summarize()) }
+func (b *PallocBits) PopcntRange(i, n uint) uint { return (*pageBits)(b).popcntRange(i, n) }
 
-func (s *Span) Free() {
-	systemstack(func() {
-		lock(&mheap_.lock)
-		mheap_.spanalloc.free(unsafe.Pointer(s.mspan))
-		unlock(&mheap_.lock)
-	})
-	s.mspan = nil
+// SummarizeSlow is a slow but more obviously correct implementation
+// of (*pallocBits).summarize. Used for testing.
+func SummarizeSlow(b *PallocBits) PallocSum {
+	var start, max, end uint
+
+	const N = uint(len(b)) * 64
+	for start < N && (*pageBits)(b).get(start) == 0 {
+		start++
+	}
+	for end < N && (*pageBits)(b).get(N-end-1) == 0 {
+		end++
+	}
+	run := uint(0)
+	for i := uint(0); i < N; i++ {
+		if (*pageBits)(b).get(i) == 0 {
+			run++
+		} else {
+			run = 0
+		}
+		if run > max {
+			max = run
+		}
+	}
+	return PackPallocSum(start, max, end)
 }
 
-func (s Span) Base() uintptr {
-	return s.mspan.base()
+// Expose non-trivial helpers for testing.
+func FindBitRange64(c uint64, n uint) uint { return findBitRange64(c, n) }
+
+// Given two PallocBits, returns a set of bit ranges where
+// they differ.
+func DiffPallocBits(a, b *PallocBits) []BitRange {
+	ba := (*pageBits)(a)
+	bb := (*pageBits)(b)
+
+	var d []BitRange
+	base, size := uint(0), uint(0)
+	for i := uint(0); i < uint(len(ba))*64; i++ {
+		if ba.get(i) != bb.get(i) {
+			if size == 0 {
+				base = i
+			}
+			size++
+		} else {
+			if size != 0 {
+				d = append(d, BitRange{base, size})
+			}
+			size = 0
+		}
+	}
+	if size != 0 {
+		d = append(d, BitRange{base, size})
+	}
+	return d
+}
+
+// StringifyPallocBits gets the bits in the bit range r from b,
+// and returns a string containing the bits as ASCII 0 and 1
+// characters.
+func StringifyPallocBits(b *PallocBits, r BitRange) string {
+	str := ""
+	for j := r.I; j < r.I+r.N; j++ {
+		if (*pageBits)(b).get(j) != 0 {
+			str += "1"
+		} else {
+			str += "0"
+		}
+	}
+	return str
 }
 
-func (s Span) Pages() uintptr {
-	return s.mspan.npages
+// Expose pallocData for testing.
+type PallocData pallocData
+
+func (d *PallocData) FindScavengeCandidate(searchIdx uint, min, max uintptr) (uint, uint) {
+	return (*pallocData)(d).findScavengeCandidate(searchIdx, min, max)
+}
+func (d *PallocData) AllocRange(i, n uint) { (*pallocData)(d).allocRange(i, n) }
+func (d *PallocData) ScavengedSetRange(i, n uint) {
+	(*pallocData)(d).scavenged.setRange(i, n)
+}
+func (d *PallocData) PallocBits() *PallocBits {
+	return (*PallocBits)(&(*pallocData)(d).pallocBits)
+}
+func (d *PallocData) Scavenged() *PallocBits {
+	return (*PallocBits)(&(*pallocData)(d).scavenged)
 }
 
-type TreapIterType treapIterType
+// Expose fillAligned for testing.
+func FillAligned(x uint64, m uint) uint64 { return fillAligned(x, m) }
 
-const (
-	TreapIterScav TreapIterType = TreapIterType(treapIterScav)
-	TreapIterHuge               = TreapIterType(treapIterHuge)
-	TreapIterBits               = treapIterBits
-)
+// Expose pageCache for testing.
+type PageCache pageCache
 
-type TreapIterFilter treapIterFilter
+const PageCachePages = pageCachePages
 
-func TreapFilter(mask, match TreapIterType) TreapIterFilter {
-	return TreapIterFilter(treapFilter(treapIterType(mask), treapIterType(match)))
+func NewPageCache(base uintptr, cache, scav uint64) PageCache {
+	return PageCache(pageCache{base: base, cache: cache, scav: scav})
 }
-
-func (s Span) MatchesIter(mask, match TreapIterType) bool {
-	return treapFilter(treapIterType(mask), treapIterType(match)).matches(s.treapFilter())
+func (c *PageCache) Empty() bool   { return (*pageCache)(c).empty() }
+func (c *PageCache) Base() uintptr { return (*pageCache)(c).base }
+func (c *PageCache) Cache() uint64 { return (*pageCache)(c).cache }
+func (c *PageCache) Scav() uint64  { return (*pageCache)(c).scav }
+func (c *PageCache) Alloc(npages uintptr) (uintptr, uintptr) {
+	return (*pageCache)(c).alloc(npages)
 }
-
-type TreapIter struct {
-	treapIter
+func (c *PageCache) Flush(s *PageAlloc) {
+	(*pageCache)(c).flush((*pageAlloc)(s))
 }
 
-func (t TreapIter) Span() Span {
-	return Span{t.span()}
+// Expose chunk index type.
+type ChunkIdx chunkIdx
+
+// Expose pageAlloc for testing. Note that because pageAlloc is
+// not in the heap, so is PageAlloc.
+type PageAlloc pageAlloc
+
+func (p *PageAlloc) Alloc(npages uintptr) (uintptr, uintptr) {
+	return (*pageAlloc)(p).alloc(npages)
+}
+func (p *PageAlloc) AllocToCache() PageCache {
+	return PageCache((*pageAlloc)(p).allocToCache())
+}
+func (p *PageAlloc) Free(base, npages uintptr) {
+	(*pageAlloc)(p).free(base, npages)
+}
+func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
+	return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
+}
+func (p *PageAlloc) Scavenge(nbytes uintptr, locked bool) (r uintptr) {
+	systemstack(func() {
+		r = (*pageAlloc)(p).scavenge(nbytes, locked)
+	})
+	return
+}
+func (p *PageAlloc) InUse() []AddrRange {
+	ranges := make([]AddrRange, 0, len(p.inUse.ranges))
+	for _, r := range p.inUse.ranges {
+		ranges = append(ranges, AddrRange{
+			Base:  r.base,
+			Limit: r.limit,
+		})
+	}
+	return ranges
 }
 
-func (t TreapIter) Valid() bool {
-	return t.valid()
+// Returns nil if the PallocData's L2 is missing.
+func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
+	ci := chunkIdx(i)
+	l2 := (*pageAlloc)(p).chunks[ci.l1()]
+	if l2 == nil {
+		return nil
+	}
+	return (*PallocData)(&l2[ci.l2()])
 }
 
-func (t TreapIter) Next() TreapIter {
-	return TreapIter{t.next()}
+// AddrRange represents a range over addresses.
+// Specifically, it represents the range [Base, Limit).
+type AddrRange struct {
+	Base, Limit uintptr
 }
 
-func (t TreapIter) Prev() TreapIter {
-	return TreapIter{t.prev()}
+// BitRange represents a range over a bitmap.
+type BitRange struct {
+	I, N uint // bit index and length in bits
 }
 
-// Treap is a safe wrapper around mTreap for testing.
+// NewPageAlloc creates a new page allocator for testing and
+// initializes it with the scav and chunks maps. Each key in these maps
+// represents a chunk index and each value is a series of bit ranges to
+// set within each bitmap's chunk.
 //
-// It must never be heap-allocated because mTreap is
-// notinheap.
+// The initialization of the pageAlloc preserves the invariant that if a
+// scavenged bit is set the alloc bit is necessarily unset, so some
+// of the bits described by scav may be cleared in the final bitmap if
+// ranges in chunks overlap with them.
 //
-//go:notinheap
-type Treap struct {
-	mTreap
-}
+// scav is optional, and if nil, the scavenged bitmap will be cleared
+// (as opposed to all 1s, which it usually is). Furthermore, every
+// chunk index in scav must appear in chunks; ones that do not are
+// ignored.
+func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
+	p := new(pageAlloc)
+
+	// We've got an entry, so initialize the pageAlloc.
+	p.init(new(mutex), nil)
+	p.test = true
+
+	for i, init := range chunks {
+		addr := chunkBase(chunkIdx(i))
+
+		// Mark the chunk's existence in the pageAlloc.
+		p.grow(addr, pallocChunkBytes)
+
+		// Initialize the bitmap and update pageAlloc metadata.
+		chunk := p.chunkOf(chunkIndex(addr))
+
+		// Clear all the scavenged bits which grow set.
+		chunk.scavenged.clearRange(0, pallocChunkPages)
+
+		// Apply scavenge state if applicable.
+		if scav != nil {
+			if scvg, ok := scav[i]; ok {
+				for _, s := range scvg {
+					// Ignore the case of s.N == 0. setRange doesn't handle
+					// it and it's a no-op anyway.
+					if s.N != 0 {
+						chunk.scavenged.setRange(s.I, s.N)
+					}
+				}
+			}
+		}
+		p.resetScavengeAddr()
+
+		// Apply alloc state.
+		for _, s := range init {
+			// Ignore the case of s.N == 0. allocRange doesn't handle
+			// it and it's a no-op anyway.
+			if s.N != 0 {
+				chunk.allocRange(s.I, s.N)
+			}
+		}
 
-func (t *Treap) Start(mask, match TreapIterType) TreapIter {
-	return TreapIter{t.start(treapIterType(mask), treapIterType(match))}
+		// Update heap metadata for the allocRange calls above.
+		p.update(addr, pallocChunkPages, false, false)
+	}
+	return (*PageAlloc)(p)
 }
 
-func (t *Treap) End(mask, match TreapIterType) TreapIter {
-	return TreapIter{t.end(treapIterType(mask), treapIterType(match))}
-}
+// FreePageAlloc releases hard OS resources owned by the pageAlloc. Once this
+// is called the pageAlloc may no longer be used. The object itself will be
+// collected by the garbage collector once it is no longer live.
+func FreePageAlloc(pp *PageAlloc) {
+	p := (*pageAlloc)(pp)
 
-func (t *Treap) Insert(s Span) {
-	// mTreap uses a fixalloc in mheap_ for treapNode
-	// allocation which requires the mheap_ lock to manipulate.
-	// Locking here is safe because the treap itself never allocs
-	// or otherwise ends up grabbing this lock.
-	systemstack(func() {
-		lock(&mheap_.lock)
-		t.insert(s.mspan)
-		unlock(&mheap_.lock)
-	})
-	t.CheckInvariants()
+	// Free all the mapped space for the summary levels.
+	if pageAlloc64Bit != 0 {
+		for l := 0; l < summaryLevels; l++ {
+			sysFree(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes, nil)
+		}
+	} else {
+		resSize := uintptr(0)
+		for _, s := range p.summary {
+			resSize += uintptr(cap(s)) * pallocSumBytes
+		}
+		sysFree(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize), nil)
+	}
+
+	// Free the mapped space for chunks.
+	for i := range p.chunks {
+		if x := p.chunks[i]; x != nil {
+			p.chunks[i] = nil
+			// This memory comes from sysAlloc and will always be page-aligned.
+			sysFree(unsafe.Pointer(x), unsafe.Sizeof(*p.chunks[0]), nil)
+		}
+	}
 }
 
-func (t *Treap) Find(npages uintptr) TreapIter {
-	return TreapIter{t.find(npages)}
+// BaseChunkIdx is a convenient chunkIdx value which works on both
+// 64 bit and 32 bit platforms, allowing the tests to share code
+// between the two.
+//
+// On AIX, the arenaBaseOffset is 0x0a00000000000000. However, this
+// constant can't be used here because it is negative and will cause
+// a constant overflow.
+//
+// This should not be higher than 0x100*pallocChunkBytes to support
+// mips and mipsle, which only have 31-bit address spaces.
+var BaseChunkIdx = ChunkIdx(chunkIndex(((0xc000*pageAlloc64Bit + 0x100*pageAlloc32Bit) * pallocChunkBytes) + 0x0a00000000000000*sys.GoosAix))
+
+// PageBase returns an address given a chunk index and a page index
+// relative to that chunk.
+func PageBase(c ChunkIdx, pageIdx uint) uintptr {
+	return chunkBase(chunkIdx(c)) + uintptr(pageIdx)*pageSize
 }
 
-func (t *Treap) Erase(i TreapIter) {
-	// mTreap uses a fixalloc in mheap_ for treapNode
-	// freeing which requires the mheap_ lock to manipulate.
-	// Locking here is safe because the treap itself never allocs
-	// or otherwise ends up grabbing this lock.
-	systemstack(func() {
-		lock(&mheap_.lock)
-		t.erase(i.treapIter)
-		unlock(&mheap_.lock)
-	})
-	t.CheckInvariants()
+type BitsMismatch struct {
+	Base      uintptr
+	Got, Want uint64
 }
 
-func (t *Treap) RemoveSpan(s Span) {
-	// See Erase about locking.
+func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
+	ok = true
+
+	// Run on the system stack to avoid stack growth allocation.
 	systemstack(func() {
+		getg().m.mallocing++
+
+		// Lock so that we can safely access the bitmap.
 		lock(&mheap_.lock)
-		t.removeSpan(s.mspan)
+	chunkLoop:
+		for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
+			chunk := mheap_.pages.chunkOf(i)
+			for j := 0; j < pallocChunkPages/64; j++ {
+				// Run over each 64-bit bitmap section and ensure
+				// scavenged is being cleared properly on allocation.
+				// If a used bit and scavenged bit are both set, that's
+				// an error, and could indicate a larger problem, or
+				// an accounting problem.
+				want := chunk.scavenged[j] &^ chunk.pallocBits[j]
+				got := chunk.scavenged[j]
+				if want != got {
+					ok = false
+					if n >= len(mismatches) {
+						break chunkLoop
+					}
+					mismatches[n] = BitsMismatch{
+						Base: chunkBase(i) + uintptr(j)*64*pageSize,
+						Got:  got,
+						Want: want,
+					}
+					n++
+				}
+			}
+		}
 		unlock(&mheap_.lock)
-	})
-	t.CheckInvariants()
-}
 
-func (t *Treap) Size() int {
-	i := 0
-	t.mTreap.treap.walkTreap(func(t *treapNode) {
-		i++
+		getg().m.mallocing--
 	})
-	return i
+	return
 }
 
-func (t *Treap) CheckInvariants() {
-	t.mTreap.treap.walkTreap(checkTreapNode)
-	t.mTreap.treap.validateInvariants()
-}
+func PageCachePagesLeaked() (leaked uintptr) {
+	stopTheWorld("PageCachePagesLeaked")
 
-func RunGetgThreadSwitchTest() {
-	// Test that getg works correctly with thread switch.
-	// With gccgo, if we generate getg inlined, the backend
-	// may cache the address of the TLS variable, which
-	// will become invalid after a thread switch. This test
-	// checks that the bad caching doesn't happen.
-
-	ch := make(chan int)
-	go func(ch chan int) {
-		ch <- 5
-		LockOSThread()
-	}(ch)
-
-	g1 := getg()
+	// Walk over destroyed Ps and look for unflushed caches.
+	deadp := allp[len(allp):cap(allp)]
+	for _, p := range deadp {
+		// Since we're going past len(allp) we may see nil Ps.
+		// Just ignore them.
+		if p != nil {
+			leaked += uintptr(sys.OnesCount64(p.pcache.cache))
+		}
+	}
 
-	// Block on a receive. This is likely to get us a thread
-	// switch. If we yield to the sender goroutine, it will
-	// lock the thread, forcing us to resume on a different
-	// thread.
-	<-ch
+	startTheWorld()
+	return
+}
 
-	g2 := getg()
-	if g1 != g2 {
-		panic("g1 != g2")
-	}
+var Semacquire = semacquire
+var Semrelease1 = semrelease1
 
-	// Also test getg after some control flow, as the
-	// backend is sensitive to control flow.
-	g3 := getg()
-	if g1 != g3 {
-		panic("g1 != g3")
-	}
+func SemNwait(addr *uint32) uint32 {
+	root := semroot(addr)
+	return atomic.Load(&root.nwait)
 }
+
+var Pusestackmaps = &usestackmaps