diff options
author | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
---|---|---|
committer | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
commit | a16e30d162c1c7408db7821e7b9513cefa09c6ca (patch) | |
tree | af752ba9ba44c547df39bb0af9bff79f610ba9d5 /src/runtime/mstats.go | |
parent | 91e4d2d57bc341dd82c98247117114c851380aef (diff) | |
parent | cf6cfba4d5358404dd890f6025e573a4b2156543 (diff) | |
download | go-git-dev.link.tar.gz |
[dev.link] all: merge branch 'master' into dev.linkdev.link
Clean merge.
Change-Id: Ia7b2808bc649790198d34c226a61d9e569084dc5
Diffstat (limited to 'src/runtime/mstats.go')
-rw-r--r-- | src/runtime/mstats.go | 531 |
1 files changed, 392 insertions, 139 deletions
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index b95b332134..e0a417d213 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -8,13 +8,10 @@ package runtime import ( "runtime/internal/atomic" - "runtime/internal/sys" "unsafe" ) // Statistics. -// If you edit this structure, also edit type MemStats below. -// Their layouts must match exactly. // // For detailed descriptions see the documentation for MemStats. // Fields that differ from MemStats are further documented here. @@ -35,31 +32,43 @@ type mstats struct { // // Like MemStats, heap_sys and heap_inuse do not count memory // in manually-managed spans. - heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above) - heap_sys uint64 // virtual address space obtained from system for GC'd heap - heap_idle uint64 // bytes in idle spans - heap_inuse uint64 // bytes in mSpanInUse spans - heap_released uint64 // bytes released to the os + heap_sys sysMemStat // virtual address space obtained from system for GC'd heap + heap_inuse uint64 // bytes in mSpanInUse spans + heap_released uint64 // bytes released to the os // heap_objects is not used by the runtime directly and instead // computed on the fly by updatememstats. heap_objects uint64 // total number of allocated objects + // Statistics about stacks. + stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats + stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys + // Statistics about allocation of low-level fixed-size structures. // Protected by FixAlloc locks. - stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW - stacks_sys uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys mspan_inuse uint64 // mspan structures - mspan_sys uint64 + mspan_sys sysMemStat mcache_inuse uint64 // mcache structures - mcache_sys uint64 - buckhash_sys uint64 // profiling bucket hash table - gc_sys uint64 // updated atomically or during STW - other_sys uint64 // updated atomically or during STW + mcache_sys sysMemStat + buckhash_sys sysMemStat // profiling bucket hash table + + // Statistics about GC overhead. + gcWorkBufInUse uint64 // computed by updatememstats + gcProgPtrScalarBitsInUse uint64 // computed by updatememstats + gcMiscSys sysMemStat // updated atomically or during STW + + // Miscellaneous statistics. + other_sys sysMemStat // updated atomically or during STW + + // Statistics about the garbage collector. + + // next_gc is the goal heap_live for when next GC ends. + // Set to ^uint64(0) if disabled. + // + // Read and written atomically, unless the world is stopped. + next_gc uint64 - // Statistics about garbage collector. // Protected by mheap or stopping the world during GC. - next_gc uint64 // goal heap_live for when next GC ends; ^0 if disabled last_gc_unix uint64 // last gc (in unix time) pause_total_ns uint64 pause_ns [256]uint64 // circular buffer of recent gc pause lengths @@ -82,8 +91,6 @@ type mstats struct { // to 64 bits for atomic operations on 32 bit platforms. _ [1 - _NumSizeClasses%2]uint32 - // Statistics below here are not exported to MemStats directly. - last_gc_nanotime uint64 // last gc (monotonic time) tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly last_next_gc uint64 // next_gc for the previous GC @@ -110,11 +117,10 @@ type mstats struct { // heap_live is the number of bytes considered live by the GC. // That is: retained by the most recent GC plus allocated - // since then. heap_live <= heap_alloc, since heap_alloc - // includes unmarked objects that have not yet been swept (and - // hence goes up as we allocate and down as we sweep) while - // heap_live excludes these objects (and hence only goes up - // between GCs). + // since then. heap_live <= alloc, since alloc includes unmarked + // objects that have not yet been swept (and hence goes up as we + // allocate and down as we sweep) while heap_live excludes these + // objects (and hence only goes up between GCs). // // This is updated atomically without locking. To reduce // contention, this is updated only when obtaining a span from @@ -139,6 +145,8 @@ type mstats struct { // no-scan objects and no-scan tails of objects. // // Whenever this is updated, call gcController.revise(). + // + // Read and written atomically or with the world stopped. heap_scan uint64 // heap_marked is the number of bytes marked by the previous @@ -146,6 +154,17 @@ type mstats struct { // unlike heap_live, heap_marked does not change until the // next mark termination. heap_marked uint64 + + // heapStats is a set of statistics + heapStats consistentHeapStats + + _ uint32 // ensure gcPauseDist is aligned + + // gcPauseDist represents the distribution of all GC-related + // application pauses in the runtime. + // + // Each individual pause is counted separately, unlike pause_ns. + gcPauseDist timeHistogram } var memstats mstats @@ -423,24 +442,25 @@ type MemStats struct { } } -// Size of the trailing by_size array differs between mstats and MemStats, -// and all data after by_size is local to runtime, not exported. -// NumSizeClasses was changed, but we cannot change MemStats because of backward compatibility. -// sizeof_C_MStats is the size of the prefix of mstats that -// corresponds to MemStats. It should match Sizeof(MemStats{}). -var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0]) - func init() { - var memStats MemStats - if sizeof_C_MStats != unsafe.Sizeof(memStats) { - println(sizeof_C_MStats, unsafe.Sizeof(memStats)) - throw("MStats vs MemStatsType size mismatch") - } - - if unsafe.Offsetof(memstats.heap_live)%8 != 0 { - println(unsafe.Offsetof(memstats.heap_live)) + if offset := unsafe.Offsetof(memstats.heap_live); offset%8 != 0 { + println(offset) throw("memstats.heap_live not aligned to 8 bytes") } + if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { + println(offset) + throw("memstats.heapStats not aligned to 8 bytes") + } + if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { + println(offset) + throw("memstats.gcPauseDist not aligned to 8 bytes") + } + // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. + // [3]heapStatsDelta) to be 8-byte aligned. + if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { + println(size) + throw("heapStatsDelta not a multiple of 8 bytes in size") + } } // ReadMemStats populates m with memory allocator statistics. @@ -462,14 +482,74 @@ func ReadMemStats(m *MemStats) { func readmemstats_m(stats *MemStats) { updatememstats() - // The size of the trailing by_size array differs between - // mstats and MemStats. NumSizeClasses was changed, but we - // cannot change MemStats because of backward compatibility. - memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats) - + stats.Alloc = memstats.alloc + stats.TotalAlloc = memstats.total_alloc + stats.Sys = memstats.sys + stats.Mallocs = memstats.nmalloc + stats.Frees = memstats.nfree + stats.HeapAlloc = memstats.alloc + stats.HeapSys = memstats.heap_sys.load() + // By definition, HeapIdle is memory that was mapped + // for the heap but is not currently used to hold heap + // objects. It also specifically is memory that can be + // used for other purposes, like stacks, but this memory + // is subtracted out of HeapSys before it makes that + // transition. Put another way: + // + // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes + // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose + // + // or + // + // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse + // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse + // + // => heap_idle = heap_sys - heap_inuse + stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse + stats.HeapInuse = memstats.heap_inuse + stats.HeapReleased = memstats.heap_released + stats.HeapObjects = memstats.heap_objects + stats.StackInuse = memstats.stacks_inuse // memstats.stacks_sys is only memory mapped directly for OS stacks. // Add in heap-allocated stack memory for user consumption. - stats.StackSys += stats.StackInuse + stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load() + stats.MSpanInuse = memstats.mspan_inuse + stats.MSpanSys = memstats.mspan_sys.load() + stats.MCacheInuse = memstats.mcache_inuse + stats.MCacheSys = memstats.mcache_sys.load() + stats.BuckHashSys = memstats.buckhash_sys.load() + // MemStats defines GCSys as an aggregate of all memory related + // to the memory management system, but we track this memory + // at a more granular level in the runtime. + stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse + stats.OtherSys = memstats.other_sys.load() + stats.NextGC = memstats.next_gc + stats.LastGC = memstats.last_gc_unix + stats.PauseTotalNs = memstats.pause_total_ns + stats.PauseNs = memstats.pause_ns + stats.PauseEnd = memstats.pause_end + stats.NumGC = memstats.numgc + stats.NumForcedGC = memstats.numforcedgc + stats.GCCPUFraction = memstats.gc_cpu_fraction + stats.EnableGC = true + + // Handle BySize. Copy N values, where N is + // the minimum of the lengths of the two arrays. + // Unfortunately copy() won't work here because + // the arrays have different structs. + // + // TODO(mknyszek): Consider renaming the fields + // of by_size's elements to align so we can use + // the copy built-in. + bySizeLen := len(stats.BySize) + if l := len(memstats.by_size); l < bySizeLen { + bySizeLen = l + } + for i := 0; i < bySizeLen; i++ { + stats.BySize[i].Size = memstats.by_size[i].size + stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc + stats.BySize[i].Frees = memstats.by_size[i].nfree + } } //go:linkname readGCStats runtime/debug.readGCStats @@ -515,6 +595,10 @@ func readGCStats_m(pauses *[]uint64) { *pauses = p[:n+n+3] } +// Updates the memstats structure. +// +// The world must be stopped. +// //go:nowritebarrier func updatememstats() { // Flush mcaches to mcentral before doing anything else. @@ -525,11 +609,9 @@ func updatememstats() { memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) - memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys + - memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys - - // We also count stacks_inuse as sys memory. - memstats.sys += memstats.stacks_inuse + memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + + memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + + memstats.other_sys.load() // Calculate memory allocator stats. // During program execution we only count number of frees and amount of freed memory. @@ -546,62 +628,75 @@ func updatememstats() { memstats.by_size[i].nmalloc = 0 memstats.by_size[i].nfree = 0 } + // Collect consistent stats, which are the source-of-truth in the some cases. + var consStats heapStatsDelta + memstats.heapStats.unsafeRead(&consStats) + + // Collect large allocation stats. + totalAlloc := uint64(consStats.largeAlloc) + memstats.nmalloc += uint64(consStats.largeAllocCount) + totalFree := uint64(consStats.largeFree) + memstats.nfree += uint64(consStats.largeFreeCount) - // Aggregate local stats. - cachestats() - - // Collect allocation stats. This is safe and consistent - // because the world is stopped. - var smallFree, totalAlloc, totalFree uint64 - // Collect per-spanclass stats. - for spc := range mheap_.central { - // The mcaches are now empty, so mcentral stats are - // up-to-date. - c := &mheap_.central[spc].mcentral - memstats.nmalloc += c.nmalloc - i := spanClass(spc).sizeclass() - memstats.by_size[i].nmalloc += c.nmalloc - totalAlloc += c.nmalloc * uint64(class_to_size[i]) - } // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { - if i == 0 { - memstats.nmalloc += mheap_.nlargealloc - totalAlloc += mheap_.largealloc - totalFree += mheap_.largefree - memstats.nfree += mheap_.nlargefree - continue - } - - // The mcache stats have been flushed to mheap_. - memstats.nfree += mheap_.nsmallfree[i] - memstats.by_size[i].nfree = mheap_.nsmallfree[i] - smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i]) + // Malloc stats. + a := uint64(consStats.smallAllocCount[i]) + totalAlloc += a * uint64(class_to_size[i]) + memstats.nmalloc += a + memstats.by_size[i].nmalloc = a + + // Free stats. + f := uint64(consStats.smallFreeCount[i]) + totalFree += f * uint64(class_to_size[i]) + memstats.nfree += f + memstats.by_size[i].nfree = f } - totalFree += smallFree + // Account for tiny allocations. memstats.nfree += memstats.tinyallocs memstats.nmalloc += memstats.tinyallocs // Calculate derived stats. memstats.total_alloc = totalAlloc memstats.alloc = totalAlloc - totalFree - memstats.heap_alloc = memstats.alloc memstats.heap_objects = memstats.nmalloc - memstats.nfree -} -// cachestats flushes all mcache stats. -// -// The world must be stopped. -// -//go:nowritebarrier -func cachestats() { - for _, p := range allp { - c := p.mcache - if c == nil { - continue - } - purgecachedstats(c) + memstats.stacks_inuse = uint64(consStats.inStacks) + memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs) + memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits) + + // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. + memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse + + // The world is stopped, so the consistent stats (after aggregation) + // should be identical to some combination of memstats. In particular: + // + // * heap_inuse == inHeap + // * heap_released == released + // * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits + // + // Check if that's actually true. + // + // TODO(mknyszek): Maybe don't throw here. It would be bad if a + // bug in otherwise benign accounting caused the whole application + // to crash. + if memstats.heap_inuse != uint64(consStats.inHeap) { + print("runtime: heap_inuse=", memstats.heap_inuse, "\n") + print("runtime: consistent value=", consStats.inHeap, "\n") + throw("heap_inuse and consistent stats are not equal") + } + if memstats.heap_released != uint64(consStats.released) { + print("runtime: heap_released=", memstats.heap_released, "\n") + print("runtime: consistent value=", consStats.released, "\n") + throw("heap_released and consistent stats are not equal") + } + globalRetained := memstats.heap_sys.load() - memstats.heap_released + consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) + if globalRetained != consRetained { + print("runtime: global value=", globalRetained, "\n") + print("runtime: consistent value=", consRetained, "\n") + throw("measures of the retained heap are not equal") } } @@ -631,64 +726,222 @@ func flushallmcaches() { } } +// sysMemStat represents a global system statistic that is managed atomically. +// +// This type must structurally be a uint64 so that mstats aligns with MemStats. +type sysMemStat uint64 + +// load atomically reads the value of the stat. +// +// Must be nosplit as it is called in runtime initialization, e.g. newosproc0. //go:nosplit -func purgecachedstats(c *mcache) { - // Protected by either heap or GC lock. - h := &mheap_ - memstats.heap_scan += uint64(c.local_scan) - c.local_scan = 0 - memstats.tinyallocs += uint64(c.local_tinyallocs) - c.local_tinyallocs = 0 - h.largefree += uint64(c.local_largefree) - c.local_largefree = 0 - h.nlargefree += uint64(c.local_nlargefree) - c.local_nlargefree = 0 - for i := 0; i < len(c.local_nsmallfree); i++ { - h.nsmallfree[i] += uint64(c.local_nsmallfree[i]) - c.local_nsmallfree[i] = 0 - } +func (s *sysMemStat) load() uint64 { + return atomic.Load64((*uint64)(s)) } -// Atomically increases a given *system* memory stat. We are counting on this -// stat never overflowing a uintptr, so this function must only be used for -// system memory stats. +// add atomically adds the sysMemStat by n. // -// The current implementation for little endian architectures is based on -// xadduintptr(), which is less than ideal: xadd64() should really be used. -// Using xadduintptr() is a stop-gap solution until arm supports xadd64() that -// doesn't use locks. (Locks are a problem as they require a valid G, which -// restricts their useability.) -// -// A side-effect of using xadduintptr() is that we need to check for -// overflow errors. +// Must be nosplit as it is called in runtime initialization, e.g. newosproc0. //go:nosplit -func mSysStatInc(sysStat *uint64, n uintptr) { - if sysStat == nil { +func (s *sysMemStat) add(n int64) { + if s == nil { return } - if sys.BigEndian { - atomic.Xadd64(sysStat, int64(n)) - return + val := atomic.Xadd64((*uint64)(s), n) + if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { + print("runtime: val=", val, " n=", n, "\n") + throw("sysMemStat overflow") } - if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n { - print("runtime: stat overflow: val ", val, ", n ", n, "\n") - exit(2) +} + +// heapStatsDelta contains deltas of various runtime memory statistics +// that need to be updated together in order for them to be kept +// consistent with one another. +type heapStatsDelta struct { + // Memory stats. + committed int64 // byte delta of memory committed + released int64 // byte delta of released memory generated + inHeap int64 // byte delta of memory placed in the heap + inStacks int64 // byte delta of memory reserved for stacks + inWorkBufs int64 // byte delta of memory reserved for work bufs + inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits + + // Allocator stats. + largeAlloc uintptr // bytes allocated for large objects + largeAllocCount uintptr // number of large object allocations + smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects + largeFree uintptr // bytes freed for large objects (>maxSmallSize) + largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) + smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) + + // Add a uint32 to ensure this struct is a multiple of 8 bytes in size. + // Only necessary on 32-bit platforms. + // _ [(sys.PtrSize / 4) % 2]uint32 +} + +// merge adds in the deltas from b into a. +func (a *heapStatsDelta) merge(b *heapStatsDelta) { + a.committed += b.committed + a.released += b.released + a.inHeap += b.inHeap + a.inStacks += b.inStacks + a.inWorkBufs += b.inWorkBufs + a.inPtrScalarBits += b.inPtrScalarBits + + a.largeAlloc += b.largeAlloc + a.largeAllocCount += b.largeAllocCount + for i := range b.smallAllocCount { + a.smallAllocCount[i] += b.smallAllocCount[i] + } + a.largeFree += b.largeFree + a.largeFreeCount += b.largeFreeCount + for i := range b.smallFreeCount { + a.smallFreeCount[i] += b.smallFreeCount[i] } } -// Atomically decreases a given *system* memory stat. Same comments as -// mSysStatInc apply. -//go:nosplit -func mSysStatDec(sysStat *uint64, n uintptr) { - if sysStat == nil { - return +// consistentHeapStats represents a set of various memory statistics +// whose updates must be viewed completely to get a consistent +// state of the world. +// +// To write updates to memory stats use the acquire and release +// methods. To obtain a consistent global snapshot of these statistics, +// use read. +type consistentHeapStats struct { + // stats is a ring buffer of heapStatsDelta values. + // Writers always atomically update the delta at index gen. + // + // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) + // and synchronizing with writers by observing each mcache's + // statsSeq field. If the reader observes a P (to which the + // mcache is bound) not writing, it can be sure that it will + // pick up the new gen value the next time it writes. + // The reader then takes responsibility by clearing space + // in the ring buffer for the next reader to rotate gen to + // that space (i.e. it merges in values from index (gen-2) mod 3 + // to index (gen-1) mod 3, then clears the former). + // + // Note that this means only one reader can be reading at a time. + // There is no way for readers to synchronize. + // + // This process is why we need ring buffer of size 3 instead + // of 2: one is for the writers, one contains the most recent + // data, and the last one is clear so writers can begin writing + // to it the moment gen is updated. + stats [3]heapStatsDelta + + // gen represents the current index into which writers + // are writing, and can take on the value of 0, 1, or 2. + // This value is updated atomically. + gen uint32 +} + +// acquire returns a heapStatsDelta to be updated. In effect, +// it acquires the shard for writing. release must be called +// as soon as the relevant deltas are updated. c must be +// a valid mcache not being used by any other thread. +// +// The returned heapStatsDelta must be updated atomically. +// +// Note however, that this is unsafe to call concurrently +// with other writers and there must be only one writer +// at a time. +func (m *consistentHeapStats) acquire(c *mcache) *heapStatsDelta { + seq := atomic.Xadd(&c.statsSeq, 1) + if seq%2 == 0 { + // Should have been incremented to odd. + print("runtime: seq=", seq, "\n") + throw("bad sequence number") } - if sys.BigEndian { - atomic.Xadd64(sysStat, -int64(n)) - return + gen := atomic.Load(&m.gen) % 3 + return &m.stats[gen] +} + +// release indicates that the writer is done modifying +// the delta. The value returned by the corresponding +// acquire must no longer be accessed or modified after +// release is called. +// +// The mcache passed here must be the same as the one +// passed to acquire. +func (m *consistentHeapStats) release(c *mcache) { + seq := atomic.Xadd(&c.statsSeq, 1) + if seq%2 != 0 { + // Should have been incremented to even. + print("runtime: seq=", seq, "\n") + throw("bad sequence number") + } +} + +// unsafeRead aggregates the delta for this shard into out. +// +// Unsafe because it does so without any synchronization. The +// only safe time to call this is if the world is stopped or +// we're freezing the world or going down anyway (and we just +// want _some_ estimate). +func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { + for i := range m.stats { + out.merge(&m.stats[i]) + } +} + +// unsafeClear clears the shard. +// +// Unsafe because the world must be stopped and values should +// be donated elsewhere before clearing. +func (m *consistentHeapStats) unsafeClear() { + for i := range m.stats { + m.stats[i] = heapStatsDelta{} + } +} + +// read takes a globally consistent snapshot of m +// and puts the aggregated value in out. Even though out is a +// heapStatsDelta, the resulting values should be complete and +// valid statistic values. +// +// Not safe to call concurrently. The world must be stopped +// or metricsSema must be held. +func (m *consistentHeapStats) read(out *heapStatsDelta) { + // Getting preempted after this point is not safe because + // we read allp. We need to make sure a STW can't happen + // so it doesn't change out from under us. + mp := acquirem() + + // Rotate gen, effectively taking a snapshot of the state of + // these statistics at the point of the exchange by moving + // writers to the next set of deltas. + // + // This exchange is safe to do because we won't race + // with anyone else trying to update this value. + currGen := atomic.Load(&m.gen) + atomic.Xchg(&m.gen, (currGen+1)%3) + prevGen := currGen - 1 + if currGen == 0 { + prevGen = 2 } - if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n { - print("runtime: stat underflow: val ", val, ", n ", n, "\n") - exit(2) + for _, p := range allp { + c := p.mcache + if c == nil { + continue + } + // Spin until there are no more writers. + for atomic.Load(&c.statsSeq)%2 != 0 { + } } + + // At this point we've observed that each sequence + // number is even, so any future writers will observe + // the new gen value. That means it's safe to read from + // the other deltas in the stats buffer. + + // Perform our responsibilities and free up + // stats[prevGen] for the next time we want to take + // a snapshot. + m.stats[currGen].merge(&m.stats[prevGen]) + m.stats[prevGen] = heapStatsDelta{} + + // Finally, copy out the complete delta. + *out = m.stats[currGen] + releasem(mp) } |