mark&sweep, 2分钟保证至少一次GC过程,如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中,会停止一切G的运行,mark的过程是多任务并发的
sweep的过程是分散的
整个程序内存块包括 .data, .bss, 每个G的stack, SpecialFinalizer
每段内存都有其相应的bitmap,用来表示每个word(8BYTE)的标志位,每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:
连续两个word表示一个iface或eface
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
static void
scanblock(byte b, uintptr n, byte ptrmask)
{
byte obj, obj0, p, arena_start, *arena_used, **wp, scanbuf[8], ptrbitp, bitp;
uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
Workbuf wbuf;
Iface iface;
Eface eface;
Type typ;
MSpan s;
pageID k;
bool keepworking;
// Cache memory arena parameters in local vars.
arena_start = runtime·mheap.arena_start;
arena_used = runtime·mheap.arena_used;
wbuf = getempty(nil);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
keepworking = b == nil;
scanbufpos = 0;
for(i = 0; i
ptrbitp = nil;
// ptrmask can have 2 possible values:
// 1. nil - obtain pointer mask from GC bitmap.
// 2. pointer to a compact mask (for stacks and data).
if(b != nil)
goto scanobj;
for(;;) {
if(nobj == 0) {
// Out of work in workbuf.
// First, see is there is any work in scanbuf.
for(i = 0; i
scanbuf[scanbufpos ] = nil;
scanbufpos %= nelem(scanbuf);
if(b != nil) {
n = arena_used - b; // scan until bitBoundary or BitsDead
ptrmask = nil; // use GC bitmap for pointer info
goto scanobj;
}
}
if(!keepworking) {
putempty(wbuf);
return;
}
// Refill workbuf from global queue.
wbuf = getfull(wbuf);
if(wbuf == nil)
return;
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}
// If another proc wants a pointer, give it some.
if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
wbuf->nobj = nobj;
wbuf = handoff(wbuf);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}
wp--;
nobj--;
b = *wp;
n = arena_used - b; // scan until next bitBoundary or BitsDead
ptrmask = nil; // use GC bitmap for pointer info
scanobj:
if(DebugPtrs)
runtime·printf("scanblock %p %p %p\n", b, n, ptrmask);
// Find bits of the beginning of the object.
if(ptrmask == nil) {
off = (uintptr*)b - (uintptr*)arena_start;
ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
}
for(i = 0; i
// Find bits for this word.
......
if(bits <= BitsScalar) // BitsScalar || BitsDead
continue;
if(bits == BitsPointer) {
obj = *(byte**)(b i);
obj0 = obj;
goto markobj;
}
// With those three out of the way, must be multi-word.
if(Debug && bits != BitsMultiWord)
runtime·throw("unexpected garbage collection bits");
// Find the next pair of bits.
if(ptrmask == nil) {
bits = *ptrbitp;
j = ((uintptr)b i PtrSize)/PtrSize & 1;
ptrbitp -= j;
bits >>= gcBits*j;
bits = (bits>>2)&BitsMask;
} else
bits = (ptrmask[((i PtrSize)/PtrSize)/4]>>((((i PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
if(Debug && bits != BitsIface && bits != BitsEface)
runtime·throw("unexpected garbage collection bits");
if(bits == BitsIface) {
iface = (Iface*)(b i);
if(iface->tab != nil) {
typ = iface->tab->type;
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
obj = iface->data;
}
} else {
eface = (Eface*)(b i);
typ = eface->type;
if(typ != nil) {
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
obj = eface->data;
}
}
i = PtrSize;
obj0 = obj;
markobj:
// At this point we have extracted the next potential pointer.
// Check if it points into heap.
if(obj == nil)
continue;
if(obj
if((uintptr)obj
goto badobj;
}
continue;
}
// Mark the object.
obj = (byte*)((uintptr)obj & ~(PtrSize-1));
off = (uintptr*)obj - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
xbits = *bitp;
bits = (xbits >> shift) & bitMask;
if((bits&bitBoundary) == 0) {
// Not a beginning of a block, consult span table to find the block beginning.
......
obj = p;
goto markobj;
}
if(DebugPtrs)
runtime·printf("scan *%p = %p => base %p\n", b i, obj0, obj);
if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
// Running garbage collection again because
// we want to find the path from a root to a bad pointer.
// Found possible next step; extend or finish path.
for(j=0; j
goto AlreadyBad;
runtime·printf("runtime: found *(%p %p) = %p %p\n", b, i, obj0, (uintptr)(obj-obj0));
if(ptrmask != nil)
runtime·throw("bad pointer");
if(nbadblock >= nelem(badblock))
runtime·throw("badblock trace too long");
badblock[nbadblock ] = (uintptr)b;
AlreadyBad:;
}
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
// Only care about not marked objects.
if((bits&bitMarked) != 0)
continue;
// If obj size is greater than 8, then each byte of GC bitmap
// contains info for at most one object. In such case we use
// non-atomic byte store to mark the object. This can lead
// to double enqueue of the object for scanning, but scanning
// is an idempotent operation, so it is OK. This cannot lead
// to bitmap corruption because the single marked bit is the
// only thing that can change in the byte.
// For 8-byte objects we use non-atomic store, if the other
// quadruple is already marked. Otherwise we resort to CAS
// loop for marking.
if((xbits&(bitMask|(bitMask<
*bitp = xbits | (bitMarked<
runtime·atomicor8(bitp, bitMarked<
continue; // noscan object
// Queue the obj for scanning.
PREFETCH(obj);
p = scanbuf[scanbufpos];
scanbuf[scanbufpos ] = obj;
scanbufpos %= nelem(scanbuf);
if(p == nil)
continue;
// If workbuf is full, obtain an empty one.
if(nobj >= nelem(wbuf->obj)) {
wbuf->nobj = nobj;
wbuf = getempty(wbuf);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}
*wp = p;
wp ;
nobj ;
}
............
}
}
static void
markroot(ParFor desc, uint32 i)
{
FinBlock fb;
MSpan s;
uint32 spanidx, sg;
G gp;
void *p;
uint32 status;
bool restart;
USED(&desc);
// Note: if you add a case here, please also update heapdump.c:dumproots.
switch(i) {
case RootData:
scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
break;
case RootBss:
scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
break;
case RootFinalizers:
for(fb=runtime·allfin; fb; fb=fb->alllink)
scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
break;
case RootSpans:
// mark MSpan.specials
......
break;
case RootFlushCaches:
flushallmcaches();
break;
default:
// the rest is scanning goroutine stacks
if(i - RootCount >= runtime·allglen)
runtime·throw("markroot: bad index");
gp = runtime·allg[i - RootCount];
// remember when we've first observed the G blocked
// needed only to output in traceback
status = runtime·readgstatus(gp);
if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
gp->waitsince = runtime·work.tstart;
// Shrink a stack if not much of it is being used.
runtime·shrinkstack(gp);
if(runtime·readgstatus(gp) == Gdead)
gp->gcworkdOne= true;
else
gp->gcworkdOne= false;
restart = runtime·stopg(gp);
scanstack(gp);
if(restart)
runtime·restartg(gp);
break;
}
}
sweep扫描span里面每一个对象是否marked,将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist,那么会将span的内存释放到heap中。
// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
MSpan *s;
uint32 idx, sg;
uintptr npages;
// increment locks to ensure that the goroutine is not preempted
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
g->m->locks ;
sg = runtime·mheap.sweepgen;
for(;;) {
idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
if(idx >= runtime·work.nspan) {
runtime·mheap.sweepdOne= true;
g->m->locks--;
return -1;
}
s = runtime·work.spans[idx];
if(s->state != MSpanInUse) {
s->sweepgen = sg;
continue;
}
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
continue;
npages = s->npages;
if(!runtime·MSpan_Sweep(s, false))
npages = 0;
g->m->locks--;
return npages;
}
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
int32 cl, n, npages, nfree;
uintptr size, off, step;
uint32 sweepgen;
byte *p, *bitp, shift, xbits, bits;
MCache *c;
byte *arena_start;
MLink head, *end, *link;
Special *special, **specialp, *y;
bool res, sweepgenset;
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
runtime·throw("MSpan_Sweep: m is not locked");
sweepgen = runtime·mheap.sweepgen;
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state");
}
arena_start = runtime·mheap.arena_start;
cl = s->sizeclass;
size = s->elemsize;
if(cl == 0) {
n = 1;
} else {
// Chunk full of small blocks.
npages = runtime·class_to_allocnpages[cl];
n = (npages <
res = false;
nfree = 0;
end = &head;
c = g->m->mcache;
sweepgenset = false;
// Mark any free objects in this span so we don't collect them.
for(link = s->freelist; link != nil; link = link->next) {
off = (uintptr*)link - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
*bitp |= bitMarked<
// Unlink & free special records for any objects we're about to free.
specialp = &s->specials;
special = *specialp;
while(special != nil) {
// A finalizer can be set for an inner byte of an object, find object beginning.
p = (byte*)(s->start <
off = (uintptr*)p - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*bitp>>shift) & bitMask;
if((bits&bitMarked) == 0) {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p = (byte*)(s->start <
// about to free object: splice out special record
y = special;
special = special->next;
*specialp = special;
if(!runtime·freespecial(y, p, size, false)) {
// stop freeing of object if it has a finalizer
*bitp |= bitMarked <
} else {
// object is still live: keep special record
specialp = &special->next;
special = *specialp;
}
}
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
p = (byte*)(s->start <
off = (uintptr*)p - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = 0;
step = size/(PtrSize*wordsPerBitmapByte);
// Rewind to the previous quadruple as we move to the next
// in the beginning of the loop.
bitp = step;
if(step == 0) {
// 8-byte objects.
bitp ;
shift = gcBits;
}
for(; n > 0; n--, p = size) {
bitp -= step;
if(step == 0) {
if(shift != 0)
bitp--;
shift = gcBits - shift;
}
xbits = *bitp;
bits = (xbits>>shift) & bitMask;
// Allocated and marked object, reset bits to allocated.
if((bits&bitMarked) != 0) {
*bitp &= ~(bitMarked<
}
// At this point we know that we are looking at garbage object
// that needs to be collected.
if(runtime·debug.allocfreetrace)
runtime·tracefree(p, size);
// Reset to allocated noscan.
*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<
// Free large span.
if(preserve)
runtime·throw("can't preserve large span");
runtime·unmarkspan(p, s->npages<
// important to set sweepgen before returning it to heap
runtime·atomicstore(&s->sweepgen, sweepgen);
sweepgenset = true;
// NOTE(rsc,dvyukov): The original implementation of efence
// in CL 22060046 used SysFree instead of SysFault, so that
// the operating system would eventually give the memory
// back to us again, so that an efence program could run
// longer without running out of memory. Unfortunately,
// calling SysFree here without any kind of adjustment of the
// heap data structures means that when the memory does
// come back to us, we have the wrong metadata for it, either in
// the MSpan structures or in the garbage collection bitmap.
// Using SysFault here means that the program will run out of
// memory fairly quickly in efence mode, but at least it won't
// have mysterious crashes due to confused memory reuse.
// It should be possible to switch back to SysFree if we also
// implement and then call some kind of MHeap_DeleteSpan.
if(runtime·debug.efence) {
s->limit = nil; // prevent mlookup from finding this span
runtime·SysFault(p, size);
} else
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_nlargefree ;
c->local_largefree = size;
runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent 100)/100));
res = true;
} else {
// Free small object.
if(size > 2*sizeof(uintptr))
((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
else if(size > sizeof(uintptr))
((uintptr*)p)[1] = 0;
end->next = (MLink*)p;
end = (MLink*)p;
nfree ;
}
}
// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if(!sweepgenset && nfree == 0) {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state after sweep");
}
runtime·atomicstore(&s->sweepgen, sweepgen);
}
if(nfree > 0) {
c->local_nsmallfree[cl] = nfree;
c->local_cachealloc -= nfree * size;
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent 100)/100));
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
// MCentral_FreeSpan updates sweepgen
}
return res;
}