Ignore:
Timestamp:
Oct 31, 2011, 11:43:37 PM (14 years ago)
Author:
[email protected]
Message:

The GC should be parallel
https://bugs.webkit.org/show_bug.cgi?id=70995

Source/JavaScriptCore:

Reviewed by Geoff Garen.

Added parallel tracing to the GC. This works by having local mark
stacks per thread, and a global shared one. Threads sometimes
donate cells from the mark stack to the global one if the heuristics
tell them that it's affordable to do so. Threads that have depleted
their local mark stacks try to steal some from the shared one.

Marking is now done using an atomic weak relaxed CAS (compare-and-swap).

This is a 23% speed-up on V8-splay when I use 4 marking threads,
leading to a 3.5% speed-up on V8.

It also appears that this reduces GC pause times on real websites by
more than half.

(JSC::Heap::Heap):
(JSC::Heap::~Heap):
(JSC::Heap::markRoots):

  • heap/Heap.h:
  • heap/MarkStack.cpp:

(JSC::MarkStackSegmentAllocator::MarkStackSegmentAllocator):
(JSC::MarkStackSegmentAllocator::~MarkStackSegmentAllocator):
(JSC::MarkStackSegmentAllocator::allocate):
(JSC::MarkStackSegmentAllocator::release):
(JSC::MarkStackSegmentAllocator::shrinkReserve):
(JSC::MarkStackArray::MarkStackArray):
(JSC::MarkStackArray::~MarkStackArray):
(JSC::MarkStackArray::expand):
(JSC::MarkStackArray::refill):
(JSC::MarkStackArray::donateSomeCellsTo):
(JSC::MarkStackArray::stealSomeCellsFrom):
(JSC::MarkStackThreadSharedData::markingThreadMain):
(JSC::MarkStackThreadSharedData::markingThreadStartFunc):
(JSC::MarkStackThreadSharedData::MarkStackThreadSharedData):
(JSC::MarkStackThreadSharedData::~MarkStackThreadSharedData):
(JSC::MarkStackThreadSharedData::reset):
(JSC::MarkStack::reset):
(JSC::SlotVisitor::donateSlow):
(JSC::SlotVisitor::drain):
(JSC::SlotVisitor::drainFromShared):
(JSC::MarkStack::mergeOpaqueRoots):
(JSC::SlotVisitor::harvestWeakReferences):

  • heap/MarkStack.h:

(JSC::MarkStackSegment::data):
(JSC::MarkStackSegment::capacityFromSize):
(JSC::MarkStackSegment::sizeFromCapacity):
(JSC::MarkStackArray::postIncTop):
(JSC::MarkStackArray::preDecTop):
(JSC::MarkStackArray::setTopForFullSegment):
(JSC::MarkStackArray::setTopForEmptySegment):
(JSC::MarkStackArray::top):
(JSC::MarkStackArray::validatePrevious):
(JSC::MarkStack::addWeakReferenceHarvester):
(JSC::MarkStack::mergeOpaqueRootsIfNecessary):
(JSC::MarkStack::mergeOpaqueRootsIfProfitable):
(JSC::MarkStack::MarkStack):
(JSC::MarkStack::addOpaqueRoot):
(JSC::MarkStack::containsOpaqueRoot):
(JSC::MarkStack::opaqueRootCount):
(JSC::MarkStackArray::append):
(JSC::MarkStackArray::canRemoveLast):
(JSC::MarkStackArray::removeLast):
(JSC::MarkStackArray::isEmpty):
(JSC::MarkStackArray::canDonateSomeCells):
(JSC::MarkStackArray::size):
(JSC::ParallelModeEnabler::ParallelModeEnabler):
(JSC::ParallelModeEnabler::~ParallelModeEnabler):

  • heap/MarkedBlock.h:

(JSC::MarkedBlock::testAndSetMarked):

  • heap/SlotVisitor.h:

(JSC::SlotVisitor::donate):
(JSC::SlotVisitor::donateAndDrain):
(JSC::SlotVisitor::donateKnownParallel):
(JSC::SlotVisitor::SlotVisitor):

  • heap/WeakReferenceHarvester.h:
  • runtime/Heuristics.cpp:

(JSC::Heuristics::initializeHeuristics):

  • runtime/Heuristics.h:
  • wtf/Atomics.h:

(WTF::weakCompareAndSwap):

  • wtf/Bitmap.h:

(WTF::::Bitmap):
(WTF::::get):
(WTF::::set):
(WTF::::testAndSet):
(WTF::::testAndClear):
(WTF::::concurrentTestAndSet):
(WTF::::concurrentTestAndClear):
(WTF::::clear):
(WTF::::clearAll):
(WTF::::nextPossiblyUnset):
(WTF::::findRunOfZeros):
(WTF::::count):
(WTF::::isEmpty):
(WTF::::isFull):

  • wtf/MainThread.h:

(WTF::isMainThreadOrGCThread):

  • wtf/Platform.h:
  • wtf/ThreadSpecific.h:

(WTF::::isSet):

  • wtf/mac/MainThreadMac.mm:

(WTF::initializeGCThreads):
(WTF::initializeMainThreadPlatform):
(WTF::initializeMainThreadToProcessMainThreadPlatform):
(WTF::registerGCThread):
(WTF::isMainThreadOrGCThread):

Source/WebCore:

Reviewed by Geoff Garen.

Added parallel tracing to the GC. This required loosening some assertions,
since some code may now be called from outside the main thread.

No new tests, since no behavior was changed.

  • platform/TreeShared.h:

(WebCore::TreeShared::parent):

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/heap/MarkStack.cpp

    r97827 r98937  
    2929#include "ConservativeRoots.h"
    3030#include "Heap.h"
     31#include "Heuristics.h"
    3132#include "JSArray.h"
    3233#include "JSCell.h"
     
    3536#include "Structure.h"
    3637#include "WriteBarrier.h"
     38#include <wtf/MainThread.h>
    3739
    3840namespace JSC {
    3941
    40 MarkStackArray::MarkStackArray()
    41     : m_top(0)
    42     , m_allocated(pageSize())
    43 {
    44     m_data = static_cast<const JSCell**>(MarkStack::allocateStack(m_allocated));
    45     m_capacity = m_allocated / sizeof(JSCell*);
     42MarkStackSegmentAllocator::MarkStackSegmentAllocator()
     43    : m_nextFreeSegment(0)
     44{
     45}
     46
     47MarkStackSegmentAllocator::~MarkStackSegmentAllocator()
     48{
     49    shrinkReserve();
     50}
     51
     52MarkStackSegment* MarkStackSegmentAllocator::allocate()
     53{
     54    {
     55        MutexLocker locker(m_lock);
     56        if (m_nextFreeSegment) {
     57            MarkStackSegment* result = m_nextFreeSegment;
     58            m_nextFreeSegment = result->m_previous;
     59            return result;
     60        }
     61    }
     62
     63    return static_cast<MarkStackSegment*>(OSAllocator::reserveAndCommit(Heuristics::gcMarkStackSegmentSize));
     64}
     65
     66void MarkStackSegmentAllocator::release(MarkStackSegment* segment)
     67{
     68    MutexLocker locker(m_lock);
     69    segment->m_previous = m_nextFreeSegment;
     70    m_nextFreeSegment = segment;
     71}
     72
     73void MarkStackSegmentAllocator::shrinkReserve()
     74{
     75    MarkStackSegment* segments;
     76    {
     77        MutexLocker locker(m_lock);
     78        segments = m_nextFreeSegment;
     79        m_nextFreeSegment = 0;
     80    }
     81    while (segments) {
     82        MarkStackSegment* toFree = segments;
     83        segments = segments->m_previous;
     84        OSAllocator::decommitAndRelease(toFree, Heuristics::gcMarkStackSegmentSize);
     85    }
     86}
     87
     88MarkStackArray::MarkStackArray(MarkStackSegmentAllocator& allocator)
     89    : m_allocator(allocator)
     90    , m_segmentCapacity(MarkStackSegment::capacityFromSize(Heuristics::gcMarkStackSegmentSize))
     91    , m_top(0)
     92    , m_numberOfPreviousSegments(0)
     93{
     94    m_topSegment = m_allocator.allocate();
     95#if !ASSERT_DISABLED
     96    m_topSegment->m_top = 0;
     97#endif
     98    m_topSegment->m_previous = 0;
    4699}
    47100
    48101MarkStackArray::~MarkStackArray()
    49102{
    50     MarkStack::releaseStack(m_data, m_allocated);
     103    ASSERT(!m_topSegment->m_previous);
     104    m_allocator.release(m_topSegment);
    51105}
    52106
    53107void MarkStackArray::expand()
    54108{
    55     size_t oldAllocation = m_allocated;
    56     m_allocated *= 2;
    57     m_capacity = m_allocated / sizeof(JSCell*);
    58     void* newData = MarkStack::allocateStack(m_allocated);
    59     memcpy(newData, m_data, oldAllocation);
    60     MarkStack::releaseStack(m_data, oldAllocation);
    61     m_data = static_cast<const JSCell**>(newData);
    62 }
    63 
    64 void MarkStackArray::shrinkAllocation(size_t size)
    65 {
    66     ASSERT(size <= m_allocated);
    67     ASSERT(isPageAligned(size));
    68     if (size == m_allocated)
    69         return;
    70 #if OS(WINDOWS)
    71     // We cannot release a part of a region with VirtualFree. To get around this,
    72     // we'll release the entire region and reallocate the size that we want.
    73     MarkStack::releaseStack(m_data, m_allocated);
    74     m_data = static_cast<const JSCell**>(MarkStack::allocateStack(size));
     109    ASSERT(m_topSegment->m_top == m_segmentCapacity);
     110   
     111    m_numberOfPreviousSegments++;
     112   
     113    MarkStackSegment* nextSegment = m_allocator.allocate();
     114#if !ASSERT_DISABLED
     115    nextSegment->m_top = 0;
     116#endif
     117    nextSegment->m_previous = m_topSegment;
     118    m_topSegment = nextSegment;
     119    setTopForEmptySegment();
     120    validatePrevious();
     121}
     122
     123bool MarkStackArray::refill()
     124{
     125    validatePrevious();
     126    if (top())
     127        return true;
     128    MarkStackSegment* toFree = m_topSegment;
     129    MarkStackSegment* previous = m_topSegment->m_previous;
     130    if (!previous)
     131        return false;
     132    ASSERT(m_numberOfPreviousSegments);
     133    m_numberOfPreviousSegments--;
     134    m_topSegment = previous;
     135    m_allocator.release(toFree);
     136    setTopForFullSegment();
     137    validatePrevious();
     138    return true;
     139}
     140
     141bool MarkStackArray::donateSomeCellsTo(MarkStackArray& other)
     142{
     143    ASSERT(m_segmentCapacity == other.m_segmentCapacity);
     144    validatePrevious();
     145    other.validatePrevious();
     146       
     147    // Fast check: see if the other mark stack already has enough segments.
     148    if (other.m_numberOfPreviousSegments + 1 >= Heuristics::maximumNumberOfSharedSegments)
     149        return false;
     150       
     151    size_t numberOfCellsToKeep = Heuristics::minimumNumberOfCellsToKeep;
     152    ASSERT(m_top > numberOfCellsToKeep || m_topSegment->m_previous);
     153       
     154    // Looks like we should donate! Give the other mark stack all of our
     155    // previous segments, and then top it off.
     156    MarkStackSegment* previous = m_topSegment->m_previous;
     157    while (previous) {
     158        ASSERT(m_numberOfPreviousSegments);
     159
     160        MarkStackSegment* current = previous;
     161        previous = current->m_previous;
     162           
     163        current->m_previous = other.m_topSegment->m_previous;
     164        other.m_topSegment->m_previous = current;
     165           
     166        m_numberOfPreviousSegments--;
     167        other.m_numberOfPreviousSegments++;
     168    }
     169    ASSERT(!m_numberOfPreviousSegments);
     170    m_topSegment->m_previous = 0;
     171    validatePrevious();
     172    other.validatePrevious();
     173       
     174    // Now top off. We want to keep at a minimum numberOfCellsToKeep, but if
     175    // we really have a lot of work, we give up half.
     176    if (m_top > numberOfCellsToKeep * 2)
     177        numberOfCellsToKeep = m_top / 2;
     178    while (m_top > numberOfCellsToKeep)
     179        other.append(removeLast());
     180       
     181    return true;
     182}
     183
     184void MarkStackArray::stealSomeCellsFrom(MarkStackArray& other)
     185{
     186    ASSERT(m_segmentCapacity == other.m_segmentCapacity);
     187    validatePrevious();
     188    other.validatePrevious();
     189       
     190    // If other has an entire segment, steal it and return.
     191    if (other.m_topSegment->m_previous) {
     192        ASSERT(other.m_topSegment->m_previous->m_top == m_segmentCapacity);
     193           
     194        // First remove a segment from other.
     195        MarkStackSegment* current = other.m_topSegment->m_previous;
     196        other.m_topSegment->m_previous = current->m_previous;
     197        other.m_numberOfPreviousSegments--;
     198           
     199        ASSERT(!!other.m_numberOfPreviousSegments == !!other.m_topSegment->m_previous);
     200           
     201        // Now add it to this.
     202        current->m_previous = m_topSegment->m_previous;
     203        m_topSegment->m_previous = current;
     204        m_numberOfPreviousSegments++;
     205           
     206        validatePrevious();
     207        other.validatePrevious();
     208        return;
     209    }
     210       
     211    // Otherwise drain 1/Nth of the shared array where N is the number of
     212    // workers, or Heuristics::minimumNumberOfCellsToKeep, whichever is bigger.
     213    size_t numberOfCellsToSteal = std::max((size_t)Heuristics::minimumNumberOfCellsToKeep, other.size() / Heuristics::numberOfGCMarkers);
     214    while (numberOfCellsToSteal-- > 0 && other.canRemoveLast())
     215        append(other.removeLast());
     216}
     217
     218#if ENABLE(PARALLEL_GC)
     219void MarkStackThreadSharedData::markingThreadMain()
     220{
     221    WTF::registerGCThread();
     222    SlotVisitor slotVisitor(*this, m_globalData->jsArrayVPtr, m_globalData->jsFinalObjectVPtr, m_globalData->jsStringVPtr);
     223    ParallelModeEnabler enabler(slotVisitor);
     224    slotVisitor.drainFromShared(SlotVisitor::SlaveDrain);
     225}
     226
     227void* MarkStackThreadSharedData::markingThreadStartFunc(void* shared)
     228{
     229    static_cast<MarkStackThreadSharedData*>(shared)->markingThreadMain();
     230    return 0;
     231}
     232#endif
     233
     234MarkStackThreadSharedData::MarkStackThreadSharedData(JSGlobalData* globalData)
     235    : m_globalData(globalData)
     236    , m_sharedMarkStack(m_segmentAllocator)
     237    , m_numberOfActiveParallelMarkers(0)
     238    , m_parallelMarkersShouldExit(false)
     239    , m_firstWeakReferenceHarvester(0)
     240{
     241#if ENABLE(PARALLEL_GC)
     242    for (unsigned i = 1; i < Heuristics::numberOfGCMarkers; ++i) {
     243        m_markingThreads.append(createThread(markingThreadStartFunc, this, "JavaScriptCore::Marking"));
     244        ASSERT(m_markingThreads.last());
     245    }
     246#endif
     247}
     248
     249MarkStackThreadSharedData::~MarkStackThreadSharedData()
     250{
     251#if ENABLE(PARALLEL_GC)   
     252    // Destroy our marking threads.
     253    {
     254        MutexLocker locker(m_markingLock);
     255        m_parallelMarkersShouldExit = true;
     256        m_markingCondition.broadcast();
     257    }
     258    for (unsigned i = 0; i < m_markingThreads.size(); ++i)
     259        waitForThreadCompletion(m_markingThreads[i], 0);
     260#endif
     261}
     262   
     263void MarkStackThreadSharedData::reset()
     264{
     265    ASSERT(!m_numberOfActiveParallelMarkers);
     266    ASSERT(!m_parallelMarkersShouldExit);
     267    ASSERT(m_sharedMarkStack.isEmpty());
     268   
     269#if ENABLE(PARALLEL_GC)
     270    m_segmentAllocator.shrinkReserve();
     271    m_opaqueRoots.clear();
    75272#else
    76     MarkStack::releaseStack(reinterpret_cast<char*>(m_data) + size, m_allocated - size);
    77 #endif
    78     m_allocated = size;
    79     m_capacity = m_allocated / sizeof(JSCell*);
     273    ASSERT(m_opaqueRoots.isEmpty());
     274#endif
    80275}
    81276
     
    83278{
    84279    m_visitCount = 0;
    85     m_stack.shrinkAllocation(pageSize());
     280    ASSERT(m_stack.isEmpty());
     281#if ENABLE(PARALLEL_GC)
     282    ASSERT(m_opaqueRoots.isEmpty()); // Should have merged by now.
     283#else
    86284    m_opaqueRoots.clear();
     285#endif
    87286}
    88287
     
    121320}
    122321
     322void SlotVisitor::donateSlow()
     323{
     324    // Refuse to donate if shared has more entries than I do.
     325    if (m_shared.m_sharedMarkStack.size() > m_stack.size())
     326        return;
     327    MutexLocker locker(m_shared.m_markingLock);
     328    if (m_stack.donateSomeCellsTo(m_shared.m_sharedMarkStack)) {
     329        // Only wake up threads if the shared stack is big enough; otherwise assume that
     330        // it's more profitable for us to just scan this ourselves later.
     331        if (m_shared.m_sharedMarkStack.size() >= Heuristics::sharedStackWakeupThreshold)
     332            m_shared.m_markingCondition.broadcast();
     333    }
     334}
     335
    123336void SlotVisitor::drain()
    124337{
     338    ASSERT(m_isInParallelMode);
     339   
    125340    void* jsFinalObjectVPtr = m_jsFinalObjectVPtr;
    126341    void* jsArrayVPtr = m_jsArrayVPtr;
    127342    void* jsStringVPtr = m_jsStringVPtr;
    128343
    129     while (!m_stack.isEmpty())
    130         visitChildren(*this, m_stack.removeLast(), jsFinalObjectVPtr, jsArrayVPtr, jsStringVPtr);
     344#if ENABLE(PARALLEL_GC)
     345    if (Heuristics::numberOfGCMarkers > 1) {
     346        while (!m_stack.isEmpty()) {
     347            m_stack.refill();
     348            for (unsigned countdown = Heuristics::minimumNumberOfScansBetweenRebalance; m_stack.canRemoveLast() && countdown--;)
     349                visitChildren(*this, m_stack.removeLast(), jsFinalObjectVPtr, jsArrayVPtr, jsStringVPtr);
     350            donateKnownParallel();
     351        }
     352       
     353        mergeOpaqueRootsIfNecessary();
     354        return;
     355    }
     356#endif
     357   
     358    while (!m_stack.isEmpty()) {
     359        m_stack.refill();
     360        while (m_stack.canRemoveLast())
     361            visitChildren(*this, m_stack.removeLast(), jsFinalObjectVPtr, jsArrayVPtr, jsStringVPtr);
     362    }
     363}
     364
     365void SlotVisitor::drainFromShared(SharedDrainMode sharedDrainMode)
     366{
     367    ASSERT(m_isInParallelMode);
     368   
     369    ASSERT(Heuristics::numberOfGCMarkers);
     370   
     371    bool shouldBeParallel;
     372
     373#if ENABLE(PARALLEL_GC)
     374    shouldBeParallel = Heuristics::numberOfGCMarkers > 1;
     375#else
     376    ASSERT(Heuristics::numberOfGCMarkers == 1);
     377    shouldBeParallel = false;
     378#endif
     379   
     380    if (!shouldBeParallel) {
     381        // This call should be a no-op.
     382        ASSERT_UNUSED(sharedDrainMode, sharedDrainMode == MasterDrain);
     383        ASSERT(m_stack.isEmpty());
     384        ASSERT(m_shared.m_sharedMarkStack.isEmpty());
     385        return;
     386    }
     387   
     388#if ENABLE(PARALLEL_GC)
     389    {
     390        MutexLocker locker(m_shared.m_markingLock);
     391        m_shared.m_numberOfActiveParallelMarkers++;
     392    }
     393    while (true) {
     394        {
     395            MutexLocker locker(m_shared.m_markingLock);
     396            m_shared.m_numberOfActiveParallelMarkers--;
     397
     398            // How we wait differs depending on drain mode.
     399            if (sharedDrainMode == MasterDrain) {
     400                // Wait until either termination is reached, or until there is some work
     401                // for us to do.
     402                while (true) {
     403                    // Did we reach termination?
     404                    if (!m_shared.m_numberOfActiveParallelMarkers && m_shared.m_sharedMarkStack.isEmpty())
     405                        return;
     406                   
     407                    // Is there work to be done?
     408                    if (!m_shared.m_sharedMarkStack.isEmpty())
     409                        break;
     410                   
     411                    // Otherwise wait.
     412                    m_shared.m_markingCondition.wait(m_shared.m_markingLock);
     413                }
     414            } else {
     415                ASSERT(sharedDrainMode == SlaveDrain);
     416               
     417                // Did we detect termination? If so, let the master know.
     418                if (!m_shared.m_numberOfActiveParallelMarkers && m_shared.m_sharedMarkStack.isEmpty())
     419                    m_shared.m_markingCondition.broadcast();
     420               
     421                while (m_shared.m_sharedMarkStack.isEmpty() && !m_shared.m_parallelMarkersShouldExit)
     422                    m_shared.m_markingCondition.wait(m_shared.m_markingLock);
     423               
     424                // Is the VM exiting? If so, exit this thread.
     425                if (m_shared.m_parallelMarkersShouldExit)
     426                    return;
     427            }
     428           
     429            m_stack.stealSomeCellsFrom(m_shared.m_sharedMarkStack);
     430            m_shared.m_numberOfActiveParallelMarkers++;
     431        }
     432       
     433        drain();
     434    }
     435#endif
     436}
     437
     438void MarkStack::mergeOpaqueRoots()
     439{
     440    ASSERT(!m_opaqueRoots.isEmpty()); // Should only be called when opaque roots are non-empty.
     441    {
     442        MutexLocker locker(m_shared.m_opaqueRootsLock);
     443        HashSet<void*>::iterator begin = m_opaqueRoots.begin();
     444        HashSet<void*>::iterator end = m_opaqueRoots.end();
     445        for (HashSet<void*>::iterator iter = begin; iter != end; ++iter)
     446            m_shared.m_opaqueRoots.add(*iter);
     447    }
     448    m_opaqueRoots.clear();
    131449}
    132450
    133451void SlotVisitor::harvestWeakReferences()
    134452{
    135     while (m_firstWeakReferenceHarvester) {
    136         WeakReferenceHarvester* current = m_firstWeakReferenceHarvester;
     453    while (m_shared.m_firstWeakReferenceHarvester) {
     454        WeakReferenceHarvester* current = m_shared.m_firstWeakReferenceHarvester;
    137455        WeakReferenceHarvester* next = reinterpret_cast<WeakReferenceHarvester*>(current->m_nextAndFlag & ~1);
    138456        current->m_nextAndFlag = 0;
    139         m_firstWeakReferenceHarvester = next;
     457        m_shared.m_firstWeakReferenceHarvester = next;
    140458        current->visitWeakReferences(*this);
    141459    }
Note: See TracChangeset for help on using the changeset viewer.