source: webkit/trunk/JavaScriptCore/wtf/FastMalloc.cpp@ 46598

Last change on this file since 46598 was 46511, checked in by [email protected], 16 years ago

Reviewed by Darin Adler.

https://bugs.webkit.org/show_bug.cgi?id=27236

  • Implement TCMalloc_SystemRelease and TCMalloc_SystemCommit for Windows.
  • Use a background thread to periodically scavenge memory to release back to the system.
  • wtf/FastMalloc.cpp: (WTF::TCMalloc_PageHeap::init): (WTF::TCMalloc_PageHeap::runScavengerThread): (WTF::TCMalloc_PageHeap::scavenge): (WTF::TCMalloc_PageHeap::shouldContinueScavenging): (WTF::TCMalloc_PageHeap::New): (WTF::TCMalloc_PageHeap::AllocLarge): (WTF::TCMalloc_PageHeap::Delete): (WTF::TCMalloc_PageHeap::GrowHeap): (WTF::sleep): (WTF::TCMalloc_PageHeap::scavengerThread):
  • wtf/TCSystemAlloc.cpp: (TCMalloc_SystemRelease): (TCMalloc_SystemCommit):
  • wtf/TCSystemAlloc.h:
  • Property svn:eol-style set to native
File size: 137.3 KB
Line 
1// Copyright (c) 2005, 2007, Google Inc.
2// All rights reserved.
3// Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <[email protected]>
33//
34// A malloc that uses a per-thread cache to satisfy small malloc requests.
35// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
36//
37// See doc/tcmalloc.html for a high-level
38// description of how this malloc works.
39//
40// SYNCHRONIZATION
41// 1. The thread-specific lists are accessed without acquiring any locks.
42// This is safe because each such list is only accessed by one thread.
43// 2. We have a lock per central free-list, and hold it while manipulating
44// the central free list for a particular size.
45// 3. The central page allocator is protected by "pageheap_lock".
46// 4. The pagemap (which maps from page-number to descriptor),
47// can be read without holding any locks, and written while holding
48// the "pageheap_lock".
49// 5. To improve performance, a subset of the information one can get
50// from the pagemap is cached in a data structure, pagemap_cache_,
51// that atomically reads and writes its entries. This cache can be
52// read and written without locking.
53//
54// This multi-threaded access to the pagemap is safe for fairly
55// subtle reasons. We basically assume that when an object X is
56// allocated by thread A and deallocated by thread B, there must
57// have been appropriate synchronization in the handoff of object
58// X from thread A to thread B. The same logic applies to pagemap_cache_.
59//
60// THE PAGEID-TO-SIZECLASS CACHE
61// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
62// returns 0 for a particular PageID then that means "no information," not that
63// the sizeclass is 0. The cache may have stale information for pages that do
64// not hold the beginning of any free()'able object. Staleness is eliminated
65// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66// do_memalign() for all other relevant pages.
67//
68// TODO: Bias reclamation to larger addresses
69// TODO: implement mallinfo/mallopt
70// TODO: Better testing
71//
72// 9/28/2003 (new page-level allocator replaces ptmalloc2):
73// * malloc/free of small objects goes from ~300 ns to ~50 ns.
74// * allocation of a reasonably complicated struct
75// goes from about 1100 ns to about 300 ns.
76
77#include "config.h"
78#include "FastMalloc.h"
79
80#include "Assertions.h"
81#include <limits>
82#if ENABLE(JSC_MULTIPLE_THREADS)
83#include <pthread.h>
84#endif
85
86#ifndef NO_TCMALLOC_SAMPLES
87#ifdef WTF_CHANGES
88#define NO_TCMALLOC_SAMPLES
89#endif
90#endif
91
92#if !defined(USE_SYSTEM_MALLOC) && defined(NDEBUG)
93#define FORCE_SYSTEM_MALLOC 0
94#else
95#define FORCE_SYSTEM_MALLOC 1
96#endif
97
98
99// Use a background thread to periodically scavenge memory to release back to the system
100#define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1
101
102#ifndef NDEBUG
103namespace WTF {
104
105#if ENABLE(JSC_MULTIPLE_THREADS)
106static pthread_key_t isForbiddenKey;
107static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
108static void initializeIsForbiddenKey()
109{
110 pthread_key_create(&isForbiddenKey, 0);
111}
112
113static bool isForbidden()
114{
115 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
116 return !!pthread_getspecific(isForbiddenKey);
117}
118
119void fastMallocForbid()
120{
121 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
122 pthread_setspecific(isForbiddenKey, &isForbiddenKey);
123}
124
125void fastMallocAllow()
126{
127 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
128 pthread_setspecific(isForbiddenKey, 0);
129}
130
131#else
132
133static bool staticIsForbidden;
134static bool isForbidden()
135{
136 return staticIsForbidden;
137}
138
139void fastMallocForbid()
140{
141 staticIsForbidden = true;
142}
143
144void fastMallocAllow()
145{
146 staticIsForbidden = false;
147}
148#endif // ENABLE(JSC_MULTIPLE_THREADS)
149
150} // namespace WTF
151#endif // NDEBUG
152
153#include <string.h>
154
155namespace WTF {
156
157#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
158
159namespace Internal {
160
161void fastMallocMatchFailed(void*)
162{
163 CRASH();
164}
165
166} // namespace Internal
167
168#endif
169
170void* fastZeroedMalloc(size_t n)
171{
172 void* result = fastMalloc(n);
173 memset(result, 0, n);
174 return result;
175}
176
177void* tryFastZeroedMalloc(size_t n)
178{
179 void* result = tryFastMalloc(n);
180 if (!result)
181 return 0;
182 memset(result, 0, n);
183 return result;
184}
185
186} // namespace WTF
187
188#if FORCE_SYSTEM_MALLOC
189
190#include <stdlib.h>
191#if !PLATFORM(WIN_OS)
192 #include <pthread.h>
193#else
194 #include "windows.h"
195#endif
196
197namespace WTF {
198
199void* tryFastMalloc(size_t n)
200{
201 ASSERT(!isForbidden());
202
203#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
204 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
205 return 0;
206
207 void* result = malloc(n + sizeof(AllocAlignmentInteger));
208 if (!result)
209 return 0;
210
211 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
212 result = static_cast<AllocAlignmentInteger*>(result) + 1;
213
214 return result;
215#else
216 return malloc(n);
217#endif
218}
219
220void* fastMalloc(size_t n)
221{
222 ASSERT(!isForbidden());
223
224#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
225 void* result = tryFastMalloc(n);
226#else
227 void* result = malloc(n);
228#endif
229
230 if (!result)
231 CRASH();
232 return result;
233}
234
235void* tryFastCalloc(size_t n_elements, size_t element_size)
236{
237 ASSERT(!isForbidden());
238
239#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
240 size_t totalBytes = n_elements * element_size;
241 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes))
242 return 0;
243
244 totalBytes += sizeof(AllocAlignmentInteger);
245 void* result = malloc(totalBytes);
246 if (!result)
247 return 0;
248
249 memset(result, 0, totalBytes);
250 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
251 result = static_cast<AllocAlignmentInteger*>(result) + 1;
252 return result;
253#else
254 return calloc(n_elements, element_size);
255#endif
256}
257
258void* fastCalloc(size_t n_elements, size_t element_size)
259{
260 ASSERT(!isForbidden());
261
262#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
263 void* result = tryFastCalloc(n_elements, element_size);
264#else
265 void* result = calloc(n_elements, element_size);
266#endif
267
268 if (!result)
269 CRASH();
270 return result;
271}
272
273void fastFree(void* p)
274{
275 ASSERT(!isForbidden());
276
277#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
278 if (!p)
279 return;
280
281 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
282 if (*header != Internal::AllocTypeMalloc)
283 Internal::fastMallocMatchFailed(p);
284 free(header);
285#else
286 free(p);
287#endif
288}
289
290void* tryFastRealloc(void* p, size_t n)
291{
292 ASSERT(!isForbidden());
293
294#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
295 if (p) {
296 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
297 return 0;
298 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
299 if (*header != Internal::AllocTypeMalloc)
300 Internal::fastMallocMatchFailed(p);
301 void* result = realloc(header, n + sizeof(AllocAlignmentInteger));
302 if (!result)
303 return 0;
304
305 // This should not be needed because the value is already there:
306 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
307 result = static_cast<AllocAlignmentInteger*>(result) + 1;
308 return result;
309 } else {
310 return fastMalloc(n);
311 }
312#else
313 return realloc(p, n);
314#endif
315}
316
317void* fastRealloc(void* p, size_t n)
318{
319 ASSERT(!isForbidden());
320
321#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
322 void* result = tryFastRealloc(p, n);
323#else
324 void* result = realloc(p, n);
325#endif
326
327 if (!result)
328 CRASH();
329 return result;
330}
331
332void releaseFastMallocFreeMemory() { }
333
334FastMallocStatistics fastMallocStatistics()
335{
336 FastMallocStatistics statistics = { 0, 0, 0, 0 };
337 return statistics;
338}
339
340} // namespace WTF
341
342#if PLATFORM(DARWIN)
343// This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
344// It will never be used in this case, so it's type and value are less interesting than its presence.
345extern "C" const int jscore_fastmalloc_introspection = 0;
346#endif
347
348#else // FORCE_SYSTEM_MALLOC
349
350#if HAVE(STDINT_H)
351#include <stdint.h>
352#elif HAVE(INTTYPES_H)
353#include <inttypes.h>
354#else
355#include <sys/types.h>
356#endif
357
358#include "AlwaysInline.h"
359#include "Assertions.h"
360#include "TCPackedCache.h"
361#include "TCPageMap.h"
362#include "TCSpinLock.h"
363#include "TCSystemAlloc.h"
364#include <algorithm>
365#include <errno.h>
366#include <limits>
367#include <new>
368#include <pthread.h>
369#include <stdarg.h>
370#include <stddef.h>
371#include <stdio.h>
372#if COMPILER(MSVC)
373#ifndef WIN32_LEAN_AND_MEAN
374#define WIN32_LEAN_AND_MEAN
375#endif
376#include <windows.h>
377#endif
378
379#if WTF_CHANGES
380
381#if PLATFORM(DARWIN)
382#include "MallocZoneSupport.h"
383#include <wtf/HashSet.h>
384#endif
385
386#ifndef PRIuS
387#define PRIuS "zu"
388#endif
389
390// Calling pthread_getspecific through a global function pointer is faster than a normal
391// call to the function on Mac OS X, and it's used in performance-critical code. So we
392// use a function pointer. But that's not necessarily faster on other platforms, and we had
393// problems with this technique on Windows, so we'll do this only on Mac OS X.
394#if PLATFORM(DARWIN)
395static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
396#define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
397#endif
398
399#define DEFINE_VARIABLE(type, name, value, meaning) \
400 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \
401 type FLAGS_##name(value); \
402 char FLAGS_no##name; \
403 } \
404 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
405
406#define DEFINE_int64(name, value, meaning) \
407 DEFINE_VARIABLE(int64_t, name, value, meaning)
408
409#define DEFINE_double(name, value, meaning) \
410 DEFINE_VARIABLE(double, name, value, meaning)
411
412namespace WTF {
413
414#define malloc fastMalloc
415#define calloc fastCalloc
416#define free fastFree
417#define realloc fastRealloc
418
419#define MESSAGE LOG_ERROR
420#define CHECK_CONDITION ASSERT
421
422#if PLATFORM(DARWIN)
423class Span;
424class TCMalloc_Central_FreeListPadded;
425class TCMalloc_PageHeap;
426class TCMalloc_ThreadCache;
427template <typename T> class PageHeapAllocator;
428
429class FastMallocZone {
430public:
431 static void init();
432
433 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
434 static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
435 static boolean_t check(malloc_zone_t*) { return true; }
436 static void print(malloc_zone_t*, boolean_t) { }
437 static void log(malloc_zone_t*, void*) { }
438 static void forceLock(malloc_zone_t*) { }
439 static void forceUnlock(malloc_zone_t*) { }
440 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
441
442private:
443 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*);
444 static size_t size(malloc_zone_t*, const void*);
445 static void* zoneMalloc(malloc_zone_t*, size_t);
446 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
447 static void zoneFree(malloc_zone_t*, void*);
448 static void* zoneRealloc(malloc_zone_t*, void*, size_t);
449 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
450 static void zoneDestroy(malloc_zone_t*) { }
451
452 malloc_zone_t m_zone;
453 TCMalloc_PageHeap* m_pageHeap;
454 TCMalloc_ThreadCache** m_threadHeaps;
455 TCMalloc_Central_FreeListPadded* m_centralCaches;
456 PageHeapAllocator<Span>* m_spanAllocator;
457 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator;
458};
459
460#endif
461
462#endif
463
464#ifndef WTF_CHANGES
465// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
466// you're porting to a system where you really can't get a stacktrace.
467#ifdef NO_TCMALLOC_SAMPLES
468// We use #define so code compiles even if you #include stacktrace.h somehow.
469# define GetStackTrace(stack, depth, skip) (0)
470#else
471# include <google/stacktrace.h>
472#endif
473#endif
474
475// Even if we have support for thread-local storage in the compiler
476// and linker, the OS may not support it. We need to check that at
477// runtime. Right now, we have to keep a manual set of "bad" OSes.
478#if defined(HAVE_TLS)
479 static bool kernel_supports_tls = false; // be conservative
480 static inline bool KernelSupportsTLS() {
481 return kernel_supports_tls;
482 }
483# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
484 static void CheckIfKernelSupportsTLS() {
485 kernel_supports_tls = false;
486 }
487# else
488# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
489 static void CheckIfKernelSupportsTLS() {
490 struct utsname buf;
491 if (uname(&buf) != 0) { // should be impossible
492 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
493 kernel_supports_tls = false;
494 } else if (strcasecmp(buf.sysname, "linux") == 0) {
495 // The linux case: the first kernel to support TLS was 2.6.0
496 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
497 kernel_supports_tls = false;
498 else if (buf.release[0] == '2' && buf.release[1] == '.' &&
499 buf.release[2] >= '0' && buf.release[2] < '6' &&
500 buf.release[3] == '.') // 2.0 - 2.5
501 kernel_supports_tls = false;
502 else
503 kernel_supports_tls = true;
504 } else { // some other kernel, we'll be optimisitic
505 kernel_supports_tls = true;
506 }
507 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
508 }
509# endif // HAVE_DECL_UNAME
510#endif // HAVE_TLS
511
512// __THROW is defined in glibc systems. It means, counter-intuitively,
513// "This function will never throw an exception." It's an optional
514// optimization tool, but we may need to use it to match glibc prototypes.
515#ifndef __THROW // I guess we're not on a glibc system
516# define __THROW // __THROW is just an optimization, so ok to make it ""
517#endif
518
519//-------------------------------------------------------------------
520// Configuration
521//-------------------------------------------------------------------
522
523// Not all possible combinations of the following parameters make
524// sense. In particular, if kMaxSize increases, you may have to
525// increase kNumClasses as well.
526static const size_t kPageShift = 12;
527static const size_t kPageSize = 1 << kPageShift;
528static const size_t kMaxSize = 8u * kPageSize;
529static const size_t kAlignShift = 3;
530static const size_t kAlignment = 1 << kAlignShift;
531static const size_t kNumClasses = 68;
532
533// Allocates a big block of memory for the pagemap once we reach more than
534// 128MB
535static const size_t kPageMapBigAllocationThreshold = 128 << 20;
536
537// Minimum number of pages to fetch from system at a time. Must be
538// significantly bigger than kBlockSize to amortize system-call
539// overhead, and also to reduce external fragementation. Also, we
540// should keep this value big because various incarnations of Linux
541// have small limits on the number of mmap() regions per
542// address-space.
543static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
544
545// Number of objects to move between a per-thread list and a central
546// list in one shot. We want this to be not too small so we can
547// amortize the lock overhead for accessing the central list. Making
548// it too big may temporarily cause unnecessary memory wastage in the
549// per-thread free list until the scavenger cleans up the list.
550static int num_objects_to_move[kNumClasses];
551
552// Maximum length we allow a per-thread free-list to have before we
553// move objects from it into the corresponding central free-list. We
554// want this big to avoid locking the central free-list too often. It
555// should not hurt to make this list somewhat big because the
556// scavenging code will shrink it down when its contents are not in use.
557static const int kMaxFreeListLength = 256;
558
559// Lower and upper bounds on the per-thread cache sizes
560static const size_t kMinThreadCacheSize = kMaxSize * 2;
561static const size_t kMaxThreadCacheSize = 2 << 20;
562
563// Default bound on the total amount of thread caches
564static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
565
566// For all span-lengths < kMaxPages we keep an exact-size list.
567// REQUIRED: kMaxPages >= kMinSystemAlloc;
568static const size_t kMaxPages = kMinSystemAlloc;
569
570/* The smallest prime > 2^n */
571static int primes_list[] = {
572 // Small values might cause high rates of sampling
573 // and hence commented out.
574 // 2, 5, 11, 17, 37, 67, 131, 257,
575 // 521, 1031, 2053, 4099, 8209, 16411,
576 32771, 65537, 131101, 262147, 524309, 1048583,
577 2097169, 4194319, 8388617, 16777259, 33554467 };
578
579// Twice the approximate gap between sampling actions.
580// I.e., we take one sample approximately once every
581// tcmalloc_sample_parameter/2
582// bytes of allocation, i.e., ~ once every 128KB.
583// Must be a prime number.
584#ifdef NO_TCMALLOC_SAMPLES
585DEFINE_int64(tcmalloc_sample_parameter, 0,
586 "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
587static size_t sample_period = 0;
588#else
589DEFINE_int64(tcmalloc_sample_parameter, 262147,
590 "Twice the approximate gap between sampling actions."
591 " Must be a prime number. Otherwise will be rounded up to a "
592 " larger prime number");
593static size_t sample_period = 262147;
594#endif
595
596// Protects sample_period above
597static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
598
599// Parameters for controlling how fast memory is returned to the OS.
600
601DEFINE_double(tcmalloc_release_rate, 1,
602 "Rate at which we release unused memory to the system. "
603 "Zero means we never release memory back to the system. "
604 "Increase this flag to return memory faster; decrease it "
605 "to return memory slower. Reasonable rates are in the "
606 "range [0,10]");
607
608//-------------------------------------------------------------------
609// Mapping from size to size_class and vice versa
610//-------------------------------------------------------------------
611
612// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
613// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
614// So for these larger sizes we have an array indexed by ceil(size/128).
615//
616// We flatten both logical arrays into one physical array and use
617// arithmetic to compute an appropriate index. The constants used by
618// ClassIndex() were selected to make the flattening work.
619//
620// Examples:
621// Size Expression Index
622// -------------------------------------------------------
623// 0 (0 + 7) / 8 0
624// 1 (1 + 7) / 8 1
625// ...
626// 1024 (1024 + 7) / 8 128
627// 1025 (1025 + 127 + (120<<7)) / 128 129
628// ...
629// 32768 (32768 + 127 + (120<<7)) / 128 376
630static const size_t kMaxSmallSize = 1024;
631static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128
632static const int add_amount[2] = { 7, 127 + (120 << 7) };
633static unsigned char class_array[377];
634
635// Compute index of the class_array[] entry for a given size
636static inline int ClassIndex(size_t s) {
637 const int i = (s > kMaxSmallSize);
638 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
639}
640
641// Mapping from size class to max size storable in that class
642static size_t class_to_size[kNumClasses];
643
644// Mapping from size class to number of pages to allocate at a time
645static size_t class_to_pages[kNumClasses];
646
647// TransferCache is used to cache transfers of num_objects_to_move[size_class]
648// back and forth between thread caches and the central cache for a given size
649// class.
650struct TCEntry {
651 void *head; // Head of chain of objects.
652 void *tail; // Tail of chain of objects.
653};
654// A central cache freelist can have anywhere from 0 to kNumTransferEntries
655// slots to put link list chains into. To keep memory usage bounded the total
656// number of TCEntries across size classes is fixed. Currently each size
657// class is initially given one TCEntry which also means that the maximum any
658// one class can have is kNumClasses.
659static const int kNumTransferEntries = kNumClasses;
660
661// Note: the following only works for "n"s that fit in 32-bits, but
662// that is fine since we only use it for small sizes.
663static inline int LgFloor(size_t n) {
664 int log = 0;
665 for (int i = 4; i >= 0; --i) {
666 int shift = (1 << i);
667 size_t x = n >> shift;
668 if (x != 0) {
669 n = x;
670 log += shift;
671 }
672 }
673 ASSERT(n == 1);
674 return log;
675}
676
677// Some very basic linked list functions for dealing with using void * as
678// storage.
679
680static inline void *SLL_Next(void *t) {
681 return *(reinterpret_cast<void**>(t));
682}
683
684static inline void SLL_SetNext(void *t, void *n) {
685 *(reinterpret_cast<void**>(t)) = n;
686}
687
688static inline void SLL_Push(void **list, void *element) {
689 SLL_SetNext(element, *list);
690 *list = element;
691}
692
693static inline void *SLL_Pop(void **list) {
694 void *result = *list;
695 *list = SLL_Next(*list);
696 return result;
697}
698
699
700// Remove N elements from a linked list to which head points. head will be
701// modified to point to the new head. start and end will point to the first
702// and last nodes of the range. Note that end will point to NULL after this
703// function is called.
704static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
705 if (N == 0) {
706 *start = NULL;
707 *end = NULL;
708 return;
709 }
710
711 void *tmp = *head;
712 for (int i = 1; i < N; ++i) {
713 tmp = SLL_Next(tmp);
714 }
715
716 *start = *head;
717 *end = tmp;
718 *head = SLL_Next(tmp);
719 // Unlink range from list.
720 SLL_SetNext(tmp, NULL);
721}
722
723static inline void SLL_PushRange(void **head, void *start, void *end) {
724 if (!start) return;
725 SLL_SetNext(end, *head);
726 *head = start;
727}
728
729static inline size_t SLL_Size(void *head) {
730 int count = 0;
731 while (head) {
732 count++;
733 head = SLL_Next(head);
734 }
735 return count;
736}
737
738// Setup helper functions.
739
740static ALWAYS_INLINE size_t SizeClass(size_t size) {
741 return class_array[ClassIndex(size)];
742}
743
744// Get the byte-size for a specified class
745static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
746 return class_to_size[cl];
747}
748static int NumMoveSize(size_t size) {
749 if (size == 0) return 0;
750 // Use approx 64k transfers between thread and central caches.
751 int num = static_cast<int>(64.0 * 1024.0 / size);
752 if (num < 2) num = 2;
753 // Clamp well below kMaxFreeListLength to avoid ping pong between central
754 // and thread caches.
755 if (num > static_cast<int>(0.8 * kMaxFreeListLength))
756 num = static_cast<int>(0.8 * kMaxFreeListLength);
757
758 // Also, avoid bringing in too many objects into small object free
759 // lists. There are lots of such lists, and if we allow each one to
760 // fetch too many at a time, we end up having to scavenge too often
761 // (especially when there are lots of threads and each thread gets a
762 // small allowance for its thread cache).
763 //
764 // TODO: Make thread cache free list sizes dynamic so that we do not
765 // have to equally divide a fixed resource amongst lots of threads.
766 if (num > 32) num = 32;
767
768 return num;
769}
770
771// Initialize the mapping arrays
772static void InitSizeClasses() {
773 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
774 if (ClassIndex(0) < 0) {
775 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
776 CRASH();
777 }
778 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
779 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
780 CRASH();
781 }
782
783 // Compute the size classes we want to use
784 size_t sc = 1; // Next size class to assign
785 unsigned char alignshift = kAlignShift;
786 int last_lg = -1;
787 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
788 int lg = LgFloor(size);
789 if (lg > last_lg) {
790 // Increase alignment every so often.
791 //
792 // Since we double the alignment every time size doubles and
793 // size >= 128, this means that space wasted due to alignment is
794 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256
795 // bytes, so the space wasted as a percentage starts falling for
796 // sizes > 2K.
797 if ((lg >= 7) && (alignshift < 8)) {
798 alignshift++;
799 }
800 last_lg = lg;
801 }
802
803 // Allocate enough pages so leftover is less than 1/8 of total.
804 // This bounds wasted space to at most 12.5%.
805 size_t psize = kPageSize;
806 while ((psize % size) > (psize >> 3)) {
807 psize += kPageSize;
808 }
809 const size_t my_pages = psize >> kPageShift;
810
811 if (sc > 1 && my_pages == class_to_pages[sc-1]) {
812 // See if we can merge this into the previous class without
813 // increasing the fragmentation of the previous class.
814 const size_t my_objects = (my_pages << kPageShift) / size;
815 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
816 / class_to_size[sc-1];
817 if (my_objects == prev_objects) {
818 // Adjust last class to include this size
819 class_to_size[sc-1] = size;
820 continue;
821 }
822 }
823
824 // Add new class
825 class_to_pages[sc] = my_pages;
826 class_to_size[sc] = size;
827 sc++;
828 }
829 if (sc != kNumClasses) {
830 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
831 sc, int(kNumClasses));
832 CRASH();
833 }
834
835 // Initialize the mapping arrays
836 int next_size = 0;
837 for (unsigned char c = 1; c < kNumClasses; c++) {
838 const size_t max_size_in_class = class_to_size[c];
839 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
840 class_array[ClassIndex(s)] = c;
841 }
842 next_size = static_cast<int>(max_size_in_class + kAlignment);
843 }
844
845 // Double-check sizes just to be safe
846 for (size_t size = 0; size <= kMaxSize; size++) {
847 const size_t sc = SizeClass(size);
848 if (sc == 0) {
849 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
850 CRASH();
851 }
852 if (sc > 1 && size <= class_to_size[sc-1]) {
853 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
854 "\n", sc, size);
855 CRASH();
856 }
857 if (sc >= kNumClasses) {
858 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
859 CRASH();
860 }
861 const size_t s = class_to_size[sc];
862 if (size > s) {
863 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
864 CRASH();
865 }
866 if (s == 0) {
867 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
868 CRASH();
869 }
870 }
871
872 // Initialize the num_objects_to_move array.
873 for (size_t cl = 1; cl < kNumClasses; ++cl) {
874 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
875 }
876
877#ifndef WTF_CHANGES
878 if (false) {
879 // Dump class sizes and maximum external wastage per size class
880 for (size_t cl = 1; cl < kNumClasses; ++cl) {
881 const int alloc_size = class_to_pages[cl] << kPageShift;
882 const int alloc_objs = alloc_size / class_to_size[cl];
883 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
884 const int max_waste = alloc_size - min_used;
885 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
886 int(cl),
887 int(class_to_size[cl-1] + 1),
888 int(class_to_size[cl]),
889 int(class_to_pages[cl] << kPageShift),
890 max_waste * 100.0 / alloc_size
891 );
892 }
893 }
894#endif
895}
896
897// -------------------------------------------------------------------------
898// Simple allocator for objects of a specified type. External locking
899// is required before accessing one of these objects.
900// -------------------------------------------------------------------------
901
902// Metadata allocator -- keeps stats about how many bytes allocated
903static uint64_t metadata_system_bytes = 0;
904static void* MetaDataAlloc(size_t bytes) {
905 void* result = TCMalloc_SystemAlloc(bytes, 0);
906 if (result != NULL) {
907 metadata_system_bytes += bytes;
908 }
909 return result;
910}
911
912template <class T>
913class PageHeapAllocator {
914 private:
915 // How much to allocate from system at a time
916 static const size_t kAllocIncrement = 32 << 10;
917
918 // Aligned size of T
919 static const size_t kAlignedSize
920 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
921
922 // Free area from which to carve new objects
923 char* free_area_;
924 size_t free_avail_;
925
926 // Linked list of all regions allocated by this allocator
927 void* allocated_regions_;
928
929 // Free list of already carved objects
930 void* free_list_;
931
932 // Number of allocated but unfreed objects
933 int inuse_;
934
935 public:
936 void Init() {
937 ASSERT(kAlignedSize <= kAllocIncrement);
938 inuse_ = 0;
939 allocated_regions_ = 0;
940 free_area_ = NULL;
941 free_avail_ = 0;
942 free_list_ = NULL;
943 }
944
945 T* New() {
946 // Consult free list
947 void* result;
948 if (free_list_ != NULL) {
949 result = free_list_;
950 free_list_ = *(reinterpret_cast<void**>(result));
951 } else {
952 if (free_avail_ < kAlignedSize) {
953 // Need more room
954 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
955 if (!new_allocation)
956 CRASH();
957
958 *(void**)new_allocation = allocated_regions_;
959 allocated_regions_ = new_allocation;
960 free_area_ = new_allocation + kAlignedSize;
961 free_avail_ = kAllocIncrement - kAlignedSize;
962 }
963 result = free_area_;
964 free_area_ += kAlignedSize;
965 free_avail_ -= kAlignedSize;
966 }
967 inuse_++;
968 return reinterpret_cast<T*>(result);
969 }
970
971 void Delete(T* p) {
972 *(reinterpret_cast<void**>(p)) = free_list_;
973 free_list_ = p;
974 inuse_--;
975 }
976
977 int inuse() const { return inuse_; }
978
979#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
980 template <class Recorder>
981 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader)
982 {
983 vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_);
984 while (adminAllocation) {
985 recorder.recordRegion(adminAllocation, kAllocIncrement);
986 adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation));
987 }
988 }
989#endif
990};
991
992// -------------------------------------------------------------------------
993// Span - a contiguous run of pages
994// -------------------------------------------------------------------------
995
996// Type that can hold a page number
997typedef uintptr_t PageID;
998
999// Type that can hold the length of a run of pages
1000typedef uintptr_t Length;
1001
1002static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
1003
1004// Convert byte size into pages. This won't overflow, but may return
1005// an unreasonably large value if bytes is huge enough.
1006static inline Length pages(size_t bytes) {
1007 return (bytes >> kPageShift) +
1008 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
1009}
1010
1011// Convert a user size into the number of bytes that will actually be
1012// allocated
1013static size_t AllocationSize(size_t bytes) {
1014 if (bytes > kMaxSize) {
1015 // Large object: we allocate an integral number of pages
1016 ASSERT(bytes <= (kMaxValidPages << kPageShift));
1017 return pages(bytes) << kPageShift;
1018 } else {
1019 // Small object: find the size class to which it belongs
1020 return ByteSizeForClass(SizeClass(bytes));
1021 }
1022}
1023
1024// Information kept for a span (a contiguous run of pages).
1025struct Span {
1026 PageID start; // Starting page number
1027 Length length; // Number of pages in span
1028 Span* next; // Used when in link list
1029 Span* prev; // Used when in link list
1030 void* objects; // Linked list of free objects
1031 unsigned int free : 1; // Is the span free
1032#ifndef NO_TCMALLOC_SAMPLES
1033 unsigned int sample : 1; // Sampled object?
1034#endif
1035 unsigned int sizeclass : 8; // Size-class for small objects (or 0)
1036 unsigned int refcount : 11; // Number of non-free objects
1037 bool decommitted : 1;
1038
1039#undef SPAN_HISTORY
1040#ifdef SPAN_HISTORY
1041 // For debugging, we can keep a log events per span
1042 int nexthistory;
1043 char history[64];
1044 int value[64];
1045#endif
1046};
1047
1048#define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1049
1050#ifdef SPAN_HISTORY
1051void Event(Span* span, char op, int v = 0) {
1052 span->history[span->nexthistory] = op;
1053 span->value[span->nexthistory] = v;
1054 span->nexthistory++;
1055 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
1056}
1057#else
1058#define Event(s,o,v) ((void) 0)
1059#endif
1060
1061// Allocator/deallocator for spans
1062static PageHeapAllocator<Span> span_allocator;
1063static Span* NewSpan(PageID p, Length len) {
1064 Span* result = span_allocator.New();
1065 memset(result, 0, sizeof(*result));
1066 result->start = p;
1067 result->length = len;
1068#ifdef SPAN_HISTORY
1069 result->nexthistory = 0;
1070#endif
1071 return result;
1072}
1073
1074static inline void DeleteSpan(Span* span) {
1075#ifndef NDEBUG
1076 // In debug mode, trash the contents of deleted Spans
1077 memset(span, 0x3f, sizeof(*span));
1078#endif
1079 span_allocator.Delete(span);
1080}
1081
1082// -------------------------------------------------------------------------
1083// Doubly linked list of spans.
1084// -------------------------------------------------------------------------
1085
1086static inline void DLL_Init(Span* list) {
1087 list->next = list;
1088 list->prev = list;
1089}
1090
1091static inline void DLL_Remove(Span* span) {
1092 span->prev->next = span->next;
1093 span->next->prev = span->prev;
1094 span->prev = NULL;
1095 span->next = NULL;
1096}
1097
1098static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
1099 return list->next == list;
1100}
1101
1102static int DLL_Length(const Span* list) {
1103 int result = 0;
1104 for (Span* s = list->next; s != list; s = s->next) {
1105 result++;
1106 }
1107 return result;
1108}
1109
1110#if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1111static void DLL_Print(const char* label, const Span* list) {
1112 MESSAGE("%-10s %p:", label, list);
1113 for (const Span* s = list->next; s != list; s = s->next) {
1114 MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
1115 }
1116 MESSAGE("\n");
1117}
1118#endif
1119
1120static inline void DLL_Prepend(Span* list, Span* span) {
1121 ASSERT(span->next == NULL);
1122 ASSERT(span->prev == NULL);
1123 span->next = list->next;
1124 span->prev = list;
1125 list->next->prev = span;
1126 list->next = span;
1127}
1128
1129// -------------------------------------------------------------------------
1130// Stack traces kept for sampled allocations
1131// The following state is protected by pageheap_lock_.
1132// -------------------------------------------------------------------------
1133
1134// size/depth are made the same size as a pointer so that some generic
1135// code below can conveniently cast them back and forth to void*.
1136static const int kMaxStackDepth = 31;
1137struct StackTrace {
1138 uintptr_t size; // Size of object
1139 uintptr_t depth; // Number of PC values stored in array below
1140 void* stack[kMaxStackDepth];
1141};
1142static PageHeapAllocator<StackTrace> stacktrace_allocator;
1143static Span sampled_objects;
1144
1145// -------------------------------------------------------------------------
1146// Map from page-id to per-page data
1147// -------------------------------------------------------------------------
1148
1149// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1150// We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1151// because sometimes the sizeclass is all the information we need.
1152
1153// Selector class -- general selector uses 3-level map
1154template <int BITS> class MapSelector {
1155 public:
1156 typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1157 typedef PackedCache<BITS, uint64_t> CacheType;
1158};
1159
1160#if defined(WTF_CHANGES)
1161#if PLATFORM(X86_64)
1162// On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1163// can be excluded from the PageMap key.
1164// See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1165
1166static const size_t kBitsUnusedOn64Bit = 16;
1167#else
1168static const size_t kBitsUnusedOn64Bit = 0;
1169#endif
1170
1171// A three-level map for 64-bit machines
1172template <> class MapSelector<64> {
1173 public:
1174 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1175 typedef PackedCache<64, uint64_t> CacheType;
1176};
1177#endif
1178
1179// A two-level map for 32-bit machines
1180template <> class MapSelector<32> {
1181 public:
1182 typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1183 typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1184};
1185
1186// -------------------------------------------------------------------------
1187// Page-level allocator
1188// * Eager coalescing
1189//
1190// Heap for page-level allocation. We allow allocating and freeing a
1191// contiguous runs of pages (called a "span").
1192// -------------------------------------------------------------------------
1193
1194#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1195// The central page heap collects spans of memory that have been deleted but are still committed until they are released
1196// back to the system. We use a background thread to periodically scan the list of free spans and release some back to the
1197// system. Every 5 seconds, the background thread wakes up and does the following:
1198// - Check if we needed to commit memory in the last 5 seconds. If so, skip this scavenge because it's a sign that we are short
1199// of free committed pages and so we should not release them back to the system yet.
1200// - Otherwise, go through the list of free spans (from largest to smallest) and release up to a fraction of the free committed pages
1201// back to the system.
1202// - If the number of free committed pages reaches kMinimumFreeCommittedPageCount, we can stop the scavenging and block the
1203// scavenging thread until the number of free committed pages goes above kMinimumFreeCommittedPageCount.
1204
1205// Background thread wakes up every 5 seconds to scavenge as long as there is memory available to return to the system.
1206static const int kScavengeTimerDelayInSeconds = 5;
1207
1208// Number of free committed pages that we want to keep around.
1209static const size_t kMinimumFreeCommittedPageCount = 512;
1210
1211// During a scavenge, we'll release up to a fraction of the free committed pages.
1212#if PLATFORM(WIN)
1213// We are slightly less aggressive in releasing memory on Windows due to performance reasons.
1214static const int kMaxScavengeAmountFactor = 3;
1215#else
1216static const int kMaxScavengeAmountFactor = 2;
1217#endif
1218#endif
1219
1220class TCMalloc_PageHeap {
1221 public:
1222 void init();
1223
1224 // Allocate a run of "n" pages. Returns zero if out of memory.
1225 Span* New(Length n);
1226
1227 // Delete the span "[p, p+n-1]".
1228 // REQUIRES: span was returned by earlier call to New() and
1229 // has not yet been deleted.
1230 void Delete(Span* span);
1231
1232 // Mark an allocated span as being used for small objects of the
1233 // specified size-class.
1234 // REQUIRES: span was returned by an earlier call to New()
1235 // and has not yet been deleted.
1236 void RegisterSizeClass(Span* span, size_t sc);
1237
1238 // Split an allocated span into two spans: one of length "n" pages
1239 // followed by another span of length "span->length - n" pages.
1240 // Modifies "*span" to point to the first span of length "n" pages.
1241 // Returns a pointer to the second span.
1242 //
1243 // REQUIRES: "0 < n < span->length"
1244 // REQUIRES: !span->free
1245 // REQUIRES: span->sizeclass == 0
1246 Span* Split(Span* span, Length n);
1247
1248 // Return the descriptor for the specified page.
1249 inline Span* GetDescriptor(PageID p) const {
1250 return reinterpret_cast<Span*>(pagemap_.get(p));
1251 }
1252
1253#ifdef WTF_CHANGES
1254 inline Span* GetDescriptorEnsureSafe(PageID p)
1255 {
1256 pagemap_.Ensure(p, 1);
1257 return GetDescriptor(p);
1258 }
1259
1260 size_t ReturnedBytes() const;
1261#endif
1262
1263 // Dump state to stderr
1264#ifndef WTF_CHANGES
1265 void Dump(TCMalloc_Printer* out);
1266#endif
1267
1268 // Return number of bytes allocated from system
1269 inline uint64_t SystemBytes() const { return system_bytes_; }
1270
1271 // Return number of free bytes in heap
1272 uint64_t FreeBytes() const {
1273 return (static_cast<uint64_t>(free_pages_) << kPageShift);
1274 }
1275
1276 bool Check();
1277 bool CheckList(Span* list, Length min_pages, Length max_pages);
1278
1279 // Release all pages on the free list for reuse by the OS:
1280 void ReleaseFreePages();
1281
1282 // Return 0 if we have no information, or else the correct sizeclass for p.
1283 // Reads and writes to pagemap_cache_ do not require locking.
1284 // The entries are 64 bits on 64-bit hardware and 16 bits on
1285 // 32-bit hardware, and we don't mind raciness as long as each read of
1286 // an entry yields a valid entry, not a partially updated entry.
1287 size_t GetSizeClassIfCached(PageID p) const {
1288 return pagemap_cache_.GetOrDefault(p, 0);
1289 }
1290 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1291
1292 private:
1293 // Pick the appropriate map and cache types based on pointer size
1294 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1295 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1296 PageMap pagemap_;
1297 mutable PageMapCache pagemap_cache_;
1298
1299 // We segregate spans of a given size into two circular linked
1300 // lists: one for normal spans, and one for spans whose memory
1301 // has been returned to the system.
1302 struct SpanList {
1303 Span normal;
1304 Span returned;
1305 };
1306
1307 // List of free spans of length >= kMaxPages
1308 SpanList large_;
1309
1310 // Array mapping from span length to a doubly linked list of free spans
1311 SpanList free_[kMaxPages];
1312
1313 // Number of pages kept in free lists
1314 uintptr_t free_pages_;
1315
1316 // Bytes allocated from system
1317 uint64_t system_bytes_;
1318
1319#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1320 // Number of pages kept in free lists that are still committed.
1321 Length free_committed_pages_;
1322
1323 // Number of pages that we committed in the last scavenge wait interval.
1324 Length pages_committed_since_last_scavenge_;
1325#endif
1326
1327 bool GrowHeap(Length n);
1328
1329 // REQUIRES span->length >= n
1330 // Remove span from its free list, and move any leftover part of
1331 // span into appropriate free lists. Also update "span" to have
1332 // length exactly "n" and mark it as non-free so it can be returned
1333 // to the client.
1334 //
1335 // "released" is true iff "span" was found on a "returned" list.
1336 void Carve(Span* span, Length n, bool released);
1337
1338 void RecordSpan(Span* span) {
1339 pagemap_.set(span->start, span);
1340 if (span->length > 1) {
1341 pagemap_.set(span->start + span->length - 1, span);
1342 }
1343 }
1344
1345 // Allocate a large span of length == n. If successful, returns a
1346 // span of exactly the specified length. Else, returns NULL.
1347 Span* AllocLarge(Length n);
1348
1349#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1350 // Incrementally release some memory to the system.
1351 // IncrementalScavenge(n) is called whenever n pages are freed.
1352 void IncrementalScavenge(Length n);
1353#endif
1354
1355 // Number of pages to deallocate before doing more scavenging
1356 int64_t scavenge_counter_;
1357
1358 // Index of last free list we scavenged
1359 size_t scavenge_index_;
1360
1361#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
1362 friend class FastMallocZone;
1363#endif
1364
1365#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1366 static NO_RETURN void* runScavengerThread(void*);
1367
1368 NO_RETURN void scavengerThread();
1369
1370 void scavenge();
1371
1372 inline bool shouldContinueScavenging() const;
1373
1374 pthread_mutex_t m_scavengeMutex;
1375
1376 pthread_cond_t m_scavengeCondition;
1377
1378 // Keeps track of whether the background thread is actively scavenging memory every kScavengeTimerDelayInSeconds, or
1379 // it's blocked waiting for more pages to be deleted.
1380 bool m_scavengeThreadActive;
1381#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1382};
1383
1384void TCMalloc_PageHeap::init()
1385{
1386 pagemap_.init(MetaDataAlloc);
1387 pagemap_cache_ = PageMapCache(0);
1388 free_pages_ = 0;
1389 system_bytes_ = 0;
1390
1391#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1392 free_committed_pages_ = 0;
1393 pages_committed_since_last_scavenge_ = 0;
1394#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1395
1396 scavenge_counter_ = 0;
1397 // Start scavenging at kMaxPages list
1398 scavenge_index_ = kMaxPages-1;
1399 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1400 DLL_Init(&large_.normal);
1401 DLL_Init(&large_.returned);
1402 for (size_t i = 0; i < kMaxPages; i++) {
1403 DLL_Init(&free_[i].normal);
1404 DLL_Init(&free_[i].returned);
1405 }
1406
1407#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1408 pthread_mutex_init(&m_scavengeMutex, 0);
1409 pthread_cond_init(&m_scavengeCondition, 0);
1410 m_scavengeThreadActive = true;
1411 pthread_t thread;
1412 pthread_create(&thread, 0, runScavengerThread, this);
1413#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1414}
1415
1416#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1417void* TCMalloc_PageHeap::runScavengerThread(void* context)
1418{
1419 reinterpret_cast<TCMalloc_PageHeap*>(context)->scavengerThread();
1420}
1421
1422void TCMalloc_PageHeap::scavenge()
1423{
1424 // If we have to commit memory in the last 5 seconds, it means we don't have enough free committed pages
1425 // for the amount of allocations that we do. So hold off on releasing memory back to the system.
1426 if (pages_committed_since_last_scavenge_ > 0) {
1427 pages_committed_since_last_scavenge_ = 0;
1428 return;
1429 }
1430 Length pagesDecommitted = 0;
1431 for (int i = kMaxPages; i >= 0; i--) {
1432 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i];
1433 if (!DLL_IsEmpty(&slist->normal)) {
1434 // Release the last span on the normal portion of this list
1435 Span* s = slist->normal.prev;
1436 // Only decommit up to a fraction of the free committed pages if pages_allocated_since_last_scavenge_ > 0.
1437 if ((pagesDecommitted + s->length) * kMaxScavengeAmountFactor > free_committed_pages_)
1438 continue;
1439 DLL_Remove(s);
1440 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1441 static_cast<size_t>(s->length << kPageShift));
1442 if (!s->decommitted) {
1443 pagesDecommitted += s->length;
1444 s->decommitted = true;
1445 }
1446 DLL_Prepend(&slist->returned, s);
1447 // We can stop scavenging if the number of free committed pages left is less than or equal to the minimum number we want to keep around.
1448 if (free_committed_pages_ <= kMinimumFreeCommittedPageCount + pagesDecommitted)
1449 break;
1450 }
1451 }
1452 pages_committed_since_last_scavenge_ = 0;
1453 ASSERT(free_committed_pages_ >= pagesDecommitted);
1454 free_committed_pages_ -= pagesDecommitted;
1455}
1456
1457inline bool TCMalloc_PageHeap::shouldContinueScavenging() const
1458{
1459 return free_committed_pages_ > kMinimumFreeCommittedPageCount;
1460}
1461
1462#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1463
1464inline Span* TCMalloc_PageHeap::New(Length n) {
1465 ASSERT(Check());
1466 ASSERT(n > 0);
1467
1468 // Find first size >= n that has a non-empty list
1469 for (Length s = n; s < kMaxPages; s++) {
1470 Span* ll = NULL;
1471 bool released = false;
1472 if (!DLL_IsEmpty(&free_[s].normal)) {
1473 // Found normal span
1474 ll = &free_[s].normal;
1475 } else if (!DLL_IsEmpty(&free_[s].returned)) {
1476 // Found returned span; reallocate it
1477 ll = &free_[s].returned;
1478 released = true;
1479 } else {
1480 // Keep looking in larger classes
1481 continue;
1482 }
1483
1484 Span* result = ll->next;
1485 Carve(result, n, released);
1486 if (result->decommitted) {
1487 TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift));
1488 result->decommitted = false;
1489#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1490 pages_committed_since_last_scavenge_ += n;
1491#endif
1492 }
1493#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1494 else {
1495 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1496 // free committed pages count.
1497 ASSERT(free_committed_pages_ >= n);
1498 free_committed_pages_ -= n;
1499 }
1500#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1501 ASSERT(Check());
1502 free_pages_ -= n;
1503 return result;
1504 }
1505
1506 Span* result = AllocLarge(n);
1507 if (result != NULL) {
1508 ASSERT_SPAN_COMMITTED(result);
1509 return result;
1510 }
1511
1512 // Grow the heap and try again
1513 if (!GrowHeap(n)) {
1514 ASSERT(Check());
1515 return NULL;
1516 }
1517
1518 return AllocLarge(n);
1519}
1520
1521Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1522 // find the best span (closest to n in size).
1523 // The following loops implements address-ordered best-fit.
1524 bool from_released = false;
1525 Span *best = NULL;
1526
1527 // Search through normal list
1528 for (Span* span = large_.normal.next;
1529 span != &large_.normal;
1530 span = span->next) {
1531 if (span->length >= n) {
1532 if ((best == NULL)
1533 || (span->length < best->length)
1534 || ((span->length == best->length) && (span->start < best->start))) {
1535 best = span;
1536 from_released = false;
1537 }
1538 }
1539 }
1540
1541 // Search through released list in case it has a better fit
1542 for (Span* span = large_.returned.next;
1543 span != &large_.returned;
1544 span = span->next) {
1545 if (span->length >= n) {
1546 if ((best == NULL)
1547 || (span->length < best->length)
1548 || ((span->length == best->length) && (span->start < best->start))) {
1549 best = span;
1550 from_released = true;
1551 }
1552 }
1553 }
1554
1555 if (best != NULL) {
1556 Carve(best, n, from_released);
1557 if (best->decommitted) {
1558 TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift));
1559 best->decommitted = false;
1560#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1561 pages_committed_since_last_scavenge_ += n;
1562#endif
1563 }
1564#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1565 else {
1566 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1567 // free committed pages count.
1568 ASSERT(free_committed_pages_ >= n);
1569 free_committed_pages_ -= n;
1570 }
1571#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1572 ASSERT(Check());
1573 free_pages_ -= n;
1574 return best;
1575 }
1576 return NULL;
1577}
1578
1579Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1580 ASSERT(0 < n);
1581 ASSERT(n < span->length);
1582 ASSERT(!span->free);
1583 ASSERT(span->sizeclass == 0);
1584 Event(span, 'T', n);
1585
1586 const Length extra = span->length - n;
1587 Span* leftover = NewSpan(span->start + n, extra);
1588 Event(leftover, 'U', extra);
1589 RecordSpan(leftover);
1590 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1591 span->length = n;
1592
1593 return leftover;
1594}
1595
1596static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source)
1597{
1598 destination->decommitted = source->decommitted;
1599}
1600
1601inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1602 ASSERT(n > 0);
1603 DLL_Remove(span);
1604 span->free = 0;
1605 Event(span, 'A', n);
1606
1607 const int extra = static_cast<int>(span->length - n);
1608 ASSERT(extra >= 0);
1609 if (extra > 0) {
1610 Span* leftover = NewSpan(span->start + n, extra);
1611 leftover->free = 1;
1612 propagateDecommittedState(leftover, span);
1613 Event(leftover, 'S', extra);
1614 RecordSpan(leftover);
1615
1616 // Place leftover span on appropriate free list
1617 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1618 Span* dst = released ? &listpair->returned : &listpair->normal;
1619 DLL_Prepend(dst, leftover);
1620
1621 span->length = n;
1622 pagemap_.set(span->start + n - 1, span);
1623 }
1624}
1625
1626static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1627{
1628 if (destination->decommitted && !other->decommitted) {
1629 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift),
1630 static_cast<size_t>(other->length << kPageShift));
1631 } else if (other->decommitted && !destination->decommitted) {
1632 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift),
1633 static_cast<size_t>(destination->length << kPageShift));
1634 destination->decommitted = true;
1635 }
1636}
1637
1638inline void TCMalloc_PageHeap::Delete(Span* span) {
1639 ASSERT(Check());
1640 ASSERT(!span->free);
1641 ASSERT(span->length > 0);
1642 ASSERT(GetDescriptor(span->start) == span);
1643 ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1644 span->sizeclass = 0;
1645#ifndef NO_TCMALLOC_SAMPLES
1646 span->sample = 0;
1647#endif
1648
1649 // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1650 // necessary. We do not bother resetting the stale pagemap
1651 // entries for the pieces we are merging together because we only
1652 // care about the pagemap entries for the boundaries.
1653#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1654 // Track the total size of the neighboring free spans that are committed.
1655 Length neighboringCommittedSpansLength = 0;
1656#endif
1657 const PageID p = span->start;
1658 const Length n = span->length;
1659 Span* prev = GetDescriptor(p-1);
1660 if (prev != NULL && prev->free) {
1661 // Merge preceding span into this span
1662 ASSERT(prev->start + prev->length == p);
1663 const Length len = prev->length;
1664#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1665 if (!prev->decommitted)
1666 neighboringCommittedSpansLength += len;
1667#endif
1668 mergeDecommittedStates(span, prev);
1669 DLL_Remove(prev);
1670 DeleteSpan(prev);
1671 span->start -= len;
1672 span->length += len;
1673 pagemap_.set(span->start, span);
1674 Event(span, 'L', len);
1675 }
1676 Span* next = GetDescriptor(p+n);
1677 if (next != NULL && next->free) {
1678 // Merge next span into this span
1679 ASSERT(next->start == p+n);
1680 const Length len = next->length;
1681#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1682 if (!next->decommitted)
1683 neighboringCommittedSpansLength += len;
1684#endif
1685 mergeDecommittedStates(span, next);
1686 DLL_Remove(next);
1687 DeleteSpan(next);
1688 span->length += len;
1689 pagemap_.set(span->start + span->length - 1, span);
1690 Event(span, 'R', len);
1691 }
1692
1693 Event(span, 'D', span->length);
1694 span->free = 1;
1695 if (span->decommitted) {
1696 if (span->length < kMaxPages)
1697 DLL_Prepend(&free_[span->length].returned, span);
1698 else
1699 DLL_Prepend(&large_.returned, span);
1700 } else {
1701 if (span->length < kMaxPages)
1702 DLL_Prepend(&free_[span->length].normal, span);
1703 else
1704 DLL_Prepend(&large_.normal, span);
1705 }
1706 free_pages_ += n;
1707
1708#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1709 if (span->decommitted) {
1710 // If the merged span is decommitted, that means we decommitted any neighboring spans that were
1711 // committed. Update the free committed pages count.
1712 free_committed_pages_ -= neighboringCommittedSpansLength;
1713 } else {
1714 // If the merged span remains committed, add the deleted span's size to the free committed pages count.
1715 free_committed_pages_ += n;
1716 }
1717
1718 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system.
1719 if (!m_scavengeThreadActive && shouldContinueScavenging())
1720 pthread_cond_signal(&m_scavengeCondition);
1721#else
1722 IncrementalScavenge(n);
1723#endif
1724
1725 ASSERT(Check());
1726}
1727
1728#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1729void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1730 // Fast path; not yet time to release memory
1731 scavenge_counter_ -= n;
1732 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
1733
1734 // If there is nothing to release, wait for so many pages before
1735 // scavenging again. With 4K pages, this comes to 16MB of memory.
1736 static const size_t kDefaultReleaseDelay = 1 << 8;
1737
1738 // Find index of free list to scavenge
1739 size_t index = scavenge_index_ + 1;
1740 for (size_t i = 0; i < kMaxPages+1; i++) {
1741 if (index > kMaxPages) index = 0;
1742 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1743 if (!DLL_IsEmpty(&slist->normal)) {
1744 // Release the last span on the normal portion of this list
1745 Span* s = slist->normal.prev;
1746 DLL_Remove(s);
1747 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1748 static_cast<size_t>(s->length << kPageShift));
1749 s->decommitted = true;
1750 DLL_Prepend(&slist->returned, s);
1751
1752 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1753
1754 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1755 scavenge_index_ = index - 1;
1756 else
1757 scavenge_index_ = index;
1758 return;
1759 }
1760 index++;
1761 }
1762
1763 // Nothing to scavenge, delay for a while
1764 scavenge_counter_ = kDefaultReleaseDelay;
1765}
1766#endif
1767
1768void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1769 // Associate span object with all interior pages as well
1770 ASSERT(!span->free);
1771 ASSERT(GetDescriptor(span->start) == span);
1772 ASSERT(GetDescriptor(span->start+span->length-1) == span);
1773 Event(span, 'C', sc);
1774 span->sizeclass = static_cast<unsigned int>(sc);
1775 for (Length i = 1; i < span->length-1; i++) {
1776 pagemap_.set(span->start+i, span);
1777 }
1778}
1779
1780#ifdef WTF_CHANGES
1781size_t TCMalloc_PageHeap::ReturnedBytes() const {
1782 size_t result = 0;
1783 for (unsigned s = 0; s < kMaxPages; s++) {
1784 const int r_length = DLL_Length(&free_[s].returned);
1785 unsigned r_pages = s * r_length;
1786 result += r_pages << kPageShift;
1787 }
1788
1789 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
1790 result += s->length << kPageShift;
1791 return result;
1792}
1793#endif
1794
1795#ifndef WTF_CHANGES
1796static double PagesToMB(uint64_t pages) {
1797 return (pages << kPageShift) / 1048576.0;
1798}
1799
1800void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
1801 int nonempty_sizes = 0;
1802 for (int s = 0; s < kMaxPages; s++) {
1803 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
1804 nonempty_sizes++;
1805 }
1806 }
1807 out->printf("------------------------------------------------\n");
1808 out->printf("PageHeap: %d sizes; %6.1f MB free\n",
1809 nonempty_sizes, PagesToMB(free_pages_));
1810 out->printf("------------------------------------------------\n");
1811 uint64_t total_normal = 0;
1812 uint64_t total_returned = 0;
1813 for (int s = 0; s < kMaxPages; s++) {
1814 const int n_length = DLL_Length(&free_[s].normal);
1815 const int r_length = DLL_Length(&free_[s].returned);
1816 if (n_length + r_length > 0) {
1817 uint64_t n_pages = s * n_length;
1818 uint64_t r_pages = s * r_length;
1819 total_normal += n_pages;
1820 total_returned += r_pages;
1821 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
1822 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1823 s,
1824 (n_length + r_length),
1825 PagesToMB(n_pages + r_pages),
1826 PagesToMB(total_normal + total_returned),
1827 PagesToMB(r_pages),
1828 PagesToMB(total_returned));
1829 }
1830 }
1831
1832 uint64_t n_pages = 0;
1833 uint64_t r_pages = 0;
1834 int n_spans = 0;
1835 int r_spans = 0;
1836 out->printf("Normal large spans:\n");
1837 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
1838 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1839 s->length, PagesToMB(s->length));
1840 n_pages += s->length;
1841 n_spans++;
1842 }
1843 out->printf("Unmapped large spans:\n");
1844 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
1845 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1846 s->length, PagesToMB(s->length));
1847 r_pages += s->length;
1848 r_spans++;
1849 }
1850 total_normal += n_pages;
1851 total_returned += r_pages;
1852 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
1853 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1854 (n_spans + r_spans),
1855 PagesToMB(n_pages + r_pages),
1856 PagesToMB(total_normal + total_returned),
1857 PagesToMB(r_pages),
1858 PagesToMB(total_returned));
1859}
1860#endif
1861
1862bool TCMalloc_PageHeap::GrowHeap(Length n) {
1863 ASSERT(kMaxPages >= kMinSystemAlloc);
1864 if (n > kMaxValidPages) return false;
1865 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
1866 size_t actual_size;
1867 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1868 if (ptr == NULL) {
1869 if (n < ask) {
1870 // Try growing just "n" pages
1871 ask = n;
1872 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1873 }
1874 if (ptr == NULL) return false;
1875 }
1876 ask = actual_size >> kPageShift;
1877
1878#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1879 pages_committed_since_last_scavenge_ += ask;
1880#endif
1881
1882 uint64_t old_system_bytes = system_bytes_;
1883 system_bytes_ += (ask << kPageShift);
1884 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1885 ASSERT(p > 0);
1886
1887 // If we have already a lot of pages allocated, just pre allocate a bunch of
1888 // memory for the page map. This prevents fragmentation by pagemap metadata
1889 // when a program keeps allocating and freeing large blocks.
1890
1891 if (old_system_bytes < kPageMapBigAllocationThreshold
1892 && system_bytes_ >= kPageMapBigAllocationThreshold) {
1893 pagemap_.PreallocateMoreMemory();
1894 }
1895
1896 // Make sure pagemap_ has entries for all of the new pages.
1897 // Plus ensure one before and one after so coalescing code
1898 // does not need bounds-checking.
1899 if (pagemap_.Ensure(p-1, ask+2)) {
1900 // Pretend the new area is allocated and then Delete() it to
1901 // cause any necessary coalescing to occur.
1902 //
1903 // We do not adjust free_pages_ here since Delete() will do it for us.
1904 Span* span = NewSpan(p, ask);
1905 RecordSpan(span);
1906 Delete(span);
1907 ASSERT(Check());
1908 return true;
1909 } else {
1910 // We could not allocate memory within "pagemap_"
1911 // TODO: Once we can return memory to the system, return the new span
1912 return false;
1913 }
1914}
1915
1916bool TCMalloc_PageHeap::Check() {
1917 ASSERT(free_[0].normal.next == &free_[0].normal);
1918 ASSERT(free_[0].returned.next == &free_[0].returned);
1919 CheckList(&large_.normal, kMaxPages, 1000000000);
1920 CheckList(&large_.returned, kMaxPages, 1000000000);
1921 for (Length s = 1; s < kMaxPages; s++) {
1922 CheckList(&free_[s].normal, s, s);
1923 CheckList(&free_[s].returned, s, s);
1924 }
1925 return true;
1926}
1927
1928#if ASSERT_DISABLED
1929bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) {
1930 return true;
1931}
1932#else
1933bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
1934 for (Span* s = list->next; s != list; s = s->next) {
1935 CHECK_CONDITION(s->free);
1936 CHECK_CONDITION(s->length >= min_pages);
1937 CHECK_CONDITION(s->length <= max_pages);
1938 CHECK_CONDITION(GetDescriptor(s->start) == s);
1939 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
1940 }
1941 return true;
1942}
1943#endif
1944
1945static void ReleaseFreeList(Span* list, Span* returned) {
1946 // Walk backwards through list so that when we push these
1947 // spans on the "returned" list, we preserve the order.
1948 while (!DLL_IsEmpty(list)) {
1949 Span* s = list->prev;
1950 DLL_Remove(s);
1951 DLL_Prepend(returned, s);
1952 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1953 static_cast<size_t>(s->length << kPageShift));
1954 }
1955}
1956
1957void TCMalloc_PageHeap::ReleaseFreePages() {
1958 for (Length s = 0; s < kMaxPages; s++) {
1959 ReleaseFreeList(&free_[s].normal, &free_[s].returned);
1960 }
1961 ReleaseFreeList(&large_.normal, &large_.returned);
1962 ASSERT(Check());
1963}
1964
1965//-------------------------------------------------------------------
1966// Free list
1967//-------------------------------------------------------------------
1968
1969class TCMalloc_ThreadCache_FreeList {
1970 private:
1971 void* list_; // Linked list of nodes
1972 uint16_t length_; // Current length
1973 uint16_t lowater_; // Low water mark for list length
1974
1975 public:
1976 void Init() {
1977 list_ = NULL;
1978 length_ = 0;
1979 lowater_ = 0;
1980 }
1981
1982 // Return current length of list
1983 int length() const {
1984 return length_;
1985 }
1986
1987 // Is list empty?
1988 bool empty() const {
1989 return list_ == NULL;
1990 }
1991
1992 // Low-water mark management
1993 int lowwatermark() const { return lowater_; }
1994 void clear_lowwatermark() { lowater_ = length_; }
1995
1996 ALWAYS_INLINE void Push(void* ptr) {
1997 SLL_Push(&list_, ptr);
1998 length_++;
1999 }
2000
2001 void PushRange(int N, void *start, void *end) {
2002 SLL_PushRange(&list_, start, end);
2003 length_ = length_ + static_cast<uint16_t>(N);
2004 }
2005
2006 void PopRange(int N, void **start, void **end) {
2007 SLL_PopRange(&list_, N, start, end);
2008 ASSERT(length_ >= N);
2009 length_ = length_ - static_cast<uint16_t>(N);
2010 if (length_ < lowater_) lowater_ = length_;
2011 }
2012
2013 ALWAYS_INLINE void* Pop() {
2014 ASSERT(list_ != NULL);
2015 length_--;
2016 if (length_ < lowater_) lowater_ = length_;
2017 return SLL_Pop(&list_);
2018 }
2019
2020#ifdef WTF_CHANGES
2021 template <class Finder, class Reader>
2022 void enumerateFreeObjects(Finder& finder, const Reader& reader)
2023 {
2024 for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2025 finder.visit(nextObject);
2026 }
2027#endif
2028};
2029
2030//-------------------------------------------------------------------
2031// Data kept per thread
2032//-------------------------------------------------------------------
2033
2034class TCMalloc_ThreadCache {
2035 private:
2036 typedef TCMalloc_ThreadCache_FreeList FreeList;
2037#if COMPILER(MSVC)
2038 typedef DWORD ThreadIdentifier;
2039#else
2040 typedef pthread_t ThreadIdentifier;
2041#endif
2042
2043 size_t size_; // Combined size of data
2044 ThreadIdentifier tid_; // Which thread owns it
2045 bool in_setspecific_; // Called pthread_setspecific?
2046 FreeList list_[kNumClasses]; // Array indexed by size-class
2047
2048 // We sample allocations, biased by the size of the allocation
2049 uint32_t rnd_; // Cheap random number generator
2050 size_t bytes_until_sample_; // Bytes until we sample next
2051
2052 // Allocate a new heap. REQUIRES: pageheap_lock is held.
2053 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
2054
2055 // Use only as pthread thread-specific destructor function.
2056 static void DestroyThreadCache(void* ptr);
2057 public:
2058 // All ThreadCache objects are kept in a linked list (for stats collection)
2059 TCMalloc_ThreadCache* next_;
2060 TCMalloc_ThreadCache* prev_;
2061
2062 void Init(ThreadIdentifier tid);
2063 void Cleanup();
2064
2065 // Accessors (mostly just for printing stats)
2066 int freelist_length(size_t cl) const { return list_[cl].length(); }
2067
2068 // Total byte size in cache
2069 size_t Size() const { return size_; }
2070
2071 void* Allocate(size_t size);
2072 void Deallocate(void* ptr, size_t size_class);
2073
2074 void FetchFromCentralCache(size_t cl, size_t allocationSize);
2075 void ReleaseToCentralCache(size_t cl, int N);
2076 void Scavenge();
2077 void Print() const;
2078
2079 // Record allocation of "k" bytes. Return true iff allocation
2080 // should be sampled
2081 bool SampleAllocation(size_t k);
2082
2083 // Pick next sampling point
2084 void PickNextSample(size_t k);
2085
2086 static void InitModule();
2087 static void InitTSD();
2088 static TCMalloc_ThreadCache* GetThreadHeap();
2089 static TCMalloc_ThreadCache* GetCache();
2090 static TCMalloc_ThreadCache* GetCacheIfPresent();
2091 static TCMalloc_ThreadCache* CreateCacheIfNecessary();
2092 static void DeleteCache(TCMalloc_ThreadCache* heap);
2093 static void BecomeIdle();
2094 static void RecomputeThreadCacheSize();
2095
2096#ifdef WTF_CHANGES
2097 template <class Finder, class Reader>
2098 void enumerateFreeObjects(Finder& finder, const Reader& reader)
2099 {
2100 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
2101 list_[sizeClass].enumerateFreeObjects(finder, reader);
2102 }
2103#endif
2104};
2105
2106//-------------------------------------------------------------------
2107// Data kept per size-class in central cache
2108//-------------------------------------------------------------------
2109
2110class TCMalloc_Central_FreeList {
2111 public:
2112 void Init(size_t cl);
2113
2114 // These methods all do internal locking.
2115
2116 // Insert the specified range into the central freelist. N is the number of
2117 // elements in the range.
2118 void InsertRange(void *start, void *end, int N);
2119
2120 // Returns the actual number of fetched elements into N.
2121 void RemoveRange(void **start, void **end, int *N);
2122
2123 // Returns the number of free objects in cache.
2124 size_t length() {
2125 SpinLockHolder h(&lock_);
2126 return counter_;
2127 }
2128
2129 // Returns the number of free objects in the transfer cache.
2130 int tc_length() {
2131 SpinLockHolder h(&lock_);
2132 return used_slots_ * num_objects_to_move[size_class_];
2133 }
2134
2135#ifdef WTF_CHANGES
2136 template <class Finder, class Reader>
2137 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
2138 {
2139 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
2140 ASSERT(!span->objects);
2141
2142 ASSERT(!nonempty_.objects);
2143 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
2144
2145 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
2146 Span* remoteSpan = nonempty_.next;
2147
2148 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
2149 for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2150 finder.visit(nextObject);
2151 }
2152 }
2153#endif
2154
2155 private:
2156 // REQUIRES: lock_ is held
2157 // Remove object from cache and return.
2158 // Return NULL if no free entries in cache.
2159 void* FetchFromSpans();
2160
2161 // REQUIRES: lock_ is held
2162 // Remove object from cache and return. Fetches
2163 // from pageheap if cache is empty. Only returns
2164 // NULL on allocation failure.
2165 void* FetchFromSpansSafe();
2166
2167 // REQUIRES: lock_ is held
2168 // Release a linked list of objects to spans.
2169 // May temporarily release lock_.
2170 void ReleaseListToSpans(void *start);
2171
2172 // REQUIRES: lock_ is held
2173 // Release an object to spans.
2174 // May temporarily release lock_.
2175 void ReleaseToSpans(void* object);
2176
2177 // REQUIRES: lock_ is held
2178 // Populate cache by fetching from the page heap.
2179 // May temporarily release lock_.
2180 void Populate();
2181
2182 // REQUIRES: lock is held.
2183 // Tries to make room for a TCEntry. If the cache is full it will try to
2184 // expand it at the cost of some other cache size. Return false if there is
2185 // no space.
2186 bool MakeCacheSpace();
2187
2188 // REQUIRES: lock_ for locked_size_class is held.
2189 // Picks a "random" size class to steal TCEntry slot from. In reality it
2190 // just iterates over the sizeclasses but does so without taking a lock.
2191 // Returns true on success.
2192 // May temporarily lock a "random" size class.
2193 static bool EvictRandomSizeClass(size_t locked_size_class, bool force);
2194
2195 // REQUIRES: lock_ is *not* held.
2196 // Tries to shrink the Cache. If force is true it will relase objects to
2197 // spans if it allows it to shrink the cache. Return false if it failed to
2198 // shrink the cache. Decrements cache_size_ on succeess.
2199 // May temporarily take lock_. If it takes lock_, the locked_size_class
2200 // lock is released to the thread from holding two size class locks
2201 // concurrently which could lead to a deadlock.
2202 bool ShrinkCache(int locked_size_class, bool force);
2203
2204 // This lock protects all the data members. cached_entries and cache_size_
2205 // may be looked at without holding the lock.
2206 SpinLock lock_;
2207
2208 // We keep linked lists of empty and non-empty spans.
2209 size_t size_class_; // My size class
2210 Span empty_; // Dummy header for list of empty spans
2211 Span nonempty_; // Dummy header for list of non-empty spans
2212 size_t counter_; // Number of free objects in cache entry
2213
2214 // Here we reserve space for TCEntry cache slots. Since one size class can
2215 // end up getting all the TCEntries quota in the system we just preallocate
2216 // sufficient number of entries here.
2217 TCEntry tc_slots_[kNumTransferEntries];
2218
2219 // Number of currently used cached entries in tc_slots_. This variable is
2220 // updated under a lock but can be read without one.
2221 int32_t used_slots_;
2222 // The current number of slots for this size class. This is an
2223 // adaptive value that is increased if there is lots of traffic
2224 // on a given size class.
2225 int32_t cache_size_;
2226};
2227
2228// Pad each CentralCache object to multiple of 64 bytes
2229class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
2230 private:
2231 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
2232};
2233
2234//-------------------------------------------------------------------
2235// Global variables
2236//-------------------------------------------------------------------
2237
2238// Central cache -- a collection of free-lists, one per size-class.
2239// We have a separate lock per free-list to reduce contention.
2240static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
2241
2242// Page-level allocator
2243static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
2244static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)];
2245static bool phinited = false;
2246
2247// Avoid extra level of indirection by making "pageheap" be just an alias
2248// of pageheap_memory.
2249typedef union {
2250 void* m_memory;
2251 TCMalloc_PageHeap* m_pageHeap;
2252} PageHeapUnion;
2253
2254static inline TCMalloc_PageHeap* getPageHeap()
2255{
2256 PageHeapUnion u = { &pageheap_memory[0] };
2257 return u.m_pageHeap;
2258}
2259
2260#define pageheap getPageHeap()
2261
2262#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2263#if PLATFORM(WIN)
2264static void sleep(unsigned seconds)
2265{
2266 ::Sleep(seconds * 1000);
2267}
2268#endif
2269
2270void TCMalloc_PageHeap::scavengerThread()
2271{
2272 while (1) {
2273 if (!shouldContinueScavenging()) {
2274 pthread_mutex_lock(&m_scavengeMutex);
2275 m_scavengeThreadActive = false;
2276 // Block until there are enough freed pages to release back to the system.
2277 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex);
2278 m_scavengeThreadActive = true;
2279 pthread_mutex_unlock(&m_scavengeMutex);
2280 }
2281 sleep(kScavengeTimerDelayInSeconds);
2282 {
2283 SpinLockHolder h(&pageheap_lock);
2284 pageheap->scavenge();
2285 }
2286 }
2287}
2288#endif
2289
2290// If TLS is available, we also store a copy
2291// of the per-thread object in a __thread variable
2292// since __thread variables are faster to read
2293// than pthread_getspecific(). We still need
2294// pthread_setspecific() because __thread
2295// variables provide no way to run cleanup
2296// code when a thread is destroyed.
2297#ifdef HAVE_TLS
2298static __thread TCMalloc_ThreadCache *threadlocal_heap;
2299#endif
2300// Thread-specific key. Initialization here is somewhat tricky
2301// because some Linux startup code invokes malloc() before it
2302// is in a good enough state to handle pthread_keycreate().
2303// Therefore, we use TSD keys only after tsd_inited is set to true.
2304// Until then, we use a slow path to get the heap object.
2305static bool tsd_inited = false;
2306static pthread_key_t heap_key;
2307#if COMPILER(MSVC)
2308DWORD tlsIndex = TLS_OUT_OF_INDEXES;
2309#endif
2310
2311static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
2312{
2313 // still do pthread_setspecific when using MSVC fast TLS to
2314 // benefit from the delete callback.
2315 pthread_setspecific(heap_key, heap);
2316#if COMPILER(MSVC)
2317 TlsSetValue(tlsIndex, heap);
2318#endif
2319}
2320
2321// Allocator for thread heaps
2322static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2323
2324// Linked list of heap objects. Protected by pageheap_lock.
2325static TCMalloc_ThreadCache* thread_heaps = NULL;
2326static int thread_heap_count = 0;
2327
2328// Overall thread cache size. Protected by pageheap_lock.
2329static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2330
2331// Global per-thread cache size. Writes are protected by
2332// pageheap_lock. Reads are done without any locking, which should be
2333// fine as long as size_t can be written atomically and we don't place
2334// invariants between this variable and other pieces of state.
2335static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2336
2337//-------------------------------------------------------------------
2338// Central cache implementation
2339//-------------------------------------------------------------------
2340
2341void TCMalloc_Central_FreeList::Init(size_t cl) {
2342 lock_.Init();
2343 size_class_ = cl;
2344 DLL_Init(&empty_);
2345 DLL_Init(&nonempty_);
2346 counter_ = 0;
2347
2348 cache_size_ = 1;
2349 used_slots_ = 0;
2350 ASSERT(cache_size_ <= kNumTransferEntries);
2351}
2352
2353void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2354 while (start) {
2355 void *next = SLL_Next(start);
2356 ReleaseToSpans(start);
2357 start = next;
2358 }
2359}
2360
2361ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2362 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2363 Span* span = pageheap->GetDescriptor(p);
2364 ASSERT(span != NULL);
2365 ASSERT(span->refcount > 0);
2366
2367 // If span is empty, move it to non-empty list
2368 if (span->objects == NULL) {
2369 DLL_Remove(span);
2370 DLL_Prepend(&nonempty_, span);
2371 Event(span, 'N', 0);
2372 }
2373
2374 // The following check is expensive, so it is disabled by default
2375 if (false) {
2376 // Check that object does not occur in list
2377 int got = 0;
2378 for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2379 ASSERT(p != object);
2380 got++;
2381 }
2382 ASSERT(got + span->refcount ==
2383 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2384 }
2385
2386 counter_++;
2387 span->refcount--;
2388 if (span->refcount == 0) {
2389 Event(span, '#', 0);
2390 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2391 DLL_Remove(span);
2392
2393 // Release central list lock while operating on pageheap
2394 lock_.Unlock();
2395 {
2396 SpinLockHolder h(&pageheap_lock);
2397 pageheap->Delete(span);
2398 }
2399 lock_.Lock();
2400 } else {
2401 *(reinterpret_cast<void**>(object)) = span->objects;
2402 span->objects = object;
2403 }
2404}
2405
2406ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2407 size_t locked_size_class, bool force) {
2408 static int race_counter = 0;
2409 int t = race_counter++; // Updated without a lock, but who cares.
2410 if (t >= static_cast<int>(kNumClasses)) {
2411 while (t >= static_cast<int>(kNumClasses)) {
2412 t -= kNumClasses;
2413 }
2414 race_counter = t;
2415 }
2416 ASSERT(t >= 0);
2417 ASSERT(t < static_cast<int>(kNumClasses));
2418 if (t == static_cast<int>(locked_size_class)) return false;
2419 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2420}
2421
2422bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2423 // Is there room in the cache?
2424 if (used_slots_ < cache_size_) return true;
2425 // Check if we can expand this cache?
2426 if (cache_size_ == kNumTransferEntries) return false;
2427 // Ok, we'll try to grab an entry from some other size class.
2428 if (EvictRandomSizeClass(size_class_, false) ||
2429 EvictRandomSizeClass(size_class_, true)) {
2430 // Succeeded in evicting, we're going to make our cache larger.
2431 cache_size_++;
2432 return true;
2433 }
2434 return false;
2435}
2436
2437
2438namespace {
2439class LockInverter {
2440 private:
2441 SpinLock *held_, *temp_;
2442 public:
2443 inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2444 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
2445 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
2446};
2447}
2448
2449bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2450 // Start with a quick check without taking a lock.
2451 if (cache_size_ == 0) return false;
2452 // We don't evict from a full cache unless we are 'forcing'.
2453 if (force == false && used_slots_ == cache_size_) return false;
2454
2455 // Grab lock, but first release the other lock held by this thread. We use
2456 // the lock inverter to ensure that we never hold two size class locks
2457 // concurrently. That can create a deadlock because there is no well
2458 // defined nesting order.
2459 LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
2460 ASSERT(used_slots_ <= cache_size_);
2461 ASSERT(0 <= cache_size_);
2462 if (cache_size_ == 0) return false;
2463 if (used_slots_ == cache_size_) {
2464 if (force == false) return false;
2465 // ReleaseListToSpans releases the lock, so we have to make all the
2466 // updates to the central list before calling it.
2467 cache_size_--;
2468 used_slots_--;
2469 ReleaseListToSpans(tc_slots_[used_slots_].head);
2470 return true;
2471 }
2472 cache_size_--;
2473 return true;
2474}
2475
2476void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2477 SpinLockHolder h(&lock_);
2478 if (N == num_objects_to_move[size_class_] &&
2479 MakeCacheSpace()) {
2480 int slot = used_slots_++;
2481 ASSERT(slot >=0);
2482 ASSERT(slot < kNumTransferEntries);
2483 TCEntry *entry = &tc_slots_[slot];
2484 entry->head = start;
2485 entry->tail = end;
2486 return;
2487 }
2488 ReleaseListToSpans(start);
2489}
2490
2491void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2492 int num = *N;
2493 ASSERT(num > 0);
2494
2495 SpinLockHolder h(&lock_);
2496 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2497 int slot = --used_slots_;
2498 ASSERT(slot >= 0);
2499 TCEntry *entry = &tc_slots_[slot];
2500 *start = entry->head;
2501 *end = entry->tail;
2502 return;
2503 }
2504
2505 // TODO: Prefetch multiple TCEntries?
2506 void *tail = FetchFromSpansSafe();
2507 if (!tail) {
2508 // We are completely out of memory.
2509 *start = *end = NULL;
2510 *N = 0;
2511 return;
2512 }
2513
2514 SLL_SetNext(tail, NULL);
2515 void *head = tail;
2516 int count = 1;
2517 while (count < num) {
2518 void *t = FetchFromSpans();
2519 if (!t) break;
2520 SLL_Push(&head, t);
2521 count++;
2522 }
2523 *start = head;
2524 *end = tail;
2525 *N = count;
2526}
2527
2528
2529void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2530 void *t = FetchFromSpans();
2531 if (!t) {
2532 Populate();
2533 t = FetchFromSpans();
2534 }
2535 return t;
2536}
2537
2538void* TCMalloc_Central_FreeList::FetchFromSpans() {
2539 if (DLL_IsEmpty(&nonempty_)) return NULL;
2540 Span* span = nonempty_.next;
2541
2542 ASSERT(span->objects != NULL);
2543 ASSERT_SPAN_COMMITTED(span);
2544 span->refcount++;
2545 void* result = span->objects;
2546 span->objects = *(reinterpret_cast<void**>(result));
2547 if (span->objects == NULL) {
2548 // Move to empty list
2549 DLL_Remove(span);
2550 DLL_Prepend(&empty_, span);
2551 Event(span, 'E', 0);
2552 }
2553 counter_--;
2554 return result;
2555}
2556
2557// Fetch memory from the system and add to the central cache freelist.
2558ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2559 // Release central list lock while operating on pageheap
2560 lock_.Unlock();
2561 const size_t npages = class_to_pages[size_class_];
2562
2563 Span* span;
2564 {
2565 SpinLockHolder h(&pageheap_lock);
2566 span = pageheap->New(npages);
2567 if (span) pageheap->RegisterSizeClass(span, size_class_);
2568 }
2569 if (span == NULL) {
2570 MESSAGE("allocation failed: %d\n", errno);
2571 lock_.Lock();
2572 return;
2573 }
2574 ASSERT_SPAN_COMMITTED(span);
2575 ASSERT(span->length == npages);
2576 // Cache sizeclass info eagerly. Locking is not necessary.
2577 // (Instead of being eager, we could just replace any stale info
2578 // about this span, but that seems to be no better in practice.)
2579 for (size_t i = 0; i < npages; i++) {
2580 pageheap->CacheSizeClass(span->start + i, size_class_);
2581 }
2582
2583 // Split the block into pieces and add to the free-list
2584 // TODO: coloring of objects to avoid cache conflicts?
2585 void** tail = &span->objects;
2586 char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2587 char* limit = ptr + (npages << kPageShift);
2588 const size_t size = ByteSizeForClass(size_class_);
2589 int num = 0;
2590 char* nptr;
2591 while ((nptr = ptr + size) <= limit) {
2592 *tail = ptr;
2593 tail = reinterpret_cast<void**>(ptr);
2594 ptr = nptr;
2595 num++;
2596 }
2597 ASSERT(ptr <= limit);
2598 *tail = NULL;
2599 span->refcount = 0; // No sub-object in use yet
2600
2601 // Add span to list of non-empty spans
2602 lock_.Lock();
2603 DLL_Prepend(&nonempty_, span);
2604 counter_ += num;
2605}
2606
2607//-------------------------------------------------------------------
2608// TCMalloc_ThreadCache implementation
2609//-------------------------------------------------------------------
2610
2611inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2612 if (bytes_until_sample_ < k) {
2613 PickNextSample(k);
2614 return true;
2615 } else {
2616 bytes_until_sample_ -= k;
2617 return false;
2618 }
2619}
2620
2621void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2622 size_ = 0;
2623 next_ = NULL;
2624 prev_ = NULL;
2625 tid_ = tid;
2626 in_setspecific_ = false;
2627 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2628 list_[cl].Init();
2629 }
2630
2631 // Initialize RNG -- run it for a bit to get to good values
2632 bytes_until_sample_ = 0;
2633 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2634 for (int i = 0; i < 100; i++) {
2635 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2636 }
2637}
2638
2639void TCMalloc_ThreadCache::Cleanup() {
2640 // Put unused memory back into central cache
2641 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2642 if (list_[cl].length() > 0) {
2643 ReleaseToCentralCache(cl, list_[cl].length());
2644 }
2645 }
2646}
2647
2648ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2649 ASSERT(size <= kMaxSize);
2650 const size_t cl = SizeClass(size);
2651 FreeList* list = &list_[cl];
2652 size_t allocationSize = ByteSizeForClass(cl);
2653 if (list->empty()) {
2654 FetchFromCentralCache(cl, allocationSize);
2655 if (list->empty()) return NULL;
2656 }
2657 size_ -= allocationSize;
2658 return list->Pop();
2659}
2660
2661inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2662 size_ += ByteSizeForClass(cl);
2663 FreeList* list = &list_[cl];
2664 list->Push(ptr);
2665 // If enough data is free, put back into central cache
2666 if (list->length() > kMaxFreeListLength) {
2667 ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2668 }
2669 if (size_ >= per_thread_cache_size) Scavenge();
2670}
2671
2672// Remove some objects of class "cl" from central cache and add to thread heap
2673ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2674 int fetch_count = num_objects_to_move[cl];
2675 void *start, *end;
2676 central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2677 list_[cl].PushRange(fetch_count, start, end);
2678 size_ += allocationSize * fetch_count;
2679}
2680
2681// Remove some objects of class "cl" from thread heap and add to central cache
2682inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2683 ASSERT(N > 0);
2684 FreeList* src = &list_[cl];
2685 if (N > src->length()) N = src->length();
2686 size_ -= N*ByteSizeForClass(cl);
2687
2688 // We return prepackaged chains of the correct size to the central cache.
2689 // TODO: Use the same format internally in the thread caches?
2690 int batch_size = num_objects_to_move[cl];
2691 while (N > batch_size) {
2692 void *tail, *head;
2693 src->PopRange(batch_size, &head, &tail);
2694 central_cache[cl].InsertRange(head, tail, batch_size);
2695 N -= batch_size;
2696 }
2697 void *tail, *head;
2698 src->PopRange(N, &head, &tail);
2699 central_cache[cl].InsertRange(head, tail, N);
2700}
2701
2702// Release idle memory to the central cache
2703inline void TCMalloc_ThreadCache::Scavenge() {
2704 // If the low-water mark for the free list is L, it means we would
2705 // not have had to allocate anything from the central cache even if
2706 // we had reduced the free list size by L. We aim to get closer to
2707 // that situation by dropping L/2 nodes from the free list. This
2708 // may not release much memory, but if so we will call scavenge again
2709 // pretty soon and the low-water marks will be high on that call.
2710 //int64 start = CycleClock::Now();
2711
2712 for (size_t cl = 0; cl < kNumClasses; cl++) {
2713 FreeList* list = &list_[cl];
2714 const int lowmark = list->lowwatermark();
2715 if (lowmark > 0) {
2716 const int drop = (lowmark > 1) ? lowmark/2 : 1;
2717 ReleaseToCentralCache(cl, drop);
2718 }
2719 list->clear_lowwatermark();
2720 }
2721
2722 //int64 finish = CycleClock::Now();
2723 //CycleTimer ct;
2724 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2725}
2726
2727void TCMalloc_ThreadCache::PickNextSample(size_t k) {
2728 // Make next "random" number
2729 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2730 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2731 uint32_t r = rnd_;
2732 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
2733
2734 // Next point is "rnd_ % (sample_period)". I.e., average
2735 // increment is "sample_period/2".
2736 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
2737 static int last_flag_value = -1;
2738
2739 if (flag_value != last_flag_value) {
2740 SpinLockHolder h(&sample_period_lock);
2741 int i;
2742 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
2743 if (primes_list[i] >= flag_value) {
2744 break;
2745 }
2746 }
2747 sample_period = primes_list[i];
2748 last_flag_value = flag_value;
2749 }
2750
2751 bytes_until_sample_ += rnd_ % sample_period;
2752
2753 if (k > (static_cast<size_t>(-1) >> 2)) {
2754 // If the user has asked for a huge allocation then it is possible
2755 // for the code below to loop infinitely. Just return (note that
2756 // this throws off the sampling accuracy somewhat, but a user who
2757 // is allocating more than 1G of memory at a time can live with a
2758 // minor inaccuracy in profiling of small allocations, and also
2759 // would rather not wait for the loop below to terminate).
2760 return;
2761 }
2762
2763 while (bytes_until_sample_ < k) {
2764 // Increase bytes_until_sample_ by enough average sampling periods
2765 // (sample_period >> 1) to allow us to sample past the current
2766 // allocation.
2767 bytes_until_sample_ += (sample_period >> 1);
2768 }
2769
2770 bytes_until_sample_ -= k;
2771}
2772
2773void TCMalloc_ThreadCache::InitModule() {
2774 // There is a slight potential race here because of double-checked
2775 // locking idiom. However, as long as the program does a small
2776 // allocation before switching to multi-threaded mode, we will be
2777 // fine. We increase the chances of doing such a small allocation
2778 // by doing one in the constructor of the module_enter_exit_hook
2779 // object declared below.
2780 SpinLockHolder h(&pageheap_lock);
2781 if (!phinited) {
2782#ifdef WTF_CHANGES
2783 InitTSD();
2784#endif
2785 InitSizeClasses();
2786 threadheap_allocator.Init();
2787 span_allocator.Init();
2788 span_allocator.New(); // Reduce cache conflicts
2789 span_allocator.New(); // Reduce cache conflicts
2790 stacktrace_allocator.Init();
2791 DLL_Init(&sampled_objects);
2792 for (size_t i = 0; i < kNumClasses; ++i) {
2793 central_cache[i].Init(i);
2794 }
2795 pageheap->init();
2796 phinited = 1;
2797#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
2798 FastMallocZone::init();
2799#endif
2800 }
2801}
2802
2803inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
2804 // Create the heap and add it to the linked list
2805 TCMalloc_ThreadCache *heap = threadheap_allocator.New();
2806 heap->Init(tid);
2807 heap->next_ = thread_heaps;
2808 heap->prev_ = NULL;
2809 if (thread_heaps != NULL) thread_heaps->prev_ = heap;
2810 thread_heaps = heap;
2811 thread_heap_count++;
2812 RecomputeThreadCacheSize();
2813 return heap;
2814}
2815
2816inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
2817#ifdef HAVE_TLS
2818 // __thread is faster, but only when the kernel supports it
2819 if (KernelSupportsTLS())
2820 return threadlocal_heap;
2821#elif COMPILER(MSVC)
2822 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
2823#else
2824 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
2825#endif
2826}
2827
2828inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
2829 TCMalloc_ThreadCache* ptr = NULL;
2830 if (!tsd_inited) {
2831 InitModule();
2832 } else {
2833 ptr = GetThreadHeap();
2834 }
2835 if (ptr == NULL) ptr = CreateCacheIfNecessary();
2836 return ptr;
2837}
2838
2839// In deletion paths, we do not try to create a thread-cache. This is
2840// because we may be in the thread destruction code and may have
2841// already cleaned up the cache for this thread.
2842inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
2843 if (!tsd_inited) return NULL;
2844 void* const p = GetThreadHeap();
2845 return reinterpret_cast<TCMalloc_ThreadCache*>(p);
2846}
2847
2848void TCMalloc_ThreadCache::InitTSD() {
2849 ASSERT(!tsd_inited);
2850 pthread_key_create(&heap_key, DestroyThreadCache);
2851#if COMPILER(MSVC)
2852 tlsIndex = TlsAlloc();
2853#endif
2854 tsd_inited = true;
2855
2856#if !COMPILER(MSVC)
2857 // We may have used a fake pthread_t for the main thread. Fix it.
2858 pthread_t zero;
2859 memset(&zero, 0, sizeof(zero));
2860#endif
2861#ifndef WTF_CHANGES
2862 SpinLockHolder h(&pageheap_lock);
2863#else
2864 ASSERT(pageheap_lock.IsHeld());
2865#endif
2866 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2867#if COMPILER(MSVC)
2868 if (h->tid_ == 0) {
2869 h->tid_ = GetCurrentThreadId();
2870 }
2871#else
2872 if (pthread_equal(h->tid_, zero)) {
2873 h->tid_ = pthread_self();
2874 }
2875#endif
2876 }
2877}
2878
2879TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
2880 // Initialize per-thread data if necessary
2881 TCMalloc_ThreadCache* heap = NULL;
2882 {
2883 SpinLockHolder h(&pageheap_lock);
2884
2885#if COMPILER(MSVC)
2886 DWORD me;
2887 if (!tsd_inited) {
2888 me = 0;
2889 } else {
2890 me = GetCurrentThreadId();
2891 }
2892#else
2893 // Early on in glibc's life, we cannot even call pthread_self()
2894 pthread_t me;
2895 if (!tsd_inited) {
2896 memset(&me, 0, sizeof(me));
2897 } else {
2898 me = pthread_self();
2899 }
2900#endif
2901
2902 // This may be a recursive malloc call from pthread_setspecific()
2903 // In that case, the heap for this thread has already been created
2904 // and added to the linked list. So we search for that first.
2905 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2906#if COMPILER(MSVC)
2907 if (h->tid_ == me) {
2908#else
2909 if (pthread_equal(h->tid_, me)) {
2910#endif
2911 heap = h;
2912 break;
2913 }
2914 }
2915
2916 if (heap == NULL) heap = NewHeap(me);
2917 }
2918
2919 // We call pthread_setspecific() outside the lock because it may
2920 // call malloc() recursively. The recursive call will never get
2921 // here again because it will find the already allocated heap in the
2922 // linked list of heaps.
2923 if (!heap->in_setspecific_ && tsd_inited) {
2924 heap->in_setspecific_ = true;
2925 setThreadHeap(heap);
2926 }
2927 return heap;
2928}
2929
2930void TCMalloc_ThreadCache::BecomeIdle() {
2931 if (!tsd_inited) return; // No caches yet
2932 TCMalloc_ThreadCache* heap = GetThreadHeap();
2933 if (heap == NULL) return; // No thread cache to remove
2934 if (heap->in_setspecific_) return; // Do not disturb the active caller
2935
2936 heap->in_setspecific_ = true;
2937 pthread_setspecific(heap_key, NULL);
2938#ifdef HAVE_TLS
2939 // Also update the copy in __thread
2940 threadlocal_heap = NULL;
2941#endif
2942 heap->in_setspecific_ = false;
2943 if (GetThreadHeap() == heap) {
2944 // Somehow heap got reinstated by a recursive call to malloc
2945 // from pthread_setspecific. We give up in this case.
2946 return;
2947 }
2948
2949 // We can now get rid of the heap
2950 DeleteCache(heap);
2951}
2952
2953void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
2954 // Note that "ptr" cannot be NULL since pthread promises not
2955 // to invoke the destructor on NULL values, but for safety,
2956 // we check anyway.
2957 if (ptr == NULL) return;
2958#ifdef HAVE_TLS
2959 // Prevent fast path of GetThreadHeap() from returning heap.
2960 threadlocal_heap = NULL;
2961#endif
2962 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
2963}
2964
2965void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
2966 // Remove all memory from heap
2967 heap->Cleanup();
2968
2969 // Remove from linked list
2970 SpinLockHolder h(&pageheap_lock);
2971 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
2972 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
2973 if (thread_heaps == heap) thread_heaps = heap->next_;
2974 thread_heap_count--;
2975 RecomputeThreadCacheSize();
2976
2977 threadheap_allocator.Delete(heap);
2978}
2979
2980void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
2981 // Divide available space across threads
2982 int n = thread_heap_count > 0 ? thread_heap_count : 1;
2983 size_t space = overall_thread_cache_size / n;
2984
2985 // Limit to allowed range
2986 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
2987 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
2988
2989 per_thread_cache_size = space;
2990}
2991
2992void TCMalloc_ThreadCache::Print() const {
2993 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2994 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n",
2995 ByteSizeForClass(cl),
2996 list_[cl].length(),
2997 list_[cl].lowwatermark());
2998 }
2999}
3000
3001// Extract interesting stats
3002struct TCMallocStats {
3003 uint64_t system_bytes; // Bytes alloced from system
3004 uint64_t thread_bytes; // Bytes in thread caches
3005 uint64_t central_bytes; // Bytes in central cache
3006 uint64_t transfer_bytes; // Bytes in central transfer cache
3007 uint64_t pageheap_bytes; // Bytes in page heap
3008 uint64_t metadata_bytes; // Bytes alloced for metadata
3009};
3010
3011#ifndef WTF_CHANGES
3012// Get stats into "r". Also get per-size-class counts if class_count != NULL
3013static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
3014 r->central_bytes = 0;
3015 r->transfer_bytes = 0;
3016 for (int cl = 0; cl < kNumClasses; ++cl) {
3017 const int length = central_cache[cl].length();
3018 const int tc_length = central_cache[cl].tc_length();
3019 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
3020 r->transfer_bytes +=
3021 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
3022 if (class_count) class_count[cl] = length + tc_length;
3023 }
3024
3025 // Add stats from per-thread heaps
3026 r->thread_bytes = 0;
3027 { // scope
3028 SpinLockHolder h(&pageheap_lock);
3029 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3030 r->thread_bytes += h->Size();
3031 if (class_count) {
3032 for (size_t cl = 0; cl < kNumClasses; ++cl) {
3033 class_count[cl] += h->freelist_length(cl);
3034 }
3035 }
3036 }
3037 }
3038
3039 { //scope
3040 SpinLockHolder h(&pageheap_lock);
3041 r->system_bytes = pageheap->SystemBytes();
3042 r->metadata_bytes = metadata_system_bytes;
3043 r->pageheap_bytes = pageheap->FreeBytes();
3044 }
3045}
3046#endif
3047
3048#ifndef WTF_CHANGES
3049// WRITE stats to "out"
3050static void DumpStats(TCMalloc_Printer* out, int level) {
3051 TCMallocStats stats;
3052 uint64_t class_count[kNumClasses];
3053 ExtractStats(&stats, (level >= 2 ? class_count : NULL));
3054
3055 if (level >= 2) {
3056 out->printf("------------------------------------------------\n");
3057 uint64_t cumulative = 0;
3058 for (int cl = 0; cl < kNumClasses; ++cl) {
3059 if (class_count[cl] > 0) {
3060 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
3061 cumulative += class_bytes;
3062 out->printf("class %3d [ %8" PRIuS " bytes ] : "
3063 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
3064 cl, ByteSizeForClass(cl),
3065 class_count[cl],
3066 class_bytes / 1048576.0,
3067 cumulative / 1048576.0);
3068 }
3069 }
3070
3071 SpinLockHolder h(&pageheap_lock);
3072 pageheap->Dump(out);
3073 }
3074
3075 const uint64_t bytes_in_use = stats.system_bytes
3076 - stats.pageheap_bytes
3077 - stats.central_bytes
3078 - stats.transfer_bytes
3079 - stats.thread_bytes;
3080
3081 out->printf("------------------------------------------------\n"
3082 "MALLOC: %12" PRIu64 " Heap size\n"
3083 "MALLOC: %12" PRIu64 " Bytes in use by application\n"
3084 "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
3085 "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
3086 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
3087 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
3088 "MALLOC: %12" PRIu64 " Spans in use\n"
3089 "MALLOC: %12" PRIu64 " Thread heaps in use\n"
3090 "MALLOC: %12" PRIu64 " Metadata allocated\n"
3091 "------------------------------------------------\n",
3092 stats.system_bytes,
3093 bytes_in_use,
3094 stats.pageheap_bytes,
3095 stats.central_bytes,
3096 stats.transfer_bytes,
3097 stats.thread_bytes,
3098 uint64_t(span_allocator.inuse()),
3099 uint64_t(threadheap_allocator.inuse()),
3100 stats.metadata_bytes);
3101}
3102
3103static void PrintStats(int level) {
3104 const int kBufferSize = 16 << 10;
3105 char* buffer = new char[kBufferSize];
3106 TCMalloc_Printer printer(buffer, kBufferSize);
3107 DumpStats(&printer, level);
3108 write(STDERR_FILENO, buffer, strlen(buffer));
3109 delete[] buffer;
3110}
3111
3112static void** DumpStackTraces() {
3113 // Count how much space we need
3114 int needed_slots = 0;
3115 {
3116 SpinLockHolder h(&pageheap_lock);
3117 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3118 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3119 needed_slots += 3 + stack->depth;
3120 }
3121 needed_slots += 100; // Slop in case sample grows
3122 needed_slots += needed_slots/8; // An extra 12.5% slop
3123 }
3124
3125 void** result = new void*[needed_slots];
3126 if (result == NULL) {
3127 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
3128 needed_slots);
3129 return NULL;
3130 }
3131
3132 SpinLockHolder h(&pageheap_lock);
3133 int used_slots = 0;
3134 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3135 ASSERT(used_slots < needed_slots); // Need to leave room for terminator
3136 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3137 if (used_slots + 3 + stack->depth >= needed_slots) {
3138 // No more room
3139 break;
3140 }
3141
3142 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
3143 result[used_slots+1] = reinterpret_cast<void*>(stack->size);
3144 result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
3145 for (int d = 0; d < stack->depth; d++) {
3146 result[used_slots+3+d] = stack->stack[d];
3147 }
3148 used_slots += 3 + stack->depth;
3149 }
3150 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
3151 return result;
3152}
3153#endif
3154
3155#ifndef WTF_CHANGES
3156
3157// TCMalloc's support for extra malloc interfaces
3158class TCMallocImplementation : public MallocExtension {
3159 public:
3160 virtual void GetStats(char* buffer, int buffer_length) {
3161 ASSERT(buffer_length > 0);
3162 TCMalloc_Printer printer(buffer, buffer_length);
3163
3164 // Print level one stats unless lots of space is available
3165 if (buffer_length < 10000) {
3166 DumpStats(&printer, 1);
3167 } else {
3168 DumpStats(&printer, 2);
3169 }
3170 }
3171
3172 virtual void** ReadStackTraces() {
3173 return DumpStackTraces();
3174 }
3175
3176 virtual bool GetNumericProperty(const char* name, size_t* value) {
3177 ASSERT(name != NULL);
3178
3179 if (strcmp(name, "generic.current_allocated_bytes") == 0) {
3180 TCMallocStats stats;
3181 ExtractStats(&stats, NULL);
3182 *value = stats.system_bytes
3183 - stats.thread_bytes
3184 - stats.central_bytes
3185 - stats.pageheap_bytes;
3186 return true;
3187 }
3188
3189 if (strcmp(name, "generic.heap_size") == 0) {
3190 TCMallocStats stats;
3191 ExtractStats(&stats, NULL);
3192 *value = stats.system_bytes;
3193 return true;
3194 }
3195
3196 if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
3197 // We assume that bytes in the page heap are not fragmented too
3198 // badly, and are therefore available for allocation.
3199 SpinLockHolder l(&pageheap_lock);
3200 *value = pageheap->FreeBytes();
3201 return true;
3202 }
3203
3204 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3205 SpinLockHolder l(&pageheap_lock);
3206 *value = overall_thread_cache_size;
3207 return true;
3208 }
3209
3210 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3211 TCMallocStats stats;
3212 ExtractStats(&stats, NULL);
3213 *value = stats.thread_bytes;
3214 return true;
3215 }
3216
3217 return false;
3218 }
3219
3220 virtual bool SetNumericProperty(const char* name, size_t value) {
3221 ASSERT(name != NULL);
3222
3223 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3224 // Clip the value to a reasonable range
3225 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
3226 if (value > (1<<30)) value = (1<<30); // Limit to 1GB
3227
3228 SpinLockHolder l(&pageheap_lock);
3229 overall_thread_cache_size = static_cast<size_t>(value);
3230 TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3231 return true;
3232 }
3233
3234 return false;
3235 }
3236
3237 virtual void MarkThreadIdle() {
3238 TCMalloc_ThreadCache::BecomeIdle();
3239 }
3240
3241 virtual void ReleaseFreeMemory() {
3242 SpinLockHolder h(&pageheap_lock);
3243 pageheap->ReleaseFreePages();
3244 }
3245};
3246#endif
3247
3248// The constructor allocates an object to ensure that initialization
3249// runs before main(), and therefore we do not have a chance to become
3250// multi-threaded before initialization. We also create the TSD key
3251// here. Presumably by the time this constructor runs, glibc is in
3252// good enough shape to handle pthread_key_create().
3253//
3254// The constructor also takes the opportunity to tell STL to use
3255// tcmalloc. We want to do this early, before construct time, so
3256// all user STL allocations go through tcmalloc (which works really
3257// well for STL).
3258//
3259// The destructor prints stats when the program exits.
3260class TCMallocGuard {
3261 public:
3262
3263 TCMallocGuard() {
3264#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS
3265 // Check whether the kernel also supports TLS (needs to happen at runtime)
3266 CheckIfKernelSupportsTLS();
3267#endif
3268#ifndef WTF_CHANGES
3269#ifdef WIN32 // patch the windows VirtualAlloc, etc.
3270 PatchWindowsFunctions(); // defined in windows/patch_functions.cc
3271#endif
3272#endif
3273 free(malloc(1));
3274 TCMalloc_ThreadCache::InitTSD();
3275 free(malloc(1));
3276#ifndef WTF_CHANGES
3277 MallocExtension::Register(new TCMallocImplementation);
3278#endif
3279 }
3280
3281#ifndef WTF_CHANGES
3282 ~TCMallocGuard() {
3283 const char* env = getenv("MALLOCSTATS");
3284 if (env != NULL) {
3285 int level = atoi(env);
3286 if (level < 1) level = 1;
3287 PrintStats(level);
3288 }
3289#ifdef WIN32
3290 UnpatchWindowsFunctions();
3291#endif
3292 }
3293#endif
3294};
3295
3296#ifndef WTF_CHANGES
3297static TCMallocGuard module_enter_exit_hook;
3298#endif
3299
3300
3301//-------------------------------------------------------------------
3302// Helpers for the exported routines below
3303//-------------------------------------------------------------------
3304
3305#ifndef WTF_CHANGES
3306
3307static Span* DoSampledAllocation(size_t size) {
3308
3309 // Grab the stack trace outside the heap lock
3310 StackTrace tmp;
3311 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
3312 tmp.size = size;
3313
3314 SpinLockHolder h(&pageheap_lock);
3315 // Allocate span
3316 Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3317 if (span == NULL) {
3318 return NULL;
3319 }
3320
3321 // Allocate stack trace
3322 StackTrace *stack = stacktrace_allocator.New();
3323 if (stack == NULL) {
3324 // Sampling failed because of lack of memory
3325 return span;
3326 }
3327
3328 *stack = tmp;
3329 span->sample = 1;
3330 span->objects = stack;
3331 DLL_Prepend(&sampled_objects, span);
3332
3333 return span;
3334}
3335#endif
3336
3337static inline bool CheckCachedSizeClass(void *ptr) {
3338 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3339 size_t cached_value = pageheap->GetSizeClassIfCached(p);
3340 return cached_value == 0 ||
3341 cached_value == pageheap->GetDescriptor(p)->sizeclass;
3342}
3343
3344static inline void* CheckedMallocResult(void *result)
3345{
3346 ASSERT(result == 0 || CheckCachedSizeClass(result));
3347 return result;
3348}
3349
3350static inline void* SpanToMallocResult(Span *span) {
3351 ASSERT_SPAN_COMMITTED(span);
3352 pageheap->CacheSizeClass(span->start, 0);
3353 return
3354 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3355}
3356
3357#ifdef WTF_CHANGES
3358template <bool crashOnFailure>
3359#endif
3360static ALWAYS_INLINE void* do_malloc(size_t size) {
3361 void* ret = NULL;
3362
3363#ifdef WTF_CHANGES
3364 ASSERT(!isForbidden());
3365#endif
3366
3367 // The following call forces module initialization
3368 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3369#ifndef WTF_CHANGES
3370 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3371 Span* span = DoSampledAllocation(size);
3372 if (span != NULL) {
3373 ret = SpanToMallocResult(span);
3374 }
3375 } else
3376#endif
3377 if (size > kMaxSize) {
3378 // Use page-level allocator
3379 SpinLockHolder h(&pageheap_lock);
3380 Span* span = pageheap->New(pages(size));
3381 if (span != NULL) {
3382 ret = SpanToMallocResult(span);
3383 }
3384 } else {
3385 // The common case, and also the simplest. This just pops the
3386 // size-appropriate freelist, afer replenishing it if it's empty.
3387 ret = CheckedMallocResult(heap->Allocate(size));
3388 }
3389 if (!ret) {
3390#ifdef WTF_CHANGES
3391 if (crashOnFailure) // This branch should be optimized out by the compiler.
3392 CRASH();
3393#else
3394 errno = ENOMEM;
3395#endif
3396 }
3397 return ret;
3398}
3399
3400static ALWAYS_INLINE void do_free(void* ptr) {
3401 if (ptr == NULL) return;
3402 ASSERT(pageheap != NULL); // Should not call free() before malloc()
3403 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3404 Span* span = NULL;
3405 size_t cl = pageheap->GetSizeClassIfCached(p);
3406
3407 if (cl == 0) {
3408 span = pageheap->GetDescriptor(p);
3409 cl = span->sizeclass;
3410 pageheap->CacheSizeClass(p, cl);
3411 }
3412 if (cl != 0) {
3413#ifndef NO_TCMALLOC_SAMPLES
3414 ASSERT(!pageheap->GetDescriptor(p)->sample);
3415#endif
3416 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3417 if (heap != NULL) {
3418 heap->Deallocate(ptr, cl);
3419 } else {
3420 // Delete directly into central cache
3421 SLL_SetNext(ptr, NULL);
3422 central_cache[cl].InsertRange(ptr, ptr, 1);
3423 }
3424 } else {
3425 SpinLockHolder h(&pageheap_lock);
3426 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3427 ASSERT(span != NULL && span->start == p);
3428#ifndef NO_TCMALLOC_SAMPLES
3429 if (span->sample) {
3430 DLL_Remove(span);
3431 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3432 span->objects = NULL;
3433 }
3434#endif
3435 pageheap->Delete(span);
3436 }
3437}
3438
3439#ifndef WTF_CHANGES
3440// For use by exported routines below that want specific alignments
3441//
3442// Note: this code can be slow, and can significantly fragment memory.
3443// The expectation is that memalign/posix_memalign/valloc/pvalloc will
3444// not be invoked very often. This requirement simplifies our
3445// implementation and allows us to tune for expected allocation
3446// patterns.
3447static void* do_memalign(size_t align, size_t size) {
3448 ASSERT((align & (align - 1)) == 0);
3449 ASSERT(align > 0);
3450 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3451
3452 // Allocate at least one byte to avoid boundary conditions below
3453 if (size == 0) size = 1;
3454
3455 if (size <= kMaxSize && align < kPageSize) {
3456 // Search through acceptable size classes looking for one with
3457 // enough alignment. This depends on the fact that
3458 // InitSizeClasses() currently produces several size classes that
3459 // are aligned at powers of two. We will waste time and space if
3460 // we miss in the size class array, but that is deemed acceptable
3461 // since memalign() should be used rarely.
3462 size_t cl = SizeClass(size);
3463 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3464 cl++;
3465 }
3466 if (cl < kNumClasses) {
3467 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3468 return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3469 }
3470 }
3471
3472 // We will allocate directly from the page heap
3473 SpinLockHolder h(&pageheap_lock);
3474
3475 if (align <= kPageSize) {
3476 // Any page-level allocation will be fine
3477 // TODO: We could put the rest of this page in the appropriate
3478 // TODO: cache but it does not seem worth it.
3479 Span* span = pageheap->New(pages(size));
3480 return span == NULL ? NULL : SpanToMallocResult(span);
3481 }
3482
3483 // Allocate extra pages and carve off an aligned portion
3484 const Length alloc = pages(size + align);
3485 Span* span = pageheap->New(alloc);
3486 if (span == NULL) return NULL;
3487
3488 // Skip starting portion so that we end up aligned
3489 Length skip = 0;
3490 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3491 skip++;
3492 }
3493 ASSERT(skip < alloc);
3494 if (skip > 0) {
3495 Span* rest = pageheap->Split(span, skip);
3496 pageheap->Delete(span);
3497 span = rest;
3498 }
3499
3500 // Skip trailing portion that we do not need to return
3501 const Length needed = pages(size);
3502 ASSERT(span->length >= needed);
3503 if (span->length > needed) {
3504 Span* trailer = pageheap->Split(span, needed);
3505 pageheap->Delete(trailer);
3506 }
3507 return SpanToMallocResult(span);
3508}
3509#endif
3510
3511// Helpers for use by exported routines below:
3512
3513#ifndef WTF_CHANGES
3514static inline void do_malloc_stats() {
3515 PrintStats(1);
3516}
3517#endif
3518
3519static inline int do_mallopt(int, int) {
3520 return 1; // Indicates error
3521}
3522
3523#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance
3524static inline struct mallinfo do_mallinfo() {
3525 TCMallocStats stats;
3526 ExtractStats(&stats, NULL);
3527
3528 // Just some of the fields are filled in.
3529 struct mallinfo info;
3530 memset(&info, 0, sizeof(info));
3531
3532 // Unfortunately, the struct contains "int" field, so some of the
3533 // size values will be truncated.
3534 info.arena = static_cast<int>(stats.system_bytes);
3535 info.fsmblks = static_cast<int>(stats.thread_bytes
3536 + stats.central_bytes
3537 + stats.transfer_bytes);
3538 info.fordblks = static_cast<int>(stats.pageheap_bytes);
3539 info.uordblks = static_cast<int>(stats.system_bytes
3540 - stats.thread_bytes
3541 - stats.central_bytes
3542 - stats.transfer_bytes
3543 - stats.pageheap_bytes);
3544
3545 return info;
3546}
3547#endif
3548
3549//-------------------------------------------------------------------
3550// Exported routines
3551//-------------------------------------------------------------------
3552
3553// CAVEAT: The code structure below ensures that MallocHook methods are always
3554// called from the stack frame of the invoked allocation function.
3555// heap-checker.cc depends on this to start a stack trace from
3556// the call to the (de)allocation function.
3557
3558#ifndef WTF_CHANGES
3559extern "C"
3560#else
3561#define do_malloc do_malloc<crashOnFailure>
3562
3563template <bool crashOnFailure>
3564void* malloc(size_t);
3565
3566void* fastMalloc(size_t size)
3567{
3568 return malloc<true>(size);
3569}
3570
3571void* tryFastMalloc(size_t size)
3572{
3573 return malloc<false>(size);
3574}
3575
3576template <bool crashOnFailure>
3577ALWAYS_INLINE
3578#endif
3579void* malloc(size_t size) {
3580#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3581 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur...
3582 return 0;
3583 size += sizeof(AllocAlignmentInteger);
3584 void* result = do_malloc(size);
3585 if (!result)
3586 return 0;
3587
3588 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3589 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3590#else
3591 void* result = do_malloc(size);
3592#endif
3593
3594#ifndef WTF_CHANGES
3595 MallocHook::InvokeNewHook(result, size);
3596#endif
3597 return result;
3598}
3599
3600#ifndef WTF_CHANGES
3601extern "C"
3602#endif
3603void free(void* ptr) {
3604#ifndef WTF_CHANGES
3605 MallocHook::InvokeDeleteHook(ptr);
3606#endif
3607
3608#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3609 if (!ptr)
3610 return;
3611
3612 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr);
3613 if (*header != Internal::AllocTypeMalloc)
3614 Internal::fastMallocMatchFailed(ptr);
3615 do_free(header);
3616#else
3617 do_free(ptr);
3618#endif
3619}
3620
3621#ifndef WTF_CHANGES
3622extern "C"
3623#else
3624template <bool crashOnFailure>
3625void* calloc(size_t, size_t);
3626
3627void* fastCalloc(size_t n, size_t elem_size)
3628{
3629 return calloc<true>(n, elem_size);
3630}
3631
3632void* tryFastCalloc(size_t n, size_t elem_size)
3633{
3634 return calloc<false>(n, elem_size);
3635}
3636
3637template <bool crashOnFailure>
3638ALWAYS_INLINE
3639#endif
3640void* calloc(size_t n, size_t elem_size) {
3641 size_t totalBytes = n * elem_size;
3642
3643 // Protect against overflow
3644 if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3645 return 0;
3646
3647#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3648 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur...
3649 return 0;
3650
3651 totalBytes += sizeof(AllocAlignmentInteger);
3652 void* result = do_malloc(totalBytes);
3653 if (!result)
3654 return 0;
3655
3656 memset(result, 0, totalBytes);
3657 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3658 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3659#else
3660 void* result = do_malloc(totalBytes);
3661 if (result != NULL) {
3662 memset(result, 0, totalBytes);
3663 }
3664#endif
3665
3666#ifndef WTF_CHANGES
3667 MallocHook::InvokeNewHook(result, totalBytes);
3668#endif
3669 return result;
3670}
3671
3672// Since cfree isn't used anywhere, we don't compile it in.
3673#ifndef WTF_CHANGES
3674#ifndef WTF_CHANGES
3675extern "C"
3676#endif
3677void cfree(void* ptr) {
3678#ifndef WTF_CHANGES
3679 MallocHook::InvokeDeleteHook(ptr);
3680#endif
3681 do_free(ptr);
3682}
3683#endif
3684
3685#ifndef WTF_CHANGES
3686extern "C"
3687#else
3688template <bool crashOnFailure>
3689void* realloc(void*, size_t);
3690
3691void* fastRealloc(void* old_ptr, size_t new_size)
3692{
3693 return realloc<true>(old_ptr, new_size);
3694}
3695
3696void* tryFastRealloc(void* old_ptr, size_t new_size)
3697{
3698 return realloc<false>(old_ptr, new_size);
3699}
3700
3701template <bool crashOnFailure>
3702ALWAYS_INLINE
3703#endif
3704void* realloc(void* old_ptr, size_t new_size) {
3705 if (old_ptr == NULL) {
3706#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3707 void* result = malloc(new_size);
3708#else
3709 void* result = do_malloc(new_size);
3710#ifndef WTF_CHANGES
3711 MallocHook::InvokeNewHook(result, new_size);
3712#endif
3713#endif
3714 return result;
3715 }
3716 if (new_size == 0) {
3717#ifndef WTF_CHANGES
3718 MallocHook::InvokeDeleteHook(old_ptr);
3719#endif
3720 free(old_ptr);
3721 return NULL;
3722 }
3723
3724#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3725 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur...
3726 return 0;
3727 new_size += sizeof(AllocAlignmentInteger);
3728 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr);
3729 if (*header != Internal::AllocTypeMalloc)
3730 Internal::fastMallocMatchFailed(old_ptr);
3731 old_ptr = header;
3732#endif
3733
3734 // Get the size of the old entry
3735 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
3736 size_t cl = pageheap->GetSizeClassIfCached(p);
3737 Span *span = NULL;
3738 size_t old_size;
3739 if (cl == 0) {
3740 span = pageheap->GetDescriptor(p);
3741 cl = span->sizeclass;
3742 pageheap->CacheSizeClass(p, cl);
3743 }
3744 if (cl != 0) {
3745 old_size = ByteSizeForClass(cl);
3746 } else {
3747 ASSERT(span != NULL);
3748 old_size = span->length << kPageShift;
3749 }
3750
3751 // Reallocate if the new size is larger than the old size,
3752 // or if the new size is significantly smaller than the old size.
3753 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
3754 // Need to reallocate
3755 void* new_ptr = do_malloc(new_size);
3756 if (new_ptr == NULL) {
3757 return NULL;
3758 }
3759#ifndef WTF_CHANGES
3760 MallocHook::InvokeNewHook(new_ptr, new_size);
3761#endif
3762 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
3763#ifndef WTF_CHANGES
3764 MallocHook::InvokeDeleteHook(old_ptr);
3765#endif
3766 // We could use a variant of do_free() that leverages the fact
3767 // that we already know the sizeclass of old_ptr. The benefit
3768 // would be small, so don't bother.
3769 do_free(old_ptr);
3770#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3771 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1;
3772#endif
3773 return new_ptr;
3774 } else {
3775#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3776 old_ptr = pByte + sizeof(AllocAlignmentInteger); // Set old_ptr back to the user pointer.
3777#endif
3778 return old_ptr;
3779 }
3780}
3781
3782#ifdef WTF_CHANGES
3783#undef do_malloc
3784#else
3785
3786static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
3787
3788static inline void* cpp_alloc(size_t size, bool nothrow) {
3789 for (;;) {
3790 void* p = do_malloc(size);
3791#ifdef PREANSINEW
3792 return p;
3793#else
3794 if (p == NULL) { // allocation failed
3795 // Get the current new handler. NB: this function is not
3796 // thread-safe. We make a feeble stab at making it so here, but
3797 // this lock only protects against tcmalloc interfering with
3798 // itself, not with other libraries calling set_new_handler.
3799 std::new_handler nh;
3800 {
3801 SpinLockHolder h(&set_new_handler_lock);
3802 nh = std::set_new_handler(0);
3803 (void) std::set_new_handler(nh);
3804 }
3805 // If no new_handler is established, the allocation failed.
3806 if (!nh) {
3807 if (nothrow) return 0;
3808 throw std::bad_alloc();
3809 }
3810 // Otherwise, try the new_handler. If it returns, retry the
3811 // allocation. If it throws std::bad_alloc, fail the allocation.
3812 // if it throws something else, don't interfere.
3813 try {
3814 (*nh)();
3815 } catch (const std::bad_alloc&) {
3816 if (!nothrow) throw;
3817 return p;
3818 }
3819 } else { // allocation success
3820 return p;
3821 }
3822#endif
3823 }
3824}
3825
3826void* operator new(size_t size) {
3827 void* p = cpp_alloc(size, false);
3828 // We keep this next instruction out of cpp_alloc for a reason: when
3829 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3830 // new call into cpp_alloc, which messes up our whole section-based
3831 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3832 // isn't the last thing this fn calls, and prevents the folding.
3833 MallocHook::InvokeNewHook(p, size);
3834 return p;
3835}
3836
3837void* operator new(size_t size, const std::nothrow_t&) __THROW {
3838 void* p = cpp_alloc(size, true);
3839 MallocHook::InvokeNewHook(p, size);
3840 return p;
3841}
3842
3843void operator delete(void* p) __THROW {
3844 MallocHook::InvokeDeleteHook(p);
3845 do_free(p);
3846}
3847
3848void operator delete(void* p, const std::nothrow_t&) __THROW {
3849 MallocHook::InvokeDeleteHook(p);
3850 do_free(p);
3851}
3852
3853void* operator new[](size_t size) {
3854 void* p = cpp_alloc(size, false);
3855 // We keep this next instruction out of cpp_alloc for a reason: when
3856 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3857 // new call into cpp_alloc, which messes up our whole section-based
3858 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3859 // isn't the last thing this fn calls, and prevents the folding.
3860 MallocHook::InvokeNewHook(p, size);
3861 return p;
3862}
3863
3864void* operator new[](size_t size, const std::nothrow_t&) __THROW {
3865 void* p = cpp_alloc(size, true);
3866 MallocHook::InvokeNewHook(p, size);
3867 return p;
3868}
3869
3870void operator delete[](void* p) __THROW {
3871 MallocHook::InvokeDeleteHook(p);
3872 do_free(p);
3873}
3874
3875void operator delete[](void* p, const std::nothrow_t&) __THROW {
3876 MallocHook::InvokeDeleteHook(p);
3877 do_free(p);
3878}
3879
3880extern "C" void* memalign(size_t align, size_t size) __THROW {
3881 void* result = do_memalign(align, size);
3882 MallocHook::InvokeNewHook(result, size);
3883 return result;
3884}
3885
3886extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
3887 __THROW {
3888 if (((align % sizeof(void*)) != 0) ||
3889 ((align & (align - 1)) != 0) ||
3890 (align == 0)) {
3891 return EINVAL;
3892 }
3893
3894 void* result = do_memalign(align, size);
3895 MallocHook::InvokeNewHook(result, size);
3896 if (result == NULL) {
3897 return ENOMEM;
3898 } else {
3899 *result_ptr = result;
3900 return 0;
3901 }
3902}
3903
3904static size_t pagesize = 0;
3905
3906extern "C" void* valloc(size_t size) __THROW {
3907 // Allocate page-aligned object of length >= size bytes
3908 if (pagesize == 0) pagesize = getpagesize();
3909 void* result = do_memalign(pagesize, size);
3910 MallocHook::InvokeNewHook(result, size);
3911 return result;
3912}
3913
3914extern "C" void* pvalloc(size_t size) __THROW {
3915 // Round up size to a multiple of pagesize
3916 if (pagesize == 0) pagesize = getpagesize();
3917 size = (size + pagesize - 1) & ~(pagesize - 1);
3918 void* result = do_memalign(pagesize, size);
3919 MallocHook::InvokeNewHook(result, size);
3920 return result;
3921}
3922
3923extern "C" void malloc_stats(void) {
3924 do_malloc_stats();
3925}
3926
3927extern "C" int mallopt(int cmd, int value) {
3928 return do_mallopt(cmd, value);
3929}
3930
3931#ifdef HAVE_STRUCT_MALLINFO
3932extern "C" struct mallinfo mallinfo(void) {
3933 return do_mallinfo();
3934}
3935#endif
3936
3937//-------------------------------------------------------------------
3938// Some library routines on RedHat 9 allocate memory using malloc()
3939// and free it using __libc_free() (or vice-versa). Since we provide
3940// our own implementations of malloc/free, we need to make sure that
3941// the __libc_XXX variants (defined as part of glibc) also point to
3942// the same implementations.
3943//-------------------------------------------------------------------
3944
3945#if defined(__GLIBC__)
3946extern "C" {
3947#if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
3948 // Potentially faster variants that use the gcc alias extension.
3949 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
3950# define ALIAS(x) __attribute__ ((weak, alias (x)))
3951 void* __libc_malloc(size_t size) ALIAS("malloc");
3952 void __libc_free(void* ptr) ALIAS("free");
3953 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc");
3954 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc");
3955 void __libc_cfree(void* ptr) ALIAS("cfree");
3956 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
3957 void* __libc_valloc(size_t size) ALIAS("valloc");
3958 void* __libc_pvalloc(size_t size) ALIAS("pvalloc");
3959 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
3960# undef ALIAS
3961# else /* not __GNUC__ */
3962 // Portable wrappers
3963 void* __libc_malloc(size_t size) { return malloc(size); }
3964 void __libc_free(void* ptr) { free(ptr); }
3965 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); }
3966 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); }
3967 void __libc_cfree(void* ptr) { cfree(ptr); }
3968 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
3969 void* __libc_valloc(size_t size) { return valloc(size); }
3970 void* __libc_pvalloc(size_t size) { return pvalloc(size); }
3971 int __posix_memalign(void** r, size_t a, size_t s) {
3972 return posix_memalign(r, a, s);
3973 }
3974# endif /* __GNUC__ */
3975}
3976#endif /* __GLIBC__ */
3977
3978// Override __libc_memalign in libc on linux boxes specially.
3979// They have a bug in libc that causes them to (very rarely) allocate
3980// with __libc_memalign() yet deallocate with free() and the
3981// definitions above don't catch it.
3982// This function is an exception to the rule of calling MallocHook method
3983// from the stack frame of the allocation function;
3984// heap-checker handles this special case explicitly.
3985static void *MemalignOverride(size_t align, size_t size, const void *caller)
3986 __THROW {
3987 void* result = do_memalign(align, size);
3988 MallocHook::InvokeNewHook(result, size);
3989 return result;
3990}
3991void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
3992
3993#endif
3994
3995#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
3996
3997class FreeObjectFinder {
3998 const RemoteMemoryReader& m_reader;
3999 HashSet<void*> m_freeObjects;
4000
4001public:
4002 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
4003
4004 void visit(void* ptr) { m_freeObjects.add(ptr); }
4005 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
4006 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); }
4007 size_t freeObjectCount() const { return m_freeObjects.size(); }
4008
4009 void findFreeObjects(TCMalloc_ThreadCache* threadCache)
4010 {
4011 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
4012 threadCache->enumerateFreeObjects(*this, m_reader);
4013 }
4014
4015 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
4016 {
4017 for (unsigned i = 0; i < numSizes; i++)
4018 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
4019 }
4020};
4021
4022class PageMapFreeObjectFinder {
4023 const RemoteMemoryReader& m_reader;
4024 FreeObjectFinder& m_freeObjectFinder;
4025
4026public:
4027 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
4028 : m_reader(reader)
4029 , m_freeObjectFinder(freeObjectFinder)
4030 { }
4031
4032 int visit(void* ptr) const
4033 {
4034 if (!ptr)
4035 return 1;
4036
4037 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4038 if (span->free) {
4039 void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
4040 m_freeObjectFinder.visit(ptr);
4041 } else if (span->sizeclass) {
4042 // Walk the free list of the small-object span, keeping track of each object seen
4043 for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject)))
4044 m_freeObjectFinder.visit(nextObject);
4045 }
4046 return span->length;
4047 }
4048};
4049
4050class PageMapMemoryUsageRecorder {
4051 task_t m_task;
4052 void* m_context;
4053 unsigned m_typeMask;
4054 vm_range_recorder_t* m_recorder;
4055 const RemoteMemoryReader& m_reader;
4056 const FreeObjectFinder& m_freeObjectFinder;
4057
4058 HashSet<void*> m_seenPointers;
4059 Vector<Span*> m_coalescedSpans;
4060
4061public:
4062 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
4063 : m_task(task)
4064 , m_context(context)
4065 , m_typeMask(typeMask)
4066 , m_recorder(recorder)
4067 , m_reader(reader)
4068 , m_freeObjectFinder(freeObjectFinder)
4069 { }
4070
4071 ~PageMapMemoryUsageRecorder()
4072 {
4073 ASSERT(!m_coalescedSpans.size());
4074 }
4075
4076 void recordPendingRegions()
4077 {
4078 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4079 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 };
4080 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize);
4081
4082 // Mark the memory region the spans represent as a candidate for containing pointers
4083 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE)
4084 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
4085
4086 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
4087 m_coalescedSpans.clear();
4088 return;
4089 }
4090
4091 Vector<vm_range_t, 1024> allocatedPointers;
4092 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) {
4093 Span *theSpan = m_coalescedSpans[i];
4094 if (theSpan->free)
4095 continue;
4096
4097 vm_address_t spanStartAddress = theSpan->start << kPageShift;
4098 vm_size_t spanSizeInBytes = theSpan->length * kPageSize;
4099
4100 if (!theSpan->sizeclass) {
4101 // If it's an allocated large object span, mark it as in use
4102 if (!m_freeObjectFinder.isFreeObject(spanStartAddress))
4103 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes});
4104 } else {
4105 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass);
4106
4107 // Mark each allocated small object within the span as in use
4108 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes;
4109 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) {
4110 if (!m_freeObjectFinder.isFreeObject(object))
4111 allocatedPointers.append((vm_range_t){object, objectSize});
4112 }
4113 }
4114 }
4115
4116 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size());
4117
4118 m_coalescedSpans.clear();
4119 }
4120
4121 int visit(void* ptr)
4122 {
4123 if (!ptr)
4124 return 1;
4125
4126 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4127 if (!span->start)
4128 return 1;
4129
4130 if (m_seenPointers.contains(ptr))
4131 return span->length;
4132 m_seenPointers.add(ptr);
4133
4134 if (!m_coalescedSpans.size()) {
4135 m_coalescedSpans.append(span);
4136 return span->length;
4137 }
4138
4139 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4140 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift;
4141 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize;
4142
4143 // If the new span is adjacent to the previous span, do nothing for now.
4144 vm_address_t spanStartAddress = span->start << kPageShift;
4145 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) {
4146 m_coalescedSpans.append(span);
4147 return span->length;
4148 }
4149
4150 // New span is not adjacent to previous span, so record the spans coalesced so far.
4151 recordPendingRegions();
4152 m_coalescedSpans.append(span);
4153
4154 return span->length;
4155 }
4156};
4157
4158class AdminRegionRecorder {
4159 task_t m_task;
4160 void* m_context;
4161 unsigned m_typeMask;
4162 vm_range_recorder_t* m_recorder;
4163 const RemoteMemoryReader& m_reader;
4164
4165 Vector<vm_range_t, 1024> m_pendingRegions;
4166
4167public:
4168 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader)
4169 : m_task(task)
4170 , m_context(context)
4171 , m_typeMask(typeMask)
4172 , m_recorder(recorder)
4173 , m_reader(reader)
4174 { }
4175
4176 void recordRegion(vm_address_t ptr, size_t size)
4177 {
4178 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE)
4179 m_pendingRegions.append((vm_range_t){ ptr, size });
4180 }
4181
4182 void visit(void *ptr, size_t size)
4183 {
4184 recordRegion(reinterpret_cast<vm_address_t>(ptr), size);
4185 }
4186
4187 void recordPendingRegions()
4188 {
4189 if (m_pendingRegions.size()) {
4190 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size());
4191 m_pendingRegions.clear();
4192 }
4193 }
4194
4195 ~AdminRegionRecorder()
4196 {
4197 ASSERT(!m_pendingRegions.size());
4198 }
4199};
4200
4201kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
4202{
4203 RemoteMemoryReader memoryReader(task, reader);
4204
4205 InitSizeClasses();
4206
4207 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
4208 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
4209 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
4210 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
4211
4212 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
4213
4214 FreeObjectFinder finder(memoryReader);
4215 finder.findFreeObjects(threadHeaps);
4216 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
4217
4218 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
4219 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
4220 pageMap->visitValues(pageMapFinder, memoryReader);
4221
4222 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
4223 pageMap->visitValues(usageRecorder, memoryReader);
4224 usageRecorder.recordPendingRegions();
4225
4226 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader);
4227 pageMap->visitAllocations(adminRegionRecorder, memoryReader);
4228
4229 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator);
4230 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator);
4231
4232 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4233 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4234
4235 adminRegionRecorder.recordPendingRegions();
4236
4237 return 0;
4238}
4239
4240size_t FastMallocZone::size(malloc_zone_t*, const void*)
4241{
4242 return 0;
4243}
4244
4245void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
4246{
4247 return 0;
4248}
4249
4250void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
4251{
4252 return 0;
4253}
4254
4255void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
4256{
4257 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4258 // is not in this zone. When this happens, the pointer being freed was not allocated by any
4259 // zone so we need to print a useful error for the application developer.
4260 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
4261}
4262
4263void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
4264{
4265 return 0;
4266}
4267
4268
4269#undef malloc
4270#undef free
4271#undef realloc
4272#undef calloc
4273
4274extern "C" {
4275malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
4276 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics
4277
4278#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !PLATFORM(IPHONE)
4279 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4280#endif
4281
4282 };
4283}
4284
4285FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator)
4286 : m_pageHeap(pageHeap)
4287 , m_threadHeaps(threadHeaps)
4288 , m_centralCaches(centralCaches)
4289 , m_spanAllocator(spanAllocator)
4290 , m_pageHeapAllocator(pageHeapAllocator)
4291{
4292 memset(&m_zone, 0, sizeof(m_zone));
4293 m_zone.version = 4;
4294 m_zone.zone_name = "JavaScriptCore FastMalloc";
4295 m_zone.size = &FastMallocZone::size;
4296 m_zone.malloc = &FastMallocZone::zoneMalloc;
4297 m_zone.calloc = &FastMallocZone::zoneCalloc;
4298 m_zone.realloc = &FastMallocZone::zoneRealloc;
4299 m_zone.free = &FastMallocZone::zoneFree;
4300 m_zone.valloc = &FastMallocZone::zoneValloc;
4301 m_zone.destroy = &FastMallocZone::zoneDestroy;
4302 m_zone.introspect = &jscore_fastmalloc_introspection;
4303 malloc_zone_register(&m_zone);
4304}
4305
4306
4307void FastMallocZone::init()
4308{
4309 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator);
4310}
4311
4312#endif
4313
4314#if WTF_CHANGES
4315void releaseFastMallocFreeMemory()
4316{
4317 // Flush free pages in the current thread cache back to the page heap.
4318 // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4319 // The second pass flushes everything.
4320 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
4321 threadCache->Scavenge();
4322 threadCache->Scavenge();
4323 }
4324
4325 SpinLockHolder h(&pageheap_lock);
4326 pageheap->ReleaseFreePages();
4327}
4328
4329FastMallocStatistics fastMallocStatistics()
4330{
4331 FastMallocStatistics statistics;
4332 {
4333 SpinLockHolder lockHolder(&pageheap_lock);
4334 statistics.heapSize = static_cast<size_t>(pageheap->SystemBytes());
4335 statistics.freeSizeInHeap = static_cast<size_t>(pageheap->FreeBytes());
4336 statistics.returnedSize = pageheap->ReturnedBytes();
4337 statistics.freeSizeInCaches = 0;
4338 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
4339 statistics.freeSizeInCaches += threadCache->Size();
4340 }
4341 for (unsigned cl = 0; cl < kNumClasses; ++cl) {
4342 const int length = central_cache[cl].length();
4343 const int tc_length = central_cache[cl].tc_length();
4344 statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length);
4345 }
4346 return statistics;
4347}
4348
4349} // namespace WTF
4350#endif
4351
4352#endif // FORCE_SYSTEM_MALLOC
Note: See TracBrowser for help on using the repository browser.