source: webkit/trunk/JavaScriptCore/wtf/FastMalloc.cpp@ 46648

Last change on this file since 46648 was 46648, checked in by [email protected], 16 years ago

Fixes the Windows release-PGO build.

Reviewed by Jon Honeycutt.

  • JavaScriptCore.vcproj/WTF/WTF.vcproj: Suppresses the warning about unreachable code that we get by adding "return 0" to WTF::TCMalloc_PageHeap::runScavengerThread().
  • wtf/FastMalloc.cpp: (WTF::TCMalloc_PageHeap::runScavengerThread): Fixes the error about the method not returning a value in the release-PGO build.
  • Property svn:eol-style set to native
File size: 137.5 KB
Line 
1// Copyright (c) 2005, 2007, Google Inc.
2// All rights reserved.
3// Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <[email protected]>
33//
34// A malloc that uses a per-thread cache to satisfy small malloc requests.
35// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
36//
37// See doc/tcmalloc.html for a high-level
38// description of how this malloc works.
39//
40// SYNCHRONIZATION
41// 1. The thread-specific lists are accessed without acquiring any locks.
42// This is safe because each such list is only accessed by one thread.
43// 2. We have a lock per central free-list, and hold it while manipulating
44// the central free list for a particular size.
45// 3. The central page allocator is protected by "pageheap_lock".
46// 4. The pagemap (which maps from page-number to descriptor),
47// can be read without holding any locks, and written while holding
48// the "pageheap_lock".
49// 5. To improve performance, a subset of the information one can get
50// from the pagemap is cached in a data structure, pagemap_cache_,
51// that atomically reads and writes its entries. This cache can be
52// read and written without locking.
53//
54// This multi-threaded access to the pagemap is safe for fairly
55// subtle reasons. We basically assume that when an object X is
56// allocated by thread A and deallocated by thread B, there must
57// have been appropriate synchronization in the handoff of object
58// X from thread A to thread B. The same logic applies to pagemap_cache_.
59//
60// THE PAGEID-TO-SIZECLASS CACHE
61// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
62// returns 0 for a particular PageID then that means "no information," not that
63// the sizeclass is 0. The cache may have stale information for pages that do
64// not hold the beginning of any free()'able object. Staleness is eliminated
65// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66// do_memalign() for all other relevant pages.
67//
68// TODO: Bias reclamation to larger addresses
69// TODO: implement mallinfo/mallopt
70// TODO: Better testing
71//
72// 9/28/2003 (new page-level allocator replaces ptmalloc2):
73// * malloc/free of small objects goes from ~300 ns to ~50 ns.
74// * allocation of a reasonably complicated struct
75// goes from about 1100 ns to about 300 ns.
76
77#include "config.h"
78#include "FastMalloc.h"
79
80#include "Assertions.h"
81#include <limits>
82#if ENABLE(JSC_MULTIPLE_THREADS)
83#include <pthread.h>
84#endif
85
86#ifndef NO_TCMALLOC_SAMPLES
87#ifdef WTF_CHANGES
88#define NO_TCMALLOC_SAMPLES
89#endif
90#endif
91
92#if !defined(USE_SYSTEM_MALLOC) && defined(NDEBUG)
93#define FORCE_SYSTEM_MALLOC 0
94#else
95#define FORCE_SYSTEM_MALLOC 1
96#endif
97
98
99// Use a background thread to periodically scavenge memory to release back to the system
100#define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1
101
102#ifndef NDEBUG
103namespace WTF {
104
105#if ENABLE(JSC_MULTIPLE_THREADS)
106static pthread_key_t isForbiddenKey;
107static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
108static void initializeIsForbiddenKey()
109{
110 pthread_key_create(&isForbiddenKey, 0);
111}
112
113static bool isForbidden()
114{
115 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
116 return !!pthread_getspecific(isForbiddenKey);
117}
118
119void fastMallocForbid()
120{
121 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
122 pthread_setspecific(isForbiddenKey, &isForbiddenKey);
123}
124
125void fastMallocAllow()
126{
127 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
128 pthread_setspecific(isForbiddenKey, 0);
129}
130
131#else
132
133static bool staticIsForbidden;
134static bool isForbidden()
135{
136 return staticIsForbidden;
137}
138
139void fastMallocForbid()
140{
141 staticIsForbidden = true;
142}
143
144void fastMallocAllow()
145{
146 staticIsForbidden = false;
147}
148#endif // ENABLE(JSC_MULTIPLE_THREADS)
149
150} // namespace WTF
151#endif // NDEBUG
152
153#include <string.h>
154
155namespace WTF {
156
157#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
158
159namespace Internal {
160
161void fastMallocMatchFailed(void*)
162{
163 CRASH();
164}
165
166} // namespace Internal
167
168#endif
169
170void* fastZeroedMalloc(size_t n)
171{
172 void* result = fastMalloc(n);
173 memset(result, 0, n);
174 return result;
175}
176
177void* tryFastZeroedMalloc(size_t n)
178{
179 void* result = tryFastMalloc(n);
180 if (!result)
181 return 0;
182 memset(result, 0, n);
183 return result;
184}
185
186} // namespace WTF
187
188#if FORCE_SYSTEM_MALLOC
189
190#include <stdlib.h>
191#if !PLATFORM(WIN_OS)
192 #include <pthread.h>
193#else
194 #include "windows.h"
195#endif
196
197namespace WTF {
198
199void* tryFastMalloc(size_t n)
200{
201 ASSERT(!isForbidden());
202
203#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
204 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
205 return 0;
206
207 void* result = malloc(n + sizeof(AllocAlignmentInteger));
208 if (!result)
209 return 0;
210
211 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
212 result = static_cast<AllocAlignmentInteger*>(result) + 1;
213
214 return result;
215#else
216 return malloc(n);
217#endif
218}
219
220void* fastMalloc(size_t n)
221{
222 ASSERT(!isForbidden());
223
224#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
225 void* result = tryFastMalloc(n);
226#else
227 void* result = malloc(n);
228#endif
229
230 if (!result)
231 CRASH();
232 return result;
233}
234
235void* tryFastCalloc(size_t n_elements, size_t element_size)
236{
237 ASSERT(!isForbidden());
238
239#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
240 size_t totalBytes = n_elements * element_size;
241 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes))
242 return 0;
243
244 totalBytes += sizeof(AllocAlignmentInteger);
245 void* result = malloc(totalBytes);
246 if (!result)
247 return 0;
248
249 memset(result, 0, totalBytes);
250 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
251 result = static_cast<AllocAlignmentInteger*>(result) + 1;
252 return result;
253#else
254 return calloc(n_elements, element_size);
255#endif
256}
257
258void* fastCalloc(size_t n_elements, size_t element_size)
259{
260 ASSERT(!isForbidden());
261
262#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
263 void* result = tryFastCalloc(n_elements, element_size);
264#else
265 void* result = calloc(n_elements, element_size);
266#endif
267
268 if (!result)
269 CRASH();
270 return result;
271}
272
273void fastFree(void* p)
274{
275 ASSERT(!isForbidden());
276
277#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
278 if (!p)
279 return;
280
281 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
282 if (*header != Internal::AllocTypeMalloc)
283 Internal::fastMallocMatchFailed(p);
284 free(header);
285#else
286 free(p);
287#endif
288}
289
290void* tryFastRealloc(void* p, size_t n)
291{
292 ASSERT(!isForbidden());
293
294#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
295 if (p) {
296 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
297 return 0;
298 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
299 if (*header != Internal::AllocTypeMalloc)
300 Internal::fastMallocMatchFailed(p);
301 void* result = realloc(header, n + sizeof(AllocAlignmentInteger));
302 if (!result)
303 return 0;
304
305 // This should not be needed because the value is already there:
306 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
307 result = static_cast<AllocAlignmentInteger*>(result) + 1;
308 return result;
309 } else {
310 return fastMalloc(n);
311 }
312#else
313 return realloc(p, n);
314#endif
315}
316
317void* fastRealloc(void* p, size_t n)
318{
319 ASSERT(!isForbidden());
320
321#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
322 void* result = tryFastRealloc(p, n);
323#else
324 void* result = realloc(p, n);
325#endif
326
327 if (!result)
328 CRASH();
329 return result;
330}
331
332void releaseFastMallocFreeMemory() { }
333
334FastMallocStatistics fastMallocStatistics()
335{
336 FastMallocStatistics statistics = { 0, 0, 0, 0 };
337 return statistics;
338}
339
340} // namespace WTF
341
342#if PLATFORM(DARWIN)
343// This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
344// It will never be used in this case, so it's type and value are less interesting than its presence.
345extern "C" const int jscore_fastmalloc_introspection = 0;
346#endif
347
348#else // FORCE_SYSTEM_MALLOC
349
350#if HAVE(STDINT_H)
351#include <stdint.h>
352#elif HAVE(INTTYPES_H)
353#include <inttypes.h>
354#else
355#include <sys/types.h>
356#endif
357
358#include "AlwaysInline.h"
359#include "Assertions.h"
360#include "TCPackedCache.h"
361#include "TCPageMap.h"
362#include "TCSpinLock.h"
363#include "TCSystemAlloc.h"
364#include <algorithm>
365#include <errno.h>
366#include <limits>
367#include <new>
368#include <pthread.h>
369#include <stdarg.h>
370#include <stddef.h>
371#include <stdio.h>
372#if COMPILER(MSVC)
373#ifndef WIN32_LEAN_AND_MEAN
374#define WIN32_LEAN_AND_MEAN
375#endif
376#include <windows.h>
377#endif
378
379#if WTF_CHANGES
380
381#if PLATFORM(DARWIN)
382#include "MallocZoneSupport.h"
383#include <wtf/HashSet.h>
384#endif
385
386#ifndef PRIuS
387#define PRIuS "zu"
388#endif
389
390// Calling pthread_getspecific through a global function pointer is faster than a normal
391// call to the function on Mac OS X, and it's used in performance-critical code. So we
392// use a function pointer. But that's not necessarily faster on other platforms, and we had
393// problems with this technique on Windows, so we'll do this only on Mac OS X.
394#if PLATFORM(DARWIN)
395static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
396#define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
397#endif
398
399#define DEFINE_VARIABLE(type, name, value, meaning) \
400 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \
401 type FLAGS_##name(value); \
402 char FLAGS_no##name; \
403 } \
404 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
405
406#define DEFINE_int64(name, value, meaning) \
407 DEFINE_VARIABLE(int64_t, name, value, meaning)
408
409#define DEFINE_double(name, value, meaning) \
410 DEFINE_VARIABLE(double, name, value, meaning)
411
412namespace WTF {
413
414#define malloc fastMalloc
415#define calloc fastCalloc
416#define free fastFree
417#define realloc fastRealloc
418
419#define MESSAGE LOG_ERROR
420#define CHECK_CONDITION ASSERT
421
422#if PLATFORM(DARWIN)
423class Span;
424class TCMalloc_Central_FreeListPadded;
425class TCMalloc_PageHeap;
426class TCMalloc_ThreadCache;
427template <typename T> class PageHeapAllocator;
428
429class FastMallocZone {
430public:
431 static void init();
432
433 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
434 static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
435 static boolean_t check(malloc_zone_t*) { return true; }
436 static void print(malloc_zone_t*, boolean_t) { }
437 static void log(malloc_zone_t*, void*) { }
438 static void forceLock(malloc_zone_t*) { }
439 static void forceUnlock(malloc_zone_t*) { }
440 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
441
442private:
443 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*);
444 static size_t size(malloc_zone_t*, const void*);
445 static void* zoneMalloc(malloc_zone_t*, size_t);
446 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
447 static void zoneFree(malloc_zone_t*, void*);
448 static void* zoneRealloc(malloc_zone_t*, void*, size_t);
449 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
450 static void zoneDestroy(malloc_zone_t*) { }
451
452 malloc_zone_t m_zone;
453 TCMalloc_PageHeap* m_pageHeap;
454 TCMalloc_ThreadCache** m_threadHeaps;
455 TCMalloc_Central_FreeListPadded* m_centralCaches;
456 PageHeapAllocator<Span>* m_spanAllocator;
457 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator;
458};
459
460#endif
461
462#endif
463
464#ifndef WTF_CHANGES
465// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
466// you're porting to a system where you really can't get a stacktrace.
467#ifdef NO_TCMALLOC_SAMPLES
468// We use #define so code compiles even if you #include stacktrace.h somehow.
469# define GetStackTrace(stack, depth, skip) (0)
470#else
471# include <google/stacktrace.h>
472#endif
473#endif
474
475// Even if we have support for thread-local storage in the compiler
476// and linker, the OS may not support it. We need to check that at
477// runtime. Right now, we have to keep a manual set of "bad" OSes.
478#if defined(HAVE_TLS)
479 static bool kernel_supports_tls = false; // be conservative
480 static inline bool KernelSupportsTLS() {
481 return kernel_supports_tls;
482 }
483# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
484 static void CheckIfKernelSupportsTLS() {
485 kernel_supports_tls = false;
486 }
487# else
488# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
489 static void CheckIfKernelSupportsTLS() {
490 struct utsname buf;
491 if (uname(&buf) != 0) { // should be impossible
492 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
493 kernel_supports_tls = false;
494 } else if (strcasecmp(buf.sysname, "linux") == 0) {
495 // The linux case: the first kernel to support TLS was 2.6.0
496 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
497 kernel_supports_tls = false;
498 else if (buf.release[0] == '2' && buf.release[1] == '.' &&
499 buf.release[2] >= '0' && buf.release[2] < '6' &&
500 buf.release[3] == '.') // 2.0 - 2.5
501 kernel_supports_tls = false;
502 else
503 kernel_supports_tls = true;
504 } else { // some other kernel, we'll be optimisitic
505 kernel_supports_tls = true;
506 }
507 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
508 }
509# endif // HAVE_DECL_UNAME
510#endif // HAVE_TLS
511
512// __THROW is defined in glibc systems. It means, counter-intuitively,
513// "This function will never throw an exception." It's an optional
514// optimization tool, but we may need to use it to match glibc prototypes.
515#ifndef __THROW // I guess we're not on a glibc system
516# define __THROW // __THROW is just an optimization, so ok to make it ""
517#endif
518
519//-------------------------------------------------------------------
520// Configuration
521//-------------------------------------------------------------------
522
523// Not all possible combinations of the following parameters make
524// sense. In particular, if kMaxSize increases, you may have to
525// increase kNumClasses as well.
526static const size_t kPageShift = 12;
527static const size_t kPageSize = 1 << kPageShift;
528static const size_t kMaxSize = 8u * kPageSize;
529static const size_t kAlignShift = 3;
530static const size_t kAlignment = 1 << kAlignShift;
531static const size_t kNumClasses = 68;
532
533// Allocates a big block of memory for the pagemap once we reach more than
534// 128MB
535static const size_t kPageMapBigAllocationThreshold = 128 << 20;
536
537// Minimum number of pages to fetch from system at a time. Must be
538// significantly bigger than kBlockSize to amortize system-call
539// overhead, and also to reduce external fragementation. Also, we
540// should keep this value big because various incarnations of Linux
541// have small limits on the number of mmap() regions per
542// address-space.
543static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
544
545// Number of objects to move between a per-thread list and a central
546// list in one shot. We want this to be not too small so we can
547// amortize the lock overhead for accessing the central list. Making
548// it too big may temporarily cause unnecessary memory wastage in the
549// per-thread free list until the scavenger cleans up the list.
550static int num_objects_to_move[kNumClasses];
551
552// Maximum length we allow a per-thread free-list to have before we
553// move objects from it into the corresponding central free-list. We
554// want this big to avoid locking the central free-list too often. It
555// should not hurt to make this list somewhat big because the
556// scavenging code will shrink it down when its contents are not in use.
557static const int kMaxFreeListLength = 256;
558
559// Lower and upper bounds on the per-thread cache sizes
560static const size_t kMinThreadCacheSize = kMaxSize * 2;
561static const size_t kMaxThreadCacheSize = 2 << 20;
562
563// Default bound on the total amount of thread caches
564static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
565
566// For all span-lengths < kMaxPages we keep an exact-size list.
567// REQUIRED: kMaxPages >= kMinSystemAlloc;
568static const size_t kMaxPages = kMinSystemAlloc;
569
570/* The smallest prime > 2^n */
571static int primes_list[] = {
572 // Small values might cause high rates of sampling
573 // and hence commented out.
574 // 2, 5, 11, 17, 37, 67, 131, 257,
575 // 521, 1031, 2053, 4099, 8209, 16411,
576 32771, 65537, 131101, 262147, 524309, 1048583,
577 2097169, 4194319, 8388617, 16777259, 33554467 };
578
579// Twice the approximate gap between sampling actions.
580// I.e., we take one sample approximately once every
581// tcmalloc_sample_parameter/2
582// bytes of allocation, i.e., ~ once every 128KB.
583// Must be a prime number.
584#ifdef NO_TCMALLOC_SAMPLES
585DEFINE_int64(tcmalloc_sample_parameter, 0,
586 "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
587static size_t sample_period = 0;
588#else
589DEFINE_int64(tcmalloc_sample_parameter, 262147,
590 "Twice the approximate gap between sampling actions."
591 " Must be a prime number. Otherwise will be rounded up to a "
592 " larger prime number");
593static size_t sample_period = 262147;
594#endif
595
596// Protects sample_period above
597static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
598
599// Parameters for controlling how fast memory is returned to the OS.
600
601DEFINE_double(tcmalloc_release_rate, 1,
602 "Rate at which we release unused memory to the system. "
603 "Zero means we never release memory back to the system. "
604 "Increase this flag to return memory faster; decrease it "
605 "to return memory slower. Reasonable rates are in the "
606 "range [0,10]");
607
608//-------------------------------------------------------------------
609// Mapping from size to size_class and vice versa
610//-------------------------------------------------------------------
611
612// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
613// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
614// So for these larger sizes we have an array indexed by ceil(size/128).
615//
616// We flatten both logical arrays into one physical array and use
617// arithmetic to compute an appropriate index. The constants used by
618// ClassIndex() were selected to make the flattening work.
619//
620// Examples:
621// Size Expression Index
622// -------------------------------------------------------
623// 0 (0 + 7) / 8 0
624// 1 (1 + 7) / 8 1
625// ...
626// 1024 (1024 + 7) / 8 128
627// 1025 (1025 + 127 + (120<<7)) / 128 129
628// ...
629// 32768 (32768 + 127 + (120<<7)) / 128 376
630static const size_t kMaxSmallSize = 1024;
631static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128
632static const int add_amount[2] = { 7, 127 + (120 << 7) };
633static unsigned char class_array[377];
634
635// Compute index of the class_array[] entry for a given size
636static inline int ClassIndex(size_t s) {
637 const int i = (s > kMaxSmallSize);
638 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
639}
640
641// Mapping from size class to max size storable in that class
642static size_t class_to_size[kNumClasses];
643
644// Mapping from size class to number of pages to allocate at a time
645static size_t class_to_pages[kNumClasses];
646
647// TransferCache is used to cache transfers of num_objects_to_move[size_class]
648// back and forth between thread caches and the central cache for a given size
649// class.
650struct TCEntry {
651 void *head; // Head of chain of objects.
652 void *tail; // Tail of chain of objects.
653};
654// A central cache freelist can have anywhere from 0 to kNumTransferEntries
655// slots to put link list chains into. To keep memory usage bounded the total
656// number of TCEntries across size classes is fixed. Currently each size
657// class is initially given one TCEntry which also means that the maximum any
658// one class can have is kNumClasses.
659static const int kNumTransferEntries = kNumClasses;
660
661// Note: the following only works for "n"s that fit in 32-bits, but
662// that is fine since we only use it for small sizes.
663static inline int LgFloor(size_t n) {
664 int log = 0;
665 for (int i = 4; i >= 0; --i) {
666 int shift = (1 << i);
667 size_t x = n >> shift;
668 if (x != 0) {
669 n = x;
670 log += shift;
671 }
672 }
673 ASSERT(n == 1);
674 return log;
675}
676
677// Some very basic linked list functions for dealing with using void * as
678// storage.
679
680static inline void *SLL_Next(void *t) {
681 return *(reinterpret_cast<void**>(t));
682}
683
684static inline void SLL_SetNext(void *t, void *n) {
685 *(reinterpret_cast<void**>(t)) = n;
686}
687
688static inline void SLL_Push(void **list, void *element) {
689 SLL_SetNext(element, *list);
690 *list = element;
691}
692
693static inline void *SLL_Pop(void **list) {
694 void *result = *list;
695 *list = SLL_Next(*list);
696 return result;
697}
698
699
700// Remove N elements from a linked list to which head points. head will be
701// modified to point to the new head. start and end will point to the first
702// and last nodes of the range. Note that end will point to NULL after this
703// function is called.
704static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
705 if (N == 0) {
706 *start = NULL;
707 *end = NULL;
708 return;
709 }
710
711 void *tmp = *head;
712 for (int i = 1; i < N; ++i) {
713 tmp = SLL_Next(tmp);
714 }
715
716 *start = *head;
717 *end = tmp;
718 *head = SLL_Next(tmp);
719 // Unlink range from list.
720 SLL_SetNext(tmp, NULL);
721}
722
723static inline void SLL_PushRange(void **head, void *start, void *end) {
724 if (!start) return;
725 SLL_SetNext(end, *head);
726 *head = start;
727}
728
729static inline size_t SLL_Size(void *head) {
730 int count = 0;
731 while (head) {
732 count++;
733 head = SLL_Next(head);
734 }
735 return count;
736}
737
738// Setup helper functions.
739
740static ALWAYS_INLINE size_t SizeClass(size_t size) {
741 return class_array[ClassIndex(size)];
742}
743
744// Get the byte-size for a specified class
745static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
746 return class_to_size[cl];
747}
748static int NumMoveSize(size_t size) {
749 if (size == 0) return 0;
750 // Use approx 64k transfers between thread and central caches.
751 int num = static_cast<int>(64.0 * 1024.0 / size);
752 if (num < 2) num = 2;
753 // Clamp well below kMaxFreeListLength to avoid ping pong between central
754 // and thread caches.
755 if (num > static_cast<int>(0.8 * kMaxFreeListLength))
756 num = static_cast<int>(0.8 * kMaxFreeListLength);
757
758 // Also, avoid bringing in too many objects into small object free
759 // lists. There are lots of such lists, and if we allow each one to
760 // fetch too many at a time, we end up having to scavenge too often
761 // (especially when there are lots of threads and each thread gets a
762 // small allowance for its thread cache).
763 //
764 // TODO: Make thread cache free list sizes dynamic so that we do not
765 // have to equally divide a fixed resource amongst lots of threads.
766 if (num > 32) num = 32;
767
768 return num;
769}
770
771// Initialize the mapping arrays
772static void InitSizeClasses() {
773 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
774 if (ClassIndex(0) < 0) {
775 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
776 CRASH();
777 }
778 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
779 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
780 CRASH();
781 }
782
783 // Compute the size classes we want to use
784 size_t sc = 1; // Next size class to assign
785 unsigned char alignshift = kAlignShift;
786 int last_lg = -1;
787 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
788 int lg = LgFloor(size);
789 if (lg > last_lg) {
790 // Increase alignment every so often.
791 //
792 // Since we double the alignment every time size doubles and
793 // size >= 128, this means that space wasted due to alignment is
794 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256
795 // bytes, so the space wasted as a percentage starts falling for
796 // sizes > 2K.
797 if ((lg >= 7) && (alignshift < 8)) {
798 alignshift++;
799 }
800 last_lg = lg;
801 }
802
803 // Allocate enough pages so leftover is less than 1/8 of total.
804 // This bounds wasted space to at most 12.5%.
805 size_t psize = kPageSize;
806 while ((psize % size) > (psize >> 3)) {
807 psize += kPageSize;
808 }
809 const size_t my_pages = psize >> kPageShift;
810
811 if (sc > 1 && my_pages == class_to_pages[sc-1]) {
812 // See if we can merge this into the previous class without
813 // increasing the fragmentation of the previous class.
814 const size_t my_objects = (my_pages << kPageShift) / size;
815 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
816 / class_to_size[sc-1];
817 if (my_objects == prev_objects) {
818 // Adjust last class to include this size
819 class_to_size[sc-1] = size;
820 continue;
821 }
822 }
823
824 // Add new class
825 class_to_pages[sc] = my_pages;
826 class_to_size[sc] = size;
827 sc++;
828 }
829 if (sc != kNumClasses) {
830 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
831 sc, int(kNumClasses));
832 CRASH();
833 }
834
835 // Initialize the mapping arrays
836 int next_size = 0;
837 for (unsigned char c = 1; c < kNumClasses; c++) {
838 const size_t max_size_in_class = class_to_size[c];
839 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
840 class_array[ClassIndex(s)] = c;
841 }
842 next_size = static_cast<int>(max_size_in_class + kAlignment);
843 }
844
845 // Double-check sizes just to be safe
846 for (size_t size = 0; size <= kMaxSize; size++) {
847 const size_t sc = SizeClass(size);
848 if (sc == 0) {
849 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
850 CRASH();
851 }
852 if (sc > 1 && size <= class_to_size[sc-1]) {
853 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
854 "\n", sc, size);
855 CRASH();
856 }
857 if (sc >= kNumClasses) {
858 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
859 CRASH();
860 }
861 const size_t s = class_to_size[sc];
862 if (size > s) {
863 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
864 CRASH();
865 }
866 if (s == 0) {
867 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
868 CRASH();
869 }
870 }
871
872 // Initialize the num_objects_to_move array.
873 for (size_t cl = 1; cl < kNumClasses; ++cl) {
874 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
875 }
876
877#ifndef WTF_CHANGES
878 if (false) {
879 // Dump class sizes and maximum external wastage per size class
880 for (size_t cl = 1; cl < kNumClasses; ++cl) {
881 const int alloc_size = class_to_pages[cl] << kPageShift;
882 const int alloc_objs = alloc_size / class_to_size[cl];
883 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
884 const int max_waste = alloc_size - min_used;
885 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
886 int(cl),
887 int(class_to_size[cl-1] + 1),
888 int(class_to_size[cl]),
889 int(class_to_pages[cl] << kPageShift),
890 max_waste * 100.0 / alloc_size
891 );
892 }
893 }
894#endif
895}
896
897// -------------------------------------------------------------------------
898// Simple allocator for objects of a specified type. External locking
899// is required before accessing one of these objects.
900// -------------------------------------------------------------------------
901
902// Metadata allocator -- keeps stats about how many bytes allocated
903static uint64_t metadata_system_bytes = 0;
904static void* MetaDataAlloc(size_t bytes) {
905 void* result = TCMalloc_SystemAlloc(bytes, 0);
906 if (result != NULL) {
907 metadata_system_bytes += bytes;
908 }
909 return result;
910}
911
912template <class T>
913class PageHeapAllocator {
914 private:
915 // How much to allocate from system at a time
916 static const size_t kAllocIncrement = 32 << 10;
917
918 // Aligned size of T
919 static const size_t kAlignedSize
920 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
921
922 // Free area from which to carve new objects
923 char* free_area_;
924 size_t free_avail_;
925
926 // Linked list of all regions allocated by this allocator
927 void* allocated_regions_;
928
929 // Free list of already carved objects
930 void* free_list_;
931
932 // Number of allocated but unfreed objects
933 int inuse_;
934
935 public:
936 void Init() {
937 ASSERT(kAlignedSize <= kAllocIncrement);
938 inuse_ = 0;
939 allocated_regions_ = 0;
940 free_area_ = NULL;
941 free_avail_ = 0;
942 free_list_ = NULL;
943 }
944
945 T* New() {
946 // Consult free list
947 void* result;
948 if (free_list_ != NULL) {
949 result = free_list_;
950 free_list_ = *(reinterpret_cast<void**>(result));
951 } else {
952 if (free_avail_ < kAlignedSize) {
953 // Need more room
954 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
955 if (!new_allocation)
956 CRASH();
957
958 *(void**)new_allocation = allocated_regions_;
959 allocated_regions_ = new_allocation;
960 free_area_ = new_allocation + kAlignedSize;
961 free_avail_ = kAllocIncrement - kAlignedSize;
962 }
963 result = free_area_;
964 free_area_ += kAlignedSize;
965 free_avail_ -= kAlignedSize;
966 }
967 inuse_++;
968 return reinterpret_cast<T*>(result);
969 }
970
971 void Delete(T* p) {
972 *(reinterpret_cast<void**>(p)) = free_list_;
973 free_list_ = p;
974 inuse_--;
975 }
976
977 int inuse() const { return inuse_; }
978
979#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
980 template <class Recorder>
981 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader)
982 {
983 vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_);
984 while (adminAllocation) {
985 recorder.recordRegion(adminAllocation, kAllocIncrement);
986 adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation));
987 }
988 }
989#endif
990};
991
992// -------------------------------------------------------------------------
993// Span - a contiguous run of pages
994// -------------------------------------------------------------------------
995
996// Type that can hold a page number
997typedef uintptr_t PageID;
998
999// Type that can hold the length of a run of pages
1000typedef uintptr_t Length;
1001
1002static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
1003
1004// Convert byte size into pages. This won't overflow, but may return
1005// an unreasonably large value if bytes is huge enough.
1006static inline Length pages(size_t bytes) {
1007 return (bytes >> kPageShift) +
1008 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
1009}
1010
1011// Convert a user size into the number of bytes that will actually be
1012// allocated
1013static size_t AllocationSize(size_t bytes) {
1014 if (bytes > kMaxSize) {
1015 // Large object: we allocate an integral number of pages
1016 ASSERT(bytes <= (kMaxValidPages << kPageShift));
1017 return pages(bytes) << kPageShift;
1018 } else {
1019 // Small object: find the size class to which it belongs
1020 return ByteSizeForClass(SizeClass(bytes));
1021 }
1022}
1023
1024// Information kept for a span (a contiguous run of pages).
1025struct Span {
1026 PageID start; // Starting page number
1027 Length length; // Number of pages in span
1028 Span* next; // Used when in link list
1029 Span* prev; // Used when in link list
1030 void* objects; // Linked list of free objects
1031 unsigned int free : 1; // Is the span free
1032#ifndef NO_TCMALLOC_SAMPLES
1033 unsigned int sample : 1; // Sampled object?
1034#endif
1035 unsigned int sizeclass : 8; // Size-class for small objects (or 0)
1036 unsigned int refcount : 11; // Number of non-free objects
1037 bool decommitted : 1;
1038
1039#undef SPAN_HISTORY
1040#ifdef SPAN_HISTORY
1041 // For debugging, we can keep a log events per span
1042 int nexthistory;
1043 char history[64];
1044 int value[64];
1045#endif
1046};
1047
1048#define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1049
1050#ifdef SPAN_HISTORY
1051void Event(Span* span, char op, int v = 0) {
1052 span->history[span->nexthistory] = op;
1053 span->value[span->nexthistory] = v;
1054 span->nexthistory++;
1055 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
1056}
1057#else
1058#define Event(s,o,v) ((void) 0)
1059#endif
1060
1061// Allocator/deallocator for spans
1062static PageHeapAllocator<Span> span_allocator;
1063static Span* NewSpan(PageID p, Length len) {
1064 Span* result = span_allocator.New();
1065 memset(result, 0, sizeof(*result));
1066 result->start = p;
1067 result->length = len;
1068#ifdef SPAN_HISTORY
1069 result->nexthistory = 0;
1070#endif
1071 return result;
1072}
1073
1074static inline void DeleteSpan(Span* span) {
1075#ifndef NDEBUG
1076 // In debug mode, trash the contents of deleted Spans
1077 memset(span, 0x3f, sizeof(*span));
1078#endif
1079 span_allocator.Delete(span);
1080}
1081
1082// -------------------------------------------------------------------------
1083// Doubly linked list of spans.
1084// -------------------------------------------------------------------------
1085
1086static inline void DLL_Init(Span* list) {
1087 list->next = list;
1088 list->prev = list;
1089}
1090
1091static inline void DLL_Remove(Span* span) {
1092 span->prev->next = span->next;
1093 span->next->prev = span->prev;
1094 span->prev = NULL;
1095 span->next = NULL;
1096}
1097
1098static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
1099 return list->next == list;
1100}
1101
1102static int DLL_Length(const Span* list) {
1103 int result = 0;
1104 for (Span* s = list->next; s != list; s = s->next) {
1105 result++;
1106 }
1107 return result;
1108}
1109
1110#if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1111static void DLL_Print(const char* label, const Span* list) {
1112 MESSAGE("%-10s %p:", label, list);
1113 for (const Span* s = list->next; s != list; s = s->next) {
1114 MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
1115 }
1116 MESSAGE("\n");
1117}
1118#endif
1119
1120static inline void DLL_Prepend(Span* list, Span* span) {
1121 ASSERT(span->next == NULL);
1122 ASSERT(span->prev == NULL);
1123 span->next = list->next;
1124 span->prev = list;
1125 list->next->prev = span;
1126 list->next = span;
1127}
1128
1129// -------------------------------------------------------------------------
1130// Stack traces kept for sampled allocations
1131// The following state is protected by pageheap_lock_.
1132// -------------------------------------------------------------------------
1133
1134// size/depth are made the same size as a pointer so that some generic
1135// code below can conveniently cast them back and forth to void*.
1136static const int kMaxStackDepth = 31;
1137struct StackTrace {
1138 uintptr_t size; // Size of object
1139 uintptr_t depth; // Number of PC values stored in array below
1140 void* stack[kMaxStackDepth];
1141};
1142static PageHeapAllocator<StackTrace> stacktrace_allocator;
1143static Span sampled_objects;
1144
1145// -------------------------------------------------------------------------
1146// Map from page-id to per-page data
1147// -------------------------------------------------------------------------
1148
1149// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1150// We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1151// because sometimes the sizeclass is all the information we need.
1152
1153// Selector class -- general selector uses 3-level map
1154template <int BITS> class MapSelector {
1155 public:
1156 typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1157 typedef PackedCache<BITS, uint64_t> CacheType;
1158};
1159
1160#if defined(WTF_CHANGES)
1161#if PLATFORM(X86_64)
1162// On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1163// can be excluded from the PageMap key.
1164// See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1165
1166static const size_t kBitsUnusedOn64Bit = 16;
1167#else
1168static const size_t kBitsUnusedOn64Bit = 0;
1169#endif
1170
1171// A three-level map for 64-bit machines
1172template <> class MapSelector<64> {
1173 public:
1174 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1175 typedef PackedCache<64, uint64_t> CacheType;
1176};
1177#endif
1178
1179// A two-level map for 32-bit machines
1180template <> class MapSelector<32> {
1181 public:
1182 typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1183 typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1184};
1185
1186// -------------------------------------------------------------------------
1187// Page-level allocator
1188// * Eager coalescing
1189//
1190// Heap for page-level allocation. We allow allocating and freeing a
1191// contiguous runs of pages (called a "span").
1192// -------------------------------------------------------------------------
1193
1194#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1195// The central page heap collects spans of memory that have been deleted but are still committed until they are released
1196// back to the system. We use a background thread to periodically scan the list of free spans and release some back to the
1197// system. Every 5 seconds, the background thread wakes up and does the following:
1198// - Check if we needed to commit memory in the last 5 seconds. If so, skip this scavenge because it's a sign that we are short
1199// of free committed pages and so we should not release them back to the system yet.
1200// - Otherwise, go through the list of free spans (from largest to smallest) and release up to a fraction of the free committed pages
1201// back to the system.
1202// - If the number of free committed pages reaches kMinimumFreeCommittedPageCount, we can stop the scavenging and block the
1203// scavenging thread until the number of free committed pages goes above kMinimumFreeCommittedPageCount.
1204
1205// Background thread wakes up every 5 seconds to scavenge as long as there is memory available to return to the system.
1206static const int kScavengeTimerDelayInSeconds = 5;
1207
1208// Number of free committed pages that we want to keep around.
1209static const size_t kMinimumFreeCommittedPageCount = 512;
1210
1211// During a scavenge, we'll release up to a fraction of the free committed pages.
1212#if PLATFORM(WIN)
1213// We are slightly less aggressive in releasing memory on Windows due to performance reasons.
1214static const int kMaxScavengeAmountFactor = 3;
1215#else
1216static const int kMaxScavengeAmountFactor = 2;
1217#endif
1218#endif
1219
1220class TCMalloc_PageHeap {
1221 public:
1222 void init();
1223
1224 // Allocate a run of "n" pages. Returns zero if out of memory.
1225 Span* New(Length n);
1226
1227 // Delete the span "[p, p+n-1]".
1228 // REQUIRES: span was returned by earlier call to New() and
1229 // has not yet been deleted.
1230 void Delete(Span* span);
1231
1232 // Mark an allocated span as being used for small objects of the
1233 // specified size-class.
1234 // REQUIRES: span was returned by an earlier call to New()
1235 // and has not yet been deleted.
1236 void RegisterSizeClass(Span* span, size_t sc);
1237
1238 // Split an allocated span into two spans: one of length "n" pages
1239 // followed by another span of length "span->length - n" pages.
1240 // Modifies "*span" to point to the first span of length "n" pages.
1241 // Returns a pointer to the second span.
1242 //
1243 // REQUIRES: "0 < n < span->length"
1244 // REQUIRES: !span->free
1245 // REQUIRES: span->sizeclass == 0
1246 Span* Split(Span* span, Length n);
1247
1248 // Return the descriptor for the specified page.
1249 inline Span* GetDescriptor(PageID p) const {
1250 return reinterpret_cast<Span*>(pagemap_.get(p));
1251 }
1252
1253#ifdef WTF_CHANGES
1254 inline Span* GetDescriptorEnsureSafe(PageID p)
1255 {
1256 pagemap_.Ensure(p, 1);
1257 return GetDescriptor(p);
1258 }
1259
1260 size_t ReturnedBytes() const;
1261#endif
1262
1263 // Dump state to stderr
1264#ifndef WTF_CHANGES
1265 void Dump(TCMalloc_Printer* out);
1266#endif
1267
1268 // Return number of bytes allocated from system
1269 inline uint64_t SystemBytes() const { return system_bytes_; }
1270
1271 // Return number of free bytes in heap
1272 uint64_t FreeBytes() const {
1273 return (static_cast<uint64_t>(free_pages_) << kPageShift);
1274 }
1275
1276 bool Check();
1277 bool CheckList(Span* list, Length min_pages, Length max_pages);
1278
1279 // Release all pages on the free list for reuse by the OS:
1280 void ReleaseFreePages();
1281
1282 // Return 0 if we have no information, or else the correct sizeclass for p.
1283 // Reads and writes to pagemap_cache_ do not require locking.
1284 // The entries are 64 bits on 64-bit hardware and 16 bits on
1285 // 32-bit hardware, and we don't mind raciness as long as each read of
1286 // an entry yields a valid entry, not a partially updated entry.
1287 size_t GetSizeClassIfCached(PageID p) const {
1288 return pagemap_cache_.GetOrDefault(p, 0);
1289 }
1290 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1291
1292 private:
1293 // Pick the appropriate map and cache types based on pointer size
1294 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1295 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1296 PageMap pagemap_;
1297 mutable PageMapCache pagemap_cache_;
1298
1299 // We segregate spans of a given size into two circular linked
1300 // lists: one for normal spans, and one for spans whose memory
1301 // has been returned to the system.
1302 struct SpanList {
1303 Span normal;
1304 Span returned;
1305 };
1306
1307 // List of free spans of length >= kMaxPages
1308 SpanList large_;
1309
1310 // Array mapping from span length to a doubly linked list of free spans
1311 SpanList free_[kMaxPages];
1312
1313 // Number of pages kept in free lists
1314 uintptr_t free_pages_;
1315
1316 // Bytes allocated from system
1317 uint64_t system_bytes_;
1318
1319#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1320 // Number of pages kept in free lists that are still committed.
1321 Length free_committed_pages_;
1322
1323 // Number of pages that we committed in the last scavenge wait interval.
1324 Length pages_committed_since_last_scavenge_;
1325#endif
1326
1327 bool GrowHeap(Length n);
1328
1329 // REQUIRES span->length >= n
1330 // Remove span from its free list, and move any leftover part of
1331 // span into appropriate free lists. Also update "span" to have
1332 // length exactly "n" and mark it as non-free so it can be returned
1333 // to the client.
1334 //
1335 // "released" is true iff "span" was found on a "returned" list.
1336 void Carve(Span* span, Length n, bool released);
1337
1338 void RecordSpan(Span* span) {
1339 pagemap_.set(span->start, span);
1340 if (span->length > 1) {
1341 pagemap_.set(span->start + span->length - 1, span);
1342 }
1343 }
1344
1345 // Allocate a large span of length == n. If successful, returns a
1346 // span of exactly the specified length. Else, returns NULL.
1347 Span* AllocLarge(Length n);
1348
1349#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1350 // Incrementally release some memory to the system.
1351 // IncrementalScavenge(n) is called whenever n pages are freed.
1352 void IncrementalScavenge(Length n);
1353#endif
1354
1355 // Number of pages to deallocate before doing more scavenging
1356 int64_t scavenge_counter_;
1357
1358 // Index of last free list we scavenged
1359 size_t scavenge_index_;
1360
1361#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
1362 friend class FastMallocZone;
1363#endif
1364
1365#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1366 static NO_RETURN void* runScavengerThread(void*);
1367
1368 NO_RETURN void scavengerThread();
1369
1370 void scavenge();
1371
1372 inline bool shouldContinueScavenging() const;
1373
1374 pthread_mutex_t m_scavengeMutex;
1375
1376 pthread_cond_t m_scavengeCondition;
1377
1378 // Keeps track of whether the background thread is actively scavenging memory every kScavengeTimerDelayInSeconds, or
1379 // it's blocked waiting for more pages to be deleted.
1380 bool m_scavengeThreadActive;
1381#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1382};
1383
1384void TCMalloc_PageHeap::init()
1385{
1386 pagemap_.init(MetaDataAlloc);
1387 pagemap_cache_ = PageMapCache(0);
1388 free_pages_ = 0;
1389 system_bytes_ = 0;
1390
1391#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1392 free_committed_pages_ = 0;
1393 pages_committed_since_last_scavenge_ = 0;
1394#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1395
1396 scavenge_counter_ = 0;
1397 // Start scavenging at kMaxPages list
1398 scavenge_index_ = kMaxPages-1;
1399 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1400 DLL_Init(&large_.normal);
1401 DLL_Init(&large_.returned);
1402 for (size_t i = 0; i < kMaxPages; i++) {
1403 DLL_Init(&free_[i].normal);
1404 DLL_Init(&free_[i].returned);
1405 }
1406
1407#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1408 pthread_mutex_init(&m_scavengeMutex, 0);
1409 pthread_cond_init(&m_scavengeCondition, 0);
1410 m_scavengeThreadActive = true;
1411 pthread_t thread;
1412 pthread_create(&thread, 0, runScavengerThread, this);
1413#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1414}
1415
1416#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1417void* TCMalloc_PageHeap::runScavengerThread(void* context)
1418{
1419 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread();
1420#if COMPILER(MSVC)
1421 // Without this, Visual Studio will complain that this method does not return a value.
1422 return 0;
1423#endif
1424}
1425
1426void TCMalloc_PageHeap::scavenge()
1427{
1428 // If we have to commit memory in the last 5 seconds, it means we don't have enough free committed pages
1429 // for the amount of allocations that we do. So hold off on releasing memory back to the system.
1430 if (pages_committed_since_last_scavenge_ > 0) {
1431 pages_committed_since_last_scavenge_ = 0;
1432 return;
1433 }
1434 Length pagesDecommitted = 0;
1435 for (int i = kMaxPages; i >= 0; i--) {
1436 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i];
1437 if (!DLL_IsEmpty(&slist->normal)) {
1438 // Release the last span on the normal portion of this list
1439 Span* s = slist->normal.prev;
1440 // Only decommit up to a fraction of the free committed pages if pages_allocated_since_last_scavenge_ > 0.
1441 if ((pagesDecommitted + s->length) * kMaxScavengeAmountFactor > free_committed_pages_)
1442 continue;
1443 DLL_Remove(s);
1444 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1445 static_cast<size_t>(s->length << kPageShift));
1446 if (!s->decommitted) {
1447 pagesDecommitted += s->length;
1448 s->decommitted = true;
1449 }
1450 DLL_Prepend(&slist->returned, s);
1451 // We can stop scavenging if the number of free committed pages left is less than or equal to the minimum number we want to keep around.
1452 if (free_committed_pages_ <= kMinimumFreeCommittedPageCount + pagesDecommitted)
1453 break;
1454 }
1455 }
1456 pages_committed_since_last_scavenge_ = 0;
1457 ASSERT(free_committed_pages_ >= pagesDecommitted);
1458 free_committed_pages_ -= pagesDecommitted;
1459}
1460
1461inline bool TCMalloc_PageHeap::shouldContinueScavenging() const
1462{
1463 return free_committed_pages_ > kMinimumFreeCommittedPageCount;
1464}
1465
1466#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1467
1468inline Span* TCMalloc_PageHeap::New(Length n) {
1469 ASSERT(Check());
1470 ASSERT(n > 0);
1471
1472 // Find first size >= n that has a non-empty list
1473 for (Length s = n; s < kMaxPages; s++) {
1474 Span* ll = NULL;
1475 bool released = false;
1476 if (!DLL_IsEmpty(&free_[s].normal)) {
1477 // Found normal span
1478 ll = &free_[s].normal;
1479 } else if (!DLL_IsEmpty(&free_[s].returned)) {
1480 // Found returned span; reallocate it
1481 ll = &free_[s].returned;
1482 released = true;
1483 } else {
1484 // Keep looking in larger classes
1485 continue;
1486 }
1487
1488 Span* result = ll->next;
1489 Carve(result, n, released);
1490 if (result->decommitted) {
1491 TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift));
1492 result->decommitted = false;
1493#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1494 pages_committed_since_last_scavenge_ += n;
1495#endif
1496 }
1497#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1498 else {
1499 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1500 // free committed pages count.
1501 ASSERT(free_committed_pages_ >= n);
1502 free_committed_pages_ -= n;
1503 }
1504#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1505 ASSERT(Check());
1506 free_pages_ -= n;
1507 return result;
1508 }
1509
1510 Span* result = AllocLarge(n);
1511 if (result != NULL) {
1512 ASSERT_SPAN_COMMITTED(result);
1513 return result;
1514 }
1515
1516 // Grow the heap and try again
1517 if (!GrowHeap(n)) {
1518 ASSERT(Check());
1519 return NULL;
1520 }
1521
1522 return AllocLarge(n);
1523}
1524
1525Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1526 // find the best span (closest to n in size).
1527 // The following loops implements address-ordered best-fit.
1528 bool from_released = false;
1529 Span *best = NULL;
1530
1531 // Search through normal list
1532 for (Span* span = large_.normal.next;
1533 span != &large_.normal;
1534 span = span->next) {
1535 if (span->length >= n) {
1536 if ((best == NULL)
1537 || (span->length < best->length)
1538 || ((span->length == best->length) && (span->start < best->start))) {
1539 best = span;
1540 from_released = false;
1541 }
1542 }
1543 }
1544
1545 // Search through released list in case it has a better fit
1546 for (Span* span = large_.returned.next;
1547 span != &large_.returned;
1548 span = span->next) {
1549 if (span->length >= n) {
1550 if ((best == NULL)
1551 || (span->length < best->length)
1552 || ((span->length == best->length) && (span->start < best->start))) {
1553 best = span;
1554 from_released = true;
1555 }
1556 }
1557 }
1558
1559 if (best != NULL) {
1560 Carve(best, n, from_released);
1561 if (best->decommitted) {
1562 TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift));
1563 best->decommitted = false;
1564#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1565 pages_committed_since_last_scavenge_ += n;
1566#endif
1567 }
1568#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1569 else {
1570 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1571 // free committed pages count.
1572 ASSERT(free_committed_pages_ >= n);
1573 free_committed_pages_ -= n;
1574 }
1575#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1576 ASSERT(Check());
1577 free_pages_ -= n;
1578 return best;
1579 }
1580 return NULL;
1581}
1582
1583Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1584 ASSERT(0 < n);
1585 ASSERT(n < span->length);
1586 ASSERT(!span->free);
1587 ASSERT(span->sizeclass == 0);
1588 Event(span, 'T', n);
1589
1590 const Length extra = span->length - n;
1591 Span* leftover = NewSpan(span->start + n, extra);
1592 Event(leftover, 'U', extra);
1593 RecordSpan(leftover);
1594 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1595 span->length = n;
1596
1597 return leftover;
1598}
1599
1600static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source)
1601{
1602 destination->decommitted = source->decommitted;
1603}
1604
1605inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1606 ASSERT(n > 0);
1607 DLL_Remove(span);
1608 span->free = 0;
1609 Event(span, 'A', n);
1610
1611 const int extra = static_cast<int>(span->length - n);
1612 ASSERT(extra >= 0);
1613 if (extra > 0) {
1614 Span* leftover = NewSpan(span->start + n, extra);
1615 leftover->free = 1;
1616 propagateDecommittedState(leftover, span);
1617 Event(leftover, 'S', extra);
1618 RecordSpan(leftover);
1619
1620 // Place leftover span on appropriate free list
1621 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1622 Span* dst = released ? &listpair->returned : &listpair->normal;
1623 DLL_Prepend(dst, leftover);
1624
1625 span->length = n;
1626 pagemap_.set(span->start + n - 1, span);
1627 }
1628}
1629
1630static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1631{
1632 if (destination->decommitted && !other->decommitted) {
1633 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift),
1634 static_cast<size_t>(other->length << kPageShift));
1635 } else if (other->decommitted && !destination->decommitted) {
1636 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift),
1637 static_cast<size_t>(destination->length << kPageShift));
1638 destination->decommitted = true;
1639 }
1640}
1641
1642inline void TCMalloc_PageHeap::Delete(Span* span) {
1643 ASSERT(Check());
1644 ASSERT(!span->free);
1645 ASSERT(span->length > 0);
1646 ASSERT(GetDescriptor(span->start) == span);
1647 ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1648 span->sizeclass = 0;
1649#ifndef NO_TCMALLOC_SAMPLES
1650 span->sample = 0;
1651#endif
1652
1653 // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1654 // necessary. We do not bother resetting the stale pagemap
1655 // entries for the pieces we are merging together because we only
1656 // care about the pagemap entries for the boundaries.
1657#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1658 // Track the total size of the neighboring free spans that are committed.
1659 Length neighboringCommittedSpansLength = 0;
1660#endif
1661 const PageID p = span->start;
1662 const Length n = span->length;
1663 Span* prev = GetDescriptor(p-1);
1664 if (prev != NULL && prev->free) {
1665 // Merge preceding span into this span
1666 ASSERT(prev->start + prev->length == p);
1667 const Length len = prev->length;
1668#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1669 if (!prev->decommitted)
1670 neighboringCommittedSpansLength += len;
1671#endif
1672 mergeDecommittedStates(span, prev);
1673 DLL_Remove(prev);
1674 DeleteSpan(prev);
1675 span->start -= len;
1676 span->length += len;
1677 pagemap_.set(span->start, span);
1678 Event(span, 'L', len);
1679 }
1680 Span* next = GetDescriptor(p+n);
1681 if (next != NULL && next->free) {
1682 // Merge next span into this span
1683 ASSERT(next->start == p+n);
1684 const Length len = next->length;
1685#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1686 if (!next->decommitted)
1687 neighboringCommittedSpansLength += len;
1688#endif
1689 mergeDecommittedStates(span, next);
1690 DLL_Remove(next);
1691 DeleteSpan(next);
1692 span->length += len;
1693 pagemap_.set(span->start + span->length - 1, span);
1694 Event(span, 'R', len);
1695 }
1696
1697 Event(span, 'D', span->length);
1698 span->free = 1;
1699 if (span->decommitted) {
1700 if (span->length < kMaxPages)
1701 DLL_Prepend(&free_[span->length].returned, span);
1702 else
1703 DLL_Prepend(&large_.returned, span);
1704 } else {
1705 if (span->length < kMaxPages)
1706 DLL_Prepend(&free_[span->length].normal, span);
1707 else
1708 DLL_Prepend(&large_.normal, span);
1709 }
1710 free_pages_ += n;
1711
1712#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1713 if (span->decommitted) {
1714 // If the merged span is decommitted, that means we decommitted any neighboring spans that were
1715 // committed. Update the free committed pages count.
1716 free_committed_pages_ -= neighboringCommittedSpansLength;
1717 } else {
1718 // If the merged span remains committed, add the deleted span's size to the free committed pages count.
1719 free_committed_pages_ += n;
1720 }
1721
1722 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system.
1723 if (!m_scavengeThreadActive && shouldContinueScavenging())
1724 pthread_cond_signal(&m_scavengeCondition);
1725#else
1726 IncrementalScavenge(n);
1727#endif
1728
1729 ASSERT(Check());
1730}
1731
1732#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1733void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1734 // Fast path; not yet time to release memory
1735 scavenge_counter_ -= n;
1736 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
1737
1738 // If there is nothing to release, wait for so many pages before
1739 // scavenging again. With 4K pages, this comes to 16MB of memory.
1740 static const size_t kDefaultReleaseDelay = 1 << 8;
1741
1742 // Find index of free list to scavenge
1743 size_t index = scavenge_index_ + 1;
1744 for (size_t i = 0; i < kMaxPages+1; i++) {
1745 if (index > kMaxPages) index = 0;
1746 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1747 if (!DLL_IsEmpty(&slist->normal)) {
1748 // Release the last span on the normal portion of this list
1749 Span* s = slist->normal.prev;
1750 DLL_Remove(s);
1751 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1752 static_cast<size_t>(s->length << kPageShift));
1753 s->decommitted = true;
1754 DLL_Prepend(&slist->returned, s);
1755
1756 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1757
1758 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1759 scavenge_index_ = index - 1;
1760 else
1761 scavenge_index_ = index;
1762 return;
1763 }
1764 index++;
1765 }
1766
1767 // Nothing to scavenge, delay for a while
1768 scavenge_counter_ = kDefaultReleaseDelay;
1769}
1770#endif
1771
1772void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1773 // Associate span object with all interior pages as well
1774 ASSERT(!span->free);
1775 ASSERT(GetDescriptor(span->start) == span);
1776 ASSERT(GetDescriptor(span->start+span->length-1) == span);
1777 Event(span, 'C', sc);
1778 span->sizeclass = static_cast<unsigned int>(sc);
1779 for (Length i = 1; i < span->length-1; i++) {
1780 pagemap_.set(span->start+i, span);
1781 }
1782}
1783
1784#ifdef WTF_CHANGES
1785size_t TCMalloc_PageHeap::ReturnedBytes() const {
1786 size_t result = 0;
1787 for (unsigned s = 0; s < kMaxPages; s++) {
1788 const int r_length = DLL_Length(&free_[s].returned);
1789 unsigned r_pages = s * r_length;
1790 result += r_pages << kPageShift;
1791 }
1792
1793 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
1794 result += s->length << kPageShift;
1795 return result;
1796}
1797#endif
1798
1799#ifndef WTF_CHANGES
1800static double PagesToMB(uint64_t pages) {
1801 return (pages << kPageShift) / 1048576.0;
1802}
1803
1804void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
1805 int nonempty_sizes = 0;
1806 for (int s = 0; s < kMaxPages; s++) {
1807 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
1808 nonempty_sizes++;
1809 }
1810 }
1811 out->printf("------------------------------------------------\n");
1812 out->printf("PageHeap: %d sizes; %6.1f MB free\n",
1813 nonempty_sizes, PagesToMB(free_pages_));
1814 out->printf("------------------------------------------------\n");
1815 uint64_t total_normal = 0;
1816 uint64_t total_returned = 0;
1817 for (int s = 0; s < kMaxPages; s++) {
1818 const int n_length = DLL_Length(&free_[s].normal);
1819 const int r_length = DLL_Length(&free_[s].returned);
1820 if (n_length + r_length > 0) {
1821 uint64_t n_pages = s * n_length;
1822 uint64_t r_pages = s * r_length;
1823 total_normal += n_pages;
1824 total_returned += r_pages;
1825 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
1826 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1827 s,
1828 (n_length + r_length),
1829 PagesToMB(n_pages + r_pages),
1830 PagesToMB(total_normal + total_returned),
1831 PagesToMB(r_pages),
1832 PagesToMB(total_returned));
1833 }
1834 }
1835
1836 uint64_t n_pages = 0;
1837 uint64_t r_pages = 0;
1838 int n_spans = 0;
1839 int r_spans = 0;
1840 out->printf("Normal large spans:\n");
1841 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
1842 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1843 s->length, PagesToMB(s->length));
1844 n_pages += s->length;
1845 n_spans++;
1846 }
1847 out->printf("Unmapped large spans:\n");
1848 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
1849 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1850 s->length, PagesToMB(s->length));
1851 r_pages += s->length;
1852 r_spans++;
1853 }
1854 total_normal += n_pages;
1855 total_returned += r_pages;
1856 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
1857 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1858 (n_spans + r_spans),
1859 PagesToMB(n_pages + r_pages),
1860 PagesToMB(total_normal + total_returned),
1861 PagesToMB(r_pages),
1862 PagesToMB(total_returned));
1863}
1864#endif
1865
1866bool TCMalloc_PageHeap::GrowHeap(Length n) {
1867 ASSERT(kMaxPages >= kMinSystemAlloc);
1868 if (n > kMaxValidPages) return false;
1869 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
1870 size_t actual_size;
1871 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1872 if (ptr == NULL) {
1873 if (n < ask) {
1874 // Try growing just "n" pages
1875 ask = n;
1876 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1877 }
1878 if (ptr == NULL) return false;
1879 }
1880 ask = actual_size >> kPageShift;
1881
1882#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1883 pages_committed_since_last_scavenge_ += ask;
1884#endif
1885
1886 uint64_t old_system_bytes = system_bytes_;
1887 system_bytes_ += (ask << kPageShift);
1888 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1889 ASSERT(p > 0);
1890
1891 // If we have already a lot of pages allocated, just pre allocate a bunch of
1892 // memory for the page map. This prevents fragmentation by pagemap metadata
1893 // when a program keeps allocating and freeing large blocks.
1894
1895 if (old_system_bytes < kPageMapBigAllocationThreshold
1896 && system_bytes_ >= kPageMapBigAllocationThreshold) {
1897 pagemap_.PreallocateMoreMemory();
1898 }
1899
1900 // Make sure pagemap_ has entries for all of the new pages.
1901 // Plus ensure one before and one after so coalescing code
1902 // does not need bounds-checking.
1903 if (pagemap_.Ensure(p-1, ask+2)) {
1904 // Pretend the new area is allocated and then Delete() it to
1905 // cause any necessary coalescing to occur.
1906 //
1907 // We do not adjust free_pages_ here since Delete() will do it for us.
1908 Span* span = NewSpan(p, ask);
1909 RecordSpan(span);
1910 Delete(span);
1911 ASSERT(Check());
1912 return true;
1913 } else {
1914 // We could not allocate memory within "pagemap_"
1915 // TODO: Once we can return memory to the system, return the new span
1916 return false;
1917 }
1918}
1919
1920bool TCMalloc_PageHeap::Check() {
1921 ASSERT(free_[0].normal.next == &free_[0].normal);
1922 ASSERT(free_[0].returned.next == &free_[0].returned);
1923 CheckList(&large_.normal, kMaxPages, 1000000000);
1924 CheckList(&large_.returned, kMaxPages, 1000000000);
1925 for (Length s = 1; s < kMaxPages; s++) {
1926 CheckList(&free_[s].normal, s, s);
1927 CheckList(&free_[s].returned, s, s);
1928 }
1929 return true;
1930}
1931
1932#if ASSERT_DISABLED
1933bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) {
1934 return true;
1935}
1936#else
1937bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
1938 for (Span* s = list->next; s != list; s = s->next) {
1939 CHECK_CONDITION(s->free);
1940 CHECK_CONDITION(s->length >= min_pages);
1941 CHECK_CONDITION(s->length <= max_pages);
1942 CHECK_CONDITION(GetDescriptor(s->start) == s);
1943 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
1944 }
1945 return true;
1946}
1947#endif
1948
1949static void ReleaseFreeList(Span* list, Span* returned) {
1950 // Walk backwards through list so that when we push these
1951 // spans on the "returned" list, we preserve the order.
1952 while (!DLL_IsEmpty(list)) {
1953 Span* s = list->prev;
1954 DLL_Remove(s);
1955 DLL_Prepend(returned, s);
1956 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1957 static_cast<size_t>(s->length << kPageShift));
1958 }
1959}
1960
1961void TCMalloc_PageHeap::ReleaseFreePages() {
1962 for (Length s = 0; s < kMaxPages; s++) {
1963 ReleaseFreeList(&free_[s].normal, &free_[s].returned);
1964 }
1965 ReleaseFreeList(&large_.normal, &large_.returned);
1966 ASSERT(Check());
1967}
1968
1969//-------------------------------------------------------------------
1970// Free list
1971//-------------------------------------------------------------------
1972
1973class TCMalloc_ThreadCache_FreeList {
1974 private:
1975 void* list_; // Linked list of nodes
1976 uint16_t length_; // Current length
1977 uint16_t lowater_; // Low water mark for list length
1978
1979 public:
1980 void Init() {
1981 list_ = NULL;
1982 length_ = 0;
1983 lowater_ = 0;
1984 }
1985
1986 // Return current length of list
1987 int length() const {
1988 return length_;
1989 }
1990
1991 // Is list empty?
1992 bool empty() const {
1993 return list_ == NULL;
1994 }
1995
1996 // Low-water mark management
1997 int lowwatermark() const { return lowater_; }
1998 void clear_lowwatermark() { lowater_ = length_; }
1999
2000 ALWAYS_INLINE void Push(void* ptr) {
2001 SLL_Push(&list_, ptr);
2002 length_++;
2003 }
2004
2005 void PushRange(int N, void *start, void *end) {
2006 SLL_PushRange(&list_, start, end);
2007 length_ = length_ + static_cast<uint16_t>(N);
2008 }
2009
2010 void PopRange(int N, void **start, void **end) {
2011 SLL_PopRange(&list_, N, start, end);
2012 ASSERT(length_ >= N);
2013 length_ = length_ - static_cast<uint16_t>(N);
2014 if (length_ < lowater_) lowater_ = length_;
2015 }
2016
2017 ALWAYS_INLINE void* Pop() {
2018 ASSERT(list_ != NULL);
2019 length_--;
2020 if (length_ < lowater_) lowater_ = length_;
2021 return SLL_Pop(&list_);
2022 }
2023
2024#ifdef WTF_CHANGES
2025 template <class Finder, class Reader>
2026 void enumerateFreeObjects(Finder& finder, const Reader& reader)
2027 {
2028 for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2029 finder.visit(nextObject);
2030 }
2031#endif
2032};
2033
2034//-------------------------------------------------------------------
2035// Data kept per thread
2036//-------------------------------------------------------------------
2037
2038class TCMalloc_ThreadCache {
2039 private:
2040 typedef TCMalloc_ThreadCache_FreeList FreeList;
2041#if COMPILER(MSVC)
2042 typedef DWORD ThreadIdentifier;
2043#else
2044 typedef pthread_t ThreadIdentifier;
2045#endif
2046
2047 size_t size_; // Combined size of data
2048 ThreadIdentifier tid_; // Which thread owns it
2049 bool in_setspecific_; // Called pthread_setspecific?
2050 FreeList list_[kNumClasses]; // Array indexed by size-class
2051
2052 // We sample allocations, biased by the size of the allocation
2053 uint32_t rnd_; // Cheap random number generator
2054 size_t bytes_until_sample_; // Bytes until we sample next
2055
2056 // Allocate a new heap. REQUIRES: pageheap_lock is held.
2057 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
2058
2059 // Use only as pthread thread-specific destructor function.
2060 static void DestroyThreadCache(void* ptr);
2061 public:
2062 // All ThreadCache objects are kept in a linked list (for stats collection)
2063 TCMalloc_ThreadCache* next_;
2064 TCMalloc_ThreadCache* prev_;
2065
2066 void Init(ThreadIdentifier tid);
2067 void Cleanup();
2068
2069 // Accessors (mostly just for printing stats)
2070 int freelist_length(size_t cl) const { return list_[cl].length(); }
2071
2072 // Total byte size in cache
2073 size_t Size() const { return size_; }
2074
2075 void* Allocate(size_t size);
2076 void Deallocate(void* ptr, size_t size_class);
2077
2078 void FetchFromCentralCache(size_t cl, size_t allocationSize);
2079 void ReleaseToCentralCache(size_t cl, int N);
2080 void Scavenge();
2081 void Print() const;
2082
2083 // Record allocation of "k" bytes. Return true iff allocation
2084 // should be sampled
2085 bool SampleAllocation(size_t k);
2086
2087 // Pick next sampling point
2088 void PickNextSample(size_t k);
2089
2090 static void InitModule();
2091 static void InitTSD();
2092 static TCMalloc_ThreadCache* GetThreadHeap();
2093 static TCMalloc_ThreadCache* GetCache();
2094 static TCMalloc_ThreadCache* GetCacheIfPresent();
2095 static TCMalloc_ThreadCache* CreateCacheIfNecessary();
2096 static void DeleteCache(TCMalloc_ThreadCache* heap);
2097 static void BecomeIdle();
2098 static void RecomputeThreadCacheSize();
2099
2100#ifdef WTF_CHANGES
2101 template <class Finder, class Reader>
2102 void enumerateFreeObjects(Finder& finder, const Reader& reader)
2103 {
2104 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
2105 list_[sizeClass].enumerateFreeObjects(finder, reader);
2106 }
2107#endif
2108};
2109
2110//-------------------------------------------------------------------
2111// Data kept per size-class in central cache
2112//-------------------------------------------------------------------
2113
2114class TCMalloc_Central_FreeList {
2115 public:
2116 void Init(size_t cl);
2117
2118 // These methods all do internal locking.
2119
2120 // Insert the specified range into the central freelist. N is the number of
2121 // elements in the range.
2122 void InsertRange(void *start, void *end, int N);
2123
2124 // Returns the actual number of fetched elements into N.
2125 void RemoveRange(void **start, void **end, int *N);
2126
2127 // Returns the number of free objects in cache.
2128 size_t length() {
2129 SpinLockHolder h(&lock_);
2130 return counter_;
2131 }
2132
2133 // Returns the number of free objects in the transfer cache.
2134 int tc_length() {
2135 SpinLockHolder h(&lock_);
2136 return used_slots_ * num_objects_to_move[size_class_];
2137 }
2138
2139#ifdef WTF_CHANGES
2140 template <class Finder, class Reader>
2141 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
2142 {
2143 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
2144 ASSERT(!span->objects);
2145
2146 ASSERT(!nonempty_.objects);
2147 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
2148
2149 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
2150 Span* remoteSpan = nonempty_.next;
2151
2152 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
2153 for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2154 finder.visit(nextObject);
2155 }
2156 }
2157#endif
2158
2159 private:
2160 // REQUIRES: lock_ is held
2161 // Remove object from cache and return.
2162 // Return NULL if no free entries in cache.
2163 void* FetchFromSpans();
2164
2165 // REQUIRES: lock_ is held
2166 // Remove object from cache and return. Fetches
2167 // from pageheap if cache is empty. Only returns
2168 // NULL on allocation failure.
2169 void* FetchFromSpansSafe();
2170
2171 // REQUIRES: lock_ is held
2172 // Release a linked list of objects to spans.
2173 // May temporarily release lock_.
2174 void ReleaseListToSpans(void *start);
2175
2176 // REQUIRES: lock_ is held
2177 // Release an object to spans.
2178 // May temporarily release lock_.
2179 void ReleaseToSpans(void* object);
2180
2181 // REQUIRES: lock_ is held
2182 // Populate cache by fetching from the page heap.
2183 // May temporarily release lock_.
2184 void Populate();
2185
2186 // REQUIRES: lock is held.
2187 // Tries to make room for a TCEntry. If the cache is full it will try to
2188 // expand it at the cost of some other cache size. Return false if there is
2189 // no space.
2190 bool MakeCacheSpace();
2191
2192 // REQUIRES: lock_ for locked_size_class is held.
2193 // Picks a "random" size class to steal TCEntry slot from. In reality it
2194 // just iterates over the sizeclasses but does so without taking a lock.
2195 // Returns true on success.
2196 // May temporarily lock a "random" size class.
2197 static bool EvictRandomSizeClass(size_t locked_size_class, bool force);
2198
2199 // REQUIRES: lock_ is *not* held.
2200 // Tries to shrink the Cache. If force is true it will relase objects to
2201 // spans if it allows it to shrink the cache. Return false if it failed to
2202 // shrink the cache. Decrements cache_size_ on succeess.
2203 // May temporarily take lock_. If it takes lock_, the locked_size_class
2204 // lock is released to the thread from holding two size class locks
2205 // concurrently which could lead to a deadlock.
2206 bool ShrinkCache(int locked_size_class, bool force);
2207
2208 // This lock protects all the data members. cached_entries and cache_size_
2209 // may be looked at without holding the lock.
2210 SpinLock lock_;
2211
2212 // We keep linked lists of empty and non-empty spans.
2213 size_t size_class_; // My size class
2214 Span empty_; // Dummy header for list of empty spans
2215 Span nonempty_; // Dummy header for list of non-empty spans
2216 size_t counter_; // Number of free objects in cache entry
2217
2218 // Here we reserve space for TCEntry cache slots. Since one size class can
2219 // end up getting all the TCEntries quota in the system we just preallocate
2220 // sufficient number of entries here.
2221 TCEntry tc_slots_[kNumTransferEntries];
2222
2223 // Number of currently used cached entries in tc_slots_. This variable is
2224 // updated under a lock but can be read without one.
2225 int32_t used_slots_;
2226 // The current number of slots for this size class. This is an
2227 // adaptive value that is increased if there is lots of traffic
2228 // on a given size class.
2229 int32_t cache_size_;
2230};
2231
2232// Pad each CentralCache object to multiple of 64 bytes
2233class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
2234 private:
2235 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
2236};
2237
2238//-------------------------------------------------------------------
2239// Global variables
2240//-------------------------------------------------------------------
2241
2242// Central cache -- a collection of free-lists, one per size-class.
2243// We have a separate lock per free-list to reduce contention.
2244static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
2245
2246// Page-level allocator
2247static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
2248static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)];
2249static bool phinited = false;
2250
2251// Avoid extra level of indirection by making "pageheap" be just an alias
2252// of pageheap_memory.
2253typedef union {
2254 void* m_memory;
2255 TCMalloc_PageHeap* m_pageHeap;
2256} PageHeapUnion;
2257
2258static inline TCMalloc_PageHeap* getPageHeap()
2259{
2260 PageHeapUnion u = { &pageheap_memory[0] };
2261 return u.m_pageHeap;
2262}
2263
2264#define pageheap getPageHeap()
2265
2266#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2267#if PLATFORM(WIN)
2268static void sleep(unsigned seconds)
2269{
2270 ::Sleep(seconds * 1000);
2271}
2272#endif
2273
2274void TCMalloc_PageHeap::scavengerThread()
2275{
2276 while (1) {
2277 if (!shouldContinueScavenging()) {
2278 pthread_mutex_lock(&m_scavengeMutex);
2279 m_scavengeThreadActive = false;
2280 // Block until there are enough freed pages to release back to the system.
2281 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex);
2282 m_scavengeThreadActive = true;
2283 pthread_mutex_unlock(&m_scavengeMutex);
2284 }
2285 sleep(kScavengeTimerDelayInSeconds);
2286 {
2287 SpinLockHolder h(&pageheap_lock);
2288 pageheap->scavenge();
2289 }
2290 }
2291}
2292#endif
2293
2294// If TLS is available, we also store a copy
2295// of the per-thread object in a __thread variable
2296// since __thread variables are faster to read
2297// than pthread_getspecific(). We still need
2298// pthread_setspecific() because __thread
2299// variables provide no way to run cleanup
2300// code when a thread is destroyed.
2301#ifdef HAVE_TLS
2302static __thread TCMalloc_ThreadCache *threadlocal_heap;
2303#endif
2304// Thread-specific key. Initialization here is somewhat tricky
2305// because some Linux startup code invokes malloc() before it
2306// is in a good enough state to handle pthread_keycreate().
2307// Therefore, we use TSD keys only after tsd_inited is set to true.
2308// Until then, we use a slow path to get the heap object.
2309static bool tsd_inited = false;
2310static pthread_key_t heap_key;
2311#if COMPILER(MSVC)
2312DWORD tlsIndex = TLS_OUT_OF_INDEXES;
2313#endif
2314
2315static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
2316{
2317 // still do pthread_setspecific when using MSVC fast TLS to
2318 // benefit from the delete callback.
2319 pthread_setspecific(heap_key, heap);
2320#if COMPILER(MSVC)
2321 TlsSetValue(tlsIndex, heap);
2322#endif
2323}
2324
2325// Allocator for thread heaps
2326static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2327
2328// Linked list of heap objects. Protected by pageheap_lock.
2329static TCMalloc_ThreadCache* thread_heaps = NULL;
2330static int thread_heap_count = 0;
2331
2332// Overall thread cache size. Protected by pageheap_lock.
2333static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2334
2335// Global per-thread cache size. Writes are protected by
2336// pageheap_lock. Reads are done without any locking, which should be
2337// fine as long as size_t can be written atomically and we don't place
2338// invariants between this variable and other pieces of state.
2339static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2340
2341//-------------------------------------------------------------------
2342// Central cache implementation
2343//-------------------------------------------------------------------
2344
2345void TCMalloc_Central_FreeList::Init(size_t cl) {
2346 lock_.Init();
2347 size_class_ = cl;
2348 DLL_Init(&empty_);
2349 DLL_Init(&nonempty_);
2350 counter_ = 0;
2351
2352 cache_size_ = 1;
2353 used_slots_ = 0;
2354 ASSERT(cache_size_ <= kNumTransferEntries);
2355}
2356
2357void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2358 while (start) {
2359 void *next = SLL_Next(start);
2360 ReleaseToSpans(start);
2361 start = next;
2362 }
2363}
2364
2365ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2366 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2367 Span* span = pageheap->GetDescriptor(p);
2368 ASSERT(span != NULL);
2369 ASSERT(span->refcount > 0);
2370
2371 // If span is empty, move it to non-empty list
2372 if (span->objects == NULL) {
2373 DLL_Remove(span);
2374 DLL_Prepend(&nonempty_, span);
2375 Event(span, 'N', 0);
2376 }
2377
2378 // The following check is expensive, so it is disabled by default
2379 if (false) {
2380 // Check that object does not occur in list
2381 int got = 0;
2382 for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2383 ASSERT(p != object);
2384 got++;
2385 }
2386 ASSERT(got + span->refcount ==
2387 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2388 }
2389
2390 counter_++;
2391 span->refcount--;
2392 if (span->refcount == 0) {
2393 Event(span, '#', 0);
2394 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2395 DLL_Remove(span);
2396
2397 // Release central list lock while operating on pageheap
2398 lock_.Unlock();
2399 {
2400 SpinLockHolder h(&pageheap_lock);
2401 pageheap->Delete(span);
2402 }
2403 lock_.Lock();
2404 } else {
2405 *(reinterpret_cast<void**>(object)) = span->objects;
2406 span->objects = object;
2407 }
2408}
2409
2410ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2411 size_t locked_size_class, bool force) {
2412 static int race_counter = 0;
2413 int t = race_counter++; // Updated without a lock, but who cares.
2414 if (t >= static_cast<int>(kNumClasses)) {
2415 while (t >= static_cast<int>(kNumClasses)) {
2416 t -= kNumClasses;
2417 }
2418 race_counter = t;
2419 }
2420 ASSERT(t >= 0);
2421 ASSERT(t < static_cast<int>(kNumClasses));
2422 if (t == static_cast<int>(locked_size_class)) return false;
2423 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2424}
2425
2426bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2427 // Is there room in the cache?
2428 if (used_slots_ < cache_size_) return true;
2429 // Check if we can expand this cache?
2430 if (cache_size_ == kNumTransferEntries) return false;
2431 // Ok, we'll try to grab an entry from some other size class.
2432 if (EvictRandomSizeClass(size_class_, false) ||
2433 EvictRandomSizeClass(size_class_, true)) {
2434 // Succeeded in evicting, we're going to make our cache larger.
2435 cache_size_++;
2436 return true;
2437 }
2438 return false;
2439}
2440
2441
2442namespace {
2443class LockInverter {
2444 private:
2445 SpinLock *held_, *temp_;
2446 public:
2447 inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2448 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
2449 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
2450};
2451}
2452
2453bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2454 // Start with a quick check without taking a lock.
2455 if (cache_size_ == 0) return false;
2456 // We don't evict from a full cache unless we are 'forcing'.
2457 if (force == false && used_slots_ == cache_size_) return false;
2458
2459 // Grab lock, but first release the other lock held by this thread. We use
2460 // the lock inverter to ensure that we never hold two size class locks
2461 // concurrently. That can create a deadlock because there is no well
2462 // defined nesting order.
2463 LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
2464 ASSERT(used_slots_ <= cache_size_);
2465 ASSERT(0 <= cache_size_);
2466 if (cache_size_ == 0) return false;
2467 if (used_slots_ == cache_size_) {
2468 if (force == false) return false;
2469 // ReleaseListToSpans releases the lock, so we have to make all the
2470 // updates to the central list before calling it.
2471 cache_size_--;
2472 used_slots_--;
2473 ReleaseListToSpans(tc_slots_[used_slots_].head);
2474 return true;
2475 }
2476 cache_size_--;
2477 return true;
2478}
2479
2480void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2481 SpinLockHolder h(&lock_);
2482 if (N == num_objects_to_move[size_class_] &&
2483 MakeCacheSpace()) {
2484 int slot = used_slots_++;
2485 ASSERT(slot >=0);
2486 ASSERT(slot < kNumTransferEntries);
2487 TCEntry *entry = &tc_slots_[slot];
2488 entry->head = start;
2489 entry->tail = end;
2490 return;
2491 }
2492 ReleaseListToSpans(start);
2493}
2494
2495void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2496 int num = *N;
2497 ASSERT(num > 0);
2498
2499 SpinLockHolder h(&lock_);
2500 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2501 int slot = --used_slots_;
2502 ASSERT(slot >= 0);
2503 TCEntry *entry = &tc_slots_[slot];
2504 *start = entry->head;
2505 *end = entry->tail;
2506 return;
2507 }
2508
2509 // TODO: Prefetch multiple TCEntries?
2510 void *tail = FetchFromSpansSafe();
2511 if (!tail) {
2512 // We are completely out of memory.
2513 *start = *end = NULL;
2514 *N = 0;
2515 return;
2516 }
2517
2518 SLL_SetNext(tail, NULL);
2519 void *head = tail;
2520 int count = 1;
2521 while (count < num) {
2522 void *t = FetchFromSpans();
2523 if (!t) break;
2524 SLL_Push(&head, t);
2525 count++;
2526 }
2527 *start = head;
2528 *end = tail;
2529 *N = count;
2530}
2531
2532
2533void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2534 void *t = FetchFromSpans();
2535 if (!t) {
2536 Populate();
2537 t = FetchFromSpans();
2538 }
2539 return t;
2540}
2541
2542void* TCMalloc_Central_FreeList::FetchFromSpans() {
2543 if (DLL_IsEmpty(&nonempty_)) return NULL;
2544 Span* span = nonempty_.next;
2545
2546 ASSERT(span->objects != NULL);
2547 ASSERT_SPAN_COMMITTED(span);
2548 span->refcount++;
2549 void* result = span->objects;
2550 span->objects = *(reinterpret_cast<void**>(result));
2551 if (span->objects == NULL) {
2552 // Move to empty list
2553 DLL_Remove(span);
2554 DLL_Prepend(&empty_, span);
2555 Event(span, 'E', 0);
2556 }
2557 counter_--;
2558 return result;
2559}
2560
2561// Fetch memory from the system and add to the central cache freelist.
2562ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2563 // Release central list lock while operating on pageheap
2564 lock_.Unlock();
2565 const size_t npages = class_to_pages[size_class_];
2566
2567 Span* span;
2568 {
2569 SpinLockHolder h(&pageheap_lock);
2570 span = pageheap->New(npages);
2571 if (span) pageheap->RegisterSizeClass(span, size_class_);
2572 }
2573 if (span == NULL) {
2574 MESSAGE("allocation failed: %d\n", errno);
2575 lock_.Lock();
2576 return;
2577 }
2578 ASSERT_SPAN_COMMITTED(span);
2579 ASSERT(span->length == npages);
2580 // Cache sizeclass info eagerly. Locking is not necessary.
2581 // (Instead of being eager, we could just replace any stale info
2582 // about this span, but that seems to be no better in practice.)
2583 for (size_t i = 0; i < npages; i++) {
2584 pageheap->CacheSizeClass(span->start + i, size_class_);
2585 }
2586
2587 // Split the block into pieces and add to the free-list
2588 // TODO: coloring of objects to avoid cache conflicts?
2589 void** tail = &span->objects;
2590 char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2591 char* limit = ptr + (npages << kPageShift);
2592 const size_t size = ByteSizeForClass(size_class_);
2593 int num = 0;
2594 char* nptr;
2595 while ((nptr = ptr + size) <= limit) {
2596 *tail = ptr;
2597 tail = reinterpret_cast<void**>(ptr);
2598 ptr = nptr;
2599 num++;
2600 }
2601 ASSERT(ptr <= limit);
2602 *tail = NULL;
2603 span->refcount = 0; // No sub-object in use yet
2604
2605 // Add span to list of non-empty spans
2606 lock_.Lock();
2607 DLL_Prepend(&nonempty_, span);
2608 counter_ += num;
2609}
2610
2611//-------------------------------------------------------------------
2612// TCMalloc_ThreadCache implementation
2613//-------------------------------------------------------------------
2614
2615inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2616 if (bytes_until_sample_ < k) {
2617 PickNextSample(k);
2618 return true;
2619 } else {
2620 bytes_until_sample_ -= k;
2621 return false;
2622 }
2623}
2624
2625void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2626 size_ = 0;
2627 next_ = NULL;
2628 prev_ = NULL;
2629 tid_ = tid;
2630 in_setspecific_ = false;
2631 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2632 list_[cl].Init();
2633 }
2634
2635 // Initialize RNG -- run it for a bit to get to good values
2636 bytes_until_sample_ = 0;
2637 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2638 for (int i = 0; i < 100; i++) {
2639 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2640 }
2641}
2642
2643void TCMalloc_ThreadCache::Cleanup() {
2644 // Put unused memory back into central cache
2645 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2646 if (list_[cl].length() > 0) {
2647 ReleaseToCentralCache(cl, list_[cl].length());
2648 }
2649 }
2650}
2651
2652ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2653 ASSERT(size <= kMaxSize);
2654 const size_t cl = SizeClass(size);
2655 FreeList* list = &list_[cl];
2656 size_t allocationSize = ByteSizeForClass(cl);
2657 if (list->empty()) {
2658 FetchFromCentralCache(cl, allocationSize);
2659 if (list->empty()) return NULL;
2660 }
2661 size_ -= allocationSize;
2662 return list->Pop();
2663}
2664
2665inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2666 size_ += ByteSizeForClass(cl);
2667 FreeList* list = &list_[cl];
2668 list->Push(ptr);
2669 // If enough data is free, put back into central cache
2670 if (list->length() > kMaxFreeListLength) {
2671 ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2672 }
2673 if (size_ >= per_thread_cache_size) Scavenge();
2674}
2675
2676// Remove some objects of class "cl" from central cache and add to thread heap
2677ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2678 int fetch_count = num_objects_to_move[cl];
2679 void *start, *end;
2680 central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2681 list_[cl].PushRange(fetch_count, start, end);
2682 size_ += allocationSize * fetch_count;
2683}
2684
2685// Remove some objects of class "cl" from thread heap and add to central cache
2686inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2687 ASSERT(N > 0);
2688 FreeList* src = &list_[cl];
2689 if (N > src->length()) N = src->length();
2690 size_ -= N*ByteSizeForClass(cl);
2691
2692 // We return prepackaged chains of the correct size to the central cache.
2693 // TODO: Use the same format internally in the thread caches?
2694 int batch_size = num_objects_to_move[cl];
2695 while (N > batch_size) {
2696 void *tail, *head;
2697 src->PopRange(batch_size, &head, &tail);
2698 central_cache[cl].InsertRange(head, tail, batch_size);
2699 N -= batch_size;
2700 }
2701 void *tail, *head;
2702 src->PopRange(N, &head, &tail);
2703 central_cache[cl].InsertRange(head, tail, N);
2704}
2705
2706// Release idle memory to the central cache
2707inline void TCMalloc_ThreadCache::Scavenge() {
2708 // If the low-water mark for the free list is L, it means we would
2709 // not have had to allocate anything from the central cache even if
2710 // we had reduced the free list size by L. We aim to get closer to
2711 // that situation by dropping L/2 nodes from the free list. This
2712 // may not release much memory, but if so we will call scavenge again
2713 // pretty soon and the low-water marks will be high on that call.
2714 //int64 start = CycleClock::Now();
2715
2716 for (size_t cl = 0; cl < kNumClasses; cl++) {
2717 FreeList* list = &list_[cl];
2718 const int lowmark = list->lowwatermark();
2719 if (lowmark > 0) {
2720 const int drop = (lowmark > 1) ? lowmark/2 : 1;
2721 ReleaseToCentralCache(cl, drop);
2722 }
2723 list->clear_lowwatermark();
2724 }
2725
2726 //int64 finish = CycleClock::Now();
2727 //CycleTimer ct;
2728 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2729}
2730
2731void TCMalloc_ThreadCache::PickNextSample(size_t k) {
2732 // Make next "random" number
2733 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2734 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2735 uint32_t r = rnd_;
2736 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
2737
2738 // Next point is "rnd_ % (sample_period)". I.e., average
2739 // increment is "sample_period/2".
2740 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
2741 static int last_flag_value = -1;
2742
2743 if (flag_value != last_flag_value) {
2744 SpinLockHolder h(&sample_period_lock);
2745 int i;
2746 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
2747 if (primes_list[i] >= flag_value) {
2748 break;
2749 }
2750 }
2751 sample_period = primes_list[i];
2752 last_flag_value = flag_value;
2753 }
2754
2755 bytes_until_sample_ += rnd_ % sample_period;
2756
2757 if (k > (static_cast<size_t>(-1) >> 2)) {
2758 // If the user has asked for a huge allocation then it is possible
2759 // for the code below to loop infinitely. Just return (note that
2760 // this throws off the sampling accuracy somewhat, but a user who
2761 // is allocating more than 1G of memory at a time can live with a
2762 // minor inaccuracy in profiling of small allocations, and also
2763 // would rather not wait for the loop below to terminate).
2764 return;
2765 }
2766
2767 while (bytes_until_sample_ < k) {
2768 // Increase bytes_until_sample_ by enough average sampling periods
2769 // (sample_period >> 1) to allow us to sample past the current
2770 // allocation.
2771 bytes_until_sample_ += (sample_period >> 1);
2772 }
2773
2774 bytes_until_sample_ -= k;
2775}
2776
2777void TCMalloc_ThreadCache::InitModule() {
2778 // There is a slight potential race here because of double-checked
2779 // locking idiom. However, as long as the program does a small
2780 // allocation before switching to multi-threaded mode, we will be
2781 // fine. We increase the chances of doing such a small allocation
2782 // by doing one in the constructor of the module_enter_exit_hook
2783 // object declared below.
2784 SpinLockHolder h(&pageheap_lock);
2785 if (!phinited) {
2786#ifdef WTF_CHANGES
2787 InitTSD();
2788#endif
2789 InitSizeClasses();
2790 threadheap_allocator.Init();
2791 span_allocator.Init();
2792 span_allocator.New(); // Reduce cache conflicts
2793 span_allocator.New(); // Reduce cache conflicts
2794 stacktrace_allocator.Init();
2795 DLL_Init(&sampled_objects);
2796 for (size_t i = 0; i < kNumClasses; ++i) {
2797 central_cache[i].Init(i);
2798 }
2799 pageheap->init();
2800 phinited = 1;
2801#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
2802 FastMallocZone::init();
2803#endif
2804 }
2805}
2806
2807inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
2808 // Create the heap and add it to the linked list
2809 TCMalloc_ThreadCache *heap = threadheap_allocator.New();
2810 heap->Init(tid);
2811 heap->next_ = thread_heaps;
2812 heap->prev_ = NULL;
2813 if (thread_heaps != NULL) thread_heaps->prev_ = heap;
2814 thread_heaps = heap;
2815 thread_heap_count++;
2816 RecomputeThreadCacheSize();
2817 return heap;
2818}
2819
2820inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
2821#ifdef HAVE_TLS
2822 // __thread is faster, but only when the kernel supports it
2823 if (KernelSupportsTLS())
2824 return threadlocal_heap;
2825#elif COMPILER(MSVC)
2826 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
2827#else
2828 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
2829#endif
2830}
2831
2832inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
2833 TCMalloc_ThreadCache* ptr = NULL;
2834 if (!tsd_inited) {
2835 InitModule();
2836 } else {
2837 ptr = GetThreadHeap();
2838 }
2839 if (ptr == NULL) ptr = CreateCacheIfNecessary();
2840 return ptr;
2841}
2842
2843// In deletion paths, we do not try to create a thread-cache. This is
2844// because we may be in the thread destruction code and may have
2845// already cleaned up the cache for this thread.
2846inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
2847 if (!tsd_inited) return NULL;
2848 void* const p = GetThreadHeap();
2849 return reinterpret_cast<TCMalloc_ThreadCache*>(p);
2850}
2851
2852void TCMalloc_ThreadCache::InitTSD() {
2853 ASSERT(!tsd_inited);
2854 pthread_key_create(&heap_key, DestroyThreadCache);
2855#if COMPILER(MSVC)
2856 tlsIndex = TlsAlloc();
2857#endif
2858 tsd_inited = true;
2859
2860#if !COMPILER(MSVC)
2861 // We may have used a fake pthread_t for the main thread. Fix it.
2862 pthread_t zero;
2863 memset(&zero, 0, sizeof(zero));
2864#endif
2865#ifndef WTF_CHANGES
2866 SpinLockHolder h(&pageheap_lock);
2867#else
2868 ASSERT(pageheap_lock.IsHeld());
2869#endif
2870 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2871#if COMPILER(MSVC)
2872 if (h->tid_ == 0) {
2873 h->tid_ = GetCurrentThreadId();
2874 }
2875#else
2876 if (pthread_equal(h->tid_, zero)) {
2877 h->tid_ = pthread_self();
2878 }
2879#endif
2880 }
2881}
2882
2883TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
2884 // Initialize per-thread data if necessary
2885 TCMalloc_ThreadCache* heap = NULL;
2886 {
2887 SpinLockHolder h(&pageheap_lock);
2888
2889#if COMPILER(MSVC)
2890 DWORD me;
2891 if (!tsd_inited) {
2892 me = 0;
2893 } else {
2894 me = GetCurrentThreadId();
2895 }
2896#else
2897 // Early on in glibc's life, we cannot even call pthread_self()
2898 pthread_t me;
2899 if (!tsd_inited) {
2900 memset(&me, 0, sizeof(me));
2901 } else {
2902 me = pthread_self();
2903 }
2904#endif
2905
2906 // This may be a recursive malloc call from pthread_setspecific()
2907 // In that case, the heap for this thread has already been created
2908 // and added to the linked list. So we search for that first.
2909 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2910#if COMPILER(MSVC)
2911 if (h->tid_ == me) {
2912#else
2913 if (pthread_equal(h->tid_, me)) {
2914#endif
2915 heap = h;
2916 break;
2917 }
2918 }
2919
2920 if (heap == NULL) heap = NewHeap(me);
2921 }
2922
2923 // We call pthread_setspecific() outside the lock because it may
2924 // call malloc() recursively. The recursive call will never get
2925 // here again because it will find the already allocated heap in the
2926 // linked list of heaps.
2927 if (!heap->in_setspecific_ && tsd_inited) {
2928 heap->in_setspecific_ = true;
2929 setThreadHeap(heap);
2930 }
2931 return heap;
2932}
2933
2934void TCMalloc_ThreadCache::BecomeIdle() {
2935 if (!tsd_inited) return; // No caches yet
2936 TCMalloc_ThreadCache* heap = GetThreadHeap();
2937 if (heap == NULL) return; // No thread cache to remove
2938 if (heap->in_setspecific_) return; // Do not disturb the active caller
2939
2940 heap->in_setspecific_ = true;
2941 pthread_setspecific(heap_key, NULL);
2942#ifdef HAVE_TLS
2943 // Also update the copy in __thread
2944 threadlocal_heap = NULL;
2945#endif
2946 heap->in_setspecific_ = false;
2947 if (GetThreadHeap() == heap) {
2948 // Somehow heap got reinstated by a recursive call to malloc
2949 // from pthread_setspecific. We give up in this case.
2950 return;
2951 }
2952
2953 // We can now get rid of the heap
2954 DeleteCache(heap);
2955}
2956
2957void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
2958 // Note that "ptr" cannot be NULL since pthread promises not
2959 // to invoke the destructor on NULL values, but for safety,
2960 // we check anyway.
2961 if (ptr == NULL) return;
2962#ifdef HAVE_TLS
2963 // Prevent fast path of GetThreadHeap() from returning heap.
2964 threadlocal_heap = NULL;
2965#endif
2966 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
2967}
2968
2969void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
2970 // Remove all memory from heap
2971 heap->Cleanup();
2972
2973 // Remove from linked list
2974 SpinLockHolder h(&pageheap_lock);
2975 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
2976 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
2977 if (thread_heaps == heap) thread_heaps = heap->next_;
2978 thread_heap_count--;
2979 RecomputeThreadCacheSize();
2980
2981 threadheap_allocator.Delete(heap);
2982}
2983
2984void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
2985 // Divide available space across threads
2986 int n = thread_heap_count > 0 ? thread_heap_count : 1;
2987 size_t space = overall_thread_cache_size / n;
2988
2989 // Limit to allowed range
2990 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
2991 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
2992
2993 per_thread_cache_size = space;
2994}
2995
2996void TCMalloc_ThreadCache::Print() const {
2997 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2998 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n",
2999 ByteSizeForClass(cl),
3000 list_[cl].length(),
3001 list_[cl].lowwatermark());
3002 }
3003}
3004
3005// Extract interesting stats
3006struct TCMallocStats {
3007 uint64_t system_bytes; // Bytes alloced from system
3008 uint64_t thread_bytes; // Bytes in thread caches
3009 uint64_t central_bytes; // Bytes in central cache
3010 uint64_t transfer_bytes; // Bytes in central transfer cache
3011 uint64_t pageheap_bytes; // Bytes in page heap
3012 uint64_t metadata_bytes; // Bytes alloced for metadata
3013};
3014
3015#ifndef WTF_CHANGES
3016// Get stats into "r". Also get per-size-class counts if class_count != NULL
3017static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
3018 r->central_bytes = 0;
3019 r->transfer_bytes = 0;
3020 for (int cl = 0; cl < kNumClasses; ++cl) {
3021 const int length = central_cache[cl].length();
3022 const int tc_length = central_cache[cl].tc_length();
3023 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
3024 r->transfer_bytes +=
3025 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
3026 if (class_count) class_count[cl] = length + tc_length;
3027 }
3028
3029 // Add stats from per-thread heaps
3030 r->thread_bytes = 0;
3031 { // scope
3032 SpinLockHolder h(&pageheap_lock);
3033 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3034 r->thread_bytes += h->Size();
3035 if (class_count) {
3036 for (size_t cl = 0; cl < kNumClasses; ++cl) {
3037 class_count[cl] += h->freelist_length(cl);
3038 }
3039 }
3040 }
3041 }
3042
3043 { //scope
3044 SpinLockHolder h(&pageheap_lock);
3045 r->system_bytes = pageheap->SystemBytes();
3046 r->metadata_bytes = metadata_system_bytes;
3047 r->pageheap_bytes = pageheap->FreeBytes();
3048 }
3049}
3050#endif
3051
3052#ifndef WTF_CHANGES
3053// WRITE stats to "out"
3054static void DumpStats(TCMalloc_Printer* out, int level) {
3055 TCMallocStats stats;
3056 uint64_t class_count[kNumClasses];
3057 ExtractStats(&stats, (level >= 2 ? class_count : NULL));
3058
3059 if (level >= 2) {
3060 out->printf("------------------------------------------------\n");
3061 uint64_t cumulative = 0;
3062 for (int cl = 0; cl < kNumClasses; ++cl) {
3063 if (class_count[cl] > 0) {
3064 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
3065 cumulative += class_bytes;
3066 out->printf("class %3d [ %8" PRIuS " bytes ] : "
3067 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
3068 cl, ByteSizeForClass(cl),
3069 class_count[cl],
3070 class_bytes / 1048576.0,
3071 cumulative / 1048576.0);
3072 }
3073 }
3074
3075 SpinLockHolder h(&pageheap_lock);
3076 pageheap->Dump(out);
3077 }
3078
3079 const uint64_t bytes_in_use = stats.system_bytes
3080 - stats.pageheap_bytes
3081 - stats.central_bytes
3082 - stats.transfer_bytes
3083 - stats.thread_bytes;
3084
3085 out->printf("------------------------------------------------\n"
3086 "MALLOC: %12" PRIu64 " Heap size\n"
3087 "MALLOC: %12" PRIu64 " Bytes in use by application\n"
3088 "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
3089 "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
3090 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
3091 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
3092 "MALLOC: %12" PRIu64 " Spans in use\n"
3093 "MALLOC: %12" PRIu64 " Thread heaps in use\n"
3094 "MALLOC: %12" PRIu64 " Metadata allocated\n"
3095 "------------------------------------------------\n",
3096 stats.system_bytes,
3097 bytes_in_use,
3098 stats.pageheap_bytes,
3099 stats.central_bytes,
3100 stats.transfer_bytes,
3101 stats.thread_bytes,
3102 uint64_t(span_allocator.inuse()),
3103 uint64_t(threadheap_allocator.inuse()),
3104 stats.metadata_bytes);
3105}
3106
3107static void PrintStats(int level) {
3108 const int kBufferSize = 16 << 10;
3109 char* buffer = new char[kBufferSize];
3110 TCMalloc_Printer printer(buffer, kBufferSize);
3111 DumpStats(&printer, level);
3112 write(STDERR_FILENO, buffer, strlen(buffer));
3113 delete[] buffer;
3114}
3115
3116static void** DumpStackTraces() {
3117 // Count how much space we need
3118 int needed_slots = 0;
3119 {
3120 SpinLockHolder h(&pageheap_lock);
3121 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3122 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3123 needed_slots += 3 + stack->depth;
3124 }
3125 needed_slots += 100; // Slop in case sample grows
3126 needed_slots += needed_slots/8; // An extra 12.5% slop
3127 }
3128
3129 void** result = new void*[needed_slots];
3130 if (result == NULL) {
3131 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
3132 needed_slots);
3133 return NULL;
3134 }
3135
3136 SpinLockHolder h(&pageheap_lock);
3137 int used_slots = 0;
3138 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3139 ASSERT(used_slots < needed_slots); // Need to leave room for terminator
3140 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3141 if (used_slots + 3 + stack->depth >= needed_slots) {
3142 // No more room
3143 break;
3144 }
3145
3146 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
3147 result[used_slots+1] = reinterpret_cast<void*>(stack->size);
3148 result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
3149 for (int d = 0; d < stack->depth; d++) {
3150 result[used_slots+3+d] = stack->stack[d];
3151 }
3152 used_slots += 3 + stack->depth;
3153 }
3154 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
3155 return result;
3156}
3157#endif
3158
3159#ifndef WTF_CHANGES
3160
3161// TCMalloc's support for extra malloc interfaces
3162class TCMallocImplementation : public MallocExtension {
3163 public:
3164 virtual void GetStats(char* buffer, int buffer_length) {
3165 ASSERT(buffer_length > 0);
3166 TCMalloc_Printer printer(buffer, buffer_length);
3167
3168 // Print level one stats unless lots of space is available
3169 if (buffer_length < 10000) {
3170 DumpStats(&printer, 1);
3171 } else {
3172 DumpStats(&printer, 2);
3173 }
3174 }
3175
3176 virtual void** ReadStackTraces() {
3177 return DumpStackTraces();
3178 }
3179
3180 virtual bool GetNumericProperty(const char* name, size_t* value) {
3181 ASSERT(name != NULL);
3182
3183 if (strcmp(name, "generic.current_allocated_bytes") == 0) {
3184 TCMallocStats stats;
3185 ExtractStats(&stats, NULL);
3186 *value = stats.system_bytes
3187 - stats.thread_bytes
3188 - stats.central_bytes
3189 - stats.pageheap_bytes;
3190 return true;
3191 }
3192
3193 if (strcmp(name, "generic.heap_size") == 0) {
3194 TCMallocStats stats;
3195 ExtractStats(&stats, NULL);
3196 *value = stats.system_bytes;
3197 return true;
3198 }
3199
3200 if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
3201 // We assume that bytes in the page heap are not fragmented too
3202 // badly, and are therefore available for allocation.
3203 SpinLockHolder l(&pageheap_lock);
3204 *value = pageheap->FreeBytes();
3205 return true;
3206 }
3207
3208 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3209 SpinLockHolder l(&pageheap_lock);
3210 *value = overall_thread_cache_size;
3211 return true;
3212 }
3213
3214 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3215 TCMallocStats stats;
3216 ExtractStats(&stats, NULL);
3217 *value = stats.thread_bytes;
3218 return true;
3219 }
3220
3221 return false;
3222 }
3223
3224 virtual bool SetNumericProperty(const char* name, size_t value) {
3225 ASSERT(name != NULL);
3226
3227 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3228 // Clip the value to a reasonable range
3229 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
3230 if (value > (1<<30)) value = (1<<30); // Limit to 1GB
3231
3232 SpinLockHolder l(&pageheap_lock);
3233 overall_thread_cache_size = static_cast<size_t>(value);
3234 TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3235 return true;
3236 }
3237
3238 return false;
3239 }
3240
3241 virtual void MarkThreadIdle() {
3242 TCMalloc_ThreadCache::BecomeIdle();
3243 }
3244
3245 virtual void ReleaseFreeMemory() {
3246 SpinLockHolder h(&pageheap_lock);
3247 pageheap->ReleaseFreePages();
3248 }
3249};
3250#endif
3251
3252// The constructor allocates an object to ensure that initialization
3253// runs before main(), and therefore we do not have a chance to become
3254// multi-threaded before initialization. We also create the TSD key
3255// here. Presumably by the time this constructor runs, glibc is in
3256// good enough shape to handle pthread_key_create().
3257//
3258// The constructor also takes the opportunity to tell STL to use
3259// tcmalloc. We want to do this early, before construct time, so
3260// all user STL allocations go through tcmalloc (which works really
3261// well for STL).
3262//
3263// The destructor prints stats when the program exits.
3264class TCMallocGuard {
3265 public:
3266
3267 TCMallocGuard() {
3268#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS
3269 // Check whether the kernel also supports TLS (needs to happen at runtime)
3270 CheckIfKernelSupportsTLS();
3271#endif
3272#ifndef WTF_CHANGES
3273#ifdef WIN32 // patch the windows VirtualAlloc, etc.
3274 PatchWindowsFunctions(); // defined in windows/patch_functions.cc
3275#endif
3276#endif
3277 free(malloc(1));
3278 TCMalloc_ThreadCache::InitTSD();
3279 free(malloc(1));
3280#ifndef WTF_CHANGES
3281 MallocExtension::Register(new TCMallocImplementation);
3282#endif
3283 }
3284
3285#ifndef WTF_CHANGES
3286 ~TCMallocGuard() {
3287 const char* env = getenv("MALLOCSTATS");
3288 if (env != NULL) {
3289 int level = atoi(env);
3290 if (level < 1) level = 1;
3291 PrintStats(level);
3292 }
3293#ifdef WIN32
3294 UnpatchWindowsFunctions();
3295#endif
3296 }
3297#endif
3298};
3299
3300#ifndef WTF_CHANGES
3301static TCMallocGuard module_enter_exit_hook;
3302#endif
3303
3304
3305//-------------------------------------------------------------------
3306// Helpers for the exported routines below
3307//-------------------------------------------------------------------
3308
3309#ifndef WTF_CHANGES
3310
3311static Span* DoSampledAllocation(size_t size) {
3312
3313 // Grab the stack trace outside the heap lock
3314 StackTrace tmp;
3315 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
3316 tmp.size = size;
3317
3318 SpinLockHolder h(&pageheap_lock);
3319 // Allocate span
3320 Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3321 if (span == NULL) {
3322 return NULL;
3323 }
3324
3325 // Allocate stack trace
3326 StackTrace *stack = stacktrace_allocator.New();
3327 if (stack == NULL) {
3328 // Sampling failed because of lack of memory
3329 return span;
3330 }
3331
3332 *stack = tmp;
3333 span->sample = 1;
3334 span->objects = stack;
3335 DLL_Prepend(&sampled_objects, span);
3336
3337 return span;
3338}
3339#endif
3340
3341static inline bool CheckCachedSizeClass(void *ptr) {
3342 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3343 size_t cached_value = pageheap->GetSizeClassIfCached(p);
3344 return cached_value == 0 ||
3345 cached_value == pageheap->GetDescriptor(p)->sizeclass;
3346}
3347
3348static inline void* CheckedMallocResult(void *result)
3349{
3350 ASSERT(result == 0 || CheckCachedSizeClass(result));
3351 return result;
3352}
3353
3354static inline void* SpanToMallocResult(Span *span) {
3355 ASSERT_SPAN_COMMITTED(span);
3356 pageheap->CacheSizeClass(span->start, 0);
3357 return
3358 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3359}
3360
3361#ifdef WTF_CHANGES
3362template <bool crashOnFailure>
3363#endif
3364static ALWAYS_INLINE void* do_malloc(size_t size) {
3365 void* ret = NULL;
3366
3367#ifdef WTF_CHANGES
3368 ASSERT(!isForbidden());
3369#endif
3370
3371 // The following call forces module initialization
3372 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3373#ifndef WTF_CHANGES
3374 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3375 Span* span = DoSampledAllocation(size);
3376 if (span != NULL) {
3377 ret = SpanToMallocResult(span);
3378 }
3379 } else
3380#endif
3381 if (size > kMaxSize) {
3382 // Use page-level allocator
3383 SpinLockHolder h(&pageheap_lock);
3384 Span* span = pageheap->New(pages(size));
3385 if (span != NULL) {
3386 ret = SpanToMallocResult(span);
3387 }
3388 } else {
3389 // The common case, and also the simplest. This just pops the
3390 // size-appropriate freelist, afer replenishing it if it's empty.
3391 ret = CheckedMallocResult(heap->Allocate(size));
3392 }
3393 if (!ret) {
3394#ifdef WTF_CHANGES
3395 if (crashOnFailure) // This branch should be optimized out by the compiler.
3396 CRASH();
3397#else
3398 errno = ENOMEM;
3399#endif
3400 }
3401 return ret;
3402}
3403
3404static ALWAYS_INLINE void do_free(void* ptr) {
3405 if (ptr == NULL) return;
3406 ASSERT(pageheap != NULL); // Should not call free() before malloc()
3407 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3408 Span* span = NULL;
3409 size_t cl = pageheap->GetSizeClassIfCached(p);
3410
3411 if (cl == 0) {
3412 span = pageheap->GetDescriptor(p);
3413 cl = span->sizeclass;
3414 pageheap->CacheSizeClass(p, cl);
3415 }
3416 if (cl != 0) {
3417#ifndef NO_TCMALLOC_SAMPLES
3418 ASSERT(!pageheap->GetDescriptor(p)->sample);
3419#endif
3420 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3421 if (heap != NULL) {
3422 heap->Deallocate(ptr, cl);
3423 } else {
3424 // Delete directly into central cache
3425 SLL_SetNext(ptr, NULL);
3426 central_cache[cl].InsertRange(ptr, ptr, 1);
3427 }
3428 } else {
3429 SpinLockHolder h(&pageheap_lock);
3430 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3431 ASSERT(span != NULL && span->start == p);
3432#ifndef NO_TCMALLOC_SAMPLES
3433 if (span->sample) {
3434 DLL_Remove(span);
3435 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3436 span->objects = NULL;
3437 }
3438#endif
3439 pageheap->Delete(span);
3440 }
3441}
3442
3443#ifndef WTF_CHANGES
3444// For use by exported routines below that want specific alignments
3445//
3446// Note: this code can be slow, and can significantly fragment memory.
3447// The expectation is that memalign/posix_memalign/valloc/pvalloc will
3448// not be invoked very often. This requirement simplifies our
3449// implementation and allows us to tune for expected allocation
3450// patterns.
3451static void* do_memalign(size_t align, size_t size) {
3452 ASSERT((align & (align - 1)) == 0);
3453 ASSERT(align > 0);
3454 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3455
3456 // Allocate at least one byte to avoid boundary conditions below
3457 if (size == 0) size = 1;
3458
3459 if (size <= kMaxSize && align < kPageSize) {
3460 // Search through acceptable size classes looking for one with
3461 // enough alignment. This depends on the fact that
3462 // InitSizeClasses() currently produces several size classes that
3463 // are aligned at powers of two. We will waste time and space if
3464 // we miss in the size class array, but that is deemed acceptable
3465 // since memalign() should be used rarely.
3466 size_t cl = SizeClass(size);
3467 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3468 cl++;
3469 }
3470 if (cl < kNumClasses) {
3471 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3472 return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3473 }
3474 }
3475
3476 // We will allocate directly from the page heap
3477 SpinLockHolder h(&pageheap_lock);
3478
3479 if (align <= kPageSize) {
3480 // Any page-level allocation will be fine
3481 // TODO: We could put the rest of this page in the appropriate
3482 // TODO: cache but it does not seem worth it.
3483 Span* span = pageheap->New(pages(size));
3484 return span == NULL ? NULL : SpanToMallocResult(span);
3485 }
3486
3487 // Allocate extra pages and carve off an aligned portion
3488 const Length alloc = pages(size + align);
3489 Span* span = pageheap->New(alloc);
3490 if (span == NULL) return NULL;
3491
3492 // Skip starting portion so that we end up aligned
3493 Length skip = 0;
3494 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3495 skip++;
3496 }
3497 ASSERT(skip < alloc);
3498 if (skip > 0) {
3499 Span* rest = pageheap->Split(span, skip);
3500 pageheap->Delete(span);
3501 span = rest;
3502 }
3503
3504 // Skip trailing portion that we do not need to return
3505 const Length needed = pages(size);
3506 ASSERT(span->length >= needed);
3507 if (span->length > needed) {
3508 Span* trailer = pageheap->Split(span, needed);
3509 pageheap->Delete(trailer);
3510 }
3511 return SpanToMallocResult(span);
3512}
3513#endif
3514
3515// Helpers for use by exported routines below:
3516
3517#ifndef WTF_CHANGES
3518static inline void do_malloc_stats() {
3519 PrintStats(1);
3520}
3521#endif
3522
3523static inline int do_mallopt(int, int) {
3524 return 1; // Indicates error
3525}
3526
3527#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance
3528static inline struct mallinfo do_mallinfo() {
3529 TCMallocStats stats;
3530 ExtractStats(&stats, NULL);
3531
3532 // Just some of the fields are filled in.
3533 struct mallinfo info;
3534 memset(&info, 0, sizeof(info));
3535
3536 // Unfortunately, the struct contains "int" field, so some of the
3537 // size values will be truncated.
3538 info.arena = static_cast<int>(stats.system_bytes);
3539 info.fsmblks = static_cast<int>(stats.thread_bytes
3540 + stats.central_bytes
3541 + stats.transfer_bytes);
3542 info.fordblks = static_cast<int>(stats.pageheap_bytes);
3543 info.uordblks = static_cast<int>(stats.system_bytes
3544 - stats.thread_bytes
3545 - stats.central_bytes
3546 - stats.transfer_bytes
3547 - stats.pageheap_bytes);
3548
3549 return info;
3550}
3551#endif
3552
3553//-------------------------------------------------------------------
3554// Exported routines
3555//-------------------------------------------------------------------
3556
3557// CAVEAT: The code structure below ensures that MallocHook methods are always
3558// called from the stack frame of the invoked allocation function.
3559// heap-checker.cc depends on this to start a stack trace from
3560// the call to the (de)allocation function.
3561
3562#ifndef WTF_CHANGES
3563extern "C"
3564#else
3565#define do_malloc do_malloc<crashOnFailure>
3566
3567template <bool crashOnFailure>
3568void* malloc(size_t);
3569
3570void* fastMalloc(size_t size)
3571{
3572 return malloc<true>(size);
3573}
3574
3575void* tryFastMalloc(size_t size)
3576{
3577 return malloc<false>(size);
3578}
3579
3580template <bool crashOnFailure>
3581ALWAYS_INLINE
3582#endif
3583void* malloc(size_t size) {
3584#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3585 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur...
3586 return 0;
3587 size += sizeof(AllocAlignmentInteger);
3588 void* result = do_malloc(size);
3589 if (!result)
3590 return 0;
3591
3592 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3593 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3594#else
3595 void* result = do_malloc(size);
3596#endif
3597
3598#ifndef WTF_CHANGES
3599 MallocHook::InvokeNewHook(result, size);
3600#endif
3601 return result;
3602}
3603
3604#ifndef WTF_CHANGES
3605extern "C"
3606#endif
3607void free(void* ptr) {
3608#ifndef WTF_CHANGES
3609 MallocHook::InvokeDeleteHook(ptr);
3610#endif
3611
3612#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3613 if (!ptr)
3614 return;
3615
3616 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr);
3617 if (*header != Internal::AllocTypeMalloc)
3618 Internal::fastMallocMatchFailed(ptr);
3619 do_free(header);
3620#else
3621 do_free(ptr);
3622#endif
3623}
3624
3625#ifndef WTF_CHANGES
3626extern "C"
3627#else
3628template <bool crashOnFailure>
3629void* calloc(size_t, size_t);
3630
3631void* fastCalloc(size_t n, size_t elem_size)
3632{
3633 return calloc<true>(n, elem_size);
3634}
3635
3636void* tryFastCalloc(size_t n, size_t elem_size)
3637{
3638 return calloc<false>(n, elem_size);
3639}
3640
3641template <bool crashOnFailure>
3642ALWAYS_INLINE
3643#endif
3644void* calloc(size_t n, size_t elem_size) {
3645 size_t totalBytes = n * elem_size;
3646
3647 // Protect against overflow
3648 if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3649 return 0;
3650
3651#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3652 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur...
3653 return 0;
3654
3655 totalBytes += sizeof(AllocAlignmentInteger);
3656 void* result = do_malloc(totalBytes);
3657 if (!result)
3658 return 0;
3659
3660 memset(result, 0, totalBytes);
3661 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3662 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3663#else
3664 void* result = do_malloc(totalBytes);
3665 if (result != NULL) {
3666 memset(result, 0, totalBytes);
3667 }
3668#endif
3669
3670#ifndef WTF_CHANGES
3671 MallocHook::InvokeNewHook(result, totalBytes);
3672#endif
3673 return result;
3674}
3675
3676// Since cfree isn't used anywhere, we don't compile it in.
3677#ifndef WTF_CHANGES
3678#ifndef WTF_CHANGES
3679extern "C"
3680#endif
3681void cfree(void* ptr) {
3682#ifndef WTF_CHANGES
3683 MallocHook::InvokeDeleteHook(ptr);
3684#endif
3685 do_free(ptr);
3686}
3687#endif
3688
3689#ifndef WTF_CHANGES
3690extern "C"
3691#else
3692template <bool crashOnFailure>
3693void* realloc(void*, size_t);
3694
3695void* fastRealloc(void* old_ptr, size_t new_size)
3696{
3697 return realloc<true>(old_ptr, new_size);
3698}
3699
3700void* tryFastRealloc(void* old_ptr, size_t new_size)
3701{
3702 return realloc<false>(old_ptr, new_size);
3703}
3704
3705template <bool crashOnFailure>
3706ALWAYS_INLINE
3707#endif
3708void* realloc(void* old_ptr, size_t new_size) {
3709 if (old_ptr == NULL) {
3710#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3711 void* result = malloc(new_size);
3712#else
3713 void* result = do_malloc(new_size);
3714#ifndef WTF_CHANGES
3715 MallocHook::InvokeNewHook(result, new_size);
3716#endif
3717#endif
3718 return result;
3719 }
3720 if (new_size == 0) {
3721#ifndef WTF_CHANGES
3722 MallocHook::InvokeDeleteHook(old_ptr);
3723#endif
3724 free(old_ptr);
3725 return NULL;
3726 }
3727
3728#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3729 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur...
3730 return 0;
3731 new_size += sizeof(AllocAlignmentInteger);
3732 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr);
3733 if (*header != Internal::AllocTypeMalloc)
3734 Internal::fastMallocMatchFailed(old_ptr);
3735 old_ptr = header;
3736#endif
3737
3738 // Get the size of the old entry
3739 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
3740 size_t cl = pageheap->GetSizeClassIfCached(p);
3741 Span *span = NULL;
3742 size_t old_size;
3743 if (cl == 0) {
3744 span = pageheap->GetDescriptor(p);
3745 cl = span->sizeclass;
3746 pageheap->CacheSizeClass(p, cl);
3747 }
3748 if (cl != 0) {
3749 old_size = ByteSizeForClass(cl);
3750 } else {
3751 ASSERT(span != NULL);
3752 old_size = span->length << kPageShift;
3753 }
3754
3755 // Reallocate if the new size is larger than the old size,
3756 // or if the new size is significantly smaller than the old size.
3757 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
3758 // Need to reallocate
3759 void* new_ptr = do_malloc(new_size);
3760 if (new_ptr == NULL) {
3761 return NULL;
3762 }
3763#ifndef WTF_CHANGES
3764 MallocHook::InvokeNewHook(new_ptr, new_size);
3765#endif
3766 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
3767#ifndef WTF_CHANGES
3768 MallocHook::InvokeDeleteHook(old_ptr);
3769#endif
3770 // We could use a variant of do_free() that leverages the fact
3771 // that we already know the sizeclass of old_ptr. The benefit
3772 // would be small, so don't bother.
3773 do_free(old_ptr);
3774#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3775 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1;
3776#endif
3777 return new_ptr;
3778 } else {
3779#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3780 old_ptr = pByte + sizeof(AllocAlignmentInteger); // Set old_ptr back to the user pointer.
3781#endif
3782 return old_ptr;
3783 }
3784}
3785
3786#ifdef WTF_CHANGES
3787#undef do_malloc
3788#else
3789
3790static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
3791
3792static inline void* cpp_alloc(size_t size, bool nothrow) {
3793 for (;;) {
3794 void* p = do_malloc(size);
3795#ifdef PREANSINEW
3796 return p;
3797#else
3798 if (p == NULL) { // allocation failed
3799 // Get the current new handler. NB: this function is not
3800 // thread-safe. We make a feeble stab at making it so here, but
3801 // this lock only protects against tcmalloc interfering with
3802 // itself, not with other libraries calling set_new_handler.
3803 std::new_handler nh;
3804 {
3805 SpinLockHolder h(&set_new_handler_lock);
3806 nh = std::set_new_handler(0);
3807 (void) std::set_new_handler(nh);
3808 }
3809 // If no new_handler is established, the allocation failed.
3810 if (!nh) {
3811 if (nothrow) return 0;
3812 throw std::bad_alloc();
3813 }
3814 // Otherwise, try the new_handler. If it returns, retry the
3815 // allocation. If it throws std::bad_alloc, fail the allocation.
3816 // if it throws something else, don't interfere.
3817 try {
3818 (*nh)();
3819 } catch (const std::bad_alloc&) {
3820 if (!nothrow) throw;
3821 return p;
3822 }
3823 } else { // allocation success
3824 return p;
3825 }
3826#endif
3827 }
3828}
3829
3830void* operator new(size_t size) {
3831 void* p = cpp_alloc(size, false);
3832 // We keep this next instruction out of cpp_alloc for a reason: when
3833 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3834 // new call into cpp_alloc, which messes up our whole section-based
3835 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3836 // isn't the last thing this fn calls, and prevents the folding.
3837 MallocHook::InvokeNewHook(p, size);
3838 return p;
3839}
3840
3841void* operator new(size_t size, const std::nothrow_t&) __THROW {
3842 void* p = cpp_alloc(size, true);
3843 MallocHook::InvokeNewHook(p, size);
3844 return p;
3845}
3846
3847void operator delete(void* p) __THROW {
3848 MallocHook::InvokeDeleteHook(p);
3849 do_free(p);
3850}
3851
3852void operator delete(void* p, const std::nothrow_t&) __THROW {
3853 MallocHook::InvokeDeleteHook(p);
3854 do_free(p);
3855}
3856
3857void* operator new[](size_t size) {
3858 void* p = cpp_alloc(size, false);
3859 // We keep this next instruction out of cpp_alloc for a reason: when
3860 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3861 // new call into cpp_alloc, which messes up our whole section-based
3862 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3863 // isn't the last thing this fn calls, and prevents the folding.
3864 MallocHook::InvokeNewHook(p, size);
3865 return p;
3866}
3867
3868void* operator new[](size_t size, const std::nothrow_t&) __THROW {
3869 void* p = cpp_alloc(size, true);
3870 MallocHook::InvokeNewHook(p, size);
3871 return p;
3872}
3873
3874void operator delete[](void* p) __THROW {
3875 MallocHook::InvokeDeleteHook(p);
3876 do_free(p);
3877}
3878
3879void operator delete[](void* p, const std::nothrow_t&) __THROW {
3880 MallocHook::InvokeDeleteHook(p);
3881 do_free(p);
3882}
3883
3884extern "C" void* memalign(size_t align, size_t size) __THROW {
3885 void* result = do_memalign(align, size);
3886 MallocHook::InvokeNewHook(result, size);
3887 return result;
3888}
3889
3890extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
3891 __THROW {
3892 if (((align % sizeof(void*)) != 0) ||
3893 ((align & (align - 1)) != 0) ||
3894 (align == 0)) {
3895 return EINVAL;
3896 }
3897
3898 void* result = do_memalign(align, size);
3899 MallocHook::InvokeNewHook(result, size);
3900 if (result == NULL) {
3901 return ENOMEM;
3902 } else {
3903 *result_ptr = result;
3904 return 0;
3905 }
3906}
3907
3908static size_t pagesize = 0;
3909
3910extern "C" void* valloc(size_t size) __THROW {
3911 // Allocate page-aligned object of length >= size bytes
3912 if (pagesize == 0) pagesize = getpagesize();
3913 void* result = do_memalign(pagesize, size);
3914 MallocHook::InvokeNewHook(result, size);
3915 return result;
3916}
3917
3918extern "C" void* pvalloc(size_t size) __THROW {
3919 // Round up size to a multiple of pagesize
3920 if (pagesize == 0) pagesize = getpagesize();
3921 size = (size + pagesize - 1) & ~(pagesize - 1);
3922 void* result = do_memalign(pagesize, size);
3923 MallocHook::InvokeNewHook(result, size);
3924 return result;
3925}
3926
3927extern "C" void malloc_stats(void) {
3928 do_malloc_stats();
3929}
3930
3931extern "C" int mallopt(int cmd, int value) {
3932 return do_mallopt(cmd, value);
3933}
3934
3935#ifdef HAVE_STRUCT_MALLINFO
3936extern "C" struct mallinfo mallinfo(void) {
3937 return do_mallinfo();
3938}
3939#endif
3940
3941//-------------------------------------------------------------------
3942// Some library routines on RedHat 9 allocate memory using malloc()
3943// and free it using __libc_free() (or vice-versa). Since we provide
3944// our own implementations of malloc/free, we need to make sure that
3945// the __libc_XXX variants (defined as part of glibc) also point to
3946// the same implementations.
3947//-------------------------------------------------------------------
3948
3949#if defined(__GLIBC__)
3950extern "C" {
3951#if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
3952 // Potentially faster variants that use the gcc alias extension.
3953 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
3954# define ALIAS(x) __attribute__ ((weak, alias (x)))
3955 void* __libc_malloc(size_t size) ALIAS("malloc");
3956 void __libc_free(void* ptr) ALIAS("free");
3957 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc");
3958 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc");
3959 void __libc_cfree(void* ptr) ALIAS("cfree");
3960 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
3961 void* __libc_valloc(size_t size) ALIAS("valloc");
3962 void* __libc_pvalloc(size_t size) ALIAS("pvalloc");
3963 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
3964# undef ALIAS
3965# else /* not __GNUC__ */
3966 // Portable wrappers
3967 void* __libc_malloc(size_t size) { return malloc(size); }
3968 void __libc_free(void* ptr) { free(ptr); }
3969 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); }
3970 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); }
3971 void __libc_cfree(void* ptr) { cfree(ptr); }
3972 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
3973 void* __libc_valloc(size_t size) { return valloc(size); }
3974 void* __libc_pvalloc(size_t size) { return pvalloc(size); }
3975 int __posix_memalign(void** r, size_t a, size_t s) {
3976 return posix_memalign(r, a, s);
3977 }
3978# endif /* __GNUC__ */
3979}
3980#endif /* __GLIBC__ */
3981
3982// Override __libc_memalign in libc on linux boxes specially.
3983// They have a bug in libc that causes them to (very rarely) allocate
3984// with __libc_memalign() yet deallocate with free() and the
3985// definitions above don't catch it.
3986// This function is an exception to the rule of calling MallocHook method
3987// from the stack frame of the allocation function;
3988// heap-checker handles this special case explicitly.
3989static void *MemalignOverride(size_t align, size_t size, const void *caller)
3990 __THROW {
3991 void* result = do_memalign(align, size);
3992 MallocHook::InvokeNewHook(result, size);
3993 return result;
3994}
3995void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
3996
3997#endif
3998
3999#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
4000
4001class FreeObjectFinder {
4002 const RemoteMemoryReader& m_reader;
4003 HashSet<void*> m_freeObjects;
4004
4005public:
4006 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
4007
4008 void visit(void* ptr) { m_freeObjects.add(ptr); }
4009 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
4010 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); }
4011 size_t freeObjectCount() const { return m_freeObjects.size(); }
4012
4013 void findFreeObjects(TCMalloc_ThreadCache* threadCache)
4014 {
4015 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
4016 threadCache->enumerateFreeObjects(*this, m_reader);
4017 }
4018
4019 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
4020 {
4021 for (unsigned i = 0; i < numSizes; i++)
4022 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
4023 }
4024};
4025
4026class PageMapFreeObjectFinder {
4027 const RemoteMemoryReader& m_reader;
4028 FreeObjectFinder& m_freeObjectFinder;
4029
4030public:
4031 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
4032 : m_reader(reader)
4033 , m_freeObjectFinder(freeObjectFinder)
4034 { }
4035
4036 int visit(void* ptr) const
4037 {
4038 if (!ptr)
4039 return 1;
4040
4041 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4042 if (span->free) {
4043 void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
4044 m_freeObjectFinder.visit(ptr);
4045 } else if (span->sizeclass) {
4046 // Walk the free list of the small-object span, keeping track of each object seen
4047 for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject)))
4048 m_freeObjectFinder.visit(nextObject);
4049 }
4050 return span->length;
4051 }
4052};
4053
4054class PageMapMemoryUsageRecorder {
4055 task_t m_task;
4056 void* m_context;
4057 unsigned m_typeMask;
4058 vm_range_recorder_t* m_recorder;
4059 const RemoteMemoryReader& m_reader;
4060 const FreeObjectFinder& m_freeObjectFinder;
4061
4062 HashSet<void*> m_seenPointers;
4063 Vector<Span*> m_coalescedSpans;
4064
4065public:
4066 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
4067 : m_task(task)
4068 , m_context(context)
4069 , m_typeMask(typeMask)
4070 , m_recorder(recorder)
4071 , m_reader(reader)
4072 , m_freeObjectFinder(freeObjectFinder)
4073 { }
4074
4075 ~PageMapMemoryUsageRecorder()
4076 {
4077 ASSERT(!m_coalescedSpans.size());
4078 }
4079
4080 void recordPendingRegions()
4081 {
4082 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4083 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 };
4084 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize);
4085
4086 // Mark the memory region the spans represent as a candidate for containing pointers
4087 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE)
4088 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
4089
4090 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
4091 m_coalescedSpans.clear();
4092 return;
4093 }
4094
4095 Vector<vm_range_t, 1024> allocatedPointers;
4096 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) {
4097 Span *theSpan = m_coalescedSpans[i];
4098 if (theSpan->free)
4099 continue;
4100
4101 vm_address_t spanStartAddress = theSpan->start << kPageShift;
4102 vm_size_t spanSizeInBytes = theSpan->length * kPageSize;
4103
4104 if (!theSpan->sizeclass) {
4105 // If it's an allocated large object span, mark it as in use
4106 if (!m_freeObjectFinder.isFreeObject(spanStartAddress))
4107 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes});
4108 } else {
4109 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass);
4110
4111 // Mark each allocated small object within the span as in use
4112 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes;
4113 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) {
4114 if (!m_freeObjectFinder.isFreeObject(object))
4115 allocatedPointers.append((vm_range_t){object, objectSize});
4116 }
4117 }
4118 }
4119
4120 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size());
4121
4122 m_coalescedSpans.clear();
4123 }
4124
4125 int visit(void* ptr)
4126 {
4127 if (!ptr)
4128 return 1;
4129
4130 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4131 if (!span->start)
4132 return 1;
4133
4134 if (m_seenPointers.contains(ptr))
4135 return span->length;
4136 m_seenPointers.add(ptr);
4137
4138 if (!m_coalescedSpans.size()) {
4139 m_coalescedSpans.append(span);
4140 return span->length;
4141 }
4142
4143 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4144 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift;
4145 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize;
4146
4147 // If the new span is adjacent to the previous span, do nothing for now.
4148 vm_address_t spanStartAddress = span->start << kPageShift;
4149 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) {
4150 m_coalescedSpans.append(span);
4151 return span->length;
4152 }
4153
4154 // New span is not adjacent to previous span, so record the spans coalesced so far.
4155 recordPendingRegions();
4156 m_coalescedSpans.append(span);
4157
4158 return span->length;
4159 }
4160};
4161
4162class AdminRegionRecorder {
4163 task_t m_task;
4164 void* m_context;
4165 unsigned m_typeMask;
4166 vm_range_recorder_t* m_recorder;
4167 const RemoteMemoryReader& m_reader;
4168
4169 Vector<vm_range_t, 1024> m_pendingRegions;
4170
4171public:
4172 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader)
4173 : m_task(task)
4174 , m_context(context)
4175 , m_typeMask(typeMask)
4176 , m_recorder(recorder)
4177 , m_reader(reader)
4178 { }
4179
4180 void recordRegion(vm_address_t ptr, size_t size)
4181 {
4182 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE)
4183 m_pendingRegions.append((vm_range_t){ ptr, size });
4184 }
4185
4186 void visit(void *ptr, size_t size)
4187 {
4188 recordRegion(reinterpret_cast<vm_address_t>(ptr), size);
4189 }
4190
4191 void recordPendingRegions()
4192 {
4193 if (m_pendingRegions.size()) {
4194 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size());
4195 m_pendingRegions.clear();
4196 }
4197 }
4198
4199 ~AdminRegionRecorder()
4200 {
4201 ASSERT(!m_pendingRegions.size());
4202 }
4203};
4204
4205kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
4206{
4207 RemoteMemoryReader memoryReader(task, reader);
4208
4209 InitSizeClasses();
4210
4211 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
4212 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
4213 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
4214 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
4215
4216 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
4217
4218 FreeObjectFinder finder(memoryReader);
4219 finder.findFreeObjects(threadHeaps);
4220 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
4221
4222 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
4223 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
4224 pageMap->visitValues(pageMapFinder, memoryReader);
4225
4226 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
4227 pageMap->visitValues(usageRecorder, memoryReader);
4228 usageRecorder.recordPendingRegions();
4229
4230 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader);
4231 pageMap->visitAllocations(adminRegionRecorder, memoryReader);
4232
4233 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator);
4234 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator);
4235
4236 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4237 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4238
4239 adminRegionRecorder.recordPendingRegions();
4240
4241 return 0;
4242}
4243
4244size_t FastMallocZone::size(malloc_zone_t*, const void*)
4245{
4246 return 0;
4247}
4248
4249void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
4250{
4251 return 0;
4252}
4253
4254void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
4255{
4256 return 0;
4257}
4258
4259void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
4260{
4261 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4262 // is not in this zone. When this happens, the pointer being freed was not allocated by any
4263 // zone so we need to print a useful error for the application developer.
4264 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
4265}
4266
4267void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
4268{
4269 return 0;
4270}
4271
4272
4273#undef malloc
4274#undef free
4275#undef realloc
4276#undef calloc
4277
4278extern "C" {
4279malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
4280 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics
4281
4282#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !PLATFORM(IPHONE)
4283 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4284#endif
4285
4286 };
4287}
4288
4289FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator)
4290 : m_pageHeap(pageHeap)
4291 , m_threadHeaps(threadHeaps)
4292 , m_centralCaches(centralCaches)
4293 , m_spanAllocator(spanAllocator)
4294 , m_pageHeapAllocator(pageHeapAllocator)
4295{
4296 memset(&m_zone, 0, sizeof(m_zone));
4297 m_zone.version = 4;
4298 m_zone.zone_name = "JavaScriptCore FastMalloc";
4299 m_zone.size = &FastMallocZone::size;
4300 m_zone.malloc = &FastMallocZone::zoneMalloc;
4301 m_zone.calloc = &FastMallocZone::zoneCalloc;
4302 m_zone.realloc = &FastMallocZone::zoneRealloc;
4303 m_zone.free = &FastMallocZone::zoneFree;
4304 m_zone.valloc = &FastMallocZone::zoneValloc;
4305 m_zone.destroy = &FastMallocZone::zoneDestroy;
4306 m_zone.introspect = &jscore_fastmalloc_introspection;
4307 malloc_zone_register(&m_zone);
4308}
4309
4310
4311void FastMallocZone::init()
4312{
4313 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator);
4314}
4315
4316#endif
4317
4318#if WTF_CHANGES
4319void releaseFastMallocFreeMemory()
4320{
4321 // Flush free pages in the current thread cache back to the page heap.
4322 // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4323 // The second pass flushes everything.
4324 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
4325 threadCache->Scavenge();
4326 threadCache->Scavenge();
4327 }
4328
4329 SpinLockHolder h(&pageheap_lock);
4330 pageheap->ReleaseFreePages();
4331}
4332
4333FastMallocStatistics fastMallocStatistics()
4334{
4335 FastMallocStatistics statistics;
4336 {
4337 SpinLockHolder lockHolder(&pageheap_lock);
4338 statistics.heapSize = static_cast<size_t>(pageheap->SystemBytes());
4339 statistics.freeSizeInHeap = static_cast<size_t>(pageheap->FreeBytes());
4340 statistics.returnedSize = pageheap->ReturnedBytes();
4341 statistics.freeSizeInCaches = 0;
4342 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
4343 statistics.freeSizeInCaches += threadCache->Size();
4344 }
4345 for (unsigned cl = 0; cl < kNumClasses; ++cl) {
4346 const int length = central_cache[cl].length();
4347 const int tc_length = central_cache[cl].tc_length();
4348 statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length);
4349 }
4350 return statistics;
4351}
4352
4353} // namespace WTF
4354#endif
4355
4356#endif // FORCE_SYSTEM_MALLOC
Note: See TracBrowser for help on using the repository browser.