source: webkit/trunk/JavaScriptCore/wtf/FastMalloc.cpp@ 46153

Last change on this file since 46153 was 46153, checked in by [email protected], 16 years ago

Make it harder to misuse try* allocation routines
https://bugs.webkit.org/show_bug.cgi?id=27469

Reviewed Gavin Barraclough

Jump through a few hoops to make it much harder to accidentally
miss null-checking of values returned by the try-* allocation
routines.

  • Property svn:eol-style set to native
File size: 130.1 KB
Line 
1// Copyright (c) 2005, 2007, Google Inc.
2// All rights reserved.
3// Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <[email protected]>
33//
34// A malloc that uses a per-thread cache to satisfy small malloc requests.
35// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
36//
37// See doc/tcmalloc.html for a high-level
38// description of how this malloc works.
39//
40// SYNCHRONIZATION
41// 1. The thread-specific lists are accessed without acquiring any locks.
42// This is safe because each such list is only accessed by one thread.
43// 2. We have a lock per central free-list, and hold it while manipulating
44// the central free list for a particular size.
45// 3. The central page allocator is protected by "pageheap_lock".
46// 4. The pagemap (which maps from page-number to descriptor),
47// can be read without holding any locks, and written while holding
48// the "pageheap_lock".
49// 5. To improve performance, a subset of the information one can get
50// from the pagemap is cached in a data structure, pagemap_cache_,
51// that atomically reads and writes its entries. This cache can be
52// read and written without locking.
53//
54// This multi-threaded access to the pagemap is safe for fairly
55// subtle reasons. We basically assume that when an object X is
56// allocated by thread A and deallocated by thread B, there must
57// have been appropriate synchronization in the handoff of object
58// X from thread A to thread B. The same logic applies to pagemap_cache_.
59//
60// THE PAGEID-TO-SIZECLASS CACHE
61// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
62// returns 0 for a particular PageID then that means "no information," not that
63// the sizeclass is 0. The cache may have stale information for pages that do
64// not hold the beginning of any free()'able object. Staleness is eliminated
65// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66// do_memalign() for all other relevant pages.
67//
68// TODO: Bias reclamation to larger addresses
69// TODO: implement mallinfo/mallopt
70// TODO: Better testing
71//
72// 9/28/2003 (new page-level allocator replaces ptmalloc2):
73// * malloc/free of small objects goes from ~300 ns to ~50 ns.
74// * allocation of a reasonably complicated struct
75// goes from about 1100 ns to about 300 ns.
76
77#include "config.h"
78#include "FastMalloc.h"
79
80#include "Assertions.h"
81#include <limits>
82#if ENABLE(JSC_MULTIPLE_THREADS)
83#include <pthread.h>
84#endif
85
86#ifndef NO_TCMALLOC_SAMPLES
87#ifdef WTF_CHANGES
88#define NO_TCMALLOC_SAMPLES
89#endif
90#endif
91
92#if !defined(USE_SYSTEM_MALLOC) && defined(NDEBUG)
93#define FORCE_SYSTEM_MALLOC 0
94#else
95#define FORCE_SYSTEM_MALLOC 1
96#endif
97
98#define TCMALLOC_TRACK_DECOMMITED_SPANS (HAVE(VIRTUALALLOC) || HAVE(MADV_FREE_REUSE))
99
100#ifndef NDEBUG
101namespace WTF {
102
103#if ENABLE(JSC_MULTIPLE_THREADS)
104static pthread_key_t isForbiddenKey;
105static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
106static void initializeIsForbiddenKey()
107{
108 pthread_key_create(&isForbiddenKey, 0);
109}
110
111static bool isForbidden()
112{
113 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
114 return !!pthread_getspecific(isForbiddenKey);
115}
116
117void fastMallocForbid()
118{
119 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
120 pthread_setspecific(isForbiddenKey, &isForbiddenKey);
121}
122
123void fastMallocAllow()
124{
125 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
126 pthread_setspecific(isForbiddenKey, 0);
127}
128
129#else
130
131static bool staticIsForbidden;
132static bool isForbidden()
133{
134 return staticIsForbidden;
135}
136
137void fastMallocForbid()
138{
139 staticIsForbidden = true;
140}
141
142void fastMallocAllow()
143{
144 staticIsForbidden = false;
145}
146#endif // ENABLE(JSC_MULTIPLE_THREADS)
147
148} // namespace WTF
149#endif // NDEBUG
150
151#include <string.h>
152
153namespace WTF {
154
155#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
156
157namespace Internal {
158
159void fastMallocMatchFailed(void*)
160{
161 CRASH();
162}
163
164} // namespace Internal
165
166#endif
167
168void* fastZeroedMalloc(size_t n)
169{
170 void* result = fastMalloc(n);
171 memset(result, 0, n);
172 return result;
173}
174
175TryMallocReturnValue tryFastZeroedMalloc(size_t n)
176{
177 void* result;
178 if (!tryFastMalloc(n).getValue(result))
179 return 0;
180 memset(result, 0, n);
181 return result;
182}
183
184} // namespace WTF
185
186#if FORCE_SYSTEM_MALLOC
187
188#include <stdlib.h>
189#if !PLATFORM(WIN_OS)
190 #include <pthread.h>
191#else
192 #include "windows.h"
193#endif
194
195namespace WTF {
196
197TryMallocReturnValue tryFastMalloc(size_t n)
198{
199 ASSERT(!isForbidden());
200
201#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
202 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
203 return 0;
204
205 void* result = malloc(n + sizeof(AllocAlignmentInteger));
206 if (!result)
207 return 0;
208
209 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
210 result = static_cast<AllocAlignmentInteger*>(result) + 1;
211
212 return result;
213#else
214 return malloc(n);
215#endif
216}
217
218void* fastMalloc(size_t n)
219{
220 ASSERT(!isForbidden());
221
222#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
223 void* result = tryFastMalloc(n);
224#else
225 void* result = malloc(n);
226#endif
227
228 if (!result)
229 CRASH();
230 return result;
231}
232
233TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size)
234{
235 ASSERT(!isForbidden());
236
237#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
238 size_t totalBytes = n_elements * element_size;
239 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes))
240 return 0;
241
242 totalBytes += sizeof(AllocAlignmentInteger);
243 void* result = malloc(totalBytes);
244 if (!result)
245 return 0;
246
247 memset(result, 0, totalBytes);
248 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
249 result = static_cast<AllocAlignmentInteger*>(result) + 1;
250 return result;
251#else
252 return calloc(n_elements, element_size);
253#endif
254}
255
256void* fastCalloc(size_t n_elements, size_t element_size)
257{
258 ASSERT(!isForbidden());
259
260#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
261 void* result = tryFastCalloc(n_elements, element_size);
262#else
263 void* result = calloc(n_elements, element_size);
264#endif
265
266 if (!result)
267 CRASH();
268 return result;
269}
270
271void fastFree(void* p)
272{
273 ASSERT(!isForbidden());
274
275#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
276 if (!p)
277 return;
278
279 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
280 if (*header != Internal::AllocTypeMalloc)
281 Internal::fastMallocMatchFailed(p);
282 free(header);
283#else
284 free(p);
285#endif
286}
287
288TryMallocReturnValue tryFastRealloc(void* p, size_t n)
289{
290 ASSERT(!isForbidden());
291
292#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
293 if (p) {
294 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
295 return 0;
296 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
297 if (*header != Internal::AllocTypeMalloc)
298 Internal::fastMallocMatchFailed(p);
299 void* result = realloc(header, n + sizeof(AllocAlignmentInteger));
300 if (!result)
301 return 0;
302
303 // This should not be needed because the value is already there:
304 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
305 result = static_cast<AllocAlignmentInteger*>(result) + 1;
306 return result;
307 } else {
308 return fastMalloc(n);
309 }
310#else
311 return realloc(p, n);
312#endif
313}
314
315void* fastRealloc(void* p, size_t n)
316{
317 ASSERT(!isForbidden());
318
319#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
320 void* result = tryFastRealloc(p, n);
321#else
322 void* result = realloc(p, n);
323#endif
324
325 if (!result)
326 CRASH();
327 return result;
328}
329
330void releaseFastMallocFreeMemory() { }
331
332FastMallocStatistics fastMallocStatistics()
333{
334 FastMallocStatistics statistics = { 0, 0, 0, 0 };
335 return statistics;
336}
337
338} // namespace WTF
339
340#if PLATFORM(DARWIN)
341// This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
342// It will never be used in this case, so it's type and value are less interesting than its presence.
343extern "C" const int jscore_fastmalloc_introspection = 0;
344#endif
345
346#else // FORCE_SYSTEM_MALLOC
347
348#if HAVE(STDINT_H)
349#include <stdint.h>
350#elif HAVE(INTTYPES_H)
351#include <inttypes.h>
352#else
353#include <sys/types.h>
354#endif
355
356#include "AlwaysInline.h"
357#include "Assertions.h"
358#include "TCPackedCache.h"
359#include "TCPageMap.h"
360#include "TCSpinLock.h"
361#include "TCSystemAlloc.h"
362#include <algorithm>
363#include <errno.h>
364#include <limits>
365#include <new>
366#include <pthread.h>
367#include <stdarg.h>
368#include <stddef.h>
369#include <stdio.h>
370#if COMPILER(MSVC)
371#ifndef WIN32_LEAN_AND_MEAN
372#define WIN32_LEAN_AND_MEAN
373#endif
374#include <windows.h>
375#endif
376
377#if WTF_CHANGES
378
379#if PLATFORM(DARWIN)
380#include "MallocZoneSupport.h"
381#include <wtf/HashSet.h>
382#endif
383
384#ifndef PRIuS
385#define PRIuS "zu"
386#endif
387
388// Calling pthread_getspecific through a global function pointer is faster than a normal
389// call to the function on Mac OS X, and it's used in performance-critical code. So we
390// use a function pointer. But that's not necessarily faster on other platforms, and we had
391// problems with this technique on Windows, so we'll do this only on Mac OS X.
392#if PLATFORM(DARWIN)
393static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
394#define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
395#endif
396
397#define DEFINE_VARIABLE(type, name, value, meaning) \
398 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \
399 type FLAGS_##name(value); \
400 char FLAGS_no##name; \
401 } \
402 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
403
404#define DEFINE_int64(name, value, meaning) \
405 DEFINE_VARIABLE(int64_t, name, value, meaning)
406
407#define DEFINE_double(name, value, meaning) \
408 DEFINE_VARIABLE(double, name, value, meaning)
409
410namespace WTF {
411
412#define malloc fastMalloc
413#define calloc fastCalloc
414#define free fastFree
415#define realloc fastRealloc
416
417#define MESSAGE LOG_ERROR
418#define CHECK_CONDITION ASSERT
419
420#if PLATFORM(DARWIN)
421class Span;
422class TCMalloc_Central_FreeListPadded;
423class TCMalloc_PageHeap;
424class TCMalloc_ThreadCache;
425template <typename T> class PageHeapAllocator;
426
427class FastMallocZone {
428public:
429 static void init();
430
431 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
432 static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
433 static boolean_t check(malloc_zone_t*) { return true; }
434 static void print(malloc_zone_t*, boolean_t) { }
435 static void log(malloc_zone_t*, void*) { }
436 static void forceLock(malloc_zone_t*) { }
437 static void forceUnlock(malloc_zone_t*) { }
438 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
439
440private:
441 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*);
442 static size_t size(malloc_zone_t*, const void*);
443 static void* zoneMalloc(malloc_zone_t*, size_t);
444 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
445 static void zoneFree(malloc_zone_t*, void*);
446 static void* zoneRealloc(malloc_zone_t*, void*, size_t);
447 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
448 static void zoneDestroy(malloc_zone_t*) { }
449
450 malloc_zone_t m_zone;
451 TCMalloc_PageHeap* m_pageHeap;
452 TCMalloc_ThreadCache** m_threadHeaps;
453 TCMalloc_Central_FreeListPadded* m_centralCaches;
454 PageHeapAllocator<Span>* m_spanAllocator;
455 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator;
456};
457
458#endif
459
460#endif
461
462#ifndef WTF_CHANGES
463// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
464// you're porting to a system where you really can't get a stacktrace.
465#ifdef NO_TCMALLOC_SAMPLES
466// We use #define so code compiles even if you #include stacktrace.h somehow.
467# define GetStackTrace(stack, depth, skip) (0)
468#else
469# include <google/stacktrace.h>
470#endif
471#endif
472
473// Even if we have support for thread-local storage in the compiler
474// and linker, the OS may not support it. We need to check that at
475// runtime. Right now, we have to keep a manual set of "bad" OSes.
476#if defined(HAVE_TLS)
477 static bool kernel_supports_tls = false; // be conservative
478 static inline bool KernelSupportsTLS() {
479 return kernel_supports_tls;
480 }
481# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
482 static void CheckIfKernelSupportsTLS() {
483 kernel_supports_tls = false;
484 }
485# else
486# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
487 static void CheckIfKernelSupportsTLS() {
488 struct utsname buf;
489 if (uname(&buf) != 0) { // should be impossible
490 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
491 kernel_supports_tls = false;
492 } else if (strcasecmp(buf.sysname, "linux") == 0) {
493 // The linux case: the first kernel to support TLS was 2.6.0
494 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
495 kernel_supports_tls = false;
496 else if (buf.release[0] == '2' && buf.release[1] == '.' &&
497 buf.release[2] >= '0' && buf.release[2] < '6' &&
498 buf.release[3] == '.') // 2.0 - 2.5
499 kernel_supports_tls = false;
500 else
501 kernel_supports_tls = true;
502 } else { // some other kernel, we'll be optimisitic
503 kernel_supports_tls = true;
504 }
505 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
506 }
507# endif // HAVE_DECL_UNAME
508#endif // HAVE_TLS
509
510// __THROW is defined in glibc systems. It means, counter-intuitively,
511// "This function will never throw an exception." It's an optional
512// optimization tool, but we may need to use it to match glibc prototypes.
513#ifndef __THROW // I guess we're not on a glibc system
514# define __THROW // __THROW is just an optimization, so ok to make it ""
515#endif
516
517//-------------------------------------------------------------------
518// Configuration
519//-------------------------------------------------------------------
520
521// Not all possible combinations of the following parameters make
522// sense. In particular, if kMaxSize increases, you may have to
523// increase kNumClasses as well.
524static const size_t kPageShift = 12;
525static const size_t kPageSize = 1 << kPageShift;
526static const size_t kMaxSize = 8u * kPageSize;
527static const size_t kAlignShift = 3;
528static const size_t kAlignment = 1 << kAlignShift;
529static const size_t kNumClasses = 68;
530
531// Allocates a big block of memory for the pagemap once we reach more than
532// 128MB
533static const size_t kPageMapBigAllocationThreshold = 128 << 20;
534
535// Minimum number of pages to fetch from system at a time. Must be
536// significantly bigger than kBlockSize to amortize system-call
537// overhead, and also to reduce external fragementation. Also, we
538// should keep this value big because various incarnations of Linux
539// have small limits on the number of mmap() regions per
540// address-space.
541static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
542
543// Number of objects to move between a per-thread list and a central
544// list in one shot. We want this to be not too small so we can
545// amortize the lock overhead for accessing the central list. Making
546// it too big may temporarily cause unnecessary memory wastage in the
547// per-thread free list until the scavenger cleans up the list.
548static int num_objects_to_move[kNumClasses];
549
550// Maximum length we allow a per-thread free-list to have before we
551// move objects from it into the corresponding central free-list. We
552// want this big to avoid locking the central free-list too often. It
553// should not hurt to make this list somewhat big because the
554// scavenging code will shrink it down when its contents are not in use.
555static const int kMaxFreeListLength = 256;
556
557// Lower and upper bounds on the per-thread cache sizes
558static const size_t kMinThreadCacheSize = kMaxSize * 2;
559static const size_t kMaxThreadCacheSize = 2 << 20;
560
561// Default bound on the total amount of thread caches
562static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
563
564// For all span-lengths < kMaxPages we keep an exact-size list.
565// REQUIRED: kMaxPages >= kMinSystemAlloc;
566static const size_t kMaxPages = kMinSystemAlloc;
567
568/* The smallest prime > 2^n */
569static int primes_list[] = {
570 // Small values might cause high rates of sampling
571 // and hence commented out.
572 // 2, 5, 11, 17, 37, 67, 131, 257,
573 // 521, 1031, 2053, 4099, 8209, 16411,
574 32771, 65537, 131101, 262147, 524309, 1048583,
575 2097169, 4194319, 8388617, 16777259, 33554467 };
576
577// Twice the approximate gap between sampling actions.
578// I.e., we take one sample approximately once every
579// tcmalloc_sample_parameter/2
580// bytes of allocation, i.e., ~ once every 128KB.
581// Must be a prime number.
582#ifdef NO_TCMALLOC_SAMPLES
583DEFINE_int64(tcmalloc_sample_parameter, 0,
584 "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
585static size_t sample_period = 0;
586#else
587DEFINE_int64(tcmalloc_sample_parameter, 262147,
588 "Twice the approximate gap between sampling actions."
589 " Must be a prime number. Otherwise will be rounded up to a "
590 " larger prime number");
591static size_t sample_period = 262147;
592#endif
593
594// Protects sample_period above
595static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
596
597// Parameters for controlling how fast memory is returned to the OS.
598
599DEFINE_double(tcmalloc_release_rate, 1,
600 "Rate at which we release unused memory to the system. "
601 "Zero means we never release memory back to the system. "
602 "Increase this flag to return memory faster; decrease it "
603 "to return memory slower. Reasonable rates are in the "
604 "range [0,10]");
605
606//-------------------------------------------------------------------
607// Mapping from size to size_class and vice versa
608//-------------------------------------------------------------------
609
610// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
611// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
612// So for these larger sizes we have an array indexed by ceil(size/128).
613//
614// We flatten both logical arrays into one physical array and use
615// arithmetic to compute an appropriate index. The constants used by
616// ClassIndex() were selected to make the flattening work.
617//
618// Examples:
619// Size Expression Index
620// -------------------------------------------------------
621// 0 (0 + 7) / 8 0
622// 1 (1 + 7) / 8 1
623// ...
624// 1024 (1024 + 7) / 8 128
625// 1025 (1025 + 127 + (120<<7)) / 128 129
626// ...
627// 32768 (32768 + 127 + (120<<7)) / 128 376
628static const size_t kMaxSmallSize = 1024;
629static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128
630static const int add_amount[2] = { 7, 127 + (120 << 7) };
631static unsigned char class_array[377];
632
633// Compute index of the class_array[] entry for a given size
634static inline int ClassIndex(size_t s) {
635 const int i = (s > kMaxSmallSize);
636 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
637}
638
639// Mapping from size class to max size storable in that class
640static size_t class_to_size[kNumClasses];
641
642// Mapping from size class to number of pages to allocate at a time
643static size_t class_to_pages[kNumClasses];
644
645// TransferCache is used to cache transfers of num_objects_to_move[size_class]
646// back and forth between thread caches and the central cache for a given size
647// class.
648struct TCEntry {
649 void *head; // Head of chain of objects.
650 void *tail; // Tail of chain of objects.
651};
652// A central cache freelist can have anywhere from 0 to kNumTransferEntries
653// slots to put link list chains into. To keep memory usage bounded the total
654// number of TCEntries across size classes is fixed. Currently each size
655// class is initially given one TCEntry which also means that the maximum any
656// one class can have is kNumClasses.
657static const int kNumTransferEntries = kNumClasses;
658
659// Note: the following only works for "n"s that fit in 32-bits, but
660// that is fine since we only use it for small sizes.
661static inline int LgFloor(size_t n) {
662 int log = 0;
663 for (int i = 4; i >= 0; --i) {
664 int shift = (1 << i);
665 size_t x = n >> shift;
666 if (x != 0) {
667 n = x;
668 log += shift;
669 }
670 }
671 ASSERT(n == 1);
672 return log;
673}
674
675// Some very basic linked list functions for dealing with using void * as
676// storage.
677
678static inline void *SLL_Next(void *t) {
679 return *(reinterpret_cast<void**>(t));
680}
681
682static inline void SLL_SetNext(void *t, void *n) {
683 *(reinterpret_cast<void**>(t)) = n;
684}
685
686static inline void SLL_Push(void **list, void *element) {
687 SLL_SetNext(element, *list);
688 *list = element;
689}
690
691static inline void *SLL_Pop(void **list) {
692 void *result = *list;
693 *list = SLL_Next(*list);
694 return result;
695}
696
697
698// Remove N elements from a linked list to which head points. head will be
699// modified to point to the new head. start and end will point to the first
700// and last nodes of the range. Note that end will point to NULL after this
701// function is called.
702static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
703 if (N == 0) {
704 *start = NULL;
705 *end = NULL;
706 return;
707 }
708
709 void *tmp = *head;
710 for (int i = 1; i < N; ++i) {
711 tmp = SLL_Next(tmp);
712 }
713
714 *start = *head;
715 *end = tmp;
716 *head = SLL_Next(tmp);
717 // Unlink range from list.
718 SLL_SetNext(tmp, NULL);
719}
720
721static inline void SLL_PushRange(void **head, void *start, void *end) {
722 if (!start) return;
723 SLL_SetNext(end, *head);
724 *head = start;
725}
726
727static inline size_t SLL_Size(void *head) {
728 int count = 0;
729 while (head) {
730 count++;
731 head = SLL_Next(head);
732 }
733 return count;
734}
735
736// Setup helper functions.
737
738static ALWAYS_INLINE size_t SizeClass(size_t size) {
739 return class_array[ClassIndex(size)];
740}
741
742// Get the byte-size for a specified class
743static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
744 return class_to_size[cl];
745}
746static int NumMoveSize(size_t size) {
747 if (size == 0) return 0;
748 // Use approx 64k transfers between thread and central caches.
749 int num = static_cast<int>(64.0 * 1024.0 / size);
750 if (num < 2) num = 2;
751 // Clamp well below kMaxFreeListLength to avoid ping pong between central
752 // and thread caches.
753 if (num > static_cast<int>(0.8 * kMaxFreeListLength))
754 num = static_cast<int>(0.8 * kMaxFreeListLength);
755
756 // Also, avoid bringing in too many objects into small object free
757 // lists. There are lots of such lists, and if we allow each one to
758 // fetch too many at a time, we end up having to scavenge too often
759 // (especially when there are lots of threads and each thread gets a
760 // small allowance for its thread cache).
761 //
762 // TODO: Make thread cache free list sizes dynamic so that we do not
763 // have to equally divide a fixed resource amongst lots of threads.
764 if (num > 32) num = 32;
765
766 return num;
767}
768
769// Initialize the mapping arrays
770static void InitSizeClasses() {
771 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
772 if (ClassIndex(0) < 0) {
773 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
774 CRASH();
775 }
776 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
777 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
778 CRASH();
779 }
780
781 // Compute the size classes we want to use
782 size_t sc = 1; // Next size class to assign
783 unsigned char alignshift = kAlignShift;
784 int last_lg = -1;
785 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
786 int lg = LgFloor(size);
787 if (lg > last_lg) {
788 // Increase alignment every so often.
789 //
790 // Since we double the alignment every time size doubles and
791 // size >= 128, this means that space wasted due to alignment is
792 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256
793 // bytes, so the space wasted as a percentage starts falling for
794 // sizes > 2K.
795 if ((lg >= 7) && (alignshift < 8)) {
796 alignshift++;
797 }
798 last_lg = lg;
799 }
800
801 // Allocate enough pages so leftover is less than 1/8 of total.
802 // This bounds wasted space to at most 12.5%.
803 size_t psize = kPageSize;
804 while ((psize % size) > (psize >> 3)) {
805 psize += kPageSize;
806 }
807 const size_t my_pages = psize >> kPageShift;
808
809 if (sc > 1 && my_pages == class_to_pages[sc-1]) {
810 // See if we can merge this into the previous class without
811 // increasing the fragmentation of the previous class.
812 const size_t my_objects = (my_pages << kPageShift) / size;
813 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
814 / class_to_size[sc-1];
815 if (my_objects == prev_objects) {
816 // Adjust last class to include this size
817 class_to_size[sc-1] = size;
818 continue;
819 }
820 }
821
822 // Add new class
823 class_to_pages[sc] = my_pages;
824 class_to_size[sc] = size;
825 sc++;
826 }
827 if (sc != kNumClasses) {
828 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
829 sc, int(kNumClasses));
830 CRASH();
831 }
832
833 // Initialize the mapping arrays
834 int next_size = 0;
835 for (unsigned char c = 1; c < kNumClasses; c++) {
836 const size_t max_size_in_class = class_to_size[c];
837 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
838 class_array[ClassIndex(s)] = c;
839 }
840 next_size = static_cast<int>(max_size_in_class + kAlignment);
841 }
842
843 // Double-check sizes just to be safe
844 for (size_t size = 0; size <= kMaxSize; size++) {
845 const size_t sc = SizeClass(size);
846 if (sc == 0) {
847 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
848 CRASH();
849 }
850 if (sc > 1 && size <= class_to_size[sc-1]) {
851 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
852 "\n", sc, size);
853 CRASH();
854 }
855 if (sc >= kNumClasses) {
856 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
857 CRASH();
858 }
859 const size_t s = class_to_size[sc];
860 if (size > s) {
861 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
862 CRASH();
863 }
864 if (s == 0) {
865 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
866 CRASH();
867 }
868 }
869
870 // Initialize the num_objects_to_move array.
871 for (size_t cl = 1; cl < kNumClasses; ++cl) {
872 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
873 }
874
875#ifndef WTF_CHANGES
876 if (false) {
877 // Dump class sizes and maximum external wastage per size class
878 for (size_t cl = 1; cl < kNumClasses; ++cl) {
879 const int alloc_size = class_to_pages[cl] << kPageShift;
880 const int alloc_objs = alloc_size / class_to_size[cl];
881 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
882 const int max_waste = alloc_size - min_used;
883 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
884 int(cl),
885 int(class_to_size[cl-1] + 1),
886 int(class_to_size[cl]),
887 int(class_to_pages[cl] << kPageShift),
888 max_waste * 100.0 / alloc_size
889 );
890 }
891 }
892#endif
893}
894
895// -------------------------------------------------------------------------
896// Simple allocator for objects of a specified type. External locking
897// is required before accessing one of these objects.
898// -------------------------------------------------------------------------
899
900// Metadata allocator -- keeps stats about how many bytes allocated
901static uint64_t metadata_system_bytes = 0;
902static void* MetaDataAlloc(size_t bytes) {
903 void* result = TCMalloc_SystemAlloc(bytes, 0);
904 if (result != NULL) {
905 metadata_system_bytes += bytes;
906 }
907 return result;
908}
909
910template <class T>
911class PageHeapAllocator {
912 private:
913 // How much to allocate from system at a time
914 static const size_t kAllocIncrement = 32 << 10;
915
916 // Aligned size of T
917 static const size_t kAlignedSize
918 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
919
920 // Free area from which to carve new objects
921 char* free_area_;
922 size_t free_avail_;
923
924 // Linked list of all regions allocated by this allocator
925 void* allocated_regions_;
926
927 // Free list of already carved objects
928 void* free_list_;
929
930 // Number of allocated but unfreed objects
931 int inuse_;
932
933 public:
934 void Init() {
935 ASSERT(kAlignedSize <= kAllocIncrement);
936 inuse_ = 0;
937 allocated_regions_ = 0;
938 free_area_ = NULL;
939 free_avail_ = 0;
940 free_list_ = NULL;
941 }
942
943 T* New() {
944 // Consult free list
945 void* result;
946 if (free_list_ != NULL) {
947 result = free_list_;
948 free_list_ = *(reinterpret_cast<void**>(result));
949 } else {
950 if (free_avail_ < kAlignedSize) {
951 // Need more room
952 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
953 if (!new_allocation)
954 CRASH();
955
956 *(void**)new_allocation = allocated_regions_;
957 allocated_regions_ = new_allocation;
958 free_area_ = new_allocation + kAlignedSize;
959 free_avail_ = kAllocIncrement - kAlignedSize;
960 }
961 result = free_area_;
962 free_area_ += kAlignedSize;
963 free_avail_ -= kAlignedSize;
964 }
965 inuse_++;
966 return reinterpret_cast<T*>(result);
967 }
968
969 void Delete(T* p) {
970 *(reinterpret_cast<void**>(p)) = free_list_;
971 free_list_ = p;
972 inuse_--;
973 }
974
975 int inuse() const { return inuse_; }
976
977#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
978 template <class Recorder>
979 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader)
980 {
981 vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_);
982 while (adminAllocation) {
983 recorder.recordRegion(adminAllocation, kAllocIncrement);
984 adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation));
985 }
986 }
987#endif
988};
989
990// -------------------------------------------------------------------------
991// Span - a contiguous run of pages
992// -------------------------------------------------------------------------
993
994// Type that can hold a page number
995typedef uintptr_t PageID;
996
997// Type that can hold the length of a run of pages
998typedef uintptr_t Length;
999
1000static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
1001
1002// Convert byte size into pages. This won't overflow, but may return
1003// an unreasonably large value if bytes is huge enough.
1004static inline Length pages(size_t bytes) {
1005 return (bytes >> kPageShift) +
1006 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
1007}
1008
1009// Convert a user size into the number of bytes that will actually be
1010// allocated
1011static size_t AllocationSize(size_t bytes) {
1012 if (bytes > kMaxSize) {
1013 // Large object: we allocate an integral number of pages
1014 ASSERT(bytes <= (kMaxValidPages << kPageShift));
1015 return pages(bytes) << kPageShift;
1016 } else {
1017 // Small object: find the size class to which it belongs
1018 return ByteSizeForClass(SizeClass(bytes));
1019 }
1020}
1021
1022// Information kept for a span (a contiguous run of pages).
1023struct Span {
1024 PageID start; // Starting page number
1025 Length length; // Number of pages in span
1026 Span* next; // Used when in link list
1027 Span* prev; // Used when in link list
1028 void* objects; // Linked list of free objects
1029 unsigned int free : 1; // Is the span free
1030#ifndef NO_TCMALLOC_SAMPLES
1031 unsigned int sample : 1; // Sampled object?
1032#endif
1033 unsigned int sizeclass : 8; // Size-class for small objects (or 0)
1034 unsigned int refcount : 11; // Number of non-free objects
1035 bool decommitted : 1;
1036
1037#undef SPAN_HISTORY
1038#ifdef SPAN_HISTORY
1039 // For debugging, we can keep a log events per span
1040 int nexthistory;
1041 char history[64];
1042 int value[64];
1043#endif
1044};
1045
1046#if TCMALLOC_TRACK_DECOMMITED_SPANS
1047#define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1048#else
1049#define ASSERT_SPAN_COMMITTED(span)
1050#endif
1051
1052#ifdef SPAN_HISTORY
1053void Event(Span* span, char op, int v = 0) {
1054 span->history[span->nexthistory] = op;
1055 span->value[span->nexthistory] = v;
1056 span->nexthistory++;
1057 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
1058}
1059#else
1060#define Event(s,o,v) ((void) 0)
1061#endif
1062
1063// Allocator/deallocator for spans
1064static PageHeapAllocator<Span> span_allocator;
1065static Span* NewSpan(PageID p, Length len) {
1066 Span* result = span_allocator.New();
1067 memset(result, 0, sizeof(*result));
1068 result->start = p;
1069 result->length = len;
1070#ifdef SPAN_HISTORY
1071 result->nexthistory = 0;
1072#endif
1073 return result;
1074}
1075
1076static inline void DeleteSpan(Span* span) {
1077#ifndef NDEBUG
1078 // In debug mode, trash the contents of deleted Spans
1079 memset(span, 0x3f, sizeof(*span));
1080#endif
1081 span_allocator.Delete(span);
1082}
1083
1084// -------------------------------------------------------------------------
1085// Doubly linked list of spans.
1086// -------------------------------------------------------------------------
1087
1088static inline void DLL_Init(Span* list) {
1089 list->next = list;
1090 list->prev = list;
1091}
1092
1093static inline void DLL_Remove(Span* span) {
1094 span->prev->next = span->next;
1095 span->next->prev = span->prev;
1096 span->prev = NULL;
1097 span->next = NULL;
1098}
1099
1100static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
1101 return list->next == list;
1102}
1103
1104static int DLL_Length(const Span* list) {
1105 int result = 0;
1106 for (Span* s = list->next; s != list; s = s->next) {
1107 result++;
1108 }
1109 return result;
1110}
1111
1112#if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1113static void DLL_Print(const char* label, const Span* list) {
1114 MESSAGE("%-10s %p:", label, list);
1115 for (const Span* s = list->next; s != list; s = s->next) {
1116 MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
1117 }
1118 MESSAGE("\n");
1119}
1120#endif
1121
1122static inline void DLL_Prepend(Span* list, Span* span) {
1123 ASSERT(span->next == NULL);
1124 ASSERT(span->prev == NULL);
1125 span->next = list->next;
1126 span->prev = list;
1127 list->next->prev = span;
1128 list->next = span;
1129}
1130
1131// -------------------------------------------------------------------------
1132// Stack traces kept for sampled allocations
1133// The following state is protected by pageheap_lock_.
1134// -------------------------------------------------------------------------
1135
1136// size/depth are made the same size as a pointer so that some generic
1137// code below can conveniently cast them back and forth to void*.
1138static const int kMaxStackDepth = 31;
1139struct StackTrace {
1140 uintptr_t size; // Size of object
1141 uintptr_t depth; // Number of PC values stored in array below
1142 void* stack[kMaxStackDepth];
1143};
1144static PageHeapAllocator<StackTrace> stacktrace_allocator;
1145static Span sampled_objects;
1146
1147// -------------------------------------------------------------------------
1148// Map from page-id to per-page data
1149// -------------------------------------------------------------------------
1150
1151// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1152// We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1153// because sometimes the sizeclass is all the information we need.
1154
1155// Selector class -- general selector uses 3-level map
1156template <int BITS> class MapSelector {
1157 public:
1158 typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1159 typedef PackedCache<BITS, uint64_t> CacheType;
1160};
1161
1162#if defined(WTF_CHANGES)
1163#if PLATFORM(X86_64)
1164// On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1165// can be excluded from the PageMap key.
1166// See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1167
1168static const size_t kBitsUnusedOn64Bit = 16;
1169#else
1170static const size_t kBitsUnusedOn64Bit = 0;
1171#endif
1172
1173// A three-level map for 64-bit machines
1174template <> class MapSelector<64> {
1175 public:
1176 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1177 typedef PackedCache<64, uint64_t> CacheType;
1178};
1179#endif
1180
1181// A two-level map for 32-bit machines
1182template <> class MapSelector<32> {
1183 public:
1184 typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1185 typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1186};
1187
1188// -------------------------------------------------------------------------
1189// Page-level allocator
1190// * Eager coalescing
1191//
1192// Heap for page-level allocation. We allow allocating and freeing a
1193// contiguous runs of pages (called a "span").
1194// -------------------------------------------------------------------------
1195
1196class TCMalloc_PageHeap {
1197 public:
1198 void init();
1199
1200 // Allocate a run of "n" pages. Returns zero if out of memory.
1201 Span* New(Length n);
1202
1203 // Delete the span "[p, p+n-1]".
1204 // REQUIRES: span was returned by earlier call to New() and
1205 // has not yet been deleted.
1206 void Delete(Span* span);
1207
1208 // Mark an allocated span as being used for small objects of the
1209 // specified size-class.
1210 // REQUIRES: span was returned by an earlier call to New()
1211 // and has not yet been deleted.
1212 void RegisterSizeClass(Span* span, size_t sc);
1213
1214 // Split an allocated span into two spans: one of length "n" pages
1215 // followed by another span of length "span->length - n" pages.
1216 // Modifies "*span" to point to the first span of length "n" pages.
1217 // Returns a pointer to the second span.
1218 //
1219 // REQUIRES: "0 < n < span->length"
1220 // REQUIRES: !span->free
1221 // REQUIRES: span->sizeclass == 0
1222 Span* Split(Span* span, Length n);
1223
1224 // Return the descriptor for the specified page.
1225 inline Span* GetDescriptor(PageID p) const {
1226 return reinterpret_cast<Span*>(pagemap_.get(p));
1227 }
1228
1229#ifdef WTF_CHANGES
1230 inline Span* GetDescriptorEnsureSafe(PageID p)
1231 {
1232 pagemap_.Ensure(p, 1);
1233 return GetDescriptor(p);
1234 }
1235
1236 size_t ReturnedBytes() const;
1237#endif
1238
1239 // Dump state to stderr
1240#ifndef WTF_CHANGES
1241 void Dump(TCMalloc_Printer* out);
1242#endif
1243
1244 // Return number of bytes allocated from system
1245 inline uint64_t SystemBytes() const { return system_bytes_; }
1246
1247 // Return number of free bytes in heap
1248 uint64_t FreeBytes() const {
1249 return (static_cast<uint64_t>(free_pages_) << kPageShift);
1250 }
1251
1252 bool Check();
1253 bool CheckList(Span* list, Length min_pages, Length max_pages);
1254
1255 // Release all pages on the free list for reuse by the OS:
1256 void ReleaseFreePages();
1257
1258 // Return 0 if we have no information, or else the correct sizeclass for p.
1259 // Reads and writes to pagemap_cache_ do not require locking.
1260 // The entries are 64 bits on 64-bit hardware and 16 bits on
1261 // 32-bit hardware, and we don't mind raciness as long as each read of
1262 // an entry yields a valid entry, not a partially updated entry.
1263 size_t GetSizeClassIfCached(PageID p) const {
1264 return pagemap_cache_.GetOrDefault(p, 0);
1265 }
1266 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1267
1268 private:
1269 // Pick the appropriate map and cache types based on pointer size
1270 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1271 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1272 PageMap pagemap_;
1273 mutable PageMapCache pagemap_cache_;
1274
1275 // We segregate spans of a given size into two circular linked
1276 // lists: one for normal spans, and one for spans whose memory
1277 // has been returned to the system.
1278 struct SpanList {
1279 Span normal;
1280 Span returned;
1281 };
1282
1283 // List of free spans of length >= kMaxPages
1284 SpanList large_;
1285
1286 // Array mapping from span length to a doubly linked list of free spans
1287 SpanList free_[kMaxPages];
1288
1289 // Number of pages kept in free lists
1290 uintptr_t free_pages_;
1291
1292 // Bytes allocated from system
1293 uint64_t system_bytes_;
1294
1295 bool GrowHeap(Length n);
1296
1297 // REQUIRES span->length >= n
1298 // Remove span from its free list, and move any leftover part of
1299 // span into appropriate free lists. Also update "span" to have
1300 // length exactly "n" and mark it as non-free so it can be returned
1301 // to the client.
1302 //
1303 // "released" is true iff "span" was found on a "returned" list.
1304 void Carve(Span* span, Length n, bool released);
1305
1306 void RecordSpan(Span* span) {
1307 pagemap_.set(span->start, span);
1308 if (span->length > 1) {
1309 pagemap_.set(span->start + span->length - 1, span);
1310 }
1311 }
1312
1313 // Allocate a large span of length == n. If successful, returns a
1314 // span of exactly the specified length. Else, returns NULL.
1315 Span* AllocLarge(Length n);
1316
1317 // Incrementally release some memory to the system.
1318 // IncrementalScavenge(n) is called whenever n pages are freed.
1319 void IncrementalScavenge(Length n);
1320
1321 // Number of pages to deallocate before doing more scavenging
1322 int64_t scavenge_counter_;
1323
1324 // Index of last free list we scavenged
1325 size_t scavenge_index_;
1326
1327#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
1328 friend class FastMallocZone;
1329#endif
1330};
1331
1332void TCMalloc_PageHeap::init()
1333{
1334 pagemap_.init(MetaDataAlloc);
1335 pagemap_cache_ = PageMapCache(0);
1336 free_pages_ = 0;
1337 system_bytes_ = 0;
1338 scavenge_counter_ = 0;
1339 // Start scavenging at kMaxPages list
1340 scavenge_index_ = kMaxPages-1;
1341 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1342 DLL_Init(&large_.normal);
1343 DLL_Init(&large_.returned);
1344 for (size_t i = 0; i < kMaxPages; i++) {
1345 DLL_Init(&free_[i].normal);
1346 DLL_Init(&free_[i].returned);
1347 }
1348}
1349
1350inline Span* TCMalloc_PageHeap::New(Length n) {
1351 ASSERT(Check());
1352 ASSERT(n > 0);
1353
1354 // Find first size >= n that has a non-empty list
1355 for (Length s = n; s < kMaxPages; s++) {
1356 Span* ll = NULL;
1357 bool released = false;
1358 if (!DLL_IsEmpty(&free_[s].normal)) {
1359 // Found normal span
1360 ll = &free_[s].normal;
1361 } else if (!DLL_IsEmpty(&free_[s].returned)) {
1362 // Found returned span; reallocate it
1363 ll = &free_[s].returned;
1364 released = true;
1365 } else {
1366 // Keep looking in larger classes
1367 continue;
1368 }
1369
1370 Span* result = ll->next;
1371 Carve(result, n, released);
1372#if TCMALLOC_TRACK_DECOMMITED_SPANS
1373 if (result->decommitted) {
1374 TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift));
1375 result->decommitted = false;
1376 }
1377#endif
1378 ASSERT(Check());
1379 free_pages_ -= n;
1380 return result;
1381 }
1382
1383 Span* result = AllocLarge(n);
1384 if (result != NULL) {
1385 ASSERT_SPAN_COMMITTED(result);
1386 return result;
1387 }
1388
1389 // Grow the heap and try again
1390 if (!GrowHeap(n)) {
1391 ASSERT(Check());
1392 return NULL;
1393 }
1394
1395 return AllocLarge(n);
1396}
1397
1398Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1399 // find the best span (closest to n in size).
1400 // The following loops implements address-ordered best-fit.
1401 bool from_released = false;
1402 Span *best = NULL;
1403
1404 // Search through normal list
1405 for (Span* span = large_.normal.next;
1406 span != &large_.normal;
1407 span = span->next) {
1408 if (span->length >= n) {
1409 if ((best == NULL)
1410 || (span->length < best->length)
1411 || ((span->length == best->length) && (span->start < best->start))) {
1412 best = span;
1413 from_released = false;
1414 }
1415 }
1416 }
1417
1418 // Search through released list in case it has a better fit
1419 for (Span* span = large_.returned.next;
1420 span != &large_.returned;
1421 span = span->next) {
1422 if (span->length >= n) {
1423 if ((best == NULL)
1424 || (span->length < best->length)
1425 || ((span->length == best->length) && (span->start < best->start))) {
1426 best = span;
1427 from_released = true;
1428 }
1429 }
1430 }
1431
1432 if (best != NULL) {
1433 Carve(best, n, from_released);
1434#if TCMALLOC_TRACK_DECOMMITED_SPANS
1435 if (best->decommitted) {
1436 TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift));
1437 best->decommitted = false;
1438 }
1439#endif
1440 ASSERT(Check());
1441 free_pages_ -= n;
1442 return best;
1443 }
1444 return NULL;
1445}
1446
1447Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1448 ASSERT(0 < n);
1449 ASSERT(n < span->length);
1450 ASSERT(!span->free);
1451 ASSERT(span->sizeclass == 0);
1452 Event(span, 'T', n);
1453
1454 const Length extra = span->length - n;
1455 Span* leftover = NewSpan(span->start + n, extra);
1456 Event(leftover, 'U', extra);
1457 RecordSpan(leftover);
1458 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1459 span->length = n;
1460
1461 return leftover;
1462}
1463
1464#if !TCMALLOC_TRACK_DECOMMITED_SPANS
1465static ALWAYS_INLINE void propagateDecommittedState(Span*, Span*) { }
1466#else
1467static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source)
1468{
1469 destination->decommitted = source->decommitted;
1470}
1471#endif
1472
1473inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1474 ASSERT(n > 0);
1475 DLL_Remove(span);
1476 span->free = 0;
1477 Event(span, 'A', n);
1478
1479 const int extra = static_cast<int>(span->length - n);
1480 ASSERT(extra >= 0);
1481 if (extra > 0) {
1482 Span* leftover = NewSpan(span->start + n, extra);
1483 leftover->free = 1;
1484 propagateDecommittedState(leftover, span);
1485 Event(leftover, 'S', extra);
1486 RecordSpan(leftover);
1487
1488 // Place leftover span on appropriate free list
1489 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1490 Span* dst = released ? &listpair->returned : &listpair->normal;
1491 DLL_Prepend(dst, leftover);
1492
1493 span->length = n;
1494 pagemap_.set(span->start + n - 1, span);
1495 }
1496}
1497
1498#if !TCMALLOC_TRACK_DECOMMITED_SPANS
1499static ALWAYS_INLINE void mergeDecommittedStates(Span*, Span*) { }
1500#else
1501static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1502{
1503 if (destination->decommitted && !other->decommitted) {
1504 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift),
1505 static_cast<size_t>(other->length << kPageShift));
1506 } else if (other->decommitted && !destination->decommitted) {
1507 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift),
1508 static_cast<size_t>(destination->length << kPageShift));
1509 destination->decommitted = true;
1510 }
1511}
1512#endif
1513
1514inline void TCMalloc_PageHeap::Delete(Span* span) {
1515 ASSERT(Check());
1516 ASSERT(!span->free);
1517 ASSERT(span->length > 0);
1518 ASSERT(GetDescriptor(span->start) == span);
1519 ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1520 span->sizeclass = 0;
1521#ifndef NO_TCMALLOC_SAMPLES
1522 span->sample = 0;
1523#endif
1524
1525 // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1526 // necessary. We do not bother resetting the stale pagemap
1527 // entries for the pieces we are merging together because we only
1528 // care about the pagemap entries for the boundaries.
1529 const PageID p = span->start;
1530 const Length n = span->length;
1531 Span* prev = GetDescriptor(p-1);
1532 if (prev != NULL && prev->free) {
1533 // Merge preceding span into this span
1534 ASSERT(prev->start + prev->length == p);
1535 const Length len = prev->length;
1536 mergeDecommittedStates(span, prev);
1537 DLL_Remove(prev);
1538 DeleteSpan(prev);
1539 span->start -= len;
1540 span->length += len;
1541 pagemap_.set(span->start, span);
1542 Event(span, 'L', len);
1543 }
1544 Span* next = GetDescriptor(p+n);
1545 if (next != NULL && next->free) {
1546 // Merge next span into this span
1547 ASSERT(next->start == p+n);
1548 const Length len = next->length;
1549 mergeDecommittedStates(span, next);
1550 DLL_Remove(next);
1551 DeleteSpan(next);
1552 span->length += len;
1553 pagemap_.set(span->start + span->length - 1, span);
1554 Event(span, 'R', len);
1555 }
1556
1557 Event(span, 'D', span->length);
1558 span->free = 1;
1559#if TCMALLOC_TRACK_DECOMMITED_SPANS
1560 if (span->decommitted) {
1561 if (span->length < kMaxPages)
1562 DLL_Prepend(&free_[span->length].returned, span);
1563 else
1564 DLL_Prepend(&large_.returned, span);
1565 } else
1566#endif
1567 {
1568 if (span->length < kMaxPages)
1569 DLL_Prepend(&free_[span->length].normal, span);
1570 else
1571 DLL_Prepend(&large_.normal, span);
1572 }
1573 free_pages_ += n;
1574
1575 IncrementalScavenge(n);
1576 ASSERT(Check());
1577}
1578
1579void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1580 // Fast path; not yet time to release memory
1581 scavenge_counter_ -= n;
1582 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
1583
1584 // If there is nothing to release, wait for so many pages before
1585 // scavenging again. With 4K pages, this comes to 16MB of memory.
1586 static const size_t kDefaultReleaseDelay = 1 << 8;
1587
1588 // Find index of free list to scavenge
1589 size_t index = scavenge_index_ + 1;
1590 for (size_t i = 0; i < kMaxPages+1; i++) {
1591 if (index > kMaxPages) index = 0;
1592 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1593 if (!DLL_IsEmpty(&slist->normal)) {
1594 // Release the last span on the normal portion of this list
1595 Span* s = slist->normal.prev;
1596 DLL_Remove(s);
1597 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1598 static_cast<size_t>(s->length << kPageShift));
1599#if TCMALLOC_TRACK_DECOMMITED_SPANS
1600 s->decommitted = true;
1601#endif
1602 DLL_Prepend(&slist->returned, s);
1603
1604 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1605
1606 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1607 scavenge_index_ = index - 1;
1608 else
1609 scavenge_index_ = index;
1610 return;
1611 }
1612 index++;
1613 }
1614
1615 // Nothing to scavenge, delay for a while
1616 scavenge_counter_ = kDefaultReleaseDelay;
1617}
1618
1619void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1620 // Associate span object with all interior pages as well
1621 ASSERT(!span->free);
1622 ASSERT(GetDescriptor(span->start) == span);
1623 ASSERT(GetDescriptor(span->start+span->length-1) == span);
1624 Event(span, 'C', sc);
1625 span->sizeclass = static_cast<unsigned int>(sc);
1626 for (Length i = 1; i < span->length-1; i++) {
1627 pagemap_.set(span->start+i, span);
1628 }
1629}
1630
1631#ifdef WTF_CHANGES
1632size_t TCMalloc_PageHeap::ReturnedBytes() const {
1633 size_t result = 0;
1634 for (unsigned s = 0; s < kMaxPages; s++) {
1635 const int r_length = DLL_Length(&free_[s].returned);
1636 unsigned r_pages = s * r_length;
1637 result += r_pages << kPageShift;
1638 }
1639
1640 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
1641 result += s->length << kPageShift;
1642 return result;
1643}
1644#endif
1645
1646#ifndef WTF_CHANGES
1647static double PagesToMB(uint64_t pages) {
1648 return (pages << kPageShift) / 1048576.0;
1649}
1650
1651void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
1652 int nonempty_sizes = 0;
1653 for (int s = 0; s < kMaxPages; s++) {
1654 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
1655 nonempty_sizes++;
1656 }
1657 }
1658 out->printf("------------------------------------------------\n");
1659 out->printf("PageHeap: %d sizes; %6.1f MB free\n",
1660 nonempty_sizes, PagesToMB(free_pages_));
1661 out->printf("------------------------------------------------\n");
1662 uint64_t total_normal = 0;
1663 uint64_t total_returned = 0;
1664 for (int s = 0; s < kMaxPages; s++) {
1665 const int n_length = DLL_Length(&free_[s].normal);
1666 const int r_length = DLL_Length(&free_[s].returned);
1667 if (n_length + r_length > 0) {
1668 uint64_t n_pages = s * n_length;
1669 uint64_t r_pages = s * r_length;
1670 total_normal += n_pages;
1671 total_returned += r_pages;
1672 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
1673 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1674 s,
1675 (n_length + r_length),
1676 PagesToMB(n_pages + r_pages),
1677 PagesToMB(total_normal + total_returned),
1678 PagesToMB(r_pages),
1679 PagesToMB(total_returned));
1680 }
1681 }
1682
1683 uint64_t n_pages = 0;
1684 uint64_t r_pages = 0;
1685 int n_spans = 0;
1686 int r_spans = 0;
1687 out->printf("Normal large spans:\n");
1688 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
1689 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1690 s->length, PagesToMB(s->length));
1691 n_pages += s->length;
1692 n_spans++;
1693 }
1694 out->printf("Unmapped large spans:\n");
1695 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
1696 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1697 s->length, PagesToMB(s->length));
1698 r_pages += s->length;
1699 r_spans++;
1700 }
1701 total_normal += n_pages;
1702 total_returned += r_pages;
1703 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
1704 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1705 (n_spans + r_spans),
1706 PagesToMB(n_pages + r_pages),
1707 PagesToMB(total_normal + total_returned),
1708 PagesToMB(r_pages),
1709 PagesToMB(total_returned));
1710}
1711#endif
1712
1713bool TCMalloc_PageHeap::GrowHeap(Length n) {
1714 ASSERT(kMaxPages >= kMinSystemAlloc);
1715 if (n > kMaxValidPages) return false;
1716 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
1717 size_t actual_size;
1718 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1719 if (ptr == NULL) {
1720 if (n < ask) {
1721 // Try growing just "n" pages
1722 ask = n;
1723 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1724 }
1725 if (ptr == NULL) return false;
1726 }
1727 ask = actual_size >> kPageShift;
1728
1729 uint64_t old_system_bytes = system_bytes_;
1730 system_bytes_ += (ask << kPageShift);
1731 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1732 ASSERT(p > 0);
1733
1734 // If we have already a lot of pages allocated, just pre allocate a bunch of
1735 // memory for the page map. This prevents fragmentation by pagemap metadata
1736 // when a program keeps allocating and freeing large blocks.
1737
1738 if (old_system_bytes < kPageMapBigAllocationThreshold
1739 && system_bytes_ >= kPageMapBigAllocationThreshold) {
1740 pagemap_.PreallocateMoreMemory();
1741 }
1742
1743 // Make sure pagemap_ has entries for all of the new pages.
1744 // Plus ensure one before and one after so coalescing code
1745 // does not need bounds-checking.
1746 if (pagemap_.Ensure(p-1, ask+2)) {
1747 // Pretend the new area is allocated and then Delete() it to
1748 // cause any necessary coalescing to occur.
1749 //
1750 // We do not adjust free_pages_ here since Delete() will do it for us.
1751 Span* span = NewSpan(p, ask);
1752 RecordSpan(span);
1753 Delete(span);
1754 ASSERT(Check());
1755 return true;
1756 } else {
1757 // We could not allocate memory within "pagemap_"
1758 // TODO: Once we can return memory to the system, return the new span
1759 return false;
1760 }
1761}
1762
1763bool TCMalloc_PageHeap::Check() {
1764 ASSERT(free_[0].normal.next == &free_[0].normal);
1765 ASSERT(free_[0].returned.next == &free_[0].returned);
1766 CheckList(&large_.normal, kMaxPages, 1000000000);
1767 CheckList(&large_.returned, kMaxPages, 1000000000);
1768 for (Length s = 1; s < kMaxPages; s++) {
1769 CheckList(&free_[s].normal, s, s);
1770 CheckList(&free_[s].returned, s, s);
1771 }
1772 return true;
1773}
1774
1775#if ASSERT_DISABLED
1776bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) {
1777 return true;
1778}
1779#else
1780bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
1781 for (Span* s = list->next; s != list; s = s->next) {
1782 CHECK_CONDITION(s->free);
1783 CHECK_CONDITION(s->length >= min_pages);
1784 CHECK_CONDITION(s->length <= max_pages);
1785 CHECK_CONDITION(GetDescriptor(s->start) == s);
1786 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
1787 }
1788 return true;
1789}
1790#endif
1791
1792static void ReleaseFreeList(Span* list, Span* returned) {
1793 // Walk backwards through list so that when we push these
1794 // spans on the "returned" list, we preserve the order.
1795 while (!DLL_IsEmpty(list)) {
1796 Span* s = list->prev;
1797 DLL_Remove(s);
1798 DLL_Prepend(returned, s);
1799 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1800 static_cast<size_t>(s->length << kPageShift));
1801 }
1802}
1803
1804void TCMalloc_PageHeap::ReleaseFreePages() {
1805 for (Length s = 0; s < kMaxPages; s++) {
1806 ReleaseFreeList(&free_[s].normal, &free_[s].returned);
1807 }
1808 ReleaseFreeList(&large_.normal, &large_.returned);
1809 ASSERT(Check());
1810}
1811
1812//-------------------------------------------------------------------
1813// Free list
1814//-------------------------------------------------------------------
1815
1816class TCMalloc_ThreadCache_FreeList {
1817 private:
1818 void* list_; // Linked list of nodes
1819 uint16_t length_; // Current length
1820 uint16_t lowater_; // Low water mark for list length
1821
1822 public:
1823 void Init() {
1824 list_ = NULL;
1825 length_ = 0;
1826 lowater_ = 0;
1827 }
1828
1829 // Return current length of list
1830 int length() const {
1831 return length_;
1832 }
1833
1834 // Is list empty?
1835 bool empty() const {
1836 return list_ == NULL;
1837 }
1838
1839 // Low-water mark management
1840 int lowwatermark() const { return lowater_; }
1841 void clear_lowwatermark() { lowater_ = length_; }
1842
1843 ALWAYS_INLINE void Push(void* ptr) {
1844 SLL_Push(&list_, ptr);
1845 length_++;
1846 }
1847
1848 void PushRange(int N, void *start, void *end) {
1849 SLL_PushRange(&list_, start, end);
1850 length_ = length_ + static_cast<uint16_t>(N);
1851 }
1852
1853 void PopRange(int N, void **start, void **end) {
1854 SLL_PopRange(&list_, N, start, end);
1855 ASSERT(length_ >= N);
1856 length_ = length_ - static_cast<uint16_t>(N);
1857 if (length_ < lowater_) lowater_ = length_;
1858 }
1859
1860 ALWAYS_INLINE void* Pop() {
1861 ASSERT(list_ != NULL);
1862 length_--;
1863 if (length_ < lowater_) lowater_ = length_;
1864 return SLL_Pop(&list_);
1865 }
1866
1867#ifdef WTF_CHANGES
1868 template <class Finder, class Reader>
1869 void enumerateFreeObjects(Finder& finder, const Reader& reader)
1870 {
1871 for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
1872 finder.visit(nextObject);
1873 }
1874#endif
1875};
1876
1877//-------------------------------------------------------------------
1878// Data kept per thread
1879//-------------------------------------------------------------------
1880
1881class TCMalloc_ThreadCache {
1882 private:
1883 typedef TCMalloc_ThreadCache_FreeList FreeList;
1884#if COMPILER(MSVC)
1885 typedef DWORD ThreadIdentifier;
1886#else
1887 typedef pthread_t ThreadIdentifier;
1888#endif
1889
1890 size_t size_; // Combined size of data
1891 ThreadIdentifier tid_; // Which thread owns it
1892 bool in_setspecific_; // Called pthread_setspecific?
1893 FreeList list_[kNumClasses]; // Array indexed by size-class
1894
1895 // We sample allocations, biased by the size of the allocation
1896 uint32_t rnd_; // Cheap random number generator
1897 size_t bytes_until_sample_; // Bytes until we sample next
1898
1899 // Allocate a new heap. REQUIRES: pageheap_lock is held.
1900 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
1901
1902 // Use only as pthread thread-specific destructor function.
1903 static void DestroyThreadCache(void* ptr);
1904 public:
1905 // All ThreadCache objects are kept in a linked list (for stats collection)
1906 TCMalloc_ThreadCache* next_;
1907 TCMalloc_ThreadCache* prev_;
1908
1909 void Init(ThreadIdentifier tid);
1910 void Cleanup();
1911
1912 // Accessors (mostly just for printing stats)
1913 int freelist_length(size_t cl) const { return list_[cl].length(); }
1914
1915 // Total byte size in cache
1916 size_t Size() const { return size_; }
1917
1918 void* Allocate(size_t size);
1919 void Deallocate(void* ptr, size_t size_class);
1920
1921 void FetchFromCentralCache(size_t cl, size_t allocationSize);
1922 void ReleaseToCentralCache(size_t cl, int N);
1923 void Scavenge();
1924 void Print() const;
1925
1926 // Record allocation of "k" bytes. Return true iff allocation
1927 // should be sampled
1928 bool SampleAllocation(size_t k);
1929
1930 // Pick next sampling point
1931 void PickNextSample(size_t k);
1932
1933 static void InitModule();
1934 static void InitTSD();
1935 static TCMalloc_ThreadCache* GetThreadHeap();
1936 static TCMalloc_ThreadCache* GetCache();
1937 static TCMalloc_ThreadCache* GetCacheIfPresent();
1938 static TCMalloc_ThreadCache* CreateCacheIfNecessary();
1939 static void DeleteCache(TCMalloc_ThreadCache* heap);
1940 static void BecomeIdle();
1941 static void RecomputeThreadCacheSize();
1942
1943#ifdef WTF_CHANGES
1944 template <class Finder, class Reader>
1945 void enumerateFreeObjects(Finder& finder, const Reader& reader)
1946 {
1947 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
1948 list_[sizeClass].enumerateFreeObjects(finder, reader);
1949 }
1950#endif
1951};
1952
1953//-------------------------------------------------------------------
1954// Data kept per size-class in central cache
1955//-------------------------------------------------------------------
1956
1957class TCMalloc_Central_FreeList {
1958 public:
1959 void Init(size_t cl);
1960
1961 // These methods all do internal locking.
1962
1963 // Insert the specified range into the central freelist. N is the number of
1964 // elements in the range.
1965 void InsertRange(void *start, void *end, int N);
1966
1967 // Returns the actual number of fetched elements into N.
1968 void RemoveRange(void **start, void **end, int *N);
1969
1970 // Returns the number of free objects in cache.
1971 size_t length() {
1972 SpinLockHolder h(&lock_);
1973 return counter_;
1974 }
1975
1976 // Returns the number of free objects in the transfer cache.
1977 int tc_length() {
1978 SpinLockHolder h(&lock_);
1979 return used_slots_ * num_objects_to_move[size_class_];
1980 }
1981
1982#ifdef WTF_CHANGES
1983 template <class Finder, class Reader>
1984 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
1985 {
1986 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
1987 ASSERT(!span->objects);
1988
1989 ASSERT(!nonempty_.objects);
1990 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
1991
1992 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
1993 Span* remoteSpan = nonempty_.next;
1994
1995 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
1996 for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
1997 finder.visit(nextObject);
1998 }
1999 }
2000#endif
2001
2002 private:
2003 // REQUIRES: lock_ is held
2004 // Remove object from cache and return.
2005 // Return NULL if no free entries in cache.
2006 void* FetchFromSpans();
2007
2008 // REQUIRES: lock_ is held
2009 // Remove object from cache and return. Fetches
2010 // from pageheap if cache is empty. Only returns
2011 // NULL on allocation failure.
2012 void* FetchFromSpansSafe();
2013
2014 // REQUIRES: lock_ is held
2015 // Release a linked list of objects to spans.
2016 // May temporarily release lock_.
2017 void ReleaseListToSpans(void *start);
2018
2019 // REQUIRES: lock_ is held
2020 // Release an object to spans.
2021 // May temporarily release lock_.
2022 void ReleaseToSpans(void* object);
2023
2024 // REQUIRES: lock_ is held
2025 // Populate cache by fetching from the page heap.
2026 // May temporarily release lock_.
2027 void Populate();
2028
2029 // REQUIRES: lock is held.
2030 // Tries to make room for a TCEntry. If the cache is full it will try to
2031 // expand it at the cost of some other cache size. Return false if there is
2032 // no space.
2033 bool MakeCacheSpace();
2034
2035 // REQUIRES: lock_ for locked_size_class is held.
2036 // Picks a "random" size class to steal TCEntry slot from. In reality it
2037 // just iterates over the sizeclasses but does so without taking a lock.
2038 // Returns true on success.
2039 // May temporarily lock a "random" size class.
2040 static bool EvictRandomSizeClass(size_t locked_size_class, bool force);
2041
2042 // REQUIRES: lock_ is *not* held.
2043 // Tries to shrink the Cache. If force is true it will relase objects to
2044 // spans if it allows it to shrink the cache. Return false if it failed to
2045 // shrink the cache. Decrements cache_size_ on succeess.
2046 // May temporarily take lock_. If it takes lock_, the locked_size_class
2047 // lock is released to the thread from holding two size class locks
2048 // concurrently which could lead to a deadlock.
2049 bool ShrinkCache(int locked_size_class, bool force);
2050
2051 // This lock protects all the data members. cached_entries and cache_size_
2052 // may be looked at without holding the lock.
2053 SpinLock lock_;
2054
2055 // We keep linked lists of empty and non-empty spans.
2056 size_t size_class_; // My size class
2057 Span empty_; // Dummy header for list of empty spans
2058 Span nonempty_; // Dummy header for list of non-empty spans
2059 size_t counter_; // Number of free objects in cache entry
2060
2061 // Here we reserve space for TCEntry cache slots. Since one size class can
2062 // end up getting all the TCEntries quota in the system we just preallocate
2063 // sufficient number of entries here.
2064 TCEntry tc_slots_[kNumTransferEntries];
2065
2066 // Number of currently used cached entries in tc_slots_. This variable is
2067 // updated under a lock but can be read without one.
2068 int32_t used_slots_;
2069 // The current number of slots for this size class. This is an
2070 // adaptive value that is increased if there is lots of traffic
2071 // on a given size class.
2072 int32_t cache_size_;
2073};
2074
2075// Pad each CentralCache object to multiple of 64 bytes
2076class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
2077 private:
2078 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
2079};
2080
2081//-------------------------------------------------------------------
2082// Global variables
2083//-------------------------------------------------------------------
2084
2085// Central cache -- a collection of free-lists, one per size-class.
2086// We have a separate lock per free-list to reduce contention.
2087static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
2088
2089// Page-level allocator
2090static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
2091static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)];
2092static bool phinited = false;
2093
2094// Avoid extra level of indirection by making "pageheap" be just an alias
2095// of pageheap_memory.
2096typedef union {
2097 void* m_memory;
2098 TCMalloc_PageHeap* m_pageHeap;
2099} PageHeapUnion;
2100
2101static inline TCMalloc_PageHeap* getPageHeap()
2102{
2103 PageHeapUnion u = { &pageheap_memory[0] };
2104 return u.m_pageHeap;
2105}
2106
2107#define pageheap getPageHeap()
2108
2109// If TLS is available, we also store a copy
2110// of the per-thread object in a __thread variable
2111// since __thread variables are faster to read
2112// than pthread_getspecific(). We still need
2113// pthread_setspecific() because __thread
2114// variables provide no way to run cleanup
2115// code when a thread is destroyed.
2116#ifdef HAVE_TLS
2117static __thread TCMalloc_ThreadCache *threadlocal_heap;
2118#endif
2119// Thread-specific key. Initialization here is somewhat tricky
2120// because some Linux startup code invokes malloc() before it
2121// is in a good enough state to handle pthread_keycreate().
2122// Therefore, we use TSD keys only after tsd_inited is set to true.
2123// Until then, we use a slow path to get the heap object.
2124static bool tsd_inited = false;
2125static pthread_key_t heap_key;
2126#if COMPILER(MSVC)
2127DWORD tlsIndex = TLS_OUT_OF_INDEXES;
2128#endif
2129
2130static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
2131{
2132 // still do pthread_setspecific when using MSVC fast TLS to
2133 // benefit from the delete callback.
2134 pthread_setspecific(heap_key, heap);
2135#if COMPILER(MSVC)
2136 TlsSetValue(tlsIndex, heap);
2137#endif
2138}
2139
2140// Allocator for thread heaps
2141static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2142
2143// Linked list of heap objects. Protected by pageheap_lock.
2144static TCMalloc_ThreadCache* thread_heaps = NULL;
2145static int thread_heap_count = 0;
2146
2147// Overall thread cache size. Protected by pageheap_lock.
2148static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2149
2150// Global per-thread cache size. Writes are protected by
2151// pageheap_lock. Reads are done without any locking, which should be
2152// fine as long as size_t can be written atomically and we don't place
2153// invariants between this variable and other pieces of state.
2154static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2155
2156//-------------------------------------------------------------------
2157// Central cache implementation
2158//-------------------------------------------------------------------
2159
2160void TCMalloc_Central_FreeList::Init(size_t cl) {
2161 lock_.Init();
2162 size_class_ = cl;
2163 DLL_Init(&empty_);
2164 DLL_Init(&nonempty_);
2165 counter_ = 0;
2166
2167 cache_size_ = 1;
2168 used_slots_ = 0;
2169 ASSERT(cache_size_ <= kNumTransferEntries);
2170}
2171
2172void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2173 while (start) {
2174 void *next = SLL_Next(start);
2175 ReleaseToSpans(start);
2176 start = next;
2177 }
2178}
2179
2180ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2181 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2182 Span* span = pageheap->GetDescriptor(p);
2183 ASSERT(span != NULL);
2184 ASSERT(span->refcount > 0);
2185
2186 // If span is empty, move it to non-empty list
2187 if (span->objects == NULL) {
2188 DLL_Remove(span);
2189 DLL_Prepend(&nonempty_, span);
2190 Event(span, 'N', 0);
2191 }
2192
2193 // The following check is expensive, so it is disabled by default
2194 if (false) {
2195 // Check that object does not occur in list
2196 int got = 0;
2197 for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2198 ASSERT(p != object);
2199 got++;
2200 }
2201 ASSERT(got + span->refcount ==
2202 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2203 }
2204
2205 counter_++;
2206 span->refcount--;
2207 if (span->refcount == 0) {
2208 Event(span, '#', 0);
2209 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2210 DLL_Remove(span);
2211
2212 // Release central list lock while operating on pageheap
2213 lock_.Unlock();
2214 {
2215 SpinLockHolder h(&pageheap_lock);
2216 pageheap->Delete(span);
2217 }
2218 lock_.Lock();
2219 } else {
2220 *(reinterpret_cast<void**>(object)) = span->objects;
2221 span->objects = object;
2222 }
2223}
2224
2225ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2226 size_t locked_size_class, bool force) {
2227 static int race_counter = 0;
2228 int t = race_counter++; // Updated without a lock, but who cares.
2229 if (t >= static_cast<int>(kNumClasses)) {
2230 while (t >= static_cast<int>(kNumClasses)) {
2231 t -= kNumClasses;
2232 }
2233 race_counter = t;
2234 }
2235 ASSERT(t >= 0);
2236 ASSERT(t < static_cast<int>(kNumClasses));
2237 if (t == static_cast<int>(locked_size_class)) return false;
2238 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2239}
2240
2241bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2242 // Is there room in the cache?
2243 if (used_slots_ < cache_size_) return true;
2244 // Check if we can expand this cache?
2245 if (cache_size_ == kNumTransferEntries) return false;
2246 // Ok, we'll try to grab an entry from some other size class.
2247 if (EvictRandomSizeClass(size_class_, false) ||
2248 EvictRandomSizeClass(size_class_, true)) {
2249 // Succeeded in evicting, we're going to make our cache larger.
2250 cache_size_++;
2251 return true;
2252 }
2253 return false;
2254}
2255
2256
2257namespace {
2258class LockInverter {
2259 private:
2260 SpinLock *held_, *temp_;
2261 public:
2262 inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2263 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
2264 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
2265};
2266}
2267
2268bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2269 // Start with a quick check without taking a lock.
2270 if (cache_size_ == 0) return false;
2271 // We don't evict from a full cache unless we are 'forcing'.
2272 if (force == false && used_slots_ == cache_size_) return false;
2273
2274 // Grab lock, but first release the other lock held by this thread. We use
2275 // the lock inverter to ensure that we never hold two size class locks
2276 // concurrently. That can create a deadlock because there is no well
2277 // defined nesting order.
2278 LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
2279 ASSERT(used_slots_ <= cache_size_);
2280 ASSERT(0 <= cache_size_);
2281 if (cache_size_ == 0) return false;
2282 if (used_slots_ == cache_size_) {
2283 if (force == false) return false;
2284 // ReleaseListToSpans releases the lock, so we have to make all the
2285 // updates to the central list before calling it.
2286 cache_size_--;
2287 used_slots_--;
2288 ReleaseListToSpans(tc_slots_[used_slots_].head);
2289 return true;
2290 }
2291 cache_size_--;
2292 return true;
2293}
2294
2295void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2296 SpinLockHolder h(&lock_);
2297 if (N == num_objects_to_move[size_class_] &&
2298 MakeCacheSpace()) {
2299 int slot = used_slots_++;
2300 ASSERT(slot >=0);
2301 ASSERT(slot < kNumTransferEntries);
2302 TCEntry *entry = &tc_slots_[slot];
2303 entry->head = start;
2304 entry->tail = end;
2305 return;
2306 }
2307 ReleaseListToSpans(start);
2308}
2309
2310void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2311 int num = *N;
2312 ASSERT(num > 0);
2313
2314 SpinLockHolder h(&lock_);
2315 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2316 int slot = --used_slots_;
2317 ASSERT(slot >= 0);
2318 TCEntry *entry = &tc_slots_[slot];
2319 *start = entry->head;
2320 *end = entry->tail;
2321 return;
2322 }
2323
2324 // TODO: Prefetch multiple TCEntries?
2325 void *tail = FetchFromSpansSafe();
2326 if (!tail) {
2327 // We are completely out of memory.
2328 *start = *end = NULL;
2329 *N = 0;
2330 return;
2331 }
2332
2333 SLL_SetNext(tail, NULL);
2334 void *head = tail;
2335 int count = 1;
2336 while (count < num) {
2337 void *t = FetchFromSpans();
2338 if (!t) break;
2339 SLL_Push(&head, t);
2340 count++;
2341 }
2342 *start = head;
2343 *end = tail;
2344 *N = count;
2345}
2346
2347
2348void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2349 void *t = FetchFromSpans();
2350 if (!t) {
2351 Populate();
2352 t = FetchFromSpans();
2353 }
2354 return t;
2355}
2356
2357void* TCMalloc_Central_FreeList::FetchFromSpans() {
2358 if (DLL_IsEmpty(&nonempty_)) return NULL;
2359 Span* span = nonempty_.next;
2360
2361 ASSERT(span->objects != NULL);
2362 ASSERT_SPAN_COMMITTED(span);
2363 span->refcount++;
2364 void* result = span->objects;
2365 span->objects = *(reinterpret_cast<void**>(result));
2366 if (span->objects == NULL) {
2367 // Move to empty list
2368 DLL_Remove(span);
2369 DLL_Prepend(&empty_, span);
2370 Event(span, 'E', 0);
2371 }
2372 counter_--;
2373 return result;
2374}
2375
2376// Fetch memory from the system and add to the central cache freelist.
2377ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2378 // Release central list lock while operating on pageheap
2379 lock_.Unlock();
2380 const size_t npages = class_to_pages[size_class_];
2381
2382 Span* span;
2383 {
2384 SpinLockHolder h(&pageheap_lock);
2385 span = pageheap->New(npages);
2386 if (span) pageheap->RegisterSizeClass(span, size_class_);
2387 }
2388 if (span == NULL) {
2389 MESSAGE("allocation failed: %d\n", errno);
2390 lock_.Lock();
2391 return;
2392 }
2393 ASSERT_SPAN_COMMITTED(span);
2394 ASSERT(span->length == npages);
2395 // Cache sizeclass info eagerly. Locking is not necessary.
2396 // (Instead of being eager, we could just replace any stale info
2397 // about this span, but that seems to be no better in practice.)
2398 for (size_t i = 0; i < npages; i++) {
2399 pageheap->CacheSizeClass(span->start + i, size_class_);
2400 }
2401
2402 // Split the block into pieces and add to the free-list
2403 // TODO: coloring of objects to avoid cache conflicts?
2404 void** tail = &span->objects;
2405 char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2406 char* limit = ptr + (npages << kPageShift);
2407 const size_t size = ByteSizeForClass(size_class_);
2408 int num = 0;
2409 char* nptr;
2410 while ((nptr = ptr + size) <= limit) {
2411 *tail = ptr;
2412 tail = reinterpret_cast<void**>(ptr);
2413 ptr = nptr;
2414 num++;
2415 }
2416 ASSERT(ptr <= limit);
2417 *tail = NULL;
2418 span->refcount = 0; // No sub-object in use yet
2419
2420 // Add span to list of non-empty spans
2421 lock_.Lock();
2422 DLL_Prepend(&nonempty_, span);
2423 counter_ += num;
2424}
2425
2426//-------------------------------------------------------------------
2427// TCMalloc_ThreadCache implementation
2428//-------------------------------------------------------------------
2429
2430inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2431 if (bytes_until_sample_ < k) {
2432 PickNextSample(k);
2433 return true;
2434 } else {
2435 bytes_until_sample_ -= k;
2436 return false;
2437 }
2438}
2439
2440void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2441 size_ = 0;
2442 next_ = NULL;
2443 prev_ = NULL;
2444 tid_ = tid;
2445 in_setspecific_ = false;
2446 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2447 list_[cl].Init();
2448 }
2449
2450 // Initialize RNG -- run it for a bit to get to good values
2451 bytes_until_sample_ = 0;
2452 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2453 for (int i = 0; i < 100; i++) {
2454 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2455 }
2456}
2457
2458void TCMalloc_ThreadCache::Cleanup() {
2459 // Put unused memory back into central cache
2460 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2461 if (list_[cl].length() > 0) {
2462 ReleaseToCentralCache(cl, list_[cl].length());
2463 }
2464 }
2465}
2466
2467ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2468 ASSERT(size <= kMaxSize);
2469 const size_t cl = SizeClass(size);
2470 FreeList* list = &list_[cl];
2471 size_t allocationSize = ByteSizeForClass(cl);
2472 if (list->empty()) {
2473 FetchFromCentralCache(cl, allocationSize);
2474 if (list->empty()) return NULL;
2475 }
2476 size_ -= allocationSize;
2477 return list->Pop();
2478}
2479
2480inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2481 size_ += ByteSizeForClass(cl);
2482 FreeList* list = &list_[cl];
2483 list->Push(ptr);
2484 // If enough data is free, put back into central cache
2485 if (list->length() > kMaxFreeListLength) {
2486 ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2487 }
2488 if (size_ >= per_thread_cache_size) Scavenge();
2489}
2490
2491// Remove some objects of class "cl" from central cache and add to thread heap
2492ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2493 int fetch_count = num_objects_to_move[cl];
2494 void *start, *end;
2495 central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2496 list_[cl].PushRange(fetch_count, start, end);
2497 size_ += allocationSize * fetch_count;
2498}
2499
2500// Remove some objects of class "cl" from thread heap and add to central cache
2501inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2502 ASSERT(N > 0);
2503 FreeList* src = &list_[cl];
2504 if (N > src->length()) N = src->length();
2505 size_ -= N*ByteSizeForClass(cl);
2506
2507 // We return prepackaged chains of the correct size to the central cache.
2508 // TODO: Use the same format internally in the thread caches?
2509 int batch_size = num_objects_to_move[cl];
2510 while (N > batch_size) {
2511 void *tail, *head;
2512 src->PopRange(batch_size, &head, &tail);
2513 central_cache[cl].InsertRange(head, tail, batch_size);
2514 N -= batch_size;
2515 }
2516 void *tail, *head;
2517 src->PopRange(N, &head, &tail);
2518 central_cache[cl].InsertRange(head, tail, N);
2519}
2520
2521// Release idle memory to the central cache
2522inline void TCMalloc_ThreadCache::Scavenge() {
2523 // If the low-water mark for the free list is L, it means we would
2524 // not have had to allocate anything from the central cache even if
2525 // we had reduced the free list size by L. We aim to get closer to
2526 // that situation by dropping L/2 nodes from the free list. This
2527 // may not release much memory, but if so we will call scavenge again
2528 // pretty soon and the low-water marks will be high on that call.
2529 //int64 start = CycleClock::Now();
2530
2531 for (size_t cl = 0; cl < kNumClasses; cl++) {
2532 FreeList* list = &list_[cl];
2533 const int lowmark = list->lowwatermark();
2534 if (lowmark > 0) {
2535 const int drop = (lowmark > 1) ? lowmark/2 : 1;
2536 ReleaseToCentralCache(cl, drop);
2537 }
2538 list->clear_lowwatermark();
2539 }
2540
2541 //int64 finish = CycleClock::Now();
2542 //CycleTimer ct;
2543 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2544}
2545
2546void TCMalloc_ThreadCache::PickNextSample(size_t k) {
2547 // Make next "random" number
2548 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2549 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2550 uint32_t r = rnd_;
2551 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
2552
2553 // Next point is "rnd_ % (sample_period)". I.e., average
2554 // increment is "sample_period/2".
2555 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
2556 static int last_flag_value = -1;
2557
2558 if (flag_value != last_flag_value) {
2559 SpinLockHolder h(&sample_period_lock);
2560 int i;
2561 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
2562 if (primes_list[i] >= flag_value) {
2563 break;
2564 }
2565 }
2566 sample_period = primes_list[i];
2567 last_flag_value = flag_value;
2568 }
2569
2570 bytes_until_sample_ += rnd_ % sample_period;
2571
2572 if (k > (static_cast<size_t>(-1) >> 2)) {
2573 // If the user has asked for a huge allocation then it is possible
2574 // for the code below to loop infinitely. Just return (note that
2575 // this throws off the sampling accuracy somewhat, but a user who
2576 // is allocating more than 1G of memory at a time can live with a
2577 // minor inaccuracy in profiling of small allocations, and also
2578 // would rather not wait for the loop below to terminate).
2579 return;
2580 }
2581
2582 while (bytes_until_sample_ < k) {
2583 // Increase bytes_until_sample_ by enough average sampling periods
2584 // (sample_period >> 1) to allow us to sample past the current
2585 // allocation.
2586 bytes_until_sample_ += (sample_period >> 1);
2587 }
2588
2589 bytes_until_sample_ -= k;
2590}
2591
2592void TCMalloc_ThreadCache::InitModule() {
2593 // There is a slight potential race here because of double-checked
2594 // locking idiom. However, as long as the program does a small
2595 // allocation before switching to multi-threaded mode, we will be
2596 // fine. We increase the chances of doing such a small allocation
2597 // by doing one in the constructor of the module_enter_exit_hook
2598 // object declared below.
2599 SpinLockHolder h(&pageheap_lock);
2600 if (!phinited) {
2601#ifdef WTF_CHANGES
2602 InitTSD();
2603#endif
2604 InitSizeClasses();
2605 threadheap_allocator.Init();
2606 span_allocator.Init();
2607 span_allocator.New(); // Reduce cache conflicts
2608 span_allocator.New(); // Reduce cache conflicts
2609 stacktrace_allocator.Init();
2610 DLL_Init(&sampled_objects);
2611 for (size_t i = 0; i < kNumClasses; ++i) {
2612 central_cache[i].Init(i);
2613 }
2614 pageheap->init();
2615 phinited = 1;
2616#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
2617 FastMallocZone::init();
2618#endif
2619 }
2620}
2621
2622inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
2623 // Create the heap and add it to the linked list
2624 TCMalloc_ThreadCache *heap = threadheap_allocator.New();
2625 heap->Init(tid);
2626 heap->next_ = thread_heaps;
2627 heap->prev_ = NULL;
2628 if (thread_heaps != NULL) thread_heaps->prev_ = heap;
2629 thread_heaps = heap;
2630 thread_heap_count++;
2631 RecomputeThreadCacheSize();
2632 return heap;
2633}
2634
2635inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
2636#ifdef HAVE_TLS
2637 // __thread is faster, but only when the kernel supports it
2638 if (KernelSupportsTLS())
2639 return threadlocal_heap;
2640#elif COMPILER(MSVC)
2641 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
2642#else
2643 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
2644#endif
2645}
2646
2647inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
2648 TCMalloc_ThreadCache* ptr = NULL;
2649 if (!tsd_inited) {
2650 InitModule();
2651 } else {
2652 ptr = GetThreadHeap();
2653 }
2654 if (ptr == NULL) ptr = CreateCacheIfNecessary();
2655 return ptr;
2656}
2657
2658// In deletion paths, we do not try to create a thread-cache. This is
2659// because we may be in the thread destruction code and may have
2660// already cleaned up the cache for this thread.
2661inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
2662 if (!tsd_inited) return NULL;
2663 void* const p = GetThreadHeap();
2664 return reinterpret_cast<TCMalloc_ThreadCache*>(p);
2665}
2666
2667void TCMalloc_ThreadCache::InitTSD() {
2668 ASSERT(!tsd_inited);
2669 pthread_key_create(&heap_key, DestroyThreadCache);
2670#if COMPILER(MSVC)
2671 tlsIndex = TlsAlloc();
2672#endif
2673 tsd_inited = true;
2674
2675#if !COMPILER(MSVC)
2676 // We may have used a fake pthread_t for the main thread. Fix it.
2677 pthread_t zero;
2678 memset(&zero, 0, sizeof(zero));
2679#endif
2680#ifndef WTF_CHANGES
2681 SpinLockHolder h(&pageheap_lock);
2682#else
2683 ASSERT(pageheap_lock.IsHeld());
2684#endif
2685 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2686#if COMPILER(MSVC)
2687 if (h->tid_ == 0) {
2688 h->tid_ = GetCurrentThreadId();
2689 }
2690#else
2691 if (pthread_equal(h->tid_, zero)) {
2692 h->tid_ = pthread_self();
2693 }
2694#endif
2695 }
2696}
2697
2698TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
2699 // Initialize per-thread data if necessary
2700 TCMalloc_ThreadCache* heap = NULL;
2701 {
2702 SpinLockHolder h(&pageheap_lock);
2703
2704#if COMPILER(MSVC)
2705 DWORD me;
2706 if (!tsd_inited) {
2707 me = 0;
2708 } else {
2709 me = GetCurrentThreadId();
2710 }
2711#else
2712 // Early on in glibc's life, we cannot even call pthread_self()
2713 pthread_t me;
2714 if (!tsd_inited) {
2715 memset(&me, 0, sizeof(me));
2716 } else {
2717 me = pthread_self();
2718 }
2719#endif
2720
2721 // This may be a recursive malloc call from pthread_setspecific()
2722 // In that case, the heap for this thread has already been created
2723 // and added to the linked list. So we search for that first.
2724 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2725#if COMPILER(MSVC)
2726 if (h->tid_ == me) {
2727#else
2728 if (pthread_equal(h->tid_, me)) {
2729#endif
2730 heap = h;
2731 break;
2732 }
2733 }
2734
2735 if (heap == NULL) heap = NewHeap(me);
2736 }
2737
2738 // We call pthread_setspecific() outside the lock because it may
2739 // call malloc() recursively. The recursive call will never get
2740 // here again because it will find the already allocated heap in the
2741 // linked list of heaps.
2742 if (!heap->in_setspecific_ && tsd_inited) {
2743 heap->in_setspecific_ = true;
2744 setThreadHeap(heap);
2745 }
2746 return heap;
2747}
2748
2749void TCMalloc_ThreadCache::BecomeIdle() {
2750 if (!tsd_inited) return; // No caches yet
2751 TCMalloc_ThreadCache* heap = GetThreadHeap();
2752 if (heap == NULL) return; // No thread cache to remove
2753 if (heap->in_setspecific_) return; // Do not disturb the active caller
2754
2755 heap->in_setspecific_ = true;
2756 pthread_setspecific(heap_key, NULL);
2757#ifdef HAVE_TLS
2758 // Also update the copy in __thread
2759 threadlocal_heap = NULL;
2760#endif
2761 heap->in_setspecific_ = false;
2762 if (GetThreadHeap() == heap) {
2763 // Somehow heap got reinstated by a recursive call to malloc
2764 // from pthread_setspecific. We give up in this case.
2765 return;
2766 }
2767
2768 // We can now get rid of the heap
2769 DeleteCache(heap);
2770}
2771
2772void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
2773 // Note that "ptr" cannot be NULL since pthread promises not
2774 // to invoke the destructor on NULL values, but for safety,
2775 // we check anyway.
2776 if (ptr == NULL) return;
2777#ifdef HAVE_TLS
2778 // Prevent fast path of GetThreadHeap() from returning heap.
2779 threadlocal_heap = NULL;
2780#endif
2781 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
2782}
2783
2784void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
2785 // Remove all memory from heap
2786 heap->Cleanup();
2787
2788 // Remove from linked list
2789 SpinLockHolder h(&pageheap_lock);
2790 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
2791 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
2792 if (thread_heaps == heap) thread_heaps = heap->next_;
2793 thread_heap_count--;
2794 RecomputeThreadCacheSize();
2795
2796 threadheap_allocator.Delete(heap);
2797}
2798
2799void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
2800 // Divide available space across threads
2801 int n = thread_heap_count > 0 ? thread_heap_count : 1;
2802 size_t space = overall_thread_cache_size / n;
2803
2804 // Limit to allowed range
2805 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
2806 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
2807
2808 per_thread_cache_size = space;
2809}
2810
2811void TCMalloc_ThreadCache::Print() const {
2812 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2813 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n",
2814 ByteSizeForClass(cl),
2815 list_[cl].length(),
2816 list_[cl].lowwatermark());
2817 }
2818}
2819
2820// Extract interesting stats
2821struct TCMallocStats {
2822 uint64_t system_bytes; // Bytes alloced from system
2823 uint64_t thread_bytes; // Bytes in thread caches
2824 uint64_t central_bytes; // Bytes in central cache
2825 uint64_t transfer_bytes; // Bytes in central transfer cache
2826 uint64_t pageheap_bytes; // Bytes in page heap
2827 uint64_t metadata_bytes; // Bytes alloced for metadata
2828};
2829
2830#ifndef WTF_CHANGES
2831// Get stats into "r". Also get per-size-class counts if class_count != NULL
2832static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
2833 r->central_bytes = 0;
2834 r->transfer_bytes = 0;
2835 for (int cl = 0; cl < kNumClasses; ++cl) {
2836 const int length = central_cache[cl].length();
2837 const int tc_length = central_cache[cl].tc_length();
2838 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
2839 r->transfer_bytes +=
2840 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
2841 if (class_count) class_count[cl] = length + tc_length;
2842 }
2843
2844 // Add stats from per-thread heaps
2845 r->thread_bytes = 0;
2846 { // scope
2847 SpinLockHolder h(&pageheap_lock);
2848 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2849 r->thread_bytes += h->Size();
2850 if (class_count) {
2851 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2852 class_count[cl] += h->freelist_length(cl);
2853 }
2854 }
2855 }
2856 }
2857
2858 { //scope
2859 SpinLockHolder h(&pageheap_lock);
2860 r->system_bytes = pageheap->SystemBytes();
2861 r->metadata_bytes = metadata_system_bytes;
2862 r->pageheap_bytes = pageheap->FreeBytes();
2863 }
2864}
2865#endif
2866
2867#ifndef WTF_CHANGES
2868// WRITE stats to "out"
2869static void DumpStats(TCMalloc_Printer* out, int level) {
2870 TCMallocStats stats;
2871 uint64_t class_count[kNumClasses];
2872 ExtractStats(&stats, (level >= 2 ? class_count : NULL));
2873
2874 if (level >= 2) {
2875 out->printf("------------------------------------------------\n");
2876 uint64_t cumulative = 0;
2877 for (int cl = 0; cl < kNumClasses; ++cl) {
2878 if (class_count[cl] > 0) {
2879 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
2880 cumulative += class_bytes;
2881 out->printf("class %3d [ %8" PRIuS " bytes ] : "
2882 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
2883 cl, ByteSizeForClass(cl),
2884 class_count[cl],
2885 class_bytes / 1048576.0,
2886 cumulative / 1048576.0);
2887 }
2888 }
2889
2890 SpinLockHolder h(&pageheap_lock);
2891 pageheap->Dump(out);
2892 }
2893
2894 const uint64_t bytes_in_use = stats.system_bytes
2895 - stats.pageheap_bytes
2896 - stats.central_bytes
2897 - stats.transfer_bytes
2898 - stats.thread_bytes;
2899
2900 out->printf("------------------------------------------------\n"
2901 "MALLOC: %12" PRIu64 " Heap size\n"
2902 "MALLOC: %12" PRIu64 " Bytes in use by application\n"
2903 "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
2904 "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
2905 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
2906 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
2907 "MALLOC: %12" PRIu64 " Spans in use\n"
2908 "MALLOC: %12" PRIu64 " Thread heaps in use\n"
2909 "MALLOC: %12" PRIu64 " Metadata allocated\n"
2910 "------------------------------------------------\n",
2911 stats.system_bytes,
2912 bytes_in_use,
2913 stats.pageheap_bytes,
2914 stats.central_bytes,
2915 stats.transfer_bytes,
2916 stats.thread_bytes,
2917 uint64_t(span_allocator.inuse()),
2918 uint64_t(threadheap_allocator.inuse()),
2919 stats.metadata_bytes);
2920}
2921
2922static void PrintStats(int level) {
2923 const int kBufferSize = 16 << 10;
2924 char* buffer = new char[kBufferSize];
2925 TCMalloc_Printer printer(buffer, kBufferSize);
2926 DumpStats(&printer, level);
2927 write(STDERR_FILENO, buffer, strlen(buffer));
2928 delete[] buffer;
2929}
2930
2931static void** DumpStackTraces() {
2932 // Count how much space we need
2933 int needed_slots = 0;
2934 {
2935 SpinLockHolder h(&pageheap_lock);
2936 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
2937 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
2938 needed_slots += 3 + stack->depth;
2939 }
2940 needed_slots += 100; // Slop in case sample grows
2941 needed_slots += needed_slots/8; // An extra 12.5% slop
2942 }
2943
2944 void** result = new void*[needed_slots];
2945 if (result == NULL) {
2946 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
2947 needed_slots);
2948 return NULL;
2949 }
2950
2951 SpinLockHolder h(&pageheap_lock);
2952 int used_slots = 0;
2953 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
2954 ASSERT(used_slots < needed_slots); // Need to leave room for terminator
2955 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
2956 if (used_slots + 3 + stack->depth >= needed_slots) {
2957 // No more room
2958 break;
2959 }
2960
2961 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
2962 result[used_slots+1] = reinterpret_cast<void*>(stack->size);
2963 result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
2964 for (int d = 0; d < stack->depth; d++) {
2965 result[used_slots+3+d] = stack->stack[d];
2966 }
2967 used_slots += 3 + stack->depth;
2968 }
2969 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
2970 return result;
2971}
2972#endif
2973
2974#ifndef WTF_CHANGES
2975
2976// TCMalloc's support for extra malloc interfaces
2977class TCMallocImplementation : public MallocExtension {
2978 public:
2979 virtual void GetStats(char* buffer, int buffer_length) {
2980 ASSERT(buffer_length > 0);
2981 TCMalloc_Printer printer(buffer, buffer_length);
2982
2983 // Print level one stats unless lots of space is available
2984 if (buffer_length < 10000) {
2985 DumpStats(&printer, 1);
2986 } else {
2987 DumpStats(&printer, 2);
2988 }
2989 }
2990
2991 virtual void** ReadStackTraces() {
2992 return DumpStackTraces();
2993 }
2994
2995 virtual bool GetNumericProperty(const char* name, size_t* value) {
2996 ASSERT(name != NULL);
2997
2998 if (strcmp(name, "generic.current_allocated_bytes") == 0) {
2999 TCMallocStats stats;
3000 ExtractStats(&stats, NULL);
3001 *value = stats.system_bytes
3002 - stats.thread_bytes
3003 - stats.central_bytes
3004 - stats.pageheap_bytes;
3005 return true;
3006 }
3007
3008 if (strcmp(name, "generic.heap_size") == 0) {
3009 TCMallocStats stats;
3010 ExtractStats(&stats, NULL);
3011 *value = stats.system_bytes;
3012 return true;
3013 }
3014
3015 if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
3016 // We assume that bytes in the page heap are not fragmented too
3017 // badly, and are therefore available for allocation.
3018 SpinLockHolder l(&pageheap_lock);
3019 *value = pageheap->FreeBytes();
3020 return true;
3021 }
3022
3023 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3024 SpinLockHolder l(&pageheap_lock);
3025 *value = overall_thread_cache_size;
3026 return true;
3027 }
3028
3029 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3030 TCMallocStats stats;
3031 ExtractStats(&stats, NULL);
3032 *value = stats.thread_bytes;
3033 return true;
3034 }
3035
3036 return false;
3037 }
3038
3039 virtual bool SetNumericProperty(const char* name, size_t value) {
3040 ASSERT(name != NULL);
3041
3042 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3043 // Clip the value to a reasonable range
3044 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
3045 if (value > (1<<30)) value = (1<<30); // Limit to 1GB
3046
3047 SpinLockHolder l(&pageheap_lock);
3048 overall_thread_cache_size = static_cast<size_t>(value);
3049 TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3050 return true;
3051 }
3052
3053 return false;
3054 }
3055
3056 virtual void MarkThreadIdle() {
3057 TCMalloc_ThreadCache::BecomeIdle();
3058 }
3059
3060 virtual void ReleaseFreeMemory() {
3061 SpinLockHolder h(&pageheap_lock);
3062 pageheap->ReleaseFreePages();
3063 }
3064};
3065#endif
3066
3067// The constructor allocates an object to ensure that initialization
3068// runs before main(), and therefore we do not have a chance to become
3069// multi-threaded before initialization. We also create the TSD key
3070// here. Presumably by the time this constructor runs, glibc is in
3071// good enough shape to handle pthread_key_create().
3072//
3073// The constructor also takes the opportunity to tell STL to use
3074// tcmalloc. We want to do this early, before construct time, so
3075// all user STL allocations go through tcmalloc (which works really
3076// well for STL).
3077//
3078// The destructor prints stats when the program exits.
3079class TCMallocGuard {
3080 public:
3081
3082 TCMallocGuard() {
3083#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS
3084 // Check whether the kernel also supports TLS (needs to happen at runtime)
3085 CheckIfKernelSupportsTLS();
3086#endif
3087#ifndef WTF_CHANGES
3088#ifdef WIN32 // patch the windows VirtualAlloc, etc.
3089 PatchWindowsFunctions(); // defined in windows/patch_functions.cc
3090#endif
3091#endif
3092 free(malloc(1));
3093 TCMalloc_ThreadCache::InitTSD();
3094 free(malloc(1));
3095#ifndef WTF_CHANGES
3096 MallocExtension::Register(new TCMallocImplementation);
3097#endif
3098 }
3099
3100#ifndef WTF_CHANGES
3101 ~TCMallocGuard() {
3102 const char* env = getenv("MALLOCSTATS");
3103 if (env != NULL) {
3104 int level = atoi(env);
3105 if (level < 1) level = 1;
3106 PrintStats(level);
3107 }
3108#ifdef WIN32
3109 UnpatchWindowsFunctions();
3110#endif
3111 }
3112#endif
3113};
3114
3115#ifndef WTF_CHANGES
3116static TCMallocGuard module_enter_exit_hook;
3117#endif
3118
3119
3120//-------------------------------------------------------------------
3121// Helpers for the exported routines below
3122//-------------------------------------------------------------------
3123
3124#ifndef WTF_CHANGES
3125
3126static Span* DoSampledAllocation(size_t size) {
3127
3128 // Grab the stack trace outside the heap lock
3129 StackTrace tmp;
3130 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
3131 tmp.size = size;
3132
3133 SpinLockHolder h(&pageheap_lock);
3134 // Allocate span
3135 Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3136 if (span == NULL) {
3137 return NULL;
3138 }
3139
3140 // Allocate stack trace
3141 StackTrace *stack = stacktrace_allocator.New();
3142 if (stack == NULL) {
3143 // Sampling failed because of lack of memory
3144 return span;
3145 }
3146
3147 *stack = tmp;
3148 span->sample = 1;
3149 span->objects = stack;
3150 DLL_Prepend(&sampled_objects, span);
3151
3152 return span;
3153}
3154#endif
3155
3156static inline bool CheckCachedSizeClass(void *ptr) {
3157 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3158 size_t cached_value = pageheap->GetSizeClassIfCached(p);
3159 return cached_value == 0 ||
3160 cached_value == pageheap->GetDescriptor(p)->sizeclass;
3161}
3162
3163static inline void* CheckedMallocResult(void *result)
3164{
3165 ASSERT(result == 0 || CheckCachedSizeClass(result));
3166 return result;
3167}
3168
3169static inline void* SpanToMallocResult(Span *span) {
3170 ASSERT_SPAN_COMMITTED(span);
3171 pageheap->CacheSizeClass(span->start, 0);
3172 return
3173 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3174}
3175
3176#ifdef WTF_CHANGES
3177template <bool crashOnFailure>
3178#endif
3179static ALWAYS_INLINE void* do_malloc(size_t size) {
3180 void* ret = NULL;
3181
3182#ifdef WTF_CHANGES
3183 ASSERT(!isForbidden());
3184#endif
3185
3186 // The following call forces module initialization
3187 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3188#ifndef WTF_CHANGES
3189 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3190 Span* span = DoSampledAllocation(size);
3191 if (span != NULL) {
3192 ret = SpanToMallocResult(span);
3193 }
3194 } else
3195#endif
3196 if (size > kMaxSize) {
3197 // Use page-level allocator
3198 SpinLockHolder h(&pageheap_lock);
3199 Span* span = pageheap->New(pages(size));
3200 if (span != NULL) {
3201 ret = SpanToMallocResult(span);
3202 }
3203 } else {
3204 // The common case, and also the simplest. This just pops the
3205 // size-appropriate freelist, afer replenishing it if it's empty.
3206 ret = CheckedMallocResult(heap->Allocate(size));
3207 }
3208 if (!ret) {
3209#ifdef WTF_CHANGES
3210 if (crashOnFailure) // This branch should be optimized out by the compiler.
3211 CRASH();
3212#else
3213 errno = ENOMEM;
3214#endif
3215 }
3216 return ret;
3217}
3218
3219static ALWAYS_INLINE void do_free(void* ptr) {
3220 if (ptr == NULL) return;
3221 ASSERT(pageheap != NULL); // Should not call free() before malloc()
3222 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3223 Span* span = NULL;
3224 size_t cl = pageheap->GetSizeClassIfCached(p);
3225
3226 if (cl == 0) {
3227 span = pageheap->GetDescriptor(p);
3228 cl = span->sizeclass;
3229 pageheap->CacheSizeClass(p, cl);
3230 }
3231 if (cl != 0) {
3232#ifndef NO_TCMALLOC_SAMPLES
3233 ASSERT(!pageheap->GetDescriptor(p)->sample);
3234#endif
3235 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3236 if (heap != NULL) {
3237 heap->Deallocate(ptr, cl);
3238 } else {
3239 // Delete directly into central cache
3240 SLL_SetNext(ptr, NULL);
3241 central_cache[cl].InsertRange(ptr, ptr, 1);
3242 }
3243 } else {
3244 SpinLockHolder h(&pageheap_lock);
3245 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3246 ASSERT(span != NULL && span->start == p);
3247#ifndef NO_TCMALLOC_SAMPLES
3248 if (span->sample) {
3249 DLL_Remove(span);
3250 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3251 span->objects = NULL;
3252 }
3253#endif
3254 pageheap->Delete(span);
3255 }
3256}
3257
3258#ifndef WTF_CHANGES
3259// For use by exported routines below that want specific alignments
3260//
3261// Note: this code can be slow, and can significantly fragment memory.
3262// The expectation is that memalign/posix_memalign/valloc/pvalloc will
3263// not be invoked very often. This requirement simplifies our
3264// implementation and allows us to tune for expected allocation
3265// patterns.
3266static void* do_memalign(size_t align, size_t size) {
3267 ASSERT((align & (align - 1)) == 0);
3268 ASSERT(align > 0);
3269 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3270
3271 // Allocate at least one byte to avoid boundary conditions below
3272 if (size == 0) size = 1;
3273
3274 if (size <= kMaxSize && align < kPageSize) {
3275 // Search through acceptable size classes looking for one with
3276 // enough alignment. This depends on the fact that
3277 // InitSizeClasses() currently produces several size classes that
3278 // are aligned at powers of two. We will waste time and space if
3279 // we miss in the size class array, but that is deemed acceptable
3280 // since memalign() should be used rarely.
3281 size_t cl = SizeClass(size);
3282 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3283 cl++;
3284 }
3285 if (cl < kNumClasses) {
3286 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3287 return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3288 }
3289 }
3290
3291 // We will allocate directly from the page heap
3292 SpinLockHolder h(&pageheap_lock);
3293
3294 if (align <= kPageSize) {
3295 // Any page-level allocation will be fine
3296 // TODO: We could put the rest of this page in the appropriate
3297 // TODO: cache but it does not seem worth it.
3298 Span* span = pageheap->New(pages(size));
3299 return span == NULL ? NULL : SpanToMallocResult(span);
3300 }
3301
3302 // Allocate extra pages and carve off an aligned portion
3303 const Length alloc = pages(size + align);
3304 Span* span = pageheap->New(alloc);
3305 if (span == NULL) return NULL;
3306
3307 // Skip starting portion so that we end up aligned
3308 Length skip = 0;
3309 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3310 skip++;
3311 }
3312 ASSERT(skip < alloc);
3313 if (skip > 0) {
3314 Span* rest = pageheap->Split(span, skip);
3315 pageheap->Delete(span);
3316 span = rest;
3317 }
3318
3319 // Skip trailing portion that we do not need to return
3320 const Length needed = pages(size);
3321 ASSERT(span->length >= needed);
3322 if (span->length > needed) {
3323 Span* trailer = pageheap->Split(span, needed);
3324 pageheap->Delete(trailer);
3325 }
3326 return SpanToMallocResult(span);
3327}
3328#endif
3329
3330// Helpers for use by exported routines below:
3331
3332#ifndef WTF_CHANGES
3333static inline void do_malloc_stats() {
3334 PrintStats(1);
3335}
3336#endif
3337
3338static inline int do_mallopt(int, int) {
3339 return 1; // Indicates error
3340}
3341
3342#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance
3343static inline struct mallinfo do_mallinfo() {
3344 TCMallocStats stats;
3345 ExtractStats(&stats, NULL);
3346
3347 // Just some of the fields are filled in.
3348 struct mallinfo info;
3349 memset(&info, 0, sizeof(info));
3350
3351 // Unfortunately, the struct contains "int" field, so some of the
3352 // size values will be truncated.
3353 info.arena = static_cast<int>(stats.system_bytes);
3354 info.fsmblks = static_cast<int>(stats.thread_bytes
3355 + stats.central_bytes
3356 + stats.transfer_bytes);
3357 info.fordblks = static_cast<int>(stats.pageheap_bytes);
3358 info.uordblks = static_cast<int>(stats.system_bytes
3359 - stats.thread_bytes
3360 - stats.central_bytes
3361 - stats.transfer_bytes
3362 - stats.pageheap_bytes);
3363
3364 return info;
3365}
3366#endif
3367
3368//-------------------------------------------------------------------
3369// Exported routines
3370//-------------------------------------------------------------------
3371
3372// CAVEAT: The code structure below ensures that MallocHook methods are always
3373// called from the stack frame of the invoked allocation function.
3374// heap-checker.cc depends on this to start a stack trace from
3375// the call to the (de)allocation function.
3376
3377#ifndef WTF_CHANGES
3378extern "C"
3379#else
3380#define do_malloc do_malloc<crashOnFailure>
3381
3382template <bool crashOnFailure>
3383void* malloc(size_t);
3384
3385void* fastMalloc(size_t size)
3386{
3387 return malloc<true>(size);
3388}
3389
3390TryMallocReturnValue tryFastMalloc(size_t size)
3391{
3392 return malloc<false>(size);
3393}
3394
3395template <bool crashOnFailure>
3396ALWAYS_INLINE
3397#endif
3398void* malloc(size_t size) {
3399#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3400 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur...
3401 return 0;
3402 size += sizeof(AllocAlignmentInteger);
3403 void* result = do_malloc(size);
3404 if (!result)
3405 return 0;
3406
3407 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3408 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3409#else
3410 void* result = do_malloc(size);
3411#endif
3412
3413#ifndef WTF_CHANGES
3414 MallocHook::InvokeNewHook(result, size);
3415#endif
3416 return result;
3417}
3418
3419#ifndef WTF_CHANGES
3420extern "C"
3421#endif
3422void free(void* ptr) {
3423#ifndef WTF_CHANGES
3424 MallocHook::InvokeDeleteHook(ptr);
3425#endif
3426
3427#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3428 if (!ptr)
3429 return;
3430
3431 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr);
3432 if (*header != Internal::AllocTypeMalloc)
3433 Internal::fastMallocMatchFailed(ptr);
3434 do_free(header);
3435#else
3436 do_free(ptr);
3437#endif
3438}
3439
3440#ifndef WTF_CHANGES
3441extern "C"
3442#else
3443template <bool crashOnFailure>
3444void* calloc(size_t, size_t);
3445
3446void* fastCalloc(size_t n, size_t elem_size)
3447{
3448 return calloc<true>(n, elem_size);
3449}
3450
3451TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size)
3452{
3453 return calloc<false>(n, elem_size);
3454}
3455
3456template <bool crashOnFailure>
3457ALWAYS_INLINE
3458#endif
3459void* calloc(size_t n, size_t elem_size) {
3460 size_t totalBytes = n * elem_size;
3461
3462 // Protect against overflow
3463 if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3464 return 0;
3465
3466#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3467 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur...
3468 return 0;
3469
3470 totalBytes += sizeof(AllocAlignmentInteger);
3471 void* result = do_malloc(totalBytes);
3472 if (!result)
3473 return 0;
3474
3475 memset(result, 0, totalBytes);
3476 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3477 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3478#else
3479 void* result = do_malloc(totalBytes);
3480 if (result != NULL) {
3481 memset(result, 0, totalBytes);
3482 }
3483#endif
3484
3485#ifndef WTF_CHANGES
3486 MallocHook::InvokeNewHook(result, totalBytes);
3487#endif
3488 return result;
3489}
3490
3491// Since cfree isn't used anywhere, we don't compile it in.
3492#ifndef WTF_CHANGES
3493#ifndef WTF_CHANGES
3494extern "C"
3495#endif
3496void cfree(void* ptr) {
3497#ifndef WTF_CHANGES
3498 MallocHook::InvokeDeleteHook(ptr);
3499#endif
3500 do_free(ptr);
3501}
3502#endif
3503
3504#ifndef WTF_CHANGES
3505extern "C"
3506#else
3507template <bool crashOnFailure>
3508void* realloc(void*, size_t);
3509
3510void* fastRealloc(void* old_ptr, size_t new_size)
3511{
3512 return realloc<true>(old_ptr, new_size);
3513}
3514
3515TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size)
3516{
3517 return realloc<false>(old_ptr, new_size);
3518}
3519
3520template <bool crashOnFailure>
3521ALWAYS_INLINE
3522#endif
3523void* realloc(void* old_ptr, size_t new_size) {
3524 if (old_ptr == NULL) {
3525#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3526 void* result = malloc(new_size);
3527#else
3528 void* result = do_malloc(new_size);
3529#ifndef WTF_CHANGES
3530 MallocHook::InvokeNewHook(result, new_size);
3531#endif
3532#endif
3533 return result;
3534 }
3535 if (new_size == 0) {
3536#ifndef WTF_CHANGES
3537 MallocHook::InvokeDeleteHook(old_ptr);
3538#endif
3539 free(old_ptr);
3540 return NULL;
3541 }
3542
3543#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3544 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur...
3545 return 0;
3546 new_size += sizeof(AllocAlignmentInteger);
3547 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr);
3548 if (*header != Internal::AllocTypeMalloc)
3549 Internal::fastMallocMatchFailed(old_ptr);
3550 old_ptr = header;
3551#endif
3552
3553 // Get the size of the old entry
3554 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
3555 size_t cl = pageheap->GetSizeClassIfCached(p);
3556 Span *span = NULL;
3557 size_t old_size;
3558 if (cl == 0) {
3559 span = pageheap->GetDescriptor(p);
3560 cl = span->sizeclass;
3561 pageheap->CacheSizeClass(p, cl);
3562 }
3563 if (cl != 0) {
3564 old_size = ByteSizeForClass(cl);
3565 } else {
3566 ASSERT(span != NULL);
3567 old_size = span->length << kPageShift;
3568 }
3569
3570 // Reallocate if the new size is larger than the old size,
3571 // or if the new size is significantly smaller than the old size.
3572 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
3573 // Need to reallocate
3574 void* new_ptr = do_malloc(new_size);
3575 if (new_ptr == NULL) {
3576 return NULL;
3577 }
3578#ifndef WTF_CHANGES
3579 MallocHook::InvokeNewHook(new_ptr, new_size);
3580#endif
3581 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
3582#ifndef WTF_CHANGES
3583 MallocHook::InvokeDeleteHook(old_ptr);
3584#endif
3585 // We could use a variant of do_free() that leverages the fact
3586 // that we already know the sizeclass of old_ptr. The benefit
3587 // would be small, so don't bother.
3588 do_free(old_ptr);
3589#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3590 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1;
3591#endif
3592 return new_ptr;
3593 } else {
3594#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3595 old_ptr = pByte + sizeof(AllocAlignmentInteger); // Set old_ptr back to the user pointer.
3596#endif
3597 return old_ptr;
3598 }
3599}
3600
3601#ifdef WTF_CHANGES
3602#undef do_malloc
3603#else
3604
3605static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
3606
3607static inline void* cpp_alloc(size_t size, bool nothrow) {
3608 for (;;) {
3609 void* p = do_malloc(size);
3610#ifdef PREANSINEW
3611 return p;
3612#else
3613 if (p == NULL) { // allocation failed
3614 // Get the current new handler. NB: this function is not
3615 // thread-safe. We make a feeble stab at making it so here, but
3616 // this lock only protects against tcmalloc interfering with
3617 // itself, not with other libraries calling set_new_handler.
3618 std::new_handler nh;
3619 {
3620 SpinLockHolder h(&set_new_handler_lock);
3621 nh = std::set_new_handler(0);
3622 (void) std::set_new_handler(nh);
3623 }
3624 // If no new_handler is established, the allocation failed.
3625 if (!nh) {
3626 if (nothrow) return 0;
3627 throw std::bad_alloc();
3628 }
3629 // Otherwise, try the new_handler. If it returns, retry the
3630 // allocation. If it throws std::bad_alloc, fail the allocation.
3631 // if it throws something else, don't interfere.
3632 try {
3633 (*nh)();
3634 } catch (const std::bad_alloc&) {
3635 if (!nothrow) throw;
3636 return p;
3637 }
3638 } else { // allocation success
3639 return p;
3640 }
3641#endif
3642 }
3643}
3644
3645void* operator new(size_t size) {
3646 void* p = cpp_alloc(size, false);
3647 // We keep this next instruction out of cpp_alloc for a reason: when
3648 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3649 // new call into cpp_alloc, which messes up our whole section-based
3650 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3651 // isn't the last thing this fn calls, and prevents the folding.
3652 MallocHook::InvokeNewHook(p, size);
3653 return p;
3654}
3655
3656void* operator new(size_t size, const std::nothrow_t&) __THROW {
3657 void* p = cpp_alloc(size, true);
3658 MallocHook::InvokeNewHook(p, size);
3659 return p;
3660}
3661
3662void operator delete(void* p) __THROW {
3663 MallocHook::InvokeDeleteHook(p);
3664 do_free(p);
3665}
3666
3667void operator delete(void* p, const std::nothrow_t&) __THROW {
3668 MallocHook::InvokeDeleteHook(p);
3669 do_free(p);
3670}
3671
3672void* operator new[](size_t size) {
3673 void* p = cpp_alloc(size, false);
3674 // We keep this next instruction out of cpp_alloc for a reason: when
3675 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3676 // new call into cpp_alloc, which messes up our whole section-based
3677 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3678 // isn't the last thing this fn calls, and prevents the folding.
3679 MallocHook::InvokeNewHook(p, size);
3680 return p;
3681}
3682
3683void* operator new[](size_t size, const std::nothrow_t&) __THROW {
3684 void* p = cpp_alloc(size, true);
3685 MallocHook::InvokeNewHook(p, size);
3686 return p;
3687}
3688
3689void operator delete[](void* p) __THROW {
3690 MallocHook::InvokeDeleteHook(p);
3691 do_free(p);
3692}
3693
3694void operator delete[](void* p, const std::nothrow_t&) __THROW {
3695 MallocHook::InvokeDeleteHook(p);
3696 do_free(p);
3697}
3698
3699extern "C" void* memalign(size_t align, size_t size) __THROW {
3700 void* result = do_memalign(align, size);
3701 MallocHook::InvokeNewHook(result, size);
3702 return result;
3703}
3704
3705extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
3706 __THROW {
3707 if (((align % sizeof(void*)) != 0) ||
3708 ((align & (align - 1)) != 0) ||
3709 (align == 0)) {
3710 return EINVAL;
3711 }
3712
3713 void* result = do_memalign(align, size);
3714 MallocHook::InvokeNewHook(result, size);
3715 if (result == NULL) {
3716 return ENOMEM;
3717 } else {
3718 *result_ptr = result;
3719 return 0;
3720 }
3721}
3722
3723static size_t pagesize = 0;
3724
3725extern "C" void* valloc(size_t size) __THROW {
3726 // Allocate page-aligned object of length >= size bytes
3727 if (pagesize == 0) pagesize = getpagesize();
3728 void* result = do_memalign(pagesize, size);
3729 MallocHook::InvokeNewHook(result, size);
3730 return result;
3731}
3732
3733extern "C" void* pvalloc(size_t size) __THROW {
3734 // Round up size to a multiple of pagesize
3735 if (pagesize == 0) pagesize = getpagesize();
3736 size = (size + pagesize - 1) & ~(pagesize - 1);
3737 void* result = do_memalign(pagesize, size);
3738 MallocHook::InvokeNewHook(result, size);
3739 return result;
3740}
3741
3742extern "C" void malloc_stats(void) {
3743 do_malloc_stats();
3744}
3745
3746extern "C" int mallopt(int cmd, int value) {
3747 return do_mallopt(cmd, value);
3748}
3749
3750#ifdef HAVE_STRUCT_MALLINFO
3751extern "C" struct mallinfo mallinfo(void) {
3752 return do_mallinfo();
3753}
3754#endif
3755
3756//-------------------------------------------------------------------
3757// Some library routines on RedHat 9 allocate memory using malloc()
3758// and free it using __libc_free() (or vice-versa). Since we provide
3759// our own implementations of malloc/free, we need to make sure that
3760// the __libc_XXX variants (defined as part of glibc) also point to
3761// the same implementations.
3762//-------------------------------------------------------------------
3763
3764#if defined(__GLIBC__)
3765extern "C" {
3766#if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
3767 // Potentially faster variants that use the gcc alias extension.
3768 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
3769# define ALIAS(x) __attribute__ ((weak, alias (x)))
3770 void* __libc_malloc(size_t size) ALIAS("malloc");
3771 void __libc_free(void* ptr) ALIAS("free");
3772 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc");
3773 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc");
3774 void __libc_cfree(void* ptr) ALIAS("cfree");
3775 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
3776 void* __libc_valloc(size_t size) ALIAS("valloc");
3777 void* __libc_pvalloc(size_t size) ALIAS("pvalloc");
3778 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
3779# undef ALIAS
3780# else /* not __GNUC__ */
3781 // Portable wrappers
3782 void* __libc_malloc(size_t size) { return malloc(size); }
3783 void __libc_free(void* ptr) { free(ptr); }
3784 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); }
3785 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); }
3786 void __libc_cfree(void* ptr) { cfree(ptr); }
3787 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
3788 void* __libc_valloc(size_t size) { return valloc(size); }
3789 void* __libc_pvalloc(size_t size) { return pvalloc(size); }
3790 int __posix_memalign(void** r, size_t a, size_t s) {
3791 return posix_memalign(r, a, s);
3792 }
3793# endif /* __GNUC__ */
3794}
3795#endif /* __GLIBC__ */
3796
3797// Override __libc_memalign in libc on linux boxes specially.
3798// They have a bug in libc that causes them to (very rarely) allocate
3799// with __libc_memalign() yet deallocate with free() and the
3800// definitions above don't catch it.
3801// This function is an exception to the rule of calling MallocHook method
3802// from the stack frame of the allocation function;
3803// heap-checker handles this special case explicitly.
3804static void *MemalignOverride(size_t align, size_t size, const void *caller)
3805 __THROW {
3806 void* result = do_memalign(align, size);
3807 MallocHook::InvokeNewHook(result, size);
3808 return result;
3809}
3810void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
3811
3812#endif
3813
3814#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
3815
3816class FreeObjectFinder {
3817 const RemoteMemoryReader& m_reader;
3818 HashSet<void*> m_freeObjects;
3819
3820public:
3821 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
3822
3823 void visit(void* ptr) { m_freeObjects.add(ptr); }
3824 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
3825 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); }
3826 size_t freeObjectCount() const { return m_freeObjects.size(); }
3827
3828 void findFreeObjects(TCMalloc_ThreadCache* threadCache)
3829 {
3830 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
3831 threadCache->enumerateFreeObjects(*this, m_reader);
3832 }
3833
3834 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
3835 {
3836 for (unsigned i = 0; i < numSizes; i++)
3837 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
3838 }
3839};
3840
3841class PageMapFreeObjectFinder {
3842 const RemoteMemoryReader& m_reader;
3843 FreeObjectFinder& m_freeObjectFinder;
3844
3845public:
3846 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
3847 : m_reader(reader)
3848 , m_freeObjectFinder(freeObjectFinder)
3849 { }
3850
3851 int visit(void* ptr) const
3852 {
3853 if (!ptr)
3854 return 1;
3855
3856 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
3857 if (span->free) {
3858 void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
3859 m_freeObjectFinder.visit(ptr);
3860 } else if (span->sizeclass) {
3861 // Walk the free list of the small-object span, keeping track of each object seen
3862 for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject)))
3863 m_freeObjectFinder.visit(nextObject);
3864 }
3865 return span->length;
3866 }
3867};
3868
3869class PageMapMemoryUsageRecorder {
3870 task_t m_task;
3871 void* m_context;
3872 unsigned m_typeMask;
3873 vm_range_recorder_t* m_recorder;
3874 const RemoteMemoryReader& m_reader;
3875 const FreeObjectFinder& m_freeObjectFinder;
3876
3877 HashSet<void*> m_seenPointers;
3878 Vector<Span*> m_coalescedSpans;
3879
3880public:
3881 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
3882 : m_task(task)
3883 , m_context(context)
3884 , m_typeMask(typeMask)
3885 , m_recorder(recorder)
3886 , m_reader(reader)
3887 , m_freeObjectFinder(freeObjectFinder)
3888 { }
3889
3890 ~PageMapMemoryUsageRecorder()
3891 {
3892 ASSERT(!m_coalescedSpans.size());
3893 }
3894
3895 void recordPendingRegions()
3896 {
3897 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
3898 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 };
3899 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize);
3900
3901 // Mark the memory region the spans represent as a candidate for containing pointers
3902 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE)
3903 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
3904
3905 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
3906 m_coalescedSpans.clear();
3907 return;
3908 }
3909
3910 Vector<vm_range_t, 1024> allocatedPointers;
3911 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) {
3912 Span *theSpan = m_coalescedSpans[i];
3913 if (theSpan->free)
3914 continue;
3915
3916 vm_address_t spanStartAddress = theSpan->start << kPageShift;
3917 vm_size_t spanSizeInBytes = theSpan->length * kPageSize;
3918
3919 if (!theSpan->sizeclass) {
3920 // If it's an allocated large object span, mark it as in use
3921 if (!m_freeObjectFinder.isFreeObject(spanStartAddress))
3922 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes});
3923 } else {
3924 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass);
3925
3926 // Mark each allocated small object within the span as in use
3927 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes;
3928 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) {
3929 if (!m_freeObjectFinder.isFreeObject(object))
3930 allocatedPointers.append((vm_range_t){object, objectSize});
3931 }
3932 }
3933 }
3934
3935 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size());
3936
3937 m_coalescedSpans.clear();
3938 }
3939
3940 int visit(void* ptr)
3941 {
3942 if (!ptr)
3943 return 1;
3944
3945 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
3946 if (!span->start)
3947 return 1;
3948
3949 if (m_seenPointers.contains(ptr))
3950 return span->length;
3951 m_seenPointers.add(ptr);
3952
3953 if (!m_coalescedSpans.size()) {
3954 m_coalescedSpans.append(span);
3955 return span->length;
3956 }
3957
3958 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
3959 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift;
3960 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize;
3961
3962 // If the new span is adjacent to the previous span, do nothing for now.
3963 vm_address_t spanStartAddress = span->start << kPageShift;
3964 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) {
3965 m_coalescedSpans.append(span);
3966 return span->length;
3967 }
3968
3969 // New span is not adjacent to previous span, so record the spans coalesced so far.
3970 recordPendingRegions();
3971 m_coalescedSpans.append(span);
3972
3973 return span->length;
3974 }
3975};
3976
3977class AdminRegionRecorder {
3978 task_t m_task;
3979 void* m_context;
3980 unsigned m_typeMask;
3981 vm_range_recorder_t* m_recorder;
3982 const RemoteMemoryReader& m_reader;
3983
3984 Vector<vm_range_t, 1024> m_pendingRegions;
3985
3986public:
3987 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader)
3988 : m_task(task)
3989 , m_context(context)
3990 , m_typeMask(typeMask)
3991 , m_recorder(recorder)
3992 , m_reader(reader)
3993 { }
3994
3995 void recordRegion(vm_address_t ptr, size_t size)
3996 {
3997 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE)
3998 m_pendingRegions.append((vm_range_t){ ptr, size });
3999 }
4000
4001 void visit(void *ptr, size_t size)
4002 {
4003 recordRegion(reinterpret_cast<vm_address_t>(ptr), size);
4004 }
4005
4006 void recordPendingRegions()
4007 {
4008 if (m_pendingRegions.size()) {
4009 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size());
4010 m_pendingRegions.clear();
4011 }
4012 }
4013
4014 ~AdminRegionRecorder()
4015 {
4016 ASSERT(!m_pendingRegions.size());
4017 }
4018};
4019
4020kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
4021{
4022 RemoteMemoryReader memoryReader(task, reader);
4023
4024 InitSizeClasses();
4025
4026 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
4027 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
4028 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
4029 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
4030
4031 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
4032
4033 FreeObjectFinder finder(memoryReader);
4034 finder.findFreeObjects(threadHeaps);
4035 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
4036
4037 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
4038 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
4039 pageMap->visitValues(pageMapFinder, memoryReader);
4040
4041 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
4042 pageMap->visitValues(usageRecorder, memoryReader);
4043 usageRecorder.recordPendingRegions();
4044
4045 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader);
4046 pageMap->visitAllocations(adminRegionRecorder, memoryReader);
4047
4048 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator);
4049 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator);
4050
4051 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4052 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4053
4054 adminRegionRecorder.recordPendingRegions();
4055
4056 return 0;
4057}
4058
4059size_t FastMallocZone::size(malloc_zone_t*, const void*)
4060{
4061 return 0;
4062}
4063
4064void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
4065{
4066 return 0;
4067}
4068
4069void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
4070{
4071 return 0;
4072}
4073
4074void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
4075{
4076 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4077 // is not in this zone. When this happens, the pointer being freed was not allocated by any
4078 // zone so we need to print a useful error for the application developer.
4079 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
4080}
4081
4082void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
4083{
4084 return 0;
4085}
4086
4087
4088#undef malloc
4089#undef free
4090#undef realloc
4091#undef calloc
4092
4093extern "C" {
4094malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
4095 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics
4096
4097#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !PLATFORM(IPHONE)
4098 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4099#endif
4100
4101 };
4102}
4103
4104FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator)
4105 : m_pageHeap(pageHeap)
4106 , m_threadHeaps(threadHeaps)
4107 , m_centralCaches(centralCaches)
4108 , m_spanAllocator(spanAllocator)
4109 , m_pageHeapAllocator(pageHeapAllocator)
4110{
4111 memset(&m_zone, 0, sizeof(m_zone));
4112 m_zone.version = 4;
4113 m_zone.zone_name = "JavaScriptCore FastMalloc";
4114 m_zone.size = &FastMallocZone::size;
4115 m_zone.malloc = &FastMallocZone::zoneMalloc;
4116 m_zone.calloc = &FastMallocZone::zoneCalloc;
4117 m_zone.realloc = &FastMallocZone::zoneRealloc;
4118 m_zone.free = &FastMallocZone::zoneFree;
4119 m_zone.valloc = &FastMallocZone::zoneValloc;
4120 m_zone.destroy = &FastMallocZone::zoneDestroy;
4121 m_zone.introspect = &jscore_fastmalloc_introspection;
4122 malloc_zone_register(&m_zone);
4123}
4124
4125
4126void FastMallocZone::init()
4127{
4128 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator);
4129}
4130
4131#endif
4132
4133#if WTF_CHANGES
4134void releaseFastMallocFreeMemory()
4135{
4136 // Flush free pages in the current thread cache back to the page heap.
4137 // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4138 // The second pass flushes everything.
4139 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
4140 threadCache->Scavenge();
4141 threadCache->Scavenge();
4142 }
4143
4144 SpinLockHolder h(&pageheap_lock);
4145 pageheap->ReleaseFreePages();
4146}
4147
4148FastMallocStatistics fastMallocStatistics()
4149{
4150 FastMallocStatistics statistics;
4151 {
4152 SpinLockHolder lockHolder(&pageheap_lock);
4153 statistics.heapSize = static_cast<size_t>(pageheap->SystemBytes());
4154 statistics.freeSizeInHeap = static_cast<size_t>(pageheap->FreeBytes());
4155 statistics.returnedSize = pageheap->ReturnedBytes();
4156 statistics.freeSizeInCaches = 0;
4157 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
4158 statistics.freeSizeInCaches += threadCache->Size();
4159 }
4160 for (unsigned cl = 0; cl < kNumClasses; ++cl) {
4161 const int length = central_cache[cl].length();
4162 const int tc_length = central_cache[cl].tc_length();
4163 statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length);
4164 }
4165 return statistics;
4166}
4167
4168} // namespace WTF
4169#endif
4170
4171#endif // FORCE_SYSTEM_MALLOC
Note: See TracBrowser for help on using the repository browser.