source: webkit/trunk/JavaScriptCore/wtf/FastMalloc.cpp@ 46180

Last change on this file since 46180 was 46180, checked in by Adam Roben, 16 years ago

Roll out r46153, r46154, and r46155

These changes were causing build failures and assertion failures on
Windows.

JavaScriptCore:

  • JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
  • JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore_debug.def:
  • JavaScriptCore.xcodeproj/project.pbxproj:
  • runtime/JSArray.cpp:
  • runtime/StringPrototype.cpp:
  • runtime/UString.cpp:
  • runtime/UString.h:
  • wtf/FastMalloc.cpp:
  • wtf/FastMalloc.h:
  • wtf/Platform.h:
  • wtf/PossiblyNull.h: Removed.

WebCore:

  • ForwardingHeaders/wtf/PossiblyNull.h: Removed.
  • platform/graphics/cg/ImageBufferCG.cpp:
  • Property svn:eol-style set to native
File size: 130.0 KB
Line 
1// Copyright (c) 2005, 2007, Google Inc.
2// All rights reserved.
3// Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <[email protected]>
33//
34// A malloc that uses a per-thread cache to satisfy small malloc requests.
35// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
36//
37// See doc/tcmalloc.html for a high-level
38// description of how this malloc works.
39//
40// SYNCHRONIZATION
41// 1. The thread-specific lists are accessed without acquiring any locks.
42// This is safe because each such list is only accessed by one thread.
43// 2. We have a lock per central free-list, and hold it while manipulating
44// the central free list for a particular size.
45// 3. The central page allocator is protected by "pageheap_lock".
46// 4. The pagemap (which maps from page-number to descriptor),
47// can be read without holding any locks, and written while holding
48// the "pageheap_lock".
49// 5. To improve performance, a subset of the information one can get
50// from the pagemap is cached in a data structure, pagemap_cache_,
51// that atomically reads and writes its entries. This cache can be
52// read and written without locking.
53//
54// This multi-threaded access to the pagemap is safe for fairly
55// subtle reasons. We basically assume that when an object X is
56// allocated by thread A and deallocated by thread B, there must
57// have been appropriate synchronization in the handoff of object
58// X from thread A to thread B. The same logic applies to pagemap_cache_.
59//
60// THE PAGEID-TO-SIZECLASS CACHE
61// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
62// returns 0 for a particular PageID then that means "no information," not that
63// the sizeclass is 0. The cache may have stale information for pages that do
64// not hold the beginning of any free()'able object. Staleness is eliminated
65// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66// do_memalign() for all other relevant pages.
67//
68// TODO: Bias reclamation to larger addresses
69// TODO: implement mallinfo/mallopt
70// TODO: Better testing
71//
72// 9/28/2003 (new page-level allocator replaces ptmalloc2):
73// * malloc/free of small objects goes from ~300 ns to ~50 ns.
74// * allocation of a reasonably complicated struct
75// goes from about 1100 ns to about 300 ns.
76
77#include "config.h"
78#include "FastMalloc.h"
79
80#include "Assertions.h"
81#include <limits>
82#if ENABLE(JSC_MULTIPLE_THREADS)
83#include <pthread.h>
84#endif
85
86#ifndef NO_TCMALLOC_SAMPLES
87#ifdef WTF_CHANGES
88#define NO_TCMALLOC_SAMPLES
89#endif
90#endif
91
92#if !defined(USE_SYSTEM_MALLOC) && defined(NDEBUG)
93#define FORCE_SYSTEM_MALLOC 0
94#else
95#define FORCE_SYSTEM_MALLOC 1
96#endif
97
98#define TCMALLOC_TRACK_DECOMMITED_SPANS (HAVE(VIRTUALALLOC) || HAVE(MADV_FREE_REUSE))
99
100#ifndef NDEBUG
101namespace WTF {
102
103#if ENABLE(JSC_MULTIPLE_THREADS)
104static pthread_key_t isForbiddenKey;
105static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
106static void initializeIsForbiddenKey()
107{
108 pthread_key_create(&isForbiddenKey, 0);
109}
110
111static bool isForbidden()
112{
113 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
114 return !!pthread_getspecific(isForbiddenKey);
115}
116
117void fastMallocForbid()
118{
119 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
120 pthread_setspecific(isForbiddenKey, &isForbiddenKey);
121}
122
123void fastMallocAllow()
124{
125 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
126 pthread_setspecific(isForbiddenKey, 0);
127}
128
129#else
130
131static bool staticIsForbidden;
132static bool isForbidden()
133{
134 return staticIsForbidden;
135}
136
137void fastMallocForbid()
138{
139 staticIsForbidden = true;
140}
141
142void fastMallocAllow()
143{
144 staticIsForbidden = false;
145}
146#endif // ENABLE(JSC_MULTIPLE_THREADS)
147
148} // namespace WTF
149#endif // NDEBUG
150
151#include <string.h>
152
153namespace WTF {
154
155#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
156
157namespace Internal {
158
159void fastMallocMatchFailed(void*)
160{
161 CRASH();
162}
163
164} // namespace Internal
165
166#endif
167
168void* fastZeroedMalloc(size_t n)
169{
170 void* result = fastMalloc(n);
171 memset(result, 0, n);
172 return result;
173}
174
175void* tryFastZeroedMalloc(size_t n)
176{
177 void* result = tryFastMalloc(n);
178 if (!result)
179 return 0;
180 memset(result, 0, n);
181 return result;
182}
183
184} // namespace WTF
185
186#if FORCE_SYSTEM_MALLOC
187
188#include <stdlib.h>
189#if !PLATFORM(WIN_OS)
190 #include <pthread.h>
191#else
192 #include "windows.h"
193#endif
194
195namespace WTF {
196
197void* tryFastMalloc(size_t n)
198{
199 ASSERT(!isForbidden());
200
201#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
202 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
203 return 0;
204
205 void* result = malloc(n + sizeof(AllocAlignmentInteger));
206 if (!result)
207 return 0;
208
209 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
210 result = static_cast<AllocAlignmentInteger*>(result) + 1;
211
212 return result;
213#else
214 return malloc(n);
215#endif
216}
217
218void* fastMalloc(size_t n)
219{
220 ASSERT(!isForbidden());
221
222#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
223 void* result = tryFastMalloc(n);
224#else
225 void* result = malloc(n);
226#endif
227
228 if (!result)
229 CRASH();
230 return result;
231}
232
233void* tryFastCalloc(size_t n_elements, size_t element_size)
234{
235 ASSERT(!isForbidden());
236
237#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
238 size_t totalBytes = n_elements * element_size;
239 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes))
240 return 0;
241
242 totalBytes += sizeof(AllocAlignmentInteger);
243 void* result = malloc(totalBytes);
244 if (!result)
245 return 0;
246
247 memset(result, 0, totalBytes);
248 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
249 result = static_cast<AllocAlignmentInteger*>(result) + 1;
250 return result;
251#else
252 return calloc(n_elements, element_size);
253#endif
254}
255
256void* fastCalloc(size_t n_elements, size_t element_size)
257{
258 ASSERT(!isForbidden());
259
260#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
261 void* result = tryFastCalloc(n_elements, element_size);
262#else
263 void* result = calloc(n_elements, element_size);
264#endif
265
266 if (!result)
267 CRASH();
268 return result;
269}
270
271void fastFree(void* p)
272{
273 ASSERT(!isForbidden());
274
275#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
276 if (!p)
277 return;
278
279 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
280 if (*header != Internal::AllocTypeMalloc)
281 Internal::fastMallocMatchFailed(p);
282 free(header);
283#else
284 free(p);
285#endif
286}
287
288void* tryFastRealloc(void* p, size_t n)
289{
290 ASSERT(!isForbidden());
291
292#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
293 if (p) {
294 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
295 return 0;
296 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
297 if (*header != Internal::AllocTypeMalloc)
298 Internal::fastMallocMatchFailed(p);
299 void* result = realloc(header, n + sizeof(AllocAlignmentInteger));
300 if (!result)
301 return 0;
302
303 // This should not be needed because the value is already there:
304 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
305 result = static_cast<AllocAlignmentInteger*>(result) + 1;
306 return result;
307 } else {
308 return fastMalloc(n);
309 }
310#else
311 return realloc(p, n);
312#endif
313}
314
315void* fastRealloc(void* p, size_t n)
316{
317 ASSERT(!isForbidden());
318
319#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
320 void* result = tryFastRealloc(p, n);
321#else
322 void* result = realloc(p, n);
323#endif
324
325 if (!result)
326 CRASH();
327 return result;
328}
329
330void releaseFastMallocFreeMemory() { }
331
332FastMallocStatistics fastMallocStatistics()
333{
334 FastMallocStatistics statistics = { 0, 0, 0, 0 };
335 return statistics;
336}
337
338} // namespace WTF
339
340#if PLATFORM(DARWIN)
341// This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
342// It will never be used in this case, so it's type and value are less interesting than its presence.
343extern "C" const int jscore_fastmalloc_introspection = 0;
344#endif
345
346#else // FORCE_SYSTEM_MALLOC
347
348#if HAVE(STDINT_H)
349#include <stdint.h>
350#elif HAVE(INTTYPES_H)
351#include <inttypes.h>
352#else
353#include <sys/types.h>
354#endif
355
356#include "AlwaysInline.h"
357#include "Assertions.h"
358#include "TCPackedCache.h"
359#include "TCPageMap.h"
360#include "TCSpinLock.h"
361#include "TCSystemAlloc.h"
362#include <algorithm>
363#include <errno.h>
364#include <limits>
365#include <new>
366#include <pthread.h>
367#include <stdarg.h>
368#include <stddef.h>
369#include <stdio.h>
370#if COMPILER(MSVC)
371#ifndef WIN32_LEAN_AND_MEAN
372#define WIN32_LEAN_AND_MEAN
373#endif
374#include <windows.h>
375#endif
376
377#if WTF_CHANGES
378
379#if PLATFORM(DARWIN)
380#include "MallocZoneSupport.h"
381#include <wtf/HashSet.h>
382#endif
383
384#ifndef PRIuS
385#define PRIuS "zu"
386#endif
387
388// Calling pthread_getspecific through a global function pointer is faster than a normal
389// call to the function on Mac OS X, and it's used in performance-critical code. So we
390// use a function pointer. But that's not necessarily faster on other platforms, and we had
391// problems with this technique on Windows, so we'll do this only on Mac OS X.
392#if PLATFORM(DARWIN)
393static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
394#define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
395#endif
396
397#define DEFINE_VARIABLE(type, name, value, meaning) \
398 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \
399 type FLAGS_##name(value); \
400 char FLAGS_no##name; \
401 } \
402 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
403
404#define DEFINE_int64(name, value, meaning) \
405 DEFINE_VARIABLE(int64_t, name, value, meaning)
406
407#define DEFINE_double(name, value, meaning) \
408 DEFINE_VARIABLE(double, name, value, meaning)
409
410namespace WTF {
411
412#define malloc fastMalloc
413#define calloc fastCalloc
414#define free fastFree
415#define realloc fastRealloc
416
417#define MESSAGE LOG_ERROR
418#define CHECK_CONDITION ASSERT
419
420#if PLATFORM(DARWIN)
421class Span;
422class TCMalloc_Central_FreeListPadded;
423class TCMalloc_PageHeap;
424class TCMalloc_ThreadCache;
425template <typename T> class PageHeapAllocator;
426
427class FastMallocZone {
428public:
429 static void init();
430
431 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
432 static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
433 static boolean_t check(malloc_zone_t*) { return true; }
434 static void print(malloc_zone_t*, boolean_t) { }
435 static void log(malloc_zone_t*, void*) { }
436 static void forceLock(malloc_zone_t*) { }
437 static void forceUnlock(malloc_zone_t*) { }
438 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
439
440private:
441 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*);
442 static size_t size(malloc_zone_t*, const void*);
443 static void* zoneMalloc(malloc_zone_t*, size_t);
444 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
445 static void zoneFree(malloc_zone_t*, void*);
446 static void* zoneRealloc(malloc_zone_t*, void*, size_t);
447 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
448 static void zoneDestroy(malloc_zone_t*) { }
449
450 malloc_zone_t m_zone;
451 TCMalloc_PageHeap* m_pageHeap;
452 TCMalloc_ThreadCache** m_threadHeaps;
453 TCMalloc_Central_FreeListPadded* m_centralCaches;
454 PageHeapAllocator<Span>* m_spanAllocator;
455 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator;
456};
457
458#endif
459
460#endif
461
462#ifndef WTF_CHANGES
463// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
464// you're porting to a system where you really can't get a stacktrace.
465#ifdef NO_TCMALLOC_SAMPLES
466// We use #define so code compiles even if you #include stacktrace.h somehow.
467# define GetStackTrace(stack, depth, skip) (0)
468#else
469# include <google/stacktrace.h>
470#endif
471#endif
472
473// Even if we have support for thread-local storage in the compiler
474// and linker, the OS may not support it. We need to check that at
475// runtime. Right now, we have to keep a manual set of "bad" OSes.
476#if defined(HAVE_TLS)
477 static bool kernel_supports_tls = false; // be conservative
478 static inline bool KernelSupportsTLS() {
479 return kernel_supports_tls;
480 }
481# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
482 static void CheckIfKernelSupportsTLS() {
483 kernel_supports_tls = false;
484 }
485# else
486# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
487 static void CheckIfKernelSupportsTLS() {
488 struct utsname buf;
489 if (uname(&buf) != 0) { // should be impossible
490 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
491 kernel_supports_tls = false;
492 } else if (strcasecmp(buf.sysname, "linux") == 0) {
493 // The linux case: the first kernel to support TLS was 2.6.0
494 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
495 kernel_supports_tls = false;
496 else if (buf.release[0] == '2' && buf.release[1] == '.' &&
497 buf.release[2] >= '0' && buf.release[2] < '6' &&
498 buf.release[3] == '.') // 2.0 - 2.5
499 kernel_supports_tls = false;
500 else
501 kernel_supports_tls = true;
502 } else { // some other kernel, we'll be optimisitic
503 kernel_supports_tls = true;
504 }
505 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
506 }
507# endif // HAVE_DECL_UNAME
508#endif // HAVE_TLS
509
510// __THROW is defined in glibc systems. It means, counter-intuitively,
511// "This function will never throw an exception." It's an optional
512// optimization tool, but we may need to use it to match glibc prototypes.
513#ifndef __THROW // I guess we're not on a glibc system
514# define __THROW // __THROW is just an optimization, so ok to make it ""
515#endif
516
517//-------------------------------------------------------------------
518// Configuration
519//-------------------------------------------------------------------
520
521// Not all possible combinations of the following parameters make
522// sense. In particular, if kMaxSize increases, you may have to
523// increase kNumClasses as well.
524static const size_t kPageShift = 12;
525static const size_t kPageSize = 1 << kPageShift;
526static const size_t kMaxSize = 8u * kPageSize;
527static const size_t kAlignShift = 3;
528static const size_t kAlignment = 1 << kAlignShift;
529static const size_t kNumClasses = 68;
530
531// Allocates a big block of memory for the pagemap once we reach more than
532// 128MB
533static const size_t kPageMapBigAllocationThreshold = 128 << 20;
534
535// Minimum number of pages to fetch from system at a time. Must be
536// significantly bigger than kBlockSize to amortize system-call
537// overhead, and also to reduce external fragementation. Also, we
538// should keep this value big because various incarnations of Linux
539// have small limits on the number of mmap() regions per
540// address-space.
541static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
542
543// Number of objects to move between a per-thread list and a central
544// list in one shot. We want this to be not too small so we can
545// amortize the lock overhead for accessing the central list. Making
546// it too big may temporarily cause unnecessary memory wastage in the
547// per-thread free list until the scavenger cleans up the list.
548static int num_objects_to_move[kNumClasses];
549
550// Maximum length we allow a per-thread free-list to have before we
551// move objects from it into the corresponding central free-list. We
552// want this big to avoid locking the central free-list too often. It
553// should not hurt to make this list somewhat big because the
554// scavenging code will shrink it down when its contents are not in use.
555static const int kMaxFreeListLength = 256;
556
557// Lower and upper bounds on the per-thread cache sizes
558static const size_t kMinThreadCacheSize = kMaxSize * 2;
559static const size_t kMaxThreadCacheSize = 2 << 20;
560
561// Default bound on the total amount of thread caches
562static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
563
564// For all span-lengths < kMaxPages we keep an exact-size list.
565// REQUIRED: kMaxPages >= kMinSystemAlloc;
566static const size_t kMaxPages = kMinSystemAlloc;
567
568/* The smallest prime > 2^n */
569static int primes_list[] = {
570 // Small values might cause high rates of sampling
571 // and hence commented out.
572 // 2, 5, 11, 17, 37, 67, 131, 257,
573 // 521, 1031, 2053, 4099, 8209, 16411,
574 32771, 65537, 131101, 262147, 524309, 1048583,
575 2097169, 4194319, 8388617, 16777259, 33554467 };
576
577// Twice the approximate gap between sampling actions.
578// I.e., we take one sample approximately once every
579// tcmalloc_sample_parameter/2
580// bytes of allocation, i.e., ~ once every 128KB.
581// Must be a prime number.
582#ifdef NO_TCMALLOC_SAMPLES
583DEFINE_int64(tcmalloc_sample_parameter, 0,
584 "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
585static size_t sample_period = 0;
586#else
587DEFINE_int64(tcmalloc_sample_parameter, 262147,
588 "Twice the approximate gap between sampling actions."
589 " Must be a prime number. Otherwise will be rounded up to a "
590 " larger prime number");
591static size_t sample_period = 262147;
592#endif
593
594// Protects sample_period above
595static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
596
597// Parameters for controlling how fast memory is returned to the OS.
598
599DEFINE_double(tcmalloc_release_rate, 1,
600 "Rate at which we release unused memory to the system. "
601 "Zero means we never release memory back to the system. "
602 "Increase this flag to return memory faster; decrease it "
603 "to return memory slower. Reasonable rates are in the "
604 "range [0,10]");
605
606//-------------------------------------------------------------------
607// Mapping from size to size_class and vice versa
608//-------------------------------------------------------------------
609
610// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
611// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
612// So for these larger sizes we have an array indexed by ceil(size/128).
613//
614// We flatten both logical arrays into one physical array and use
615// arithmetic to compute an appropriate index. The constants used by
616// ClassIndex() were selected to make the flattening work.
617//
618// Examples:
619// Size Expression Index
620// -------------------------------------------------------
621// 0 (0 + 7) / 8 0
622// 1 (1 + 7) / 8 1
623// ...
624// 1024 (1024 + 7) / 8 128
625// 1025 (1025 + 127 + (120<<7)) / 128 129
626// ...
627// 32768 (32768 + 127 + (120<<7)) / 128 376
628static const size_t kMaxSmallSize = 1024;
629static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128
630static const int add_amount[2] = { 7, 127 + (120 << 7) };
631static unsigned char class_array[377];
632
633// Compute index of the class_array[] entry for a given size
634static inline int ClassIndex(size_t s) {
635 const int i = (s > kMaxSmallSize);
636 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
637}
638
639// Mapping from size class to max size storable in that class
640static size_t class_to_size[kNumClasses];
641
642// Mapping from size class to number of pages to allocate at a time
643static size_t class_to_pages[kNumClasses];
644
645// TransferCache is used to cache transfers of num_objects_to_move[size_class]
646// back and forth between thread caches and the central cache for a given size
647// class.
648struct TCEntry {
649 void *head; // Head of chain of objects.
650 void *tail; // Tail of chain of objects.
651};
652// A central cache freelist can have anywhere from 0 to kNumTransferEntries
653// slots to put link list chains into. To keep memory usage bounded the total
654// number of TCEntries across size classes is fixed. Currently each size
655// class is initially given one TCEntry which also means that the maximum any
656// one class can have is kNumClasses.
657static const int kNumTransferEntries = kNumClasses;
658
659// Note: the following only works for "n"s that fit in 32-bits, but
660// that is fine since we only use it for small sizes.
661static inline int LgFloor(size_t n) {
662 int log = 0;
663 for (int i = 4; i >= 0; --i) {
664 int shift = (1 << i);
665 size_t x = n >> shift;
666 if (x != 0) {
667 n = x;
668 log += shift;
669 }
670 }
671 ASSERT(n == 1);
672 return log;
673}
674
675// Some very basic linked list functions for dealing with using void * as
676// storage.
677
678static inline void *SLL_Next(void *t) {
679 return *(reinterpret_cast<void**>(t));
680}
681
682static inline void SLL_SetNext(void *t, void *n) {
683 *(reinterpret_cast<void**>(t)) = n;
684}
685
686static inline void SLL_Push(void **list, void *element) {
687 SLL_SetNext(element, *list);
688 *list = element;
689}
690
691static inline void *SLL_Pop(void **list) {
692 void *result = *list;
693 *list = SLL_Next(*list);
694 return result;
695}
696
697
698// Remove N elements from a linked list to which head points. head will be
699// modified to point to the new head. start and end will point to the first
700// and last nodes of the range. Note that end will point to NULL after this
701// function is called.
702static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
703 if (N == 0) {
704 *start = NULL;
705 *end = NULL;
706 return;
707 }
708
709 void *tmp = *head;
710 for (int i = 1; i < N; ++i) {
711 tmp = SLL_Next(tmp);
712 }
713
714 *start = *head;
715 *end = tmp;
716 *head = SLL_Next(tmp);
717 // Unlink range from list.
718 SLL_SetNext(tmp, NULL);
719}
720
721static inline void SLL_PushRange(void **head, void *start, void *end) {
722 if (!start) return;
723 SLL_SetNext(end, *head);
724 *head = start;
725}
726
727static inline size_t SLL_Size(void *head) {
728 int count = 0;
729 while (head) {
730 count++;
731 head = SLL_Next(head);
732 }
733 return count;
734}
735
736// Setup helper functions.
737
738static ALWAYS_INLINE size_t SizeClass(size_t size) {
739 return class_array[ClassIndex(size)];
740}
741
742// Get the byte-size for a specified class
743static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
744 return class_to_size[cl];
745}
746static int NumMoveSize(size_t size) {
747 if (size == 0) return 0;
748 // Use approx 64k transfers between thread and central caches.
749 int num = static_cast<int>(64.0 * 1024.0 / size);
750 if (num < 2) num = 2;
751 // Clamp well below kMaxFreeListLength to avoid ping pong between central
752 // and thread caches.
753 if (num > static_cast<int>(0.8 * kMaxFreeListLength))
754 num = static_cast<int>(0.8 * kMaxFreeListLength);
755
756 // Also, avoid bringing in too many objects into small object free
757 // lists. There are lots of such lists, and if we allow each one to
758 // fetch too many at a time, we end up having to scavenge too often
759 // (especially when there are lots of threads and each thread gets a
760 // small allowance for its thread cache).
761 //
762 // TODO: Make thread cache free list sizes dynamic so that we do not
763 // have to equally divide a fixed resource amongst lots of threads.
764 if (num > 32) num = 32;
765
766 return num;
767}
768
769// Initialize the mapping arrays
770static void InitSizeClasses() {
771 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
772 if (ClassIndex(0) < 0) {
773 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
774 CRASH();
775 }
776 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
777 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
778 CRASH();
779 }
780
781 // Compute the size classes we want to use
782 size_t sc = 1; // Next size class to assign
783 unsigned char alignshift = kAlignShift;
784 int last_lg = -1;
785 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
786 int lg = LgFloor(size);
787 if (lg > last_lg) {
788 // Increase alignment every so often.
789 //
790 // Since we double the alignment every time size doubles and
791 // size >= 128, this means that space wasted due to alignment is
792 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256
793 // bytes, so the space wasted as a percentage starts falling for
794 // sizes > 2K.
795 if ((lg >= 7) && (alignshift < 8)) {
796 alignshift++;
797 }
798 last_lg = lg;
799 }
800
801 // Allocate enough pages so leftover is less than 1/8 of total.
802 // This bounds wasted space to at most 12.5%.
803 size_t psize = kPageSize;
804 while ((psize % size) > (psize >> 3)) {
805 psize += kPageSize;
806 }
807 const size_t my_pages = psize >> kPageShift;
808
809 if (sc > 1 && my_pages == class_to_pages[sc-1]) {
810 // See if we can merge this into the previous class without
811 // increasing the fragmentation of the previous class.
812 const size_t my_objects = (my_pages << kPageShift) / size;
813 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
814 / class_to_size[sc-1];
815 if (my_objects == prev_objects) {
816 // Adjust last class to include this size
817 class_to_size[sc-1] = size;
818 continue;
819 }
820 }
821
822 // Add new class
823 class_to_pages[sc] = my_pages;
824 class_to_size[sc] = size;
825 sc++;
826 }
827 if (sc != kNumClasses) {
828 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
829 sc, int(kNumClasses));
830 CRASH();
831 }
832
833 // Initialize the mapping arrays
834 int next_size = 0;
835 for (unsigned char c = 1; c < kNumClasses; c++) {
836 const size_t max_size_in_class = class_to_size[c];
837 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
838 class_array[ClassIndex(s)] = c;
839 }
840 next_size = static_cast<int>(max_size_in_class + kAlignment);
841 }
842
843 // Double-check sizes just to be safe
844 for (size_t size = 0; size <= kMaxSize; size++) {
845 const size_t sc = SizeClass(size);
846 if (sc == 0) {
847 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
848 CRASH();
849 }
850 if (sc > 1 && size <= class_to_size[sc-1]) {
851 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
852 "\n", sc, size);
853 CRASH();
854 }
855 if (sc >= kNumClasses) {
856 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
857 CRASH();
858 }
859 const size_t s = class_to_size[sc];
860 if (size > s) {
861 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
862 CRASH();
863 }
864 if (s == 0) {
865 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
866 CRASH();
867 }
868 }
869
870 // Initialize the num_objects_to_move array.
871 for (size_t cl = 1; cl < kNumClasses; ++cl) {
872 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
873 }
874
875#ifndef WTF_CHANGES
876 if (false) {
877 // Dump class sizes and maximum external wastage per size class
878 for (size_t cl = 1; cl < kNumClasses; ++cl) {
879 const int alloc_size = class_to_pages[cl] << kPageShift;
880 const int alloc_objs = alloc_size / class_to_size[cl];
881 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
882 const int max_waste = alloc_size - min_used;
883 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
884 int(cl),
885 int(class_to_size[cl-1] + 1),
886 int(class_to_size[cl]),
887 int(class_to_pages[cl] << kPageShift),
888 max_waste * 100.0 / alloc_size
889 );
890 }
891 }
892#endif
893}
894
895// -------------------------------------------------------------------------
896// Simple allocator for objects of a specified type. External locking
897// is required before accessing one of these objects.
898// -------------------------------------------------------------------------
899
900// Metadata allocator -- keeps stats about how many bytes allocated
901static uint64_t metadata_system_bytes = 0;
902static void* MetaDataAlloc(size_t bytes) {
903 void* result = TCMalloc_SystemAlloc(bytes, 0);
904 if (result != NULL) {
905 metadata_system_bytes += bytes;
906 }
907 return result;
908}
909
910template <class T>
911class PageHeapAllocator {
912 private:
913 // How much to allocate from system at a time
914 static const size_t kAllocIncrement = 32 << 10;
915
916 // Aligned size of T
917 static const size_t kAlignedSize
918 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
919
920 // Free area from which to carve new objects
921 char* free_area_;
922 size_t free_avail_;
923
924 // Linked list of all regions allocated by this allocator
925 void* allocated_regions_;
926
927 // Free list of already carved objects
928 void* free_list_;
929
930 // Number of allocated but unfreed objects
931 int inuse_;
932
933 public:
934 void Init() {
935 ASSERT(kAlignedSize <= kAllocIncrement);
936 inuse_ = 0;
937 allocated_regions_ = 0;
938 free_area_ = NULL;
939 free_avail_ = 0;
940 free_list_ = NULL;
941 }
942
943 T* New() {
944 // Consult free list
945 void* result;
946 if (free_list_ != NULL) {
947 result = free_list_;
948 free_list_ = *(reinterpret_cast<void**>(result));
949 } else {
950 if (free_avail_ < kAlignedSize) {
951 // Need more room
952 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
953 if (!new_allocation)
954 CRASH();
955
956 *(void**)new_allocation = allocated_regions_;
957 allocated_regions_ = new_allocation;
958 free_area_ = new_allocation + kAlignedSize;
959 free_avail_ = kAllocIncrement - kAlignedSize;
960 }
961 result = free_area_;
962 free_area_ += kAlignedSize;
963 free_avail_ -= kAlignedSize;
964 }
965 inuse_++;
966 return reinterpret_cast<T*>(result);
967 }
968
969 void Delete(T* p) {
970 *(reinterpret_cast<void**>(p)) = free_list_;
971 free_list_ = p;
972 inuse_--;
973 }
974
975 int inuse() const { return inuse_; }
976
977#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
978 template <class Recorder>
979 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader)
980 {
981 vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_);
982 while (adminAllocation) {
983 recorder.recordRegion(adminAllocation, kAllocIncrement);
984 adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation));
985 }
986 }
987#endif
988};
989
990// -------------------------------------------------------------------------
991// Span - a contiguous run of pages
992// -------------------------------------------------------------------------
993
994// Type that can hold a page number
995typedef uintptr_t PageID;
996
997// Type that can hold the length of a run of pages
998typedef uintptr_t Length;
999
1000static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
1001
1002// Convert byte size into pages. This won't overflow, but may return
1003// an unreasonably large value if bytes is huge enough.
1004static inline Length pages(size_t bytes) {
1005 return (bytes >> kPageShift) +
1006 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
1007}
1008
1009// Convert a user size into the number of bytes that will actually be
1010// allocated
1011static size_t AllocationSize(size_t bytes) {
1012 if (bytes > kMaxSize) {
1013 // Large object: we allocate an integral number of pages
1014 ASSERT(bytes <= (kMaxValidPages << kPageShift));
1015 return pages(bytes) << kPageShift;
1016 } else {
1017 // Small object: find the size class to which it belongs
1018 return ByteSizeForClass(SizeClass(bytes));
1019 }
1020}
1021
1022// Information kept for a span (a contiguous run of pages).
1023struct Span {
1024 PageID start; // Starting page number
1025 Length length; // Number of pages in span
1026 Span* next; // Used when in link list
1027 Span* prev; // Used when in link list
1028 void* objects; // Linked list of free objects
1029 unsigned int free : 1; // Is the span free
1030#ifndef NO_TCMALLOC_SAMPLES
1031 unsigned int sample : 1; // Sampled object?
1032#endif
1033 unsigned int sizeclass : 8; // Size-class for small objects (or 0)
1034 unsigned int refcount : 11; // Number of non-free objects
1035 bool decommitted : 1;
1036
1037#undef SPAN_HISTORY
1038#ifdef SPAN_HISTORY
1039 // For debugging, we can keep a log events per span
1040 int nexthistory;
1041 char history[64];
1042 int value[64];
1043#endif
1044};
1045
1046#if TCMALLOC_TRACK_DECOMMITED_SPANS
1047#define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1048#else
1049#define ASSERT_SPAN_COMMITTED(span)
1050#endif
1051
1052#ifdef SPAN_HISTORY
1053void Event(Span* span, char op, int v = 0) {
1054 span->history[span->nexthistory] = op;
1055 span->value[span->nexthistory] = v;
1056 span->nexthistory++;
1057 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
1058}
1059#else
1060#define Event(s,o,v) ((void) 0)
1061#endif
1062
1063// Allocator/deallocator for spans
1064static PageHeapAllocator<Span> span_allocator;
1065static Span* NewSpan(PageID p, Length len) {
1066 Span* result = span_allocator.New();
1067 memset(result, 0, sizeof(*result));
1068 result->start = p;
1069 result->length = len;
1070#ifdef SPAN_HISTORY
1071 result->nexthistory = 0;
1072#endif
1073 return result;
1074}
1075
1076static inline void DeleteSpan(Span* span) {
1077#ifndef NDEBUG
1078 // In debug mode, trash the contents of deleted Spans
1079 memset(span, 0x3f, sizeof(*span));
1080#endif
1081 span_allocator.Delete(span);
1082}
1083
1084// -------------------------------------------------------------------------
1085// Doubly linked list of spans.
1086// -------------------------------------------------------------------------
1087
1088static inline void DLL_Init(Span* list) {
1089 list->next = list;
1090 list->prev = list;
1091}
1092
1093static inline void DLL_Remove(Span* span) {
1094 span->prev->next = span->next;
1095 span->next->prev = span->prev;
1096 span->prev = NULL;
1097 span->next = NULL;
1098}
1099
1100static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
1101 return list->next == list;
1102}
1103
1104static int DLL_Length(const Span* list) {
1105 int result = 0;
1106 for (Span* s = list->next; s != list; s = s->next) {
1107 result++;
1108 }
1109 return result;
1110}
1111
1112#if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1113static void DLL_Print(const char* label, const Span* list) {
1114 MESSAGE("%-10s %p:", label, list);
1115 for (const Span* s = list->next; s != list; s = s->next) {
1116 MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
1117 }
1118 MESSAGE("\n");
1119}
1120#endif
1121
1122static inline void DLL_Prepend(Span* list, Span* span) {
1123 ASSERT(span->next == NULL);
1124 ASSERT(span->prev == NULL);
1125 span->next = list->next;
1126 span->prev = list;
1127 list->next->prev = span;
1128 list->next = span;
1129}
1130
1131// -------------------------------------------------------------------------
1132// Stack traces kept for sampled allocations
1133// The following state is protected by pageheap_lock_.
1134// -------------------------------------------------------------------------
1135
1136// size/depth are made the same size as a pointer so that some generic
1137// code below can conveniently cast them back and forth to void*.
1138static const int kMaxStackDepth = 31;
1139struct StackTrace {
1140 uintptr_t size; // Size of object
1141 uintptr_t depth; // Number of PC values stored in array below
1142 void* stack[kMaxStackDepth];
1143};
1144static PageHeapAllocator<StackTrace> stacktrace_allocator;
1145static Span sampled_objects;
1146
1147// -------------------------------------------------------------------------
1148// Map from page-id to per-page data
1149// -------------------------------------------------------------------------
1150
1151// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1152// We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1153// because sometimes the sizeclass is all the information we need.
1154
1155// Selector class -- general selector uses 3-level map
1156template <int BITS> class MapSelector {
1157 public:
1158 typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1159 typedef PackedCache<BITS, uint64_t> CacheType;
1160};
1161
1162#if defined(WTF_CHANGES)
1163#if PLATFORM(X86_64)
1164// On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1165// can be excluded from the PageMap key.
1166// See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1167
1168static const size_t kBitsUnusedOn64Bit = 16;
1169#else
1170static const size_t kBitsUnusedOn64Bit = 0;
1171#endif
1172
1173// A three-level map for 64-bit machines
1174template <> class MapSelector<64> {
1175 public:
1176 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1177 typedef PackedCache<64, uint64_t> CacheType;
1178};
1179#endif
1180
1181// A two-level map for 32-bit machines
1182template <> class MapSelector<32> {
1183 public:
1184 typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1185 typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1186};
1187
1188// -------------------------------------------------------------------------
1189// Page-level allocator
1190// * Eager coalescing
1191//
1192// Heap for page-level allocation. We allow allocating and freeing a
1193// contiguous runs of pages (called a "span").
1194// -------------------------------------------------------------------------
1195
1196class TCMalloc_PageHeap {
1197 public:
1198 void init();
1199
1200 // Allocate a run of "n" pages. Returns zero if out of memory.
1201 Span* New(Length n);
1202
1203 // Delete the span "[p, p+n-1]".
1204 // REQUIRES: span was returned by earlier call to New() and
1205 // has not yet been deleted.
1206 void Delete(Span* span);
1207
1208 // Mark an allocated span as being used for small objects of the
1209 // specified size-class.
1210 // REQUIRES: span was returned by an earlier call to New()
1211 // and has not yet been deleted.
1212 void RegisterSizeClass(Span* span, size_t sc);
1213
1214 // Split an allocated span into two spans: one of length "n" pages
1215 // followed by another span of length "span->length - n" pages.
1216 // Modifies "*span" to point to the first span of length "n" pages.
1217 // Returns a pointer to the second span.
1218 //
1219 // REQUIRES: "0 < n < span->length"
1220 // REQUIRES: !span->free
1221 // REQUIRES: span->sizeclass == 0
1222 Span* Split(Span* span, Length n);
1223
1224 // Return the descriptor for the specified page.
1225 inline Span* GetDescriptor(PageID p) const {
1226 return reinterpret_cast<Span*>(pagemap_.get(p));
1227 }
1228
1229#ifdef WTF_CHANGES
1230 inline Span* GetDescriptorEnsureSafe(PageID p)
1231 {
1232 pagemap_.Ensure(p, 1);
1233 return GetDescriptor(p);
1234 }
1235
1236 size_t ReturnedBytes() const;
1237#endif
1238
1239 // Dump state to stderr
1240#ifndef WTF_CHANGES
1241 void Dump(TCMalloc_Printer* out);
1242#endif
1243
1244 // Return number of bytes allocated from system
1245 inline uint64_t SystemBytes() const { return system_bytes_; }
1246
1247 // Return number of free bytes in heap
1248 uint64_t FreeBytes() const {
1249 return (static_cast<uint64_t>(free_pages_) << kPageShift);
1250 }
1251
1252 bool Check();
1253 bool CheckList(Span* list, Length min_pages, Length max_pages);
1254
1255 // Release all pages on the free list for reuse by the OS:
1256 void ReleaseFreePages();
1257
1258 // Return 0 if we have no information, or else the correct sizeclass for p.
1259 // Reads and writes to pagemap_cache_ do not require locking.
1260 // The entries are 64 bits on 64-bit hardware and 16 bits on
1261 // 32-bit hardware, and we don't mind raciness as long as each read of
1262 // an entry yields a valid entry, not a partially updated entry.
1263 size_t GetSizeClassIfCached(PageID p) const {
1264 return pagemap_cache_.GetOrDefault(p, 0);
1265 }
1266 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1267
1268 private:
1269 // Pick the appropriate map and cache types based on pointer size
1270 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1271 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1272 PageMap pagemap_;
1273 mutable PageMapCache pagemap_cache_;
1274
1275 // We segregate spans of a given size into two circular linked
1276 // lists: one for normal spans, and one for spans whose memory
1277 // has been returned to the system.
1278 struct SpanList {
1279 Span normal;
1280 Span returned;
1281 };
1282
1283 // List of free spans of length >= kMaxPages
1284 SpanList large_;
1285
1286 // Array mapping from span length to a doubly linked list of free spans
1287 SpanList free_[kMaxPages];
1288
1289 // Number of pages kept in free lists
1290 uintptr_t free_pages_;
1291
1292 // Bytes allocated from system
1293 uint64_t system_bytes_;
1294
1295 bool GrowHeap(Length n);
1296
1297 // REQUIRES span->length >= n
1298 // Remove span from its free list, and move any leftover part of
1299 // span into appropriate free lists. Also update "span" to have
1300 // length exactly "n" and mark it as non-free so it can be returned
1301 // to the client.
1302 //
1303 // "released" is true iff "span" was found on a "returned" list.
1304 void Carve(Span* span, Length n, bool released);
1305
1306 void RecordSpan(Span* span) {
1307 pagemap_.set(span->start, span);
1308 if (span->length > 1) {
1309 pagemap_.set(span->start + span->length - 1, span);
1310 }
1311 }
1312
1313 // Allocate a large span of length == n. If successful, returns a
1314 // span of exactly the specified length. Else, returns NULL.
1315 Span* AllocLarge(Length n);
1316
1317 // Incrementally release some memory to the system.
1318 // IncrementalScavenge(n) is called whenever n pages are freed.
1319 void IncrementalScavenge(Length n);
1320
1321 // Number of pages to deallocate before doing more scavenging
1322 int64_t scavenge_counter_;
1323
1324 // Index of last free list we scavenged
1325 size_t scavenge_index_;
1326
1327#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
1328 friend class FastMallocZone;
1329#endif
1330};
1331
1332void TCMalloc_PageHeap::init()
1333{
1334 pagemap_.init(MetaDataAlloc);
1335 pagemap_cache_ = PageMapCache(0);
1336 free_pages_ = 0;
1337 system_bytes_ = 0;
1338 scavenge_counter_ = 0;
1339 // Start scavenging at kMaxPages list
1340 scavenge_index_ = kMaxPages-1;
1341 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1342 DLL_Init(&large_.normal);
1343 DLL_Init(&large_.returned);
1344 for (size_t i = 0; i < kMaxPages; i++) {
1345 DLL_Init(&free_[i].normal);
1346 DLL_Init(&free_[i].returned);
1347 }
1348}
1349
1350inline Span* TCMalloc_PageHeap::New(Length n) {
1351 ASSERT(Check());
1352 ASSERT(n > 0);
1353
1354 // Find first size >= n that has a non-empty list
1355 for (Length s = n; s < kMaxPages; s++) {
1356 Span* ll = NULL;
1357 bool released = false;
1358 if (!DLL_IsEmpty(&free_[s].normal)) {
1359 // Found normal span
1360 ll = &free_[s].normal;
1361 } else if (!DLL_IsEmpty(&free_[s].returned)) {
1362 // Found returned span; reallocate it
1363 ll = &free_[s].returned;
1364 released = true;
1365 } else {
1366 // Keep looking in larger classes
1367 continue;
1368 }
1369
1370 Span* result = ll->next;
1371 Carve(result, n, released);
1372#if TCMALLOC_TRACK_DECOMMITED_SPANS
1373 if (result->decommitted) {
1374 TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift));
1375 result->decommitted = false;
1376 }
1377#endif
1378 ASSERT(Check());
1379 free_pages_ -= n;
1380 return result;
1381 }
1382
1383 Span* result = AllocLarge(n);
1384 if (result != NULL) {
1385 ASSERT_SPAN_COMMITTED(result);
1386 return result;
1387 }
1388
1389 // Grow the heap and try again
1390 if (!GrowHeap(n)) {
1391 ASSERT(Check());
1392 return NULL;
1393 }
1394
1395 return AllocLarge(n);
1396}
1397
1398Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1399 // find the best span (closest to n in size).
1400 // The following loops implements address-ordered best-fit.
1401 bool from_released = false;
1402 Span *best = NULL;
1403
1404 // Search through normal list
1405 for (Span* span = large_.normal.next;
1406 span != &large_.normal;
1407 span = span->next) {
1408 if (span->length >= n) {
1409 if ((best == NULL)
1410 || (span->length < best->length)
1411 || ((span->length == best->length) && (span->start < best->start))) {
1412 best = span;
1413 from_released = false;
1414 }
1415 }
1416 }
1417
1418 // Search through released list in case it has a better fit
1419 for (Span* span = large_.returned.next;
1420 span != &large_.returned;
1421 span = span->next) {
1422 if (span->length >= n) {
1423 if ((best == NULL)
1424 || (span->length < best->length)
1425 || ((span->length == best->length) && (span->start < best->start))) {
1426 best = span;
1427 from_released = true;
1428 }
1429 }
1430 }
1431
1432 if (best != NULL) {
1433 Carve(best, n, from_released);
1434#if TCMALLOC_TRACK_DECOMMITED_SPANS
1435 if (best->decommitted) {
1436 TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift));
1437 best->decommitted = false;
1438 }
1439#endif
1440 ASSERT(Check());
1441 free_pages_ -= n;
1442 return best;
1443 }
1444 return NULL;
1445}
1446
1447Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1448 ASSERT(0 < n);
1449 ASSERT(n < span->length);
1450 ASSERT(!span->free);
1451 ASSERT(span->sizeclass == 0);
1452 Event(span, 'T', n);
1453
1454 const Length extra = span->length - n;
1455 Span* leftover = NewSpan(span->start + n, extra);
1456 Event(leftover, 'U', extra);
1457 RecordSpan(leftover);
1458 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1459 span->length = n;
1460
1461 return leftover;
1462}
1463
1464#if !TCMALLOC_TRACK_DECOMMITED_SPANS
1465static ALWAYS_INLINE void propagateDecommittedState(Span*, Span*) { }
1466#else
1467static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source)
1468{
1469 destination->decommitted = source->decommitted;
1470}
1471#endif
1472
1473inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1474 ASSERT(n > 0);
1475 DLL_Remove(span);
1476 span->free = 0;
1477 Event(span, 'A', n);
1478
1479 const int extra = static_cast<int>(span->length - n);
1480 ASSERT(extra >= 0);
1481 if (extra > 0) {
1482 Span* leftover = NewSpan(span->start + n, extra);
1483 leftover->free = 1;
1484 propagateDecommittedState(leftover, span);
1485 Event(leftover, 'S', extra);
1486 RecordSpan(leftover);
1487
1488 // Place leftover span on appropriate free list
1489 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1490 Span* dst = released ? &listpair->returned : &listpair->normal;
1491 DLL_Prepend(dst, leftover);
1492
1493 span->length = n;
1494 pagemap_.set(span->start + n - 1, span);
1495 }
1496}
1497
1498#if !TCMALLOC_TRACK_DECOMMITED_SPANS
1499static ALWAYS_INLINE void mergeDecommittedStates(Span*, Span*) { }
1500#else
1501static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1502{
1503 if (destination->decommitted && !other->decommitted) {
1504 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift),
1505 static_cast<size_t>(other->length << kPageShift));
1506 } else if (other->decommitted && !destination->decommitted) {
1507 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift),
1508 static_cast<size_t>(destination->length << kPageShift));
1509 destination->decommitted = true;
1510 }
1511}
1512#endif
1513
1514inline void TCMalloc_PageHeap::Delete(Span* span) {
1515 ASSERT(Check());
1516 ASSERT(!span->free);
1517 ASSERT(span->length > 0);
1518 ASSERT(GetDescriptor(span->start) == span);
1519 ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1520 span->sizeclass = 0;
1521#ifndef NO_TCMALLOC_SAMPLES
1522 span->sample = 0;
1523#endif
1524
1525 // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1526 // necessary. We do not bother resetting the stale pagemap
1527 // entries for the pieces we are merging together because we only
1528 // care about the pagemap entries for the boundaries.
1529 const PageID p = span->start;
1530 const Length n = span->length;
1531 Span* prev = GetDescriptor(p-1);
1532 if (prev != NULL && prev->free) {
1533 // Merge preceding span into this span
1534 ASSERT(prev->start + prev->length == p);
1535 const Length len = prev->length;
1536 mergeDecommittedStates(span, prev);
1537 DLL_Remove(prev);
1538 DeleteSpan(prev);
1539 span->start -= len;
1540 span->length += len;
1541 pagemap_.set(span->start, span);
1542 Event(span, 'L', len);
1543 }
1544 Span* next = GetDescriptor(p+n);
1545 if (next != NULL && next->free) {
1546 // Merge next span into this span
1547 ASSERT(next->start == p+n);
1548 const Length len = next->length;
1549 mergeDecommittedStates(span, next);
1550 DLL_Remove(next);
1551 DeleteSpan(next);
1552 span->length += len;
1553 pagemap_.set(span->start + span->length - 1, span);
1554 Event(span, 'R', len);
1555 }
1556
1557 Event(span, 'D', span->length);
1558 span->free = 1;
1559#if TCMALLOC_TRACK_DECOMMITED_SPANS
1560 if (span->decommitted) {
1561 if (span->length < kMaxPages)
1562 DLL_Prepend(&free_[span->length].returned, span);
1563 else
1564 DLL_Prepend(&large_.returned, span);
1565 } else
1566#endif
1567 {
1568 if (span->length < kMaxPages)
1569 DLL_Prepend(&free_[span->length].normal, span);
1570 else
1571 DLL_Prepend(&large_.normal, span);
1572 }
1573 free_pages_ += n;
1574
1575 IncrementalScavenge(n);
1576 ASSERT(Check());
1577}
1578
1579void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1580 // Fast path; not yet time to release memory
1581 scavenge_counter_ -= n;
1582 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
1583
1584 // If there is nothing to release, wait for so many pages before
1585 // scavenging again. With 4K pages, this comes to 16MB of memory.
1586 static const size_t kDefaultReleaseDelay = 1 << 8;
1587
1588 // Find index of free list to scavenge
1589 size_t index = scavenge_index_ + 1;
1590 for (size_t i = 0; i < kMaxPages+1; i++) {
1591 if (index > kMaxPages) index = 0;
1592 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1593 if (!DLL_IsEmpty(&slist->normal)) {
1594 // Release the last span on the normal portion of this list
1595 Span* s = slist->normal.prev;
1596 DLL_Remove(s);
1597 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1598 static_cast<size_t>(s->length << kPageShift));
1599#if TCMALLOC_TRACK_DECOMMITED_SPANS
1600 s->decommitted = true;
1601#endif
1602 DLL_Prepend(&slist->returned, s);
1603
1604 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1605
1606 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1607 scavenge_index_ = index - 1;
1608 else
1609 scavenge_index_ = index;
1610 return;
1611 }
1612 index++;
1613 }
1614
1615 // Nothing to scavenge, delay for a while
1616 scavenge_counter_ = kDefaultReleaseDelay;
1617}
1618
1619void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1620 // Associate span object with all interior pages as well
1621 ASSERT(!span->free);
1622 ASSERT(GetDescriptor(span->start) == span);
1623 ASSERT(GetDescriptor(span->start+span->length-1) == span);
1624 Event(span, 'C', sc);
1625 span->sizeclass = static_cast<unsigned int>(sc);
1626 for (Length i = 1; i < span->length-1; i++) {
1627 pagemap_.set(span->start+i, span);
1628 }
1629}
1630
1631#ifdef WTF_CHANGES
1632size_t TCMalloc_PageHeap::ReturnedBytes() const {
1633 size_t result = 0;
1634 for (unsigned s = 0; s < kMaxPages; s++) {
1635 const int r_length = DLL_Length(&free_[s].returned);
1636 unsigned r_pages = s * r_length;
1637 result += r_pages << kPageShift;
1638 }
1639
1640 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
1641 result += s->length << kPageShift;
1642 return result;
1643}
1644#endif
1645
1646#ifndef WTF_CHANGES
1647static double PagesToMB(uint64_t pages) {
1648 return (pages << kPageShift) / 1048576.0;
1649}
1650
1651void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
1652 int nonempty_sizes = 0;
1653 for (int s = 0; s < kMaxPages; s++) {
1654 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
1655 nonempty_sizes++;
1656 }
1657 }
1658 out->printf("------------------------------------------------\n");
1659 out->printf("PageHeap: %d sizes; %6.1f MB free\n",
1660 nonempty_sizes, PagesToMB(free_pages_));
1661 out->printf("------------------------------------------------\n");
1662 uint64_t total_normal = 0;
1663 uint64_t total_returned = 0;
1664 for (int s = 0; s < kMaxPages; s++) {
1665 const int n_length = DLL_Length(&free_[s].normal);
1666 const int r_length = DLL_Length(&free_[s].returned);
1667 if (n_length + r_length > 0) {
1668 uint64_t n_pages = s * n_length;
1669 uint64_t r_pages = s * r_length;
1670 total_normal += n_pages;
1671 total_returned += r_pages;
1672 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
1673 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1674 s,
1675 (n_length + r_length),
1676 PagesToMB(n_pages + r_pages),
1677 PagesToMB(total_normal + total_returned),
1678 PagesToMB(r_pages),
1679 PagesToMB(total_returned));
1680 }
1681 }
1682
1683 uint64_t n_pages = 0;
1684 uint64_t r_pages = 0;
1685 int n_spans = 0;
1686 int r_spans = 0;
1687 out->printf("Normal large spans:\n");
1688 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
1689 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1690 s->length, PagesToMB(s->length));
1691 n_pages += s->length;
1692 n_spans++;
1693 }
1694 out->printf("Unmapped large spans:\n");
1695 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
1696 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1697 s->length, PagesToMB(s->length));
1698 r_pages += s->length;
1699 r_spans++;
1700 }
1701 total_normal += n_pages;
1702 total_returned += r_pages;
1703 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
1704 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1705 (n_spans + r_spans),
1706 PagesToMB(n_pages + r_pages),
1707 PagesToMB(total_normal + total_returned),
1708 PagesToMB(r_pages),
1709 PagesToMB(total_returned));
1710}
1711#endif
1712
1713bool TCMalloc_PageHeap::GrowHeap(Length n) {
1714 ASSERT(kMaxPages >= kMinSystemAlloc);
1715 if (n > kMaxValidPages) return false;
1716 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
1717 size_t actual_size;
1718 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1719 if (ptr == NULL) {
1720 if (n < ask) {
1721 // Try growing just "n" pages
1722 ask = n;
1723 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1724 }
1725 if (ptr == NULL) return false;
1726 }
1727 ask = actual_size >> kPageShift;
1728
1729 uint64_t old_system_bytes = system_bytes_;
1730 system_bytes_ += (ask << kPageShift);
1731 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1732 ASSERT(p > 0);
1733
1734 // If we have already a lot of pages allocated, just pre allocate a bunch of
1735 // memory for the page map. This prevents fragmentation by pagemap metadata
1736 // when a program keeps allocating and freeing large blocks.
1737
1738 if (old_system_bytes < kPageMapBigAllocationThreshold
1739 && system_bytes_ >= kPageMapBigAllocationThreshold) {
1740 pagemap_.PreallocateMoreMemory();
1741 }
1742
1743 // Make sure pagemap_ has entries for all of the new pages.
1744 // Plus ensure one before and one after so coalescing code
1745 // does not need bounds-checking.
1746 if (pagemap_.Ensure(p-1, ask+2)) {
1747 // Pretend the new area is allocated and then Delete() it to
1748 // cause any necessary coalescing to occur.
1749 //
1750 // We do not adjust free_pages_ here since Delete() will do it for us.
1751 Span* span = NewSpan(p, ask);
1752 RecordSpan(span);
1753 Delete(span);
1754 ASSERT(Check());
1755 return true;
1756 } else {
1757 // We could not allocate memory within "pagemap_"
1758 // TODO: Once we can return memory to the system, return the new span
1759 return false;
1760 }
1761}
1762
1763bool TCMalloc_PageHeap::Check() {
1764 ASSERT(free_[0].normal.next == &free_[0].normal);
1765 ASSERT(free_[0].returned.next == &free_[0].returned);
1766 CheckList(&large_.normal, kMaxPages, 1000000000);
1767 CheckList(&large_.returned, kMaxPages, 1000000000);
1768 for (Length s = 1; s < kMaxPages; s++) {
1769 CheckList(&free_[s].normal, s, s);
1770 CheckList(&free_[s].returned, s, s);
1771 }
1772 return true;
1773}
1774
1775#if ASSERT_DISABLED
1776bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) {
1777 return true;
1778}
1779#else
1780bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
1781 for (Span* s = list->next; s != list; s = s->next) {
1782 CHECK_CONDITION(s->free);
1783 CHECK_CONDITION(s->length >= min_pages);
1784 CHECK_CONDITION(s->length <= max_pages);
1785 CHECK_CONDITION(GetDescriptor(s->start) == s);
1786 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
1787 }
1788 return true;
1789}
1790#endif
1791
1792static void ReleaseFreeList(Span* list, Span* returned) {
1793 // Walk backwards through list so that when we push these
1794 // spans on the "returned" list, we preserve the order.
1795 while (!DLL_IsEmpty(list)) {
1796 Span* s = list->prev;
1797 DLL_Remove(s);
1798 DLL_Prepend(returned, s);
1799 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1800 static_cast<size_t>(s->length << kPageShift));
1801 }
1802}
1803
1804void TCMalloc_PageHeap::ReleaseFreePages() {
1805 for (Length s = 0; s < kMaxPages; s++) {
1806 ReleaseFreeList(&free_[s].normal, &free_[s].returned);
1807 }
1808 ReleaseFreeList(&large_.normal, &large_.returned);
1809 ASSERT(Check());
1810}
1811
1812//-------------------------------------------------------------------
1813// Free list
1814//-------------------------------------------------------------------
1815
1816class TCMalloc_ThreadCache_FreeList {
1817 private:
1818 void* list_; // Linked list of nodes
1819 uint16_t length_; // Current length
1820 uint16_t lowater_; // Low water mark for list length
1821
1822 public:
1823 void Init() {
1824 list_ = NULL;
1825 length_ = 0;
1826 lowater_ = 0;
1827 }
1828
1829 // Return current length of list
1830 int length() const {
1831 return length_;
1832 }
1833
1834 // Is list empty?
1835 bool empty() const {
1836 return list_ == NULL;
1837 }
1838
1839 // Low-water mark management
1840 int lowwatermark() const { return lowater_; }
1841 void clear_lowwatermark() { lowater_ = length_; }
1842
1843 ALWAYS_INLINE void Push(void* ptr) {
1844 SLL_Push(&list_, ptr);
1845 length_++;
1846 }
1847
1848 void PushRange(int N, void *start, void *end) {
1849 SLL_PushRange(&list_, start, end);
1850 length_ = length_ + static_cast<uint16_t>(N);
1851 }
1852
1853 void PopRange(int N, void **start, void **end) {
1854 SLL_PopRange(&list_, N, start, end);
1855 ASSERT(length_ >= N);
1856 length_ = length_ - static_cast<uint16_t>(N);
1857 if (length_ < lowater_) lowater_ = length_;
1858 }
1859
1860 ALWAYS_INLINE void* Pop() {
1861 ASSERT(list_ != NULL);
1862 length_--;
1863 if (length_ < lowater_) lowater_ = length_;
1864 return SLL_Pop(&list_);
1865 }
1866
1867#ifdef WTF_CHANGES
1868 template <class Finder, class Reader>
1869 void enumerateFreeObjects(Finder& finder, const Reader& reader)
1870 {
1871 for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
1872 finder.visit(nextObject);
1873 }
1874#endif
1875};
1876
1877//-------------------------------------------------------------------
1878// Data kept per thread
1879//-------------------------------------------------------------------
1880
1881class TCMalloc_ThreadCache {
1882 private:
1883 typedef TCMalloc_ThreadCache_FreeList FreeList;
1884#if COMPILER(MSVC)
1885 typedef DWORD ThreadIdentifier;
1886#else
1887 typedef pthread_t ThreadIdentifier;
1888#endif
1889
1890 size_t size_; // Combined size of data
1891 ThreadIdentifier tid_; // Which thread owns it
1892 bool in_setspecific_; // Called pthread_setspecific?
1893 FreeList list_[kNumClasses]; // Array indexed by size-class
1894
1895 // We sample allocations, biased by the size of the allocation
1896 uint32_t rnd_; // Cheap random number generator
1897 size_t bytes_until_sample_; // Bytes until we sample next
1898
1899 // Allocate a new heap. REQUIRES: pageheap_lock is held.
1900 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
1901
1902 // Use only as pthread thread-specific destructor function.
1903 static void DestroyThreadCache(void* ptr);
1904 public:
1905 // All ThreadCache objects are kept in a linked list (for stats collection)
1906 TCMalloc_ThreadCache* next_;
1907 TCMalloc_ThreadCache* prev_;
1908
1909 void Init(ThreadIdentifier tid);
1910 void Cleanup();
1911
1912 // Accessors (mostly just for printing stats)
1913 int freelist_length(size_t cl) const { return list_[cl].length(); }
1914
1915 // Total byte size in cache
1916 size_t Size() const { return size_; }
1917
1918 void* Allocate(size_t size);
1919 void Deallocate(void* ptr, size_t size_class);
1920
1921 void FetchFromCentralCache(size_t cl, size_t allocationSize);
1922 void ReleaseToCentralCache(size_t cl, int N);
1923 void Scavenge();
1924 void Print() const;
1925
1926 // Record allocation of "k" bytes. Return true iff allocation
1927 // should be sampled
1928 bool SampleAllocation(size_t k);
1929
1930 // Pick next sampling point
1931 void PickNextSample(size_t k);
1932
1933 static void InitModule();
1934 static void InitTSD();
1935 static TCMalloc_ThreadCache* GetThreadHeap();
1936 static TCMalloc_ThreadCache* GetCache();
1937 static TCMalloc_ThreadCache* GetCacheIfPresent();
1938 static TCMalloc_ThreadCache* CreateCacheIfNecessary();
1939 static void DeleteCache(TCMalloc_ThreadCache* heap);
1940 static void BecomeIdle();
1941 static void RecomputeThreadCacheSize();
1942
1943#ifdef WTF_CHANGES
1944 template <class Finder, class Reader>
1945 void enumerateFreeObjects(Finder& finder, const Reader& reader)
1946 {
1947 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
1948 list_[sizeClass].enumerateFreeObjects(finder, reader);
1949 }
1950#endif
1951};
1952
1953//-------------------------------------------------------------------
1954// Data kept per size-class in central cache
1955//-------------------------------------------------------------------
1956
1957class TCMalloc_Central_FreeList {
1958 public:
1959 void Init(size_t cl);
1960
1961 // These methods all do internal locking.
1962
1963 // Insert the specified range into the central freelist. N is the number of
1964 // elements in the range.
1965 void InsertRange(void *start, void *end, int N);
1966
1967 // Returns the actual number of fetched elements into N.
1968 void RemoveRange(void **start, void **end, int *N);
1969
1970 // Returns the number of free objects in cache.
1971 size_t length() {
1972 SpinLockHolder h(&lock_);
1973 return counter_;
1974 }
1975
1976 // Returns the number of free objects in the transfer cache.
1977 int tc_length() {
1978 SpinLockHolder h(&lock_);
1979 return used_slots_ * num_objects_to_move[size_class_];
1980 }
1981
1982#ifdef WTF_CHANGES
1983 template <class Finder, class Reader>
1984 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
1985 {
1986 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
1987 ASSERT(!span->objects);
1988
1989 ASSERT(!nonempty_.objects);
1990 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
1991
1992 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
1993 Span* remoteSpan = nonempty_.next;
1994
1995 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
1996 for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
1997 finder.visit(nextObject);
1998 }
1999 }
2000#endif
2001
2002 private:
2003 // REQUIRES: lock_ is held
2004 // Remove object from cache and return.
2005 // Return NULL if no free entries in cache.
2006 void* FetchFromSpans();
2007
2008 // REQUIRES: lock_ is held
2009 // Remove object from cache and return. Fetches
2010 // from pageheap if cache is empty. Only returns
2011 // NULL on allocation failure.
2012 void* FetchFromSpansSafe();
2013
2014 // REQUIRES: lock_ is held
2015 // Release a linked list of objects to spans.
2016 // May temporarily release lock_.
2017 void ReleaseListToSpans(void *start);
2018
2019 // REQUIRES: lock_ is held
2020 // Release an object to spans.
2021 // May temporarily release lock_.
2022 void ReleaseToSpans(void* object);
2023
2024 // REQUIRES: lock_ is held
2025 // Populate cache by fetching from the page heap.
2026 // May temporarily release lock_.
2027 void Populate();
2028
2029 // REQUIRES: lock is held.
2030 // Tries to make room for a TCEntry. If the cache is full it will try to
2031 // expand it at the cost of some other cache size. Return false if there is
2032 // no space.
2033 bool MakeCacheSpace();
2034
2035 // REQUIRES: lock_ for locked_size_class is held.
2036 // Picks a "random" size class to steal TCEntry slot from. In reality it
2037 // just iterates over the sizeclasses but does so without taking a lock.
2038 // Returns true on success.
2039 // May temporarily lock a "random" size class.
2040 static bool EvictRandomSizeClass(size_t locked_size_class, bool force);
2041
2042 // REQUIRES: lock_ is *not* held.
2043 // Tries to shrink the Cache. If force is true it will relase objects to
2044 // spans if it allows it to shrink the cache. Return false if it failed to
2045 // shrink the cache. Decrements cache_size_ on succeess.
2046 // May temporarily take lock_. If it takes lock_, the locked_size_class
2047 // lock is released to the thread from holding two size class locks
2048 // concurrently which could lead to a deadlock.
2049 bool ShrinkCache(int locked_size_class, bool force);
2050
2051 // This lock protects all the data members. cached_entries and cache_size_
2052 // may be looked at without holding the lock.
2053 SpinLock lock_;
2054
2055 // We keep linked lists of empty and non-empty spans.
2056 size_t size_class_; // My size class
2057 Span empty_; // Dummy header for list of empty spans
2058 Span nonempty_; // Dummy header for list of non-empty spans
2059 size_t counter_; // Number of free objects in cache entry
2060
2061 // Here we reserve space for TCEntry cache slots. Since one size class can
2062 // end up getting all the TCEntries quota in the system we just preallocate
2063 // sufficient number of entries here.
2064 TCEntry tc_slots_[kNumTransferEntries];
2065
2066 // Number of currently used cached entries in tc_slots_. This variable is
2067 // updated under a lock but can be read without one.
2068 int32_t used_slots_;
2069 // The current number of slots for this size class. This is an
2070 // adaptive value that is increased if there is lots of traffic
2071 // on a given size class.
2072 int32_t cache_size_;
2073};
2074
2075// Pad each CentralCache object to multiple of 64 bytes
2076class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
2077 private:
2078 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
2079};
2080
2081//-------------------------------------------------------------------
2082// Global variables
2083//-------------------------------------------------------------------
2084
2085// Central cache -- a collection of free-lists, one per size-class.
2086// We have a separate lock per free-list to reduce contention.
2087static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
2088
2089// Page-level allocator
2090static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
2091static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)];
2092static bool phinited = false;
2093
2094// Avoid extra level of indirection by making "pageheap" be just an alias
2095// of pageheap_memory.
2096typedef union {
2097 void* m_memory;
2098 TCMalloc_PageHeap* m_pageHeap;
2099} PageHeapUnion;
2100
2101static inline TCMalloc_PageHeap* getPageHeap()
2102{
2103 PageHeapUnion u = { &pageheap_memory[0] };
2104 return u.m_pageHeap;
2105}
2106
2107#define pageheap getPageHeap()
2108
2109// If TLS is available, we also store a copy
2110// of the per-thread object in a __thread variable
2111// since __thread variables are faster to read
2112// than pthread_getspecific(). We still need
2113// pthread_setspecific() because __thread
2114// variables provide no way to run cleanup
2115// code when a thread is destroyed.
2116#ifdef HAVE_TLS
2117static __thread TCMalloc_ThreadCache *threadlocal_heap;
2118#endif
2119// Thread-specific key. Initialization here is somewhat tricky
2120// because some Linux startup code invokes malloc() before it
2121// is in a good enough state to handle pthread_keycreate().
2122// Therefore, we use TSD keys only after tsd_inited is set to true.
2123// Until then, we use a slow path to get the heap object.
2124static bool tsd_inited = false;
2125static pthread_key_t heap_key;
2126#if COMPILER(MSVC)
2127DWORD tlsIndex = TLS_OUT_OF_INDEXES;
2128#endif
2129
2130static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
2131{
2132 // still do pthread_setspecific when using MSVC fast TLS to
2133 // benefit from the delete callback.
2134 pthread_setspecific(heap_key, heap);
2135#if COMPILER(MSVC)
2136 TlsSetValue(tlsIndex, heap);
2137#endif
2138}
2139
2140// Allocator for thread heaps
2141static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2142
2143// Linked list of heap objects. Protected by pageheap_lock.
2144static TCMalloc_ThreadCache* thread_heaps = NULL;
2145static int thread_heap_count = 0;
2146
2147// Overall thread cache size. Protected by pageheap_lock.
2148static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2149
2150// Global per-thread cache size. Writes are protected by
2151// pageheap_lock. Reads are done without any locking, which should be
2152// fine as long as size_t can be written atomically and we don't place
2153// invariants between this variable and other pieces of state.
2154static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2155
2156//-------------------------------------------------------------------
2157// Central cache implementation
2158//-------------------------------------------------------------------
2159
2160void TCMalloc_Central_FreeList::Init(size_t cl) {
2161 lock_.Init();
2162 size_class_ = cl;
2163 DLL_Init(&empty_);
2164 DLL_Init(&nonempty_);
2165 counter_ = 0;
2166
2167 cache_size_ = 1;
2168 used_slots_ = 0;
2169 ASSERT(cache_size_ <= kNumTransferEntries);
2170}
2171
2172void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2173 while (start) {
2174 void *next = SLL_Next(start);
2175 ReleaseToSpans(start);
2176 start = next;
2177 }
2178}
2179
2180ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2181 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2182 Span* span = pageheap->GetDescriptor(p);
2183 ASSERT(span != NULL);
2184 ASSERT(span->refcount > 0);
2185
2186 // If span is empty, move it to non-empty list
2187 if (span->objects == NULL) {
2188 DLL_Remove(span);
2189 DLL_Prepend(&nonempty_, span);
2190 Event(span, 'N', 0);
2191 }
2192
2193 // The following check is expensive, so it is disabled by default
2194 if (false) {
2195 // Check that object does not occur in list
2196 int got = 0;
2197 for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2198 ASSERT(p != object);
2199 got++;
2200 }
2201 ASSERT(got + span->refcount ==
2202 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2203 }
2204
2205 counter_++;
2206 span->refcount--;
2207 if (span->refcount == 0) {
2208 Event(span, '#', 0);
2209 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2210 DLL_Remove(span);
2211
2212 // Release central list lock while operating on pageheap
2213 lock_.Unlock();
2214 {
2215 SpinLockHolder h(&pageheap_lock);
2216 pageheap->Delete(span);
2217 }
2218 lock_.Lock();
2219 } else {
2220 *(reinterpret_cast<void**>(object)) = span->objects;
2221 span->objects = object;
2222 }
2223}
2224
2225ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2226 size_t locked_size_class, bool force) {
2227 static int race_counter = 0;
2228 int t = race_counter++; // Updated without a lock, but who cares.
2229 if (t >= static_cast<int>(kNumClasses)) {
2230 while (t >= static_cast<int>(kNumClasses)) {
2231 t -= kNumClasses;
2232 }
2233 race_counter = t;
2234 }
2235 ASSERT(t >= 0);
2236 ASSERT(t < static_cast<int>(kNumClasses));
2237 if (t == static_cast<int>(locked_size_class)) return false;
2238 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2239}
2240
2241bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2242 // Is there room in the cache?
2243 if (used_slots_ < cache_size_) return true;
2244 // Check if we can expand this cache?
2245 if (cache_size_ == kNumTransferEntries) return false;
2246 // Ok, we'll try to grab an entry from some other size class.
2247 if (EvictRandomSizeClass(size_class_, false) ||
2248 EvictRandomSizeClass(size_class_, true)) {
2249 // Succeeded in evicting, we're going to make our cache larger.
2250 cache_size_++;
2251 return true;
2252 }
2253 return false;
2254}
2255
2256
2257namespace {
2258class LockInverter {
2259 private:
2260 SpinLock *held_, *temp_;
2261 public:
2262 inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2263 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
2264 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
2265};
2266}
2267
2268bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2269 // Start with a quick check without taking a lock.
2270 if (cache_size_ == 0) return false;
2271 // We don't evict from a full cache unless we are 'forcing'.
2272 if (force == false && used_slots_ == cache_size_) return false;
2273
2274 // Grab lock, but first release the other lock held by this thread. We use
2275 // the lock inverter to ensure that we never hold two size class locks
2276 // concurrently. That can create a deadlock because there is no well
2277 // defined nesting order.
2278 LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
2279 ASSERT(used_slots_ <= cache_size_);
2280 ASSERT(0 <= cache_size_);
2281 if (cache_size_ == 0) return false;
2282 if (used_slots_ == cache_size_) {
2283 if (force == false) return false;
2284 // ReleaseListToSpans releases the lock, so we have to make all the
2285 // updates to the central list before calling it.
2286 cache_size_--;
2287 used_slots_--;
2288 ReleaseListToSpans(tc_slots_[used_slots_].head);
2289 return true;
2290 }
2291 cache_size_--;
2292 return true;
2293}
2294
2295void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2296 SpinLockHolder h(&lock_);
2297 if (N == num_objects_to_move[size_class_] &&
2298 MakeCacheSpace()) {
2299 int slot = used_slots_++;
2300 ASSERT(slot >=0);
2301 ASSERT(slot < kNumTransferEntries);
2302 TCEntry *entry = &tc_slots_[slot];
2303 entry->head = start;
2304 entry->tail = end;
2305 return;
2306 }
2307 ReleaseListToSpans(start);
2308}
2309
2310void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2311 int num = *N;
2312 ASSERT(num > 0);
2313
2314 SpinLockHolder h(&lock_);
2315 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2316 int slot = --used_slots_;
2317 ASSERT(slot >= 0);
2318 TCEntry *entry = &tc_slots_[slot];
2319 *start = entry->head;
2320 *end = entry->tail;
2321 return;
2322 }
2323
2324 // TODO: Prefetch multiple TCEntries?
2325 void *tail = FetchFromSpansSafe();
2326 if (!tail) {
2327 // We are completely out of memory.
2328 *start = *end = NULL;
2329 *N = 0;
2330 return;
2331 }
2332
2333 SLL_SetNext(tail, NULL);
2334 void *head = tail;
2335 int count = 1;
2336 while (count < num) {
2337 void *t = FetchFromSpans();
2338 if (!t) break;
2339 SLL_Push(&head, t);
2340 count++;
2341 }
2342 *start = head;
2343 *end = tail;
2344 *N = count;
2345}
2346
2347
2348void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2349 void *t = FetchFromSpans();
2350 if (!t) {
2351 Populate();
2352 t = FetchFromSpans();
2353 }
2354 return t;
2355}
2356
2357void* TCMalloc_Central_FreeList::FetchFromSpans() {
2358 if (DLL_IsEmpty(&nonempty_)) return NULL;
2359 Span* span = nonempty_.next;
2360
2361 ASSERT(span->objects != NULL);
2362 ASSERT_SPAN_COMMITTED(span);
2363 span->refcount++;
2364 void* result = span->objects;
2365 span->objects = *(reinterpret_cast<void**>(result));
2366 if (span->objects == NULL) {
2367 // Move to empty list
2368 DLL_Remove(span);
2369 DLL_Prepend(&empty_, span);
2370 Event(span, 'E', 0);
2371 }
2372 counter_--;
2373 return result;
2374}
2375
2376// Fetch memory from the system and add to the central cache freelist.
2377ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2378 // Release central list lock while operating on pageheap
2379 lock_.Unlock();
2380 const size_t npages = class_to_pages[size_class_];
2381
2382 Span* span;
2383 {
2384 SpinLockHolder h(&pageheap_lock);
2385 span = pageheap->New(npages);
2386 if (span) pageheap->RegisterSizeClass(span, size_class_);
2387 }
2388 if (span == NULL) {
2389 MESSAGE("allocation failed: %d\n", errno);
2390 lock_.Lock();
2391 return;
2392 }
2393 ASSERT_SPAN_COMMITTED(span);
2394 ASSERT(span->length == npages);
2395 // Cache sizeclass info eagerly. Locking is not necessary.
2396 // (Instead of being eager, we could just replace any stale info
2397 // about this span, but that seems to be no better in practice.)
2398 for (size_t i = 0; i < npages; i++) {
2399 pageheap->CacheSizeClass(span->start + i, size_class_);
2400 }
2401
2402 // Split the block into pieces and add to the free-list
2403 // TODO: coloring of objects to avoid cache conflicts?
2404 void** tail = &span->objects;
2405 char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2406 char* limit = ptr + (npages << kPageShift);
2407 const size_t size = ByteSizeForClass(size_class_);
2408 int num = 0;
2409 char* nptr;
2410 while ((nptr = ptr + size) <= limit) {
2411 *tail = ptr;
2412 tail = reinterpret_cast<void**>(ptr);
2413 ptr = nptr;
2414 num++;
2415 }
2416 ASSERT(ptr <= limit);
2417 *tail = NULL;
2418 span->refcount = 0; // No sub-object in use yet
2419
2420 // Add span to list of non-empty spans
2421 lock_.Lock();
2422 DLL_Prepend(&nonempty_, span);
2423 counter_ += num;
2424}
2425
2426//-------------------------------------------------------------------
2427// TCMalloc_ThreadCache implementation
2428//-------------------------------------------------------------------
2429
2430inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2431 if (bytes_until_sample_ < k) {
2432 PickNextSample(k);
2433 return true;
2434 } else {
2435 bytes_until_sample_ -= k;
2436 return false;
2437 }
2438}
2439
2440void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2441 size_ = 0;
2442 next_ = NULL;
2443 prev_ = NULL;
2444 tid_ = tid;
2445 in_setspecific_ = false;
2446 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2447 list_[cl].Init();
2448 }
2449
2450 // Initialize RNG -- run it for a bit to get to good values
2451 bytes_until_sample_ = 0;
2452 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2453 for (int i = 0; i < 100; i++) {
2454 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2455 }
2456}
2457
2458void TCMalloc_ThreadCache::Cleanup() {
2459 // Put unused memory back into central cache
2460 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2461 if (list_[cl].length() > 0) {
2462 ReleaseToCentralCache(cl, list_[cl].length());
2463 }
2464 }
2465}
2466
2467ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2468 ASSERT(size <= kMaxSize);
2469 const size_t cl = SizeClass(size);
2470 FreeList* list = &list_[cl];
2471 size_t allocationSize = ByteSizeForClass(cl);
2472 if (list->empty()) {
2473 FetchFromCentralCache(cl, allocationSize);
2474 if (list->empty()) return NULL;
2475 }
2476 size_ -= allocationSize;
2477 return list->Pop();
2478}
2479
2480inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2481 size_ += ByteSizeForClass(cl);
2482 FreeList* list = &list_[cl];
2483 list->Push(ptr);
2484 // If enough data is free, put back into central cache
2485 if (list->length() > kMaxFreeListLength) {
2486 ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2487 }
2488 if (size_ >= per_thread_cache_size) Scavenge();
2489}
2490
2491// Remove some objects of class "cl" from central cache and add to thread heap
2492ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2493 int fetch_count = num_objects_to_move[cl];
2494 void *start, *end;
2495 central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2496 list_[cl].PushRange(fetch_count, start, end);
2497 size_ += allocationSize * fetch_count;
2498}
2499
2500// Remove some objects of class "cl" from thread heap and add to central cache
2501inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2502 ASSERT(N > 0);
2503 FreeList* src = &list_[cl];
2504 if (N > src->length()) N = src->length();
2505 size_ -= N*ByteSizeForClass(cl);
2506
2507 // We return prepackaged chains of the correct size to the central cache.
2508 // TODO: Use the same format internally in the thread caches?
2509 int batch_size = num_objects_to_move[cl];
2510 while (N > batch_size) {
2511 void *tail, *head;
2512 src->PopRange(batch_size, &head, &tail);
2513 central_cache[cl].InsertRange(head, tail, batch_size);
2514 N -= batch_size;
2515 }
2516 void *tail, *head;
2517 src->PopRange(N, &head, &tail);
2518 central_cache[cl].InsertRange(head, tail, N);
2519}
2520
2521// Release idle memory to the central cache
2522inline void TCMalloc_ThreadCache::Scavenge() {
2523 // If the low-water mark for the free list is L, it means we would
2524 // not have had to allocate anything from the central cache even if
2525 // we had reduced the free list size by L. We aim to get closer to
2526 // that situation by dropping L/2 nodes from the free list. This
2527 // may not release much memory, but if so we will call scavenge again
2528 // pretty soon and the low-water marks will be high on that call.
2529 //int64 start = CycleClock::Now();
2530
2531 for (size_t cl = 0; cl < kNumClasses; cl++) {
2532 FreeList* list = &list_[cl];
2533 const int lowmark = list->lowwatermark();
2534 if (lowmark > 0) {
2535 const int drop = (lowmark > 1) ? lowmark/2 : 1;
2536 ReleaseToCentralCache(cl, drop);
2537 }
2538 list->clear_lowwatermark();
2539 }
2540
2541 //int64 finish = CycleClock::Now();
2542 //CycleTimer ct;
2543 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2544}
2545
2546void TCMalloc_ThreadCache::PickNextSample(size_t k) {
2547 // Make next "random" number
2548 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2549 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2550 uint32_t r = rnd_;
2551 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
2552
2553 // Next point is "rnd_ % (sample_period)". I.e., average
2554 // increment is "sample_period/2".
2555 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
2556 static int last_flag_value = -1;
2557
2558 if (flag_value != last_flag_value) {
2559 SpinLockHolder h(&sample_period_lock);
2560 int i;
2561 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
2562 if (primes_list[i] >= flag_value) {
2563 break;
2564 }
2565 }
2566 sample_period = primes_list[i];
2567 last_flag_value = flag_value;
2568 }
2569
2570 bytes_until_sample_ += rnd_ % sample_period;
2571
2572 if (k > (static_cast<size_t>(-1) >> 2)) {
2573 // If the user has asked for a huge allocation then it is possible
2574 // for the code below to loop infinitely. Just return (note that
2575 // this throws off the sampling accuracy somewhat, but a user who
2576 // is allocating more than 1G of memory at a time can live with a
2577 // minor inaccuracy in profiling of small allocations, and also
2578 // would rather not wait for the loop below to terminate).
2579 return;
2580 }
2581
2582 while (bytes_until_sample_ < k) {
2583 // Increase bytes_until_sample_ by enough average sampling periods
2584 // (sample_period >> 1) to allow us to sample past the current
2585 // allocation.
2586 bytes_until_sample_ += (sample_period >> 1);
2587 }
2588
2589 bytes_until_sample_ -= k;
2590}
2591
2592void TCMalloc_ThreadCache::InitModule() {
2593 // There is a slight potential race here because of double-checked
2594 // locking idiom. However, as long as the program does a small
2595 // allocation before switching to multi-threaded mode, we will be
2596 // fine. We increase the chances of doing such a small allocation
2597 // by doing one in the constructor of the module_enter_exit_hook
2598 // object declared below.
2599 SpinLockHolder h(&pageheap_lock);
2600 if (!phinited) {
2601#ifdef WTF_CHANGES
2602 InitTSD();
2603#endif
2604 InitSizeClasses();
2605 threadheap_allocator.Init();
2606 span_allocator.Init();
2607 span_allocator.New(); // Reduce cache conflicts
2608 span_allocator.New(); // Reduce cache conflicts
2609 stacktrace_allocator.Init();
2610 DLL_Init(&sampled_objects);
2611 for (size_t i = 0; i < kNumClasses; ++i) {
2612 central_cache[i].Init(i);
2613 }
2614 pageheap->init();
2615 phinited = 1;
2616#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
2617 FastMallocZone::init();
2618#endif
2619 }
2620}
2621
2622inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
2623 // Create the heap and add it to the linked list
2624 TCMalloc_ThreadCache *heap = threadheap_allocator.New();
2625 heap->Init(tid);
2626 heap->next_ = thread_heaps;
2627 heap->prev_ = NULL;
2628 if (thread_heaps != NULL) thread_heaps->prev_ = heap;
2629 thread_heaps = heap;
2630 thread_heap_count++;
2631 RecomputeThreadCacheSize();
2632 return heap;
2633}
2634
2635inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
2636#ifdef HAVE_TLS
2637 // __thread is faster, but only when the kernel supports it
2638 if (KernelSupportsTLS())
2639 return threadlocal_heap;
2640#elif COMPILER(MSVC)
2641 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
2642#else
2643 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
2644#endif
2645}
2646
2647inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
2648 TCMalloc_ThreadCache* ptr = NULL;
2649 if (!tsd_inited) {
2650 InitModule();
2651 } else {
2652 ptr = GetThreadHeap();
2653 }
2654 if (ptr == NULL) ptr = CreateCacheIfNecessary();
2655 return ptr;
2656}
2657
2658// In deletion paths, we do not try to create a thread-cache. This is
2659// because we may be in the thread destruction code and may have
2660// already cleaned up the cache for this thread.
2661inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
2662 if (!tsd_inited) return NULL;
2663 void* const p = GetThreadHeap();
2664 return reinterpret_cast<TCMalloc_ThreadCache*>(p);
2665}
2666
2667void TCMalloc_ThreadCache::InitTSD() {
2668 ASSERT(!tsd_inited);
2669 pthread_key_create(&heap_key, DestroyThreadCache);
2670#if COMPILER(MSVC)
2671 tlsIndex = TlsAlloc();
2672#endif
2673 tsd_inited = true;
2674
2675#if !COMPILER(MSVC)
2676 // We may have used a fake pthread_t for the main thread. Fix it.
2677 pthread_t zero;
2678 memset(&zero, 0, sizeof(zero));
2679#endif
2680#ifndef WTF_CHANGES
2681 SpinLockHolder h(&pageheap_lock);
2682#else
2683 ASSERT(pageheap_lock.IsHeld());
2684#endif
2685 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2686#if COMPILER(MSVC)
2687 if (h->tid_ == 0) {
2688 h->tid_ = GetCurrentThreadId();
2689 }
2690#else
2691 if (pthread_equal(h->tid_, zero)) {
2692 h->tid_ = pthread_self();
2693 }
2694#endif
2695 }
2696}
2697
2698TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
2699 // Initialize per-thread data if necessary
2700 TCMalloc_ThreadCache* heap = NULL;
2701 {
2702 SpinLockHolder h(&pageheap_lock);
2703
2704#if COMPILER(MSVC)
2705 DWORD me;
2706 if (!tsd_inited) {
2707 me = 0;
2708 } else {
2709 me = GetCurrentThreadId();
2710 }
2711#else
2712 // Early on in glibc's life, we cannot even call pthread_self()
2713 pthread_t me;
2714 if (!tsd_inited) {
2715 memset(&me, 0, sizeof(me));
2716 } else {
2717 me = pthread_self();
2718 }
2719#endif
2720
2721 // This may be a recursive malloc call from pthread_setspecific()
2722 // In that case, the heap for this thread has already been created
2723 // and added to the linked list. So we search for that first.
2724 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2725#if COMPILER(MSVC)
2726 if (h->tid_ == me) {
2727#else
2728 if (pthread_equal(h->tid_, me)) {
2729#endif
2730 heap = h;
2731 break;
2732 }
2733 }
2734
2735 if (heap == NULL) heap = NewHeap(me);
2736 }
2737
2738 // We call pthread_setspecific() outside the lock because it may
2739 // call malloc() recursively. The recursive call will never get
2740 // here again because it will find the already allocated heap in the
2741 // linked list of heaps.
2742 if (!heap->in_setspecific_ && tsd_inited) {
2743 heap->in_setspecific_ = true;
2744 setThreadHeap(heap);
2745 }
2746 return heap;
2747}
2748
2749void TCMalloc_ThreadCache::BecomeIdle() {
2750 if (!tsd_inited) return; // No caches yet
2751 TCMalloc_ThreadCache* heap = GetThreadHeap();
2752 if (heap == NULL) return; // No thread cache to remove
2753 if (heap->in_setspecific_) return; // Do not disturb the active caller
2754
2755 heap->in_setspecific_ = true;
2756 pthread_setspecific(heap_key, NULL);
2757#ifdef HAVE_TLS
2758 // Also update the copy in __thread
2759 threadlocal_heap = NULL;
2760#endif
2761 heap->in_setspecific_ = false;
2762 if (GetThreadHeap() == heap) {
2763 // Somehow heap got reinstated by a recursive call to malloc
2764 // from pthread_setspecific. We give up in this case.
2765 return;
2766 }
2767
2768 // We can now get rid of the heap
2769 DeleteCache(heap);
2770}
2771
2772void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
2773 // Note that "ptr" cannot be NULL since pthread promises not
2774 // to invoke the destructor on NULL values, but for safety,
2775 // we check anyway.
2776 if (ptr == NULL) return;
2777#ifdef HAVE_TLS
2778 // Prevent fast path of GetThreadHeap() from returning heap.
2779 threadlocal_heap = NULL;
2780#endif
2781 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
2782}
2783
2784void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
2785 // Remove all memory from heap
2786 heap->Cleanup();
2787
2788 // Remove from linked list
2789 SpinLockHolder h(&pageheap_lock);
2790 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
2791 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
2792 if (thread_heaps == heap) thread_heaps = heap->next_;
2793 thread_heap_count--;
2794 RecomputeThreadCacheSize();
2795
2796 threadheap_allocator.Delete(heap);
2797}
2798
2799void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
2800 // Divide available space across threads
2801 int n = thread_heap_count > 0 ? thread_heap_count : 1;
2802 size_t space = overall_thread_cache_size / n;
2803
2804 // Limit to allowed range
2805 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
2806 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
2807
2808 per_thread_cache_size = space;
2809}
2810
2811void TCMalloc_ThreadCache::Print() const {
2812 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2813 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n",
2814 ByteSizeForClass(cl),
2815 list_[cl].length(),
2816 list_[cl].lowwatermark());
2817 }
2818}
2819
2820// Extract interesting stats
2821struct TCMallocStats {
2822 uint64_t system_bytes; // Bytes alloced from system
2823 uint64_t thread_bytes; // Bytes in thread caches
2824 uint64_t central_bytes; // Bytes in central cache
2825 uint64_t transfer_bytes; // Bytes in central transfer cache
2826 uint64_t pageheap_bytes; // Bytes in page heap
2827 uint64_t metadata_bytes; // Bytes alloced for metadata
2828};
2829
2830#ifndef WTF_CHANGES
2831// Get stats into "r". Also get per-size-class counts if class_count != NULL
2832static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
2833 r->central_bytes = 0;
2834 r->transfer_bytes = 0;
2835 for (int cl = 0; cl < kNumClasses; ++cl) {
2836 const int length = central_cache[cl].length();
2837 const int tc_length = central_cache[cl].tc_length();
2838 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
2839 r->transfer_bytes +=
2840 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
2841 if (class_count) class_count[cl] = length + tc_length;
2842 }
2843
2844 // Add stats from per-thread heaps
2845 r->thread_bytes = 0;
2846 { // scope
2847 SpinLockHolder h(&pageheap_lock);
2848 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2849 r->thread_bytes += h->Size();
2850 if (class_count) {
2851 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2852 class_count[cl] += h->freelist_length(cl);
2853 }
2854 }
2855 }
2856 }
2857
2858 { //scope
2859 SpinLockHolder h(&pageheap_lock);
2860 r->system_bytes = pageheap->SystemBytes();
2861 r->metadata_bytes = metadata_system_bytes;
2862 r->pageheap_bytes = pageheap->FreeBytes();
2863 }
2864}
2865#endif
2866
2867#ifndef WTF_CHANGES
2868// WRITE stats to "out"
2869static void DumpStats(TCMalloc_Printer* out, int level) {
2870 TCMallocStats stats;
2871 uint64_t class_count[kNumClasses];
2872 ExtractStats(&stats, (level >= 2 ? class_count : NULL));
2873
2874 if (level >= 2) {
2875 out->printf("------------------------------------------------\n");
2876 uint64_t cumulative = 0;
2877 for (int cl = 0; cl < kNumClasses; ++cl) {
2878 if (class_count[cl] > 0) {
2879 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
2880 cumulative += class_bytes;
2881 out->printf("class %3d [ %8" PRIuS " bytes ] : "
2882 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
2883 cl, ByteSizeForClass(cl),
2884 class_count[cl],
2885 class_bytes / 1048576.0,
2886 cumulative / 1048576.0);
2887 }
2888 }
2889
2890 SpinLockHolder h(&pageheap_lock);
2891 pageheap->Dump(out);
2892 }
2893
2894 const uint64_t bytes_in_use = stats.system_bytes
2895 - stats.pageheap_bytes
2896 - stats.central_bytes
2897 - stats.transfer_bytes
2898 - stats.thread_bytes;
2899
2900 out->printf("------------------------------------------------\n"
2901 "MALLOC: %12" PRIu64 " Heap size\n"
2902 "MALLOC: %12" PRIu64 " Bytes in use by application\n"
2903 "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
2904 "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
2905 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
2906 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
2907 "MALLOC: %12" PRIu64 " Spans in use\n"
2908 "MALLOC: %12" PRIu64 " Thread heaps in use\n"
2909 "MALLOC: %12" PRIu64 " Metadata allocated\n"
2910 "------------------------------------------------\n",
2911 stats.system_bytes,
2912 bytes_in_use,
2913 stats.pageheap_bytes,
2914 stats.central_bytes,
2915 stats.transfer_bytes,
2916 stats.thread_bytes,
2917 uint64_t(span_allocator.inuse()),
2918 uint64_t(threadheap_allocator.inuse()),
2919 stats.metadata_bytes);
2920}
2921
2922static void PrintStats(int level) {
2923 const int kBufferSize = 16 << 10;
2924 char* buffer = new char[kBufferSize];
2925 TCMalloc_Printer printer(buffer, kBufferSize);
2926 DumpStats(&printer, level);
2927 write(STDERR_FILENO, buffer, strlen(buffer));
2928 delete[] buffer;
2929}
2930
2931static void** DumpStackTraces() {
2932 // Count how much space we need
2933 int needed_slots = 0;
2934 {
2935 SpinLockHolder h(&pageheap_lock);
2936 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
2937 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
2938 needed_slots += 3 + stack->depth;
2939 }
2940 needed_slots += 100; // Slop in case sample grows
2941 needed_slots += needed_slots/8; // An extra 12.5% slop
2942 }
2943
2944 void** result = new void*[needed_slots];
2945 if (result == NULL) {
2946 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
2947 needed_slots);
2948 return NULL;
2949 }
2950
2951 SpinLockHolder h(&pageheap_lock);
2952 int used_slots = 0;
2953 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
2954 ASSERT(used_slots < needed_slots); // Need to leave room for terminator
2955 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
2956 if (used_slots + 3 + stack->depth >= needed_slots) {
2957 // No more room
2958 break;
2959 }
2960
2961 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
2962 result[used_slots+1] = reinterpret_cast<void*>(stack->size);
2963 result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
2964 for (int d = 0; d < stack->depth; d++) {
2965 result[used_slots+3+d] = stack->stack[d];
2966 }
2967 used_slots += 3 + stack->depth;
2968 }
2969 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
2970 return result;
2971}
2972#endif
2973
2974#ifndef WTF_CHANGES
2975
2976// TCMalloc's support for extra malloc interfaces
2977class TCMallocImplementation : public MallocExtension {
2978 public:
2979 virtual void GetStats(char* buffer, int buffer_length) {
2980 ASSERT(buffer_length > 0);
2981 TCMalloc_Printer printer(buffer, buffer_length);
2982
2983 // Print level one stats unless lots of space is available
2984 if (buffer_length < 10000) {
2985 DumpStats(&printer, 1);
2986 } else {
2987 DumpStats(&printer, 2);
2988 }
2989 }
2990
2991 virtual void** ReadStackTraces() {
2992 return DumpStackTraces();
2993 }
2994
2995 virtual bool GetNumericProperty(const char* name, size_t* value) {
2996 ASSERT(name != NULL);
2997
2998 if (strcmp(name, "generic.current_allocated_bytes") == 0) {
2999 TCMallocStats stats;
3000 ExtractStats(&stats, NULL);
3001 *value = stats.system_bytes
3002 - stats.thread_bytes
3003 - stats.central_bytes
3004 - stats.pageheap_bytes;
3005 return true;
3006 }
3007
3008 if (strcmp(name, "generic.heap_size") == 0) {
3009 TCMallocStats stats;
3010 ExtractStats(&stats, NULL);
3011 *value = stats.system_bytes;
3012 return true;
3013 }
3014
3015 if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
3016 // We assume that bytes in the page heap are not fragmented too
3017 // badly, and are therefore available for allocation.
3018 SpinLockHolder l(&pageheap_lock);
3019 *value = pageheap->FreeBytes();
3020 return true;
3021 }
3022
3023 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3024 SpinLockHolder l(&pageheap_lock);
3025 *value = overall_thread_cache_size;
3026 return true;
3027 }
3028
3029 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3030 TCMallocStats stats;
3031 ExtractStats(&stats, NULL);
3032 *value = stats.thread_bytes;
3033 return true;
3034 }
3035
3036 return false;
3037 }
3038
3039 virtual bool SetNumericProperty(const char* name, size_t value) {
3040 ASSERT(name != NULL);
3041
3042 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3043 // Clip the value to a reasonable range
3044 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
3045 if (value > (1<<30)) value = (1<<30); // Limit to 1GB
3046
3047 SpinLockHolder l(&pageheap_lock);
3048 overall_thread_cache_size = static_cast<size_t>(value);
3049 TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3050 return true;
3051 }
3052
3053 return false;
3054 }
3055
3056 virtual void MarkThreadIdle() {
3057 TCMalloc_ThreadCache::BecomeIdle();
3058 }
3059
3060 virtual void ReleaseFreeMemory() {
3061 SpinLockHolder h(&pageheap_lock);
3062 pageheap->ReleaseFreePages();
3063 }
3064};
3065#endif
3066
3067// The constructor allocates an object to ensure that initialization
3068// runs before main(), and therefore we do not have a chance to become
3069// multi-threaded before initialization. We also create the TSD key
3070// here. Presumably by the time this constructor runs, glibc is in
3071// good enough shape to handle pthread_key_create().
3072//
3073// The constructor also takes the opportunity to tell STL to use
3074// tcmalloc. We want to do this early, before construct time, so
3075// all user STL allocations go through tcmalloc (which works really
3076// well for STL).
3077//
3078// The destructor prints stats when the program exits.
3079class TCMallocGuard {
3080 public:
3081
3082 TCMallocGuard() {
3083#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS
3084 // Check whether the kernel also supports TLS (needs to happen at runtime)
3085 CheckIfKernelSupportsTLS();
3086#endif
3087#ifndef WTF_CHANGES
3088#ifdef WIN32 // patch the windows VirtualAlloc, etc.
3089 PatchWindowsFunctions(); // defined in windows/patch_functions.cc
3090#endif
3091#endif
3092 free(malloc(1));
3093 TCMalloc_ThreadCache::InitTSD();
3094 free(malloc(1));
3095#ifndef WTF_CHANGES
3096 MallocExtension::Register(new TCMallocImplementation);
3097#endif
3098 }
3099
3100#ifndef WTF_CHANGES
3101 ~TCMallocGuard() {
3102 const char* env = getenv("MALLOCSTATS");
3103 if (env != NULL) {
3104 int level = atoi(env);
3105 if (level < 1) level = 1;
3106 PrintStats(level);
3107 }
3108#ifdef WIN32
3109 UnpatchWindowsFunctions();
3110#endif
3111 }
3112#endif
3113};
3114
3115#ifndef WTF_CHANGES
3116static TCMallocGuard module_enter_exit_hook;
3117#endif
3118
3119
3120//-------------------------------------------------------------------
3121// Helpers for the exported routines below
3122//-------------------------------------------------------------------
3123
3124#ifndef WTF_CHANGES
3125
3126static Span* DoSampledAllocation(size_t size) {
3127
3128 // Grab the stack trace outside the heap lock
3129 StackTrace tmp;
3130 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
3131 tmp.size = size;
3132
3133 SpinLockHolder h(&pageheap_lock);
3134 // Allocate span
3135 Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3136 if (span == NULL) {
3137 return NULL;
3138 }
3139
3140 // Allocate stack trace
3141 StackTrace *stack = stacktrace_allocator.New();
3142 if (stack == NULL) {
3143 // Sampling failed because of lack of memory
3144 return span;
3145 }
3146
3147 *stack = tmp;
3148 span->sample = 1;
3149 span->objects = stack;
3150 DLL_Prepend(&sampled_objects, span);
3151
3152 return span;
3153}
3154#endif
3155
3156static inline bool CheckCachedSizeClass(void *ptr) {
3157 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3158 size_t cached_value = pageheap->GetSizeClassIfCached(p);
3159 return cached_value == 0 ||
3160 cached_value == pageheap->GetDescriptor(p)->sizeclass;
3161}
3162
3163static inline void* CheckedMallocResult(void *result)
3164{
3165 ASSERT(result == 0 || CheckCachedSizeClass(result));
3166 return result;
3167}
3168
3169static inline void* SpanToMallocResult(Span *span) {
3170 ASSERT_SPAN_COMMITTED(span);
3171 pageheap->CacheSizeClass(span->start, 0);
3172 return
3173 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3174}
3175
3176#ifdef WTF_CHANGES
3177template <bool crashOnFailure>
3178#endif
3179static ALWAYS_INLINE void* do_malloc(size_t size) {
3180 void* ret = NULL;
3181
3182#ifdef WTF_CHANGES
3183 ASSERT(!isForbidden());
3184#endif
3185
3186 // The following call forces module initialization
3187 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3188#ifndef WTF_CHANGES
3189 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3190 Span* span = DoSampledAllocation(size);
3191 if (span != NULL) {
3192 ret = SpanToMallocResult(span);
3193 }
3194 } else
3195#endif
3196 if (size > kMaxSize) {
3197 // Use page-level allocator
3198 SpinLockHolder h(&pageheap_lock);
3199 Span* span = pageheap->New(pages(size));
3200 if (span != NULL) {
3201 ret = SpanToMallocResult(span);
3202 }
3203 } else {
3204 // The common case, and also the simplest. This just pops the
3205 // size-appropriate freelist, afer replenishing it if it's empty.
3206 ret = CheckedMallocResult(heap->Allocate(size));
3207 }
3208 if (!ret) {
3209#ifdef WTF_CHANGES
3210 if (crashOnFailure) // This branch should be optimized out by the compiler.
3211 CRASH();
3212#else
3213 errno = ENOMEM;
3214#endif
3215 }
3216 return ret;
3217}
3218
3219static ALWAYS_INLINE void do_free(void* ptr) {
3220 if (ptr == NULL) return;
3221 ASSERT(pageheap != NULL); // Should not call free() before malloc()
3222 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3223 Span* span = NULL;
3224 size_t cl = pageheap->GetSizeClassIfCached(p);
3225
3226 if (cl == 0) {
3227 span = pageheap->GetDescriptor(p);
3228 cl = span->sizeclass;
3229 pageheap->CacheSizeClass(p, cl);
3230 }
3231 if (cl != 0) {
3232#ifndef NO_TCMALLOC_SAMPLES
3233 ASSERT(!pageheap->GetDescriptor(p)->sample);
3234#endif
3235 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3236 if (heap != NULL) {
3237 heap->Deallocate(ptr, cl);
3238 } else {
3239 // Delete directly into central cache
3240 SLL_SetNext(ptr, NULL);
3241 central_cache[cl].InsertRange(ptr, ptr, 1);
3242 }
3243 } else {
3244 SpinLockHolder h(&pageheap_lock);
3245 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3246 ASSERT(span != NULL && span->start == p);
3247#ifndef NO_TCMALLOC_SAMPLES
3248 if (span->sample) {
3249 DLL_Remove(span);
3250 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3251 span->objects = NULL;
3252 }
3253#endif
3254 pageheap->Delete(span);
3255 }
3256}
3257
3258#ifndef WTF_CHANGES
3259// For use by exported routines below that want specific alignments
3260//
3261// Note: this code can be slow, and can significantly fragment memory.
3262// The expectation is that memalign/posix_memalign/valloc/pvalloc will
3263// not be invoked very often. This requirement simplifies our
3264// implementation and allows us to tune for expected allocation
3265// patterns.
3266static void* do_memalign(size_t align, size_t size) {
3267 ASSERT((align & (align - 1)) == 0);
3268 ASSERT(align > 0);
3269 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3270
3271 // Allocate at least one byte to avoid boundary conditions below
3272 if (size == 0) size = 1;
3273
3274 if (size <= kMaxSize && align < kPageSize) {
3275 // Search through acceptable size classes looking for one with
3276 // enough alignment. This depends on the fact that
3277 // InitSizeClasses() currently produces several size classes that
3278 // are aligned at powers of two. We will waste time and space if
3279 // we miss in the size class array, but that is deemed acceptable
3280 // since memalign() should be used rarely.
3281 size_t cl = SizeClass(size);
3282 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3283 cl++;
3284 }
3285 if (cl < kNumClasses) {
3286 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3287 return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3288 }
3289 }
3290
3291 // We will allocate directly from the page heap
3292 SpinLockHolder h(&pageheap_lock);
3293
3294 if (align <= kPageSize) {
3295 // Any page-level allocation will be fine
3296 // TODO: We could put the rest of this page in the appropriate
3297 // TODO: cache but it does not seem worth it.
3298 Span* span = pageheap->New(pages(size));
3299 return span == NULL ? NULL : SpanToMallocResult(span);
3300 }
3301
3302 // Allocate extra pages and carve off an aligned portion
3303 const Length alloc = pages(size + align);
3304 Span* span = pageheap->New(alloc);
3305 if (span == NULL) return NULL;
3306
3307 // Skip starting portion so that we end up aligned
3308 Length skip = 0;
3309 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3310 skip++;
3311 }
3312 ASSERT(skip < alloc);
3313 if (skip > 0) {
3314 Span* rest = pageheap->Split(span, skip);
3315 pageheap->Delete(span);
3316 span = rest;
3317 }
3318
3319 // Skip trailing portion that we do not need to return
3320 const Length needed = pages(size);
3321 ASSERT(span->length >= needed);
3322 if (span->length > needed) {
3323 Span* trailer = pageheap->Split(span, needed);
3324 pageheap->Delete(trailer);
3325 }
3326 return SpanToMallocResult(span);
3327}
3328#endif
3329
3330// Helpers for use by exported routines below:
3331
3332#ifndef WTF_CHANGES
3333static inline void do_malloc_stats() {
3334 PrintStats(1);
3335}
3336#endif
3337
3338static inline int do_mallopt(int, int) {
3339 return 1; // Indicates error
3340}
3341
3342#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance
3343static inline struct mallinfo do_mallinfo() {
3344 TCMallocStats stats;
3345 ExtractStats(&stats, NULL);
3346
3347 // Just some of the fields are filled in.
3348 struct mallinfo info;
3349 memset(&info, 0, sizeof(info));
3350
3351 // Unfortunately, the struct contains "int" field, so some of the
3352 // size values will be truncated.
3353 info.arena = static_cast<int>(stats.system_bytes);
3354 info.fsmblks = static_cast<int>(stats.thread_bytes
3355 + stats.central_bytes
3356 + stats.transfer_bytes);
3357 info.fordblks = static_cast<int>(stats.pageheap_bytes);
3358 info.uordblks = static_cast<int>(stats.system_bytes
3359 - stats.thread_bytes
3360 - stats.central_bytes
3361 - stats.transfer_bytes
3362 - stats.pageheap_bytes);
3363
3364 return info;
3365}
3366#endif
3367
3368//-------------------------------------------------------------------
3369// Exported routines
3370//-------------------------------------------------------------------
3371
3372// CAVEAT: The code structure below ensures that MallocHook methods are always
3373// called from the stack frame of the invoked allocation function.
3374// heap-checker.cc depends on this to start a stack trace from
3375// the call to the (de)allocation function.
3376
3377#ifndef WTF_CHANGES
3378extern "C"
3379#else
3380#define do_malloc do_malloc<crashOnFailure>
3381
3382template <bool crashOnFailure>
3383void* malloc(size_t);
3384
3385void* fastMalloc(size_t size)
3386{
3387 return malloc<true>(size);
3388}
3389
3390void* tryFastMalloc(size_t size)
3391{
3392 return malloc<false>(size);
3393}
3394
3395template <bool crashOnFailure>
3396ALWAYS_INLINE
3397#endif
3398void* malloc(size_t size) {
3399#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3400 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur...
3401 return 0;
3402 size += sizeof(AllocAlignmentInteger);
3403 void* result = do_malloc(size);
3404 if (!result)
3405 return 0;
3406
3407 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3408 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3409#else
3410 void* result = do_malloc(size);
3411#endif
3412
3413#ifndef WTF_CHANGES
3414 MallocHook::InvokeNewHook(result, size);
3415#endif
3416 return result;
3417}
3418
3419#ifndef WTF_CHANGES
3420extern "C"
3421#endif
3422void free(void* ptr) {
3423#ifndef WTF_CHANGES
3424 MallocHook::InvokeDeleteHook(ptr);
3425#endif
3426
3427#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3428 if (!ptr)
3429 return;
3430
3431 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr);
3432 if (*header != Internal::AllocTypeMalloc)
3433 Internal::fastMallocMatchFailed(ptr);
3434 do_free(header);
3435#else
3436 do_free(ptr);
3437#endif
3438}
3439
3440#ifndef WTF_CHANGES
3441extern "C"
3442#else
3443template <bool crashOnFailure>
3444void* calloc(size_t, size_t);
3445
3446void* fastCalloc(size_t n, size_t elem_size)
3447{
3448 return calloc<true>(n, elem_size);
3449}
3450
3451void* tryFastCalloc(size_t n, size_t elem_size)
3452{
3453 return calloc<false>(n, elem_size);
3454}
3455
3456template <bool crashOnFailure>
3457ALWAYS_INLINE
3458#endif
3459void* calloc(size_t n, size_t elem_size) {
3460 size_t totalBytes = n * elem_size;
3461
3462 // Protect against overflow
3463 if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3464 return 0;
3465
3466#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3467 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur...
3468 return 0;
3469
3470 totalBytes += sizeof(AllocAlignmentInteger);
3471 void* result = do_malloc(totalBytes);
3472 if (!result)
3473 return 0;
3474
3475 memset(result, 0, totalBytes);
3476 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3477 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3478#else
3479 void* result = do_malloc(totalBytes);
3480 if (result != NULL) {
3481 memset(result, 0, totalBytes);
3482 }
3483#endif
3484
3485#ifndef WTF_CHANGES
3486 MallocHook::InvokeNewHook(result, totalBytes);
3487#endif
3488 return result;
3489}
3490
3491// Since cfree isn't used anywhere, we don't compile it in.
3492#ifndef WTF_CHANGES
3493#ifndef WTF_CHANGES
3494extern "C"
3495#endif
3496void cfree(void* ptr) {
3497#ifndef WTF_CHANGES
3498 MallocHook::InvokeDeleteHook(ptr);
3499#endif
3500 do_free(ptr);
3501}
3502#endif
3503
3504#ifndef WTF_CHANGES
3505extern "C"
3506#else
3507template <bool crashOnFailure>
3508void* realloc(void*, size_t);
3509
3510void* fastRealloc(void* old_ptr, size_t new_size)
3511{
3512 return realloc<true>(old_ptr, new_size);
3513}
3514
3515void* tryFastRealloc(void* old_ptr, size_t new_size)
3516{
3517 return realloc<false>(old_ptr, new_size);
3518}
3519
3520template <bool crashOnFailure>
3521ALWAYS_INLINE
3522#endif
3523void* realloc(void* old_ptr, size_t new_size) {
3524 if (old_ptr == NULL) {
3525#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3526 void* result = malloc(new_size);
3527#else
3528 void* result = do_malloc(new_size);
3529#ifndef WTF_CHANGES
3530 MallocHook::InvokeNewHook(result, new_size);
3531#endif
3532#endif
3533 return result;
3534 }
3535 if (new_size == 0) {
3536#ifndef WTF_CHANGES
3537 MallocHook::InvokeDeleteHook(old_ptr);
3538#endif
3539 free(old_ptr);
3540 return NULL;
3541 }
3542
3543#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3544 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur...
3545 return 0;
3546 new_size += sizeof(AllocAlignmentInteger);
3547 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr);
3548 if (*header != Internal::AllocTypeMalloc)
3549 Internal::fastMallocMatchFailed(old_ptr);
3550 old_ptr = header;
3551#endif
3552
3553 // Get the size of the old entry
3554 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
3555 size_t cl = pageheap->GetSizeClassIfCached(p);
3556 Span *span = NULL;
3557 size_t old_size;
3558 if (cl == 0) {
3559 span = pageheap->GetDescriptor(p);
3560 cl = span->sizeclass;
3561 pageheap->CacheSizeClass(p, cl);
3562 }
3563 if (cl != 0) {
3564 old_size = ByteSizeForClass(cl);
3565 } else {
3566 ASSERT(span != NULL);
3567 old_size = span->length << kPageShift;
3568 }
3569
3570 // Reallocate if the new size is larger than the old size,
3571 // or if the new size is significantly smaller than the old size.
3572 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
3573 // Need to reallocate
3574 void* new_ptr = do_malloc(new_size);
3575 if (new_ptr == NULL) {
3576 return NULL;
3577 }
3578#ifndef WTF_CHANGES
3579 MallocHook::InvokeNewHook(new_ptr, new_size);
3580#endif
3581 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
3582#ifndef WTF_CHANGES
3583 MallocHook::InvokeDeleteHook(old_ptr);
3584#endif
3585 // We could use a variant of do_free() that leverages the fact
3586 // that we already know the sizeclass of old_ptr. The benefit
3587 // would be small, so don't bother.
3588 do_free(old_ptr);
3589#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3590 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1;
3591#endif
3592 return new_ptr;
3593 } else {
3594#if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3595 old_ptr = pByte + sizeof(AllocAlignmentInteger); // Set old_ptr back to the user pointer.
3596#endif
3597 return old_ptr;
3598 }
3599}
3600
3601#ifdef WTF_CHANGES
3602#undef do_malloc
3603#else
3604
3605static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
3606
3607static inline void* cpp_alloc(size_t size, bool nothrow) {
3608 for (;;) {
3609 void* p = do_malloc(size);
3610#ifdef PREANSINEW
3611 return p;
3612#else
3613 if (p == NULL) { // allocation failed
3614 // Get the current new handler. NB: this function is not
3615 // thread-safe. We make a feeble stab at making it so here, but
3616 // this lock only protects against tcmalloc interfering with
3617 // itself, not with other libraries calling set_new_handler.
3618 std::new_handler nh;
3619 {
3620 SpinLockHolder h(&set_new_handler_lock);
3621 nh = std::set_new_handler(0);
3622 (void) std::set_new_handler(nh);
3623 }
3624 // If no new_handler is established, the allocation failed.
3625 if (!nh) {
3626 if (nothrow) return 0;
3627 throw std::bad_alloc();
3628 }
3629 // Otherwise, try the new_handler. If it returns, retry the
3630 // allocation. If it throws std::bad_alloc, fail the allocation.
3631 // if it throws something else, don't interfere.
3632 try {
3633 (*nh)();
3634 } catch (const std::bad_alloc&) {
3635 if (!nothrow) throw;
3636 return p;
3637 }
3638 } else { // allocation success
3639 return p;
3640 }
3641#endif
3642 }
3643}
3644
3645void* operator new(size_t size) {
3646 void* p = cpp_alloc(size, false);
3647 // We keep this next instruction out of cpp_alloc for a reason: when
3648 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3649 // new call into cpp_alloc, which messes up our whole section-based
3650 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3651 // isn't the last thing this fn calls, and prevents the folding.
3652 MallocHook::InvokeNewHook(p, size);
3653 return p;
3654}
3655
3656void* operator new(size_t size, const std::nothrow_t&) __THROW {
3657 void* p = cpp_alloc(size, true);
3658 MallocHook::InvokeNewHook(p, size);
3659 return p;
3660}
3661
3662void operator delete(void* p) __THROW {
3663 MallocHook::InvokeDeleteHook(p);
3664 do_free(p);
3665}
3666
3667void operator delete(void* p, const std::nothrow_t&) __THROW {
3668 MallocHook::InvokeDeleteHook(p);
3669 do_free(p);
3670}
3671
3672void* operator new[](size_t size) {
3673 void* p = cpp_alloc(size, false);
3674 // We keep this next instruction out of cpp_alloc for a reason: when
3675 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3676 // new call into cpp_alloc, which messes up our whole section-based
3677 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3678 // isn't the last thing this fn calls, and prevents the folding.
3679 MallocHook::InvokeNewHook(p, size);
3680 return p;
3681}
3682
3683void* operator new[](size_t size, const std::nothrow_t&) __THROW {
3684 void* p = cpp_alloc(size, true);
3685 MallocHook::InvokeNewHook(p, size);
3686 return p;
3687}
3688
3689void operator delete[](void* p) __THROW {
3690 MallocHook::InvokeDeleteHook(p);
3691 do_free(p);
3692}
3693
3694void operator delete[](void* p, const std::nothrow_t&) __THROW {
3695 MallocHook::InvokeDeleteHook(p);
3696 do_free(p);
3697}
3698
3699extern "C" void* memalign(size_t align, size_t size) __THROW {
3700 void* result = do_memalign(align, size);
3701 MallocHook::InvokeNewHook(result, size);
3702 return result;
3703}
3704
3705extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
3706 __THROW {
3707 if (((align % sizeof(void*)) != 0) ||
3708 ((align & (align - 1)) != 0) ||
3709 (align == 0)) {
3710 return EINVAL;
3711 }
3712
3713 void* result = do_memalign(align, size);
3714 MallocHook::InvokeNewHook(result, size);
3715 if (result == NULL) {
3716 return ENOMEM;
3717 } else {
3718 *result_ptr = result;
3719 return 0;
3720 }
3721}
3722
3723static size_t pagesize = 0;
3724
3725extern "C" void* valloc(size_t size) __THROW {
3726 // Allocate page-aligned object of length >= size bytes
3727 if (pagesize == 0) pagesize = getpagesize();
3728 void* result = do_memalign(pagesize, size);
3729 MallocHook::InvokeNewHook(result, size);
3730 return result;
3731}
3732
3733extern "C" void* pvalloc(size_t size) __THROW {
3734 // Round up size to a multiple of pagesize
3735 if (pagesize == 0) pagesize = getpagesize();
3736 size = (size + pagesize - 1) & ~(pagesize - 1);
3737 void* result = do_memalign(pagesize, size);
3738 MallocHook::InvokeNewHook(result, size);
3739 return result;
3740}
3741
3742extern "C" void malloc_stats(void) {
3743 do_malloc_stats();
3744}
3745
3746extern "C" int mallopt(int cmd, int value) {
3747 return do_mallopt(cmd, value);
3748}
3749
3750#ifdef HAVE_STRUCT_MALLINFO
3751extern "C" struct mallinfo mallinfo(void) {
3752 return do_mallinfo();
3753}
3754#endif
3755
3756//-------------------------------------------------------------------
3757// Some library routines on RedHat 9 allocate memory using malloc()
3758// and free it using __libc_free() (or vice-versa). Since we provide
3759// our own implementations of malloc/free, we need to make sure that
3760// the __libc_XXX variants (defined as part of glibc) also point to
3761// the same implementations.
3762//-------------------------------------------------------------------
3763
3764#if defined(__GLIBC__)
3765extern "C" {
3766#if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
3767 // Potentially faster variants that use the gcc alias extension.
3768 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
3769# define ALIAS(x) __attribute__ ((weak, alias (x)))
3770 void* __libc_malloc(size_t size) ALIAS("malloc");
3771 void __libc_free(void* ptr) ALIAS("free");
3772 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc");
3773 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc");
3774 void __libc_cfree(void* ptr) ALIAS("cfree");
3775 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
3776 void* __libc_valloc(size_t size) ALIAS("valloc");
3777 void* __libc_pvalloc(size_t size) ALIAS("pvalloc");
3778 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
3779# undef ALIAS
3780# else /* not __GNUC__ */
3781 // Portable wrappers
3782 void* __libc_malloc(size_t size) { return malloc(size); }
3783 void __libc_free(void* ptr) { free(ptr); }
3784 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); }
3785 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); }
3786 void __libc_cfree(void* ptr) { cfree(ptr); }
3787 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
3788 void* __libc_valloc(size_t size) { return valloc(size); }
3789 void* __libc_pvalloc(size_t size) { return pvalloc(size); }
3790 int __posix_memalign(void** r, size_t a, size_t s) {
3791 return posix_memalign(r, a, s);
3792 }
3793# endif /* __GNUC__ */
3794}
3795#endif /* __GLIBC__ */
3796
3797// Override __libc_memalign in libc on linux boxes specially.
3798// They have a bug in libc that causes them to (very rarely) allocate
3799// with __libc_memalign() yet deallocate with free() and the
3800// definitions above don't catch it.
3801// This function is an exception to the rule of calling MallocHook method
3802// from the stack frame of the allocation function;
3803// heap-checker handles this special case explicitly.
3804static void *MemalignOverride(size_t align, size_t size, const void *caller)
3805 __THROW {
3806 void* result = do_memalign(align, size);
3807 MallocHook::InvokeNewHook(result, size);
3808 return result;
3809}
3810void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
3811
3812#endif
3813
3814#if defined(WTF_CHANGES) && PLATFORM(DARWIN)
3815
3816class FreeObjectFinder {
3817 const RemoteMemoryReader& m_reader;
3818 HashSet<void*> m_freeObjects;
3819
3820public:
3821 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
3822
3823 void visit(void* ptr) { m_freeObjects.add(ptr); }
3824 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
3825 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); }
3826 size_t freeObjectCount() const { return m_freeObjects.size(); }
3827
3828 void findFreeObjects(TCMalloc_ThreadCache* threadCache)
3829 {
3830 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
3831 threadCache->enumerateFreeObjects(*this, m_reader);
3832 }
3833
3834 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
3835 {
3836 for (unsigned i = 0; i < numSizes; i++)
3837 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
3838 }
3839};
3840
3841class PageMapFreeObjectFinder {
3842 const RemoteMemoryReader& m_reader;
3843 FreeObjectFinder& m_freeObjectFinder;
3844
3845public:
3846 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
3847 : m_reader(reader)
3848 , m_freeObjectFinder(freeObjectFinder)
3849 { }
3850
3851 int visit(void* ptr) const
3852 {
3853 if (!ptr)
3854 return 1;
3855
3856 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
3857 if (span->free) {
3858 void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
3859 m_freeObjectFinder.visit(ptr);
3860 } else if (span->sizeclass) {
3861 // Walk the free list of the small-object span, keeping track of each object seen
3862 for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject)))
3863 m_freeObjectFinder.visit(nextObject);
3864 }
3865 return span->length;
3866 }
3867};
3868
3869class PageMapMemoryUsageRecorder {
3870 task_t m_task;
3871 void* m_context;
3872 unsigned m_typeMask;
3873 vm_range_recorder_t* m_recorder;
3874 const RemoteMemoryReader& m_reader;
3875 const FreeObjectFinder& m_freeObjectFinder;
3876
3877 HashSet<void*> m_seenPointers;
3878 Vector<Span*> m_coalescedSpans;
3879
3880public:
3881 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
3882 : m_task(task)
3883 , m_context(context)
3884 , m_typeMask(typeMask)
3885 , m_recorder(recorder)
3886 , m_reader(reader)
3887 , m_freeObjectFinder(freeObjectFinder)
3888 { }
3889
3890 ~PageMapMemoryUsageRecorder()
3891 {
3892 ASSERT(!m_coalescedSpans.size());
3893 }
3894
3895 void recordPendingRegions()
3896 {
3897 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
3898 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 };
3899 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize);
3900
3901 // Mark the memory region the spans represent as a candidate for containing pointers
3902 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE)
3903 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
3904
3905 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
3906 m_coalescedSpans.clear();
3907 return;
3908 }
3909
3910 Vector<vm_range_t, 1024> allocatedPointers;
3911 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) {
3912 Span *theSpan = m_coalescedSpans[i];
3913 if (theSpan->free)
3914 continue;
3915
3916 vm_address_t spanStartAddress = theSpan->start << kPageShift;
3917 vm_size_t spanSizeInBytes = theSpan->length * kPageSize;
3918
3919 if (!theSpan->sizeclass) {
3920 // If it's an allocated large object span, mark it as in use
3921 if (!m_freeObjectFinder.isFreeObject(spanStartAddress))
3922 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes});
3923 } else {
3924 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass);
3925
3926 // Mark each allocated small object within the span as in use
3927 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes;
3928 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) {
3929 if (!m_freeObjectFinder.isFreeObject(object))
3930 allocatedPointers.append((vm_range_t){object, objectSize});
3931 }
3932 }
3933 }
3934
3935 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size());
3936
3937 m_coalescedSpans.clear();
3938 }
3939
3940 int visit(void* ptr)
3941 {
3942 if (!ptr)
3943 return 1;
3944
3945 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
3946 if (!span->start)
3947 return 1;
3948
3949 if (m_seenPointers.contains(ptr))
3950 return span->length;
3951 m_seenPointers.add(ptr);
3952
3953 if (!m_coalescedSpans.size()) {
3954 m_coalescedSpans.append(span);
3955 return span->length;
3956 }
3957
3958 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
3959 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift;
3960 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize;
3961
3962 // If the new span is adjacent to the previous span, do nothing for now.
3963 vm_address_t spanStartAddress = span->start << kPageShift;
3964 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) {
3965 m_coalescedSpans.append(span);
3966 return span->length;
3967 }
3968
3969 // New span is not adjacent to previous span, so record the spans coalesced so far.
3970 recordPendingRegions();
3971 m_coalescedSpans.append(span);
3972
3973 return span->length;
3974 }
3975};
3976
3977class AdminRegionRecorder {
3978 task_t m_task;
3979 void* m_context;
3980 unsigned m_typeMask;
3981 vm_range_recorder_t* m_recorder;
3982 const RemoteMemoryReader& m_reader;
3983
3984 Vector<vm_range_t, 1024> m_pendingRegions;
3985
3986public:
3987 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader)
3988 : m_task(task)
3989 , m_context(context)
3990 , m_typeMask(typeMask)
3991 , m_recorder(recorder)
3992 , m_reader(reader)
3993 { }
3994
3995 void recordRegion(vm_address_t ptr, size_t size)
3996 {
3997 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE)
3998 m_pendingRegions.append((vm_range_t){ ptr, size });
3999 }
4000
4001 void visit(void *ptr, size_t size)
4002 {
4003 recordRegion(reinterpret_cast<vm_address_t>(ptr), size);
4004 }
4005
4006 void recordPendingRegions()
4007 {
4008 if (m_pendingRegions.size()) {
4009 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size());
4010 m_pendingRegions.clear();
4011 }
4012 }
4013
4014 ~AdminRegionRecorder()
4015 {
4016 ASSERT(!m_pendingRegions.size());
4017 }
4018};
4019
4020kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
4021{
4022 RemoteMemoryReader memoryReader(task, reader);
4023
4024 InitSizeClasses();
4025
4026 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
4027 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
4028 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
4029 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
4030
4031 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
4032
4033 FreeObjectFinder finder(memoryReader);
4034 finder.findFreeObjects(threadHeaps);
4035 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
4036
4037 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
4038 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
4039 pageMap->visitValues(pageMapFinder, memoryReader);
4040
4041 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
4042 pageMap->visitValues(usageRecorder, memoryReader);
4043 usageRecorder.recordPendingRegions();
4044
4045 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader);
4046 pageMap->visitAllocations(adminRegionRecorder, memoryReader);
4047
4048 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator);
4049 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator);
4050
4051 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4052 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4053
4054 adminRegionRecorder.recordPendingRegions();
4055
4056 return 0;
4057}
4058
4059size_t FastMallocZone::size(malloc_zone_t*, const void*)
4060{
4061 return 0;
4062}
4063
4064void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
4065{
4066 return 0;
4067}
4068
4069void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
4070{
4071 return 0;
4072}
4073
4074void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
4075{
4076 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4077 // is not in this zone. When this happens, the pointer being freed was not allocated by any
4078 // zone so we need to print a useful error for the application developer.
4079 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
4080}
4081
4082void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
4083{
4084 return 0;
4085}
4086
4087
4088#undef malloc
4089#undef free
4090#undef realloc
4091#undef calloc
4092
4093extern "C" {
4094malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
4095 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics
4096
4097#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !PLATFORM(IPHONE)
4098 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4099#endif
4100
4101 };
4102}
4103
4104FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator)
4105 : m_pageHeap(pageHeap)
4106 , m_threadHeaps(threadHeaps)
4107 , m_centralCaches(centralCaches)
4108 , m_spanAllocator(spanAllocator)
4109 , m_pageHeapAllocator(pageHeapAllocator)
4110{
4111 memset(&m_zone, 0, sizeof(m_zone));
4112 m_zone.version = 4;
4113 m_zone.zone_name = "JavaScriptCore FastMalloc";
4114 m_zone.size = &FastMallocZone::size;
4115 m_zone.malloc = &FastMallocZone::zoneMalloc;
4116 m_zone.calloc = &FastMallocZone::zoneCalloc;
4117 m_zone.realloc = &FastMallocZone::zoneRealloc;
4118 m_zone.free = &FastMallocZone::zoneFree;
4119 m_zone.valloc = &FastMallocZone::zoneValloc;
4120 m_zone.destroy = &FastMallocZone::zoneDestroy;
4121 m_zone.introspect = &jscore_fastmalloc_introspection;
4122 malloc_zone_register(&m_zone);
4123}
4124
4125
4126void FastMallocZone::init()
4127{
4128 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator);
4129}
4130
4131#endif
4132
4133#if WTF_CHANGES
4134void releaseFastMallocFreeMemory()
4135{
4136 // Flush free pages in the current thread cache back to the page heap.
4137 // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4138 // The second pass flushes everything.
4139 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
4140 threadCache->Scavenge();
4141 threadCache->Scavenge();
4142 }
4143
4144 SpinLockHolder h(&pageheap_lock);
4145 pageheap->ReleaseFreePages();
4146}
4147
4148FastMallocStatistics fastMallocStatistics()
4149{
4150 FastMallocStatistics statistics;
4151 {
4152 SpinLockHolder lockHolder(&pageheap_lock);
4153 statistics.heapSize = static_cast<size_t>(pageheap->SystemBytes());
4154 statistics.freeSizeInHeap = static_cast<size_t>(pageheap->FreeBytes());
4155 statistics.returnedSize = pageheap->ReturnedBytes();
4156 statistics.freeSizeInCaches = 0;
4157 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
4158 statistics.freeSizeInCaches += threadCache->Size();
4159 }
4160 for (unsigned cl = 0; cl < kNumClasses; ++cl) {
4161 const int length = central_cache[cl].length();
4162 const int tc_length = central_cache[cl].tc_length();
4163 statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length);
4164 }
4165 return statistics;
4166}
4167
4168} // namespace WTF
4169#endif
4170
4171#endif // FORCE_SYSTEM_MALLOC
Note: See TracBrowser for help on using the repository browser.