source: lib/libjemalloc/malloc.c @ 93a495

Revision 93a495, 138.4 KB checked in by Tollef Fog Heen <tfheen@…>, 7 years ago (diff)

Import jemalloc

git-svn-id:  http://www.varnish-cache.org/svn/trunk/varnish-cache@3215 d4fa192b-c00b-0410-8231-f00ffab90ce4

  • Property mode set to 100644
Line 
1/*-
2 * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in
14 *    the documentation and/or other materials provided with the
15 *    distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 *******************************************************************************
30 *
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems.  The following
33 * features are included for this purpose:
34 *
35 *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 *     contention and cache sloshing.
37 *
38 *   + Thread-specific caching is used if there are multiple threads, which
39 *     reduces the amount of locking.
40 *
41 *   + Cache line sharing between arenas is avoided for internal data
42 *     structures.
43 *
44 *   + Memory is managed in chunks and runs (chunks can be split into runs),
45 *     rather than as individual pages.  This provides a constant-time
46 *     mechanism for associating allocations with particular arenas.
47 *
48 * Allocation requests are rounded up to the nearest size class, and no record
49 * of the original request size is maintained.  Allocations are broken into
50 * categories according to size class.  Assuming runtime defaults, 4 kB pages
51 * and a 16 byte quantum on a 32-bit system, the size classes in each category
52 * are as follows:
53 *
54 *   |=======================================|
55 *   | Category | Subcategory      |    Size |
56 *   |=======================================|
57 *   | Small    | Tiny             |       2 |
58 *   |          |                  |       4 |
59 *   |          |                  |       8 |
60 *   |          |------------------+---------|
61 *   |          | Quantum-spaced   |      16 |
62 *   |          |                  |      32 |
63 *   |          |                  |      48 |
64 *   |          |                  |     ... |
65 *   |          |                  |      96 |
66 *   |          |                  |     112 |
67 *   |          |                  |     128 |
68 *   |          |------------------+---------|
69 *   |          | Cacheline-spaced |     192 |
70 *   |          |                  |     256 |
71 *   |          |                  |     320 |
72 *   |          |                  |     384 |
73 *   |          |                  |     448 |
74 *   |          |                  |     512 |
75 *   |          |------------------+---------|
76 *   |          | Sub-page         |     760 |
77 *   |          |                  |    1024 |
78 *   |          |                  |    1280 |
79 *   |          |                  |     ... |
80 *   |          |                  |    3328 |
81 *   |          |                  |    3584 |
82 *   |          |                  |    3840 |
83 *   |=======================================|
84 *   | Large                       |    4 kB |
85 *   |                             |    8 kB |
86 *   |                             |   12 kB |
87 *   |                             |     ... |
88 *   |                             | 1012 kB |
89 *   |                             | 1016 kB |
90 *   |                             | 1020 kB |
91 *   |=======================================|
92 *   | Huge                        |    1 MB |
93 *   |                             |    2 MB |
94 *   |                             |    3 MB |
95 *   |                             |     ... |
96 *   |=======================================|
97 *
98 * A different mechanism is used for each category:
99 *
100 *   Small : Each size class is segregated into its own set of runs.  Each run
101 *           maintains a bitmap of which regions are free/allocated.
102 *
103 *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
104 *           in the associated arena chunk header maps.
105 *
106 *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
107 *          Metadata are stored in a separate red-black tree.
108 *
109 *******************************************************************************
110 */
111
112/*
113 * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
114 * defaults the A and J runtime options to off.  These settings are appropriate
115 * for production systems.
116 */
117/* #define      MALLOC_PRODUCTION */
118
119#ifndef MALLOC_PRODUCTION
120   /*
121    * MALLOC_DEBUG enables assertions and other sanity checks, and disables
122    * inline functions.
123    */
124#  define MALLOC_DEBUG
125
126   /* MALLOC_STATS enables statistics calculation. */
127#  define MALLOC_STATS
128#endif
129
130/*
131 * MALLOC_TINY enables support for tiny objects, which are smaller than one
132 * quantum.
133 */
134#define MALLOC_TINY
135
136/*
137 * MALLOC_MAG enables a magazine-based thread-specific caching layer for small
138 * objects.  This makes it possible to allocate/deallocate objects without any
139 * locking when the cache is in the steady state.
140 */
141#define MALLOC_MAG
142
143/*
144 * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
145 * re-balances arena load if exponentially averaged contention exceeds a
146 * certain threshold.
147 */
148#define MALLOC_BALANCE
149
150/*
151 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
152 * segment (DSS).  In an ideal world, this functionality would be completely
153 * unnecessary, but we are burdened by history and the lack of resource limits
154 * for anonymous mapped memory.
155 */
156#define MALLOC_DSS
157
158#include <sys/cdefs.h>
159__FBSDID("$FreeBSD: head/lib/libc/stdlib/malloc.c 182225 2008-08-27 02:00:53Z jasone $");
160
161#include "libc_private.h"
162#ifdef MALLOC_DEBUG
163#  define _LOCK_DEBUG
164#endif
165#include "spinlock.h"
166#include "namespace.h"
167#include <sys/mman.h>
168#include <sys/param.h>
169#include <sys/stddef.h>
170#include <sys/time.h>
171#include <sys/types.h>
172#include <sys/sysctl.h>
173#include <sys/uio.h>
174#include <sys/ktrace.h> /* Must come after several other sys/ includes. */
175
176#include <machine/cpufunc.h>
177#include <machine/vmparam.h>
178
179#include <errno.h>
180#include <limits.h>
181#include <pthread.h>
182#include <sched.h>
183#include <stdarg.h>
184#include <stdbool.h>
185#include <stdio.h>
186#include <stdint.h>
187#include <stdlib.h>
188#include <string.h>
189#include <strings.h>
190#include <unistd.h>
191
192#include "un-namespace.h"
193
194#ifdef MALLOC_DEBUG
195#  ifdef NDEBUG
196#    undef NDEBUG
197#  endif
198#else
199#  ifndef NDEBUG
200#    define NDEBUG
201#  endif
202#endif
203#include <assert.h>
204
205#include "rb.h"
206
207#ifdef MALLOC_DEBUG
208   /* Disable inlining to make debugging easier. */
209#  define inline
210#endif
211
212/* Size of stack-allocated buffer passed to strerror_r(). */
213#define STRERROR_BUF            64
214
215/*
216 * The const_size2bin table is sized according to PAGESIZE_2POW, but for
217 * correctness reasons, we never assume that
218 * (pagesize == (1U << * PAGESIZE_2POW)).
219 *
220 * Minimum alignment of allocations is 2^QUANTUM_2POW bytes.
221 */
222#ifdef __i386__
223#  define PAGESIZE_2POW         12
224#  define QUANTUM_2POW          4
225#  define SIZEOF_PTR_2POW       2
226#  define CPU_SPINWAIT          __asm__ volatile("pause")
227#endif
228#ifdef __ia64__
229#  define PAGESIZE_2POW         12
230#  define QUANTUM_2POW          4
231#  define SIZEOF_PTR_2POW       3
232#endif
233#ifdef __alpha__
234#  define PAGESIZE_2POW         13
235#  define QUANTUM_2POW          4
236#  define SIZEOF_PTR_2POW       3
237#  define NO_TLS
238#endif
239#ifdef __sparc64__
240#  define PAGESIZE_2POW         13
241#  define QUANTUM_2POW          4
242#  define SIZEOF_PTR_2POW       3
243#  define NO_TLS
244#endif
245#ifdef __amd64__
246#  define PAGESIZE_2POW         12
247#  define QUANTUM_2POW          4
248#  define SIZEOF_PTR_2POW       3
249#  define CPU_SPINWAIT          __asm__ volatile("pause")
250#endif
251#ifdef __arm__
252#  define PAGESIZE_2POW         12
253#  define QUANTUM_2POW          3
254#  define SIZEOF_PTR_2POW       2
255#  define NO_TLS
256#endif
257#ifdef __mips__
258#  define PAGESIZE_2POW         12
259#  define QUANTUM_2POW          3
260#  define SIZEOF_PTR_2POW       2
261#  define NO_TLS
262#endif
263#ifdef __powerpc__
264#  define PAGESIZE_2POW         12
265#  define QUANTUM_2POW          4
266#  define SIZEOF_PTR_2POW       2
267#endif
268
269#define QUANTUM                 ((size_t)(1U << QUANTUM_2POW))
270#define QUANTUM_MASK            (QUANTUM - 1)
271
272#define SIZEOF_PTR              (1U << SIZEOF_PTR_2POW)
273
274/* sizeof(int) == (1U << SIZEOF_INT_2POW). */
275#ifndef SIZEOF_INT_2POW
276#  define SIZEOF_INT_2POW       2
277#endif
278
279/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
280#if (!defined(PIC) && !defined(NO_TLS))
281#  define NO_TLS
282#endif
283
284#ifdef NO_TLS
285   /* MALLOC_MAG requires TLS. */
286#  ifdef MALLOC_MAG
287#    undef MALLOC_MAG
288#  endif
289   /* MALLOC_BALANCE requires TLS. */
290#  ifdef MALLOC_BALANCE
291#    undef MALLOC_BALANCE
292#  endif
293#endif
294
295/*
296 * Size and alignment of memory chunks that are allocated by the OS's virtual
297 * memory system.
298 */
299#define CHUNK_2POW_DEFAULT      20
300
301/* Maximum number of dirty pages per arena. */
302#define DIRTY_MAX_DEFAULT       (1U << 9)
303
304/*
305 * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
306 * In addition, this controls the spacing of cacheline-spaced size classes.
307 */
308#define CACHELINE_2POW          6
309#define CACHELINE               ((size_t)(1U << CACHELINE_2POW))
310#define CACHELINE_MASK          (CACHELINE - 1)
311
312/*
313 * Subpages are an artificially designated partitioning of pages.  Their only
314 * purpose is to support subpage-spaced size classes.
315 *
316 * There must be at least 4 subpages per page, due to the way size classes are
317 * handled.
318 */
319#define SUBPAGE_2POW            8
320#define SUBPAGE                 ((size_t)(1U << SUBPAGE_2POW))
321#define SUBPAGE_MASK            (SUBPAGE - 1)
322
323#ifdef MALLOC_TINY
324   /* Smallest size class to support. */
325#  define TINY_MIN_2POW         1
326#endif
327
328/*
329 * Maximum size class that is a multiple of the quantum, but not (necessarily)
330 * a power of 2.  Above this size, allocations are rounded up to the nearest
331 * power of 2.
332 */
333#define QSPACE_MAX_2POW_DEFAULT 7
334
335/*
336 * Maximum size class that is a multiple of the cacheline, but not (necessarily)
337 * a power of 2.  Above this size, allocations are rounded up to the nearest
338 * power of 2.
339 */
340#define CSPACE_MAX_2POW_DEFAULT 9
341
342/*
343 * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
344 * as small as possible such that this setting is still honored, without
345 * violating other constraints.  The goal is to make runs as small as possible
346 * without exceeding a per run external fragmentation threshold.
347 *
348 * We use binary fixed point math for overhead computations, where the binary
349 * point is implicitly RUN_BFP bits to the left.
350 *
351 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
352 * honored for some/all object sizes, since there is one bit of header overhead
353 * per object (plus a constant).  This constraint is relaxed (ignored) for runs
354 * that are so small that the per-region overhead is greater than:
355 *
356 *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
357 */
358#define RUN_BFP                 12
359/*                                    \/   Implicit binary fixed point. */
360#define RUN_MAX_OVRHD           0x0000003dU
361#define RUN_MAX_OVRHD_RELAX     0x00001800U
362
363/* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
364#define RUN_MAX_SMALL   (12 * pagesize)
365
366/*
367 * Hyper-threaded CPUs may need a special instruction inside spin loops in
368 * order to yield to another virtual CPU.  If no such instruction is defined
369 * above, make CPU_SPINWAIT a no-op.
370 */
371#ifndef CPU_SPINWAIT
372#  define CPU_SPINWAIT
373#endif
374
375/*
376 * Adaptive spinning must eventually switch to blocking, in order to avoid the
377 * potential for priority inversion deadlock.  Backing off past a certain point
378 * can actually waste time.
379 */
380#define SPIN_LIMIT_2POW         11
381
382/*
383 * Conversion from spinning to blocking is expensive; we use (1U <<
384 * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
385 * worst-case spinning.
386 */
387#define BLOCK_COST_2POW         4
388
389#ifdef MALLOC_MAG
390   /*
391    * Default magazine size, in bytes.  max_rounds is calculated to make
392    * optimal use of the space, leaving just enough room for the magazine
393    * header.
394    */
395#  define MAG_SIZE_2POW_DEFAULT 9
396#endif
397
398#ifdef MALLOC_BALANCE
399   /*
400    * We use an exponential moving average to track recent lock contention,
401    * where the size of the history window is N, and alpha=2/(N+1).
402    *
403    * Due to integer math rounding, very small values here can cause
404    * substantial degradation in accuracy, thus making the moving average decay
405    * faster than it would with precise calculation.
406    */
407#  define BALANCE_ALPHA_INV_2POW        9
408
409   /*
410    * Threshold value for the exponential moving contention average at which to
411    * re-assign a thread.
412    */
413#  define BALANCE_THRESHOLD_DEFAULT     (1U << (SPIN_LIMIT_2POW-4))
414#endif
415
416/******************************************************************************/
417
418/*
419 * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
420 * places, because they require malloc()ed memory, which causes bootstrapping
421 * issues in some cases.
422 */
423typedef struct {
424        spinlock_t      lock;
425} malloc_mutex_t;
426
427/* Set to true once the allocator has been initialized. */
428static bool malloc_initialized = false;
429
430/* Used to avoid initialization races. */
431static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
432
433/******************************************************************************/
434/*
435 * Statistics data structures.
436 */
437
438#ifdef MALLOC_STATS
439
440typedef struct malloc_bin_stats_s malloc_bin_stats_t;
441struct malloc_bin_stats_s {
442        /*
443         * Number of allocation requests that corresponded to the size of this
444         * bin.
445         */
446        uint64_t        nrequests;
447
448#ifdef MALLOC_MAG
449        /* Number of magazine reloads from this bin. */
450        uint64_t        nmags;
451#endif
452
453        /* Total number of runs created for this bin's size class. */
454        uint64_t        nruns;
455
456        /*
457         * Total number of runs reused by extracting them from the runs tree for
458         * this bin's size class.
459         */
460        uint64_t        reruns;
461
462        /* High-water mark for this bin. */
463        unsigned long   highruns;
464
465        /* Current number of runs in this bin. */
466        unsigned long   curruns;
467};
468
469typedef struct arena_stats_s arena_stats_t;
470struct arena_stats_s {
471        /* Number of bytes currently mapped. */
472        size_t          mapped;
473
474        /*
475         * Total number of purge sweeps, total number of madvise calls made,
476         * and total pages purged in order to keep dirty unused memory under
477         * control.
478         */
479        uint64_t        npurge;
480        uint64_t        nmadvise;
481        uint64_t        purged;
482
483        /* Per-size-category statistics. */
484        size_t          allocated_small;
485        uint64_t        nmalloc_small;
486        uint64_t        ndalloc_small;
487
488        size_t          allocated_large;
489        uint64_t        nmalloc_large;
490        uint64_t        ndalloc_large;
491
492#ifdef MALLOC_BALANCE
493        /* Number of times this arena reassigned a thread due to contention. */
494        uint64_t        nbalance;
495#endif
496};
497
498typedef struct chunk_stats_s chunk_stats_t;
499struct chunk_stats_s {
500        /* Number of chunks that were allocated. */
501        uint64_t        nchunks;
502
503        /* High-water mark for number of chunks allocated. */
504        unsigned long   highchunks;
505
506        /*
507         * Current number of chunks allocated.  This value isn't maintained for
508         * any other purpose, so keep track of it in order to be able to set
509         * highchunks.
510         */
511        unsigned long   curchunks;
512};
513
514#endif /* #ifdef MALLOC_STATS */
515
516/******************************************************************************/
517/*
518 * Extent data structures.
519 */
520
521/* Tree of extents. */
522typedef struct extent_node_s extent_node_t;
523struct extent_node_s {
524#ifdef MALLOC_DSS
525        /* Linkage for the size/address-ordered tree. */
526        rb_node(extent_node_t) link_szad;
527#endif
528
529        /* Linkage for the address-ordered tree. */
530        rb_node(extent_node_t) link_ad;
531
532        /* Pointer to the extent that this tree node is responsible for. */
533        void    *addr;
534
535        /* Total region size. */
536        size_t  size;
537};
538typedef rb_tree(extent_node_t) extent_tree_t;
539
540/******************************************************************************/
541/*
542 * Arena data structures.
543 */
544
545typedef struct arena_s arena_t;
546typedef struct arena_bin_s arena_bin_t;
547
548/* Each element of the chunk map corresponds to one page within the chunk. */
549typedef struct arena_chunk_map_s arena_chunk_map_t;
550struct arena_chunk_map_s {
551        /*
552         * Linkage for run trees.  There are two disjoint uses:
553         *
554         * 1) arena_t's runs_avail tree.
555         * 2) arena_run_t conceptually uses this linkage for in-use non-full
556         *    runs, rather than directly embedding linkage.
557         */
558        rb_node(arena_chunk_map_t)      link;
559
560        /*
561         * Run address (or size) and various flags are stored together.  The bit
562         * layout looks like (assuming 32-bit system):
563         *
564         *   ???????? ???????? ????---- ---kdzla
565         *
566         * ? : Unallocated: Run address for first/last pages, unset for internal
567         *                  pages.
568         *     Small: Run address.
569         *     Large: Run size for first page, unset for trailing pages.
570         * - : Unused.
571         * k : key?
572         * d : dirty?
573         * z : zeroed?
574         * l : large?
575         * a : allocated?
576         *
577         * Following are example bit patterns for the three types of runs.
578         *
579         * r : run address
580         * s : run size
581         * x : don't care
582         * - : 0
583         * [dzla] : bit set
584         *
585         *   Unallocated:
586         *     ssssssss ssssssss ssss---- --------
587         *     xxxxxxxx xxxxxxxx xxxx---- ----d---
588         *     ssssssss ssssssss ssss---- -----z--
589         *
590         *   Small:
591         *     rrrrrrrr rrrrrrrr rrrr---- -------a
592         *     rrrrrrrr rrrrrrrr rrrr---- -------a
593         *     rrrrrrrr rrrrrrrr rrrr---- -------a
594         *
595         *   Large:
596         *     ssssssss ssssssss ssss---- ------la
597         *     -------- -------- -------- ------la
598         *     -------- -------- -------- ------la
599         */
600        size_t                          bits;
601#define CHUNK_MAP_KEY           ((size_t)0x10U)
602#define CHUNK_MAP_DIRTY         ((size_t)0x08U)
603#define CHUNK_MAP_ZEROED        ((size_t)0x04U)
604#define CHUNK_MAP_LARGE         ((size_t)0x02U)
605#define CHUNK_MAP_ALLOCATED     ((size_t)0x01U)
606};
607typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
608typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
609
610/* Arena chunk header. */
611typedef struct arena_chunk_s arena_chunk_t;
612struct arena_chunk_s {
613        /* Arena that owns the chunk. */
614        arena_t         *arena;
615
616        /* Linkage for the arena's chunks_dirty tree. */
617        rb_node(arena_chunk_t) link_dirty;
618
619        /* Number of dirty pages. */
620        size_t          ndirty;
621
622        /* Map of pages within chunk that keeps track of free/large/small. */
623        arena_chunk_map_t map[1]; /* Dynamically sized. */
624};
625typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
626
627typedef struct arena_run_s arena_run_t;
628struct arena_run_s {
629#ifdef MALLOC_DEBUG
630        uint32_t        magic;
631#  define ARENA_RUN_MAGIC 0x384adf93
632#endif
633
634        /* Bin this run is associated with. */
635        arena_bin_t     *bin;
636
637        /* Index of first element that might have a free region. */
638        unsigned        regs_minelm;
639
640        /* Number of free regions in run. */
641        unsigned        nfree;
642
643        /* Bitmask of in-use regions (0: in use, 1: free). */
644        unsigned        regs_mask[1]; /* Dynamically sized. */
645};
646
647struct arena_bin_s {
648        /*
649         * Current run being used to service allocations of this bin's size
650         * class.
651         */
652        arena_run_t     *runcur;
653
654        /*
655         * Tree of non-full runs.  This tree is used when looking for an
656         * existing run when runcur is no longer usable.  We choose the
657         * non-full run that is lowest in memory; this policy tends to keep
658         * objects packed well, and it can also help reduce the number of
659         * almost-empty chunks.
660         */
661        arena_run_tree_t runs;
662
663        /* Size of regions in a run for this bin's size class. */
664        size_t          reg_size;
665
666        /* Total size of a run for this bin's size class. */
667        size_t          run_size;
668
669        /* Total number of regions in a run for this bin's size class. */
670        uint32_t        nregs;
671
672        /* Number of elements in a run's regs_mask for this bin's size class. */
673        uint32_t        regs_mask_nelms;
674
675        /* Offset of first region in a run for this bin's size class. */
676        uint32_t        reg0_offset;
677
678#ifdef MALLOC_STATS
679        /* Bin statistics. */
680        malloc_bin_stats_t stats;
681#endif
682};
683
684struct arena_s {
685#ifdef MALLOC_DEBUG
686        uint32_t                magic;
687#  define ARENA_MAGIC 0x947d3d24
688#endif
689
690        /* All operations on this arena require that lock be locked. */
691        pthread_mutex_t         lock;
692
693#ifdef MALLOC_STATS
694        arena_stats_t           stats;
695#endif
696
697        /* Tree of dirty-page-containing chunks this arena manages. */
698        arena_chunk_tree_t      chunks_dirty;
699
700        /*
701         * In order to avoid rapid chunk allocation/deallocation when an arena
702         * oscillates right on the cusp of needing a new chunk, cache the most
703         * recently freed chunk.  The spare is left in the arena's chunk trees
704         * until it is deleted.
705         *
706         * There is one spare chunk per arena, rather than one spare total, in
707         * order to avoid interactions between multiple threads that could make
708         * a single spare inadequate.
709         */
710        arena_chunk_t           *spare;
711
712        /*
713         * Current count of pages within unused runs that are potentially
714         * dirty, and for which madvise(... MADV_FREE) has not been called.  By
715         * tracking this, we can institute a limit on how much dirty unused
716         * memory is mapped for each arena.
717         */
718        size_t                  ndirty;
719
720        /*
721         * Size/address-ordered tree of this arena's available runs.  This tree
722         * is used for first-best-fit run allocation.
723         */
724        arena_avail_tree_t      runs_avail;
725
726#ifdef MALLOC_BALANCE
727        /*
728         * The arena load balancing machinery needs to keep track of how much
729         * lock contention there is.  This value is exponentially averaged.
730         */
731        uint32_t                contention;
732#endif
733
734        /*
735         * bins is used to store rings of free regions of the following sizes,
736         * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
737         *
738         *   bins[i] | size |
739         *   --------+------+
740         *        0  |    2 |
741         *        1  |    4 |
742         *        2  |    8 |
743         *   --------+------+
744         *        3  |   16 |
745         *        4  |   32 |
746         *        5  |   48 |
747         *        6  |   64 |
748         *           :      :
749         *           :      :
750         *       33  |  496 |
751         *       34  |  512 |
752         *   --------+------+
753         *       35  | 1024 |
754         *       36  | 2048 |
755         *   --------+------+
756         */
757        arena_bin_t             bins[1]; /* Dynamically sized. */
758};
759
760/******************************************************************************/
761/*
762 * Magazine data structures.
763 */
764
765#ifdef MALLOC_MAG
766typedef struct mag_s mag_t;
767struct mag_s {
768        size_t          binind; /* Index of associated bin. */
769        size_t          nrounds;
770        void            *rounds[1]; /* Dynamically sized. */
771};
772
773/*
774 * Magazines are lazily allocated, but once created, they remain until the
775 * associated mag_rack is destroyed.
776 */
777typedef struct bin_mags_s bin_mags_t;
778struct bin_mags_s {
779        mag_t   *curmag;
780        mag_t   *sparemag;
781};
782
783typedef struct mag_rack_s mag_rack_t;
784struct mag_rack_s {
785        bin_mags_t      bin_mags[1]; /* Dynamically sized. */
786};
787#endif
788
789/******************************************************************************/
790/*
791 * Data.
792 */
793
794/* Number of CPUs. */
795static unsigned         ncpus;
796
797/* VM page size. */
798static size_t           pagesize;
799static size_t           pagesize_mask;
800static size_t           pagesize_2pow;
801
802/* Various bin-related settings. */
803#ifdef MALLOC_TINY              /* Number of (2^n)-spaced tiny bins. */
804#  define               ntbins  ((unsigned)(QUANTUM_2POW - TINY_MIN_2POW))
805#else
806#  define               ntbins  0
807#endif
808static unsigned         nqbins; /* Number of quantum-spaced bins. */
809static unsigned         ncbins; /* Number of cacheline-spaced bins. */
810static unsigned         nsbins; /* Number of subpage-spaced bins. */
811static unsigned         nbins;
812#ifdef MALLOC_TINY
813#  define               tspace_max      ((size_t)(QUANTUM >> 1))
814#endif
815#define                 qspace_min      QUANTUM
816static size_t           qspace_max;
817static size_t           cspace_min;
818static size_t           cspace_max;
819static size_t           sspace_min;
820static size_t           sspace_max;
821#define                 bin_maxclass    sspace_max
822
823static uint8_t const    *size2bin;
824/*
825 * const_size2bin is a static constant lookup table that in the common case can
826 * be used as-is for size2bin.  For dynamically linked programs, this avoids
827 * a page of memory overhead per process.
828 */
829#define S2B_1(i)        i,
830#define S2B_2(i)        S2B_1(i) S2B_1(i)
831#define S2B_4(i)        S2B_2(i) S2B_2(i)
832#define S2B_8(i)        S2B_4(i) S2B_4(i)
833#define S2B_16(i)       S2B_8(i) S2B_8(i)
834#define S2B_32(i)       S2B_16(i) S2B_16(i)
835#define S2B_64(i)       S2B_32(i) S2B_32(i)
836#define S2B_128(i)      S2B_64(i) S2B_64(i)
837#define S2B_256(i)      S2B_128(i) S2B_128(i)
838static const uint8_t    const_size2bin[(1U << PAGESIZE_2POW) - 255] = {
839        S2B_1(0xffU)            /*    0 */
840#if (QUANTUM_2POW == 4)
841/* 64-bit system ************************/
842#  ifdef MALLOC_TINY
843        S2B_2(0)                /*    2 */
844        S2B_2(1)                /*    4 */
845        S2B_4(2)                /*    8 */
846        S2B_8(3)                /*   16 */
847#    define S2B_QMIN 3
848#  else
849        S2B_16(0)               /*   16 */
850#    define S2B_QMIN 0
851#  endif
852        S2B_16(S2B_QMIN + 1)    /*   32 */
853        S2B_16(S2B_QMIN + 2)    /*   48 */
854        S2B_16(S2B_QMIN + 3)    /*   64 */
855        S2B_16(S2B_QMIN + 4)    /*   80 */
856        S2B_16(S2B_QMIN + 5)    /*   96 */
857        S2B_16(S2B_QMIN + 6)    /*  112 */
858        S2B_16(S2B_QMIN + 7)    /*  128 */
859#  define S2B_CMIN (S2B_QMIN + 8)
860#else
861/* 32-bit system ************************/
862#  ifdef MALLOC_TINY
863        S2B_2(0)                /*    2 */
864        S2B_2(1)                /*    4 */
865        S2B_4(2)                /*    8 */
866#    define S2B_QMIN 2
867#  else
868        S2B_8(0)                /*    8 */
869#    define S2B_QMIN 0
870#  endif
871        S2B_8(S2B_QMIN + 1)     /*   16 */
872        S2B_8(S2B_QMIN + 2)     /*   24 */
873        S2B_8(S2B_QMIN + 3)     /*   32 */
874        S2B_8(S2B_QMIN + 4)     /*   40 */
875        S2B_8(S2B_QMIN + 5)     /*   48 */
876        S2B_8(S2B_QMIN + 6)     /*   56 */
877        S2B_8(S2B_QMIN + 7)     /*   64 */
878        S2B_8(S2B_QMIN + 8)     /*   72 */
879        S2B_8(S2B_QMIN + 9)     /*   80 */
880        S2B_8(S2B_QMIN + 10)    /*   88 */
881        S2B_8(S2B_QMIN + 11)    /*   96 */
882        S2B_8(S2B_QMIN + 12)    /*  104 */
883        S2B_8(S2B_QMIN + 13)    /*  112 */
884        S2B_8(S2B_QMIN + 14)    /*  120 */
885        S2B_8(S2B_QMIN + 15)    /*  128 */
886#  define S2B_CMIN (S2B_QMIN + 16)
887#endif
888/****************************************/
889        S2B_64(S2B_CMIN + 0)    /*  192 */
890        S2B_64(S2B_CMIN + 1)    /*  256 */
891        S2B_64(S2B_CMIN + 2)    /*  320 */
892        S2B_64(S2B_CMIN + 3)    /*  384 */
893        S2B_64(S2B_CMIN + 4)    /*  448 */
894        S2B_64(S2B_CMIN + 5)    /*  512 */
895#  define S2B_SMIN (S2B_CMIN + 6)
896        S2B_256(S2B_SMIN + 0)   /*  768 */
897        S2B_256(S2B_SMIN + 1)   /* 1024 */
898        S2B_256(S2B_SMIN + 2)   /* 1280 */
899        S2B_256(S2B_SMIN + 3)   /* 1536 */
900        S2B_256(S2B_SMIN + 4)   /* 1792 */
901        S2B_256(S2B_SMIN + 5)   /* 2048 */
902        S2B_256(S2B_SMIN + 6)   /* 2304 */
903        S2B_256(S2B_SMIN + 7)   /* 2560 */
904        S2B_256(S2B_SMIN + 8)   /* 2816 */
905        S2B_256(S2B_SMIN + 9)   /* 3072 */
906        S2B_256(S2B_SMIN + 10)  /* 3328 */
907        S2B_256(S2B_SMIN + 11)  /* 3584 */
908        S2B_256(S2B_SMIN + 12)  /* 3840 */
909#if (PAGESIZE_2POW == 13)
910        S2B_256(S2B_SMIN + 13)  /* 4096 */
911        S2B_256(S2B_SMIN + 14)  /* 4352 */
912        S2B_256(S2B_SMIN + 15)  /* 4608 */
913        S2B_256(S2B_SMIN + 16)  /* 4864 */
914        S2B_256(S2B_SMIN + 17)  /* 5120 */
915        S2B_256(S2B_SMIN + 18)  /* 5376 */
916        S2B_256(S2B_SMIN + 19)  /* 5632 */
917        S2B_256(S2B_SMIN + 20)  /* 5888 */
918        S2B_256(S2B_SMIN + 21)  /* 6144 */
919        S2B_256(S2B_SMIN + 22)  /* 6400 */
920        S2B_256(S2B_SMIN + 23)  /* 6656 */
921        S2B_256(S2B_SMIN + 24)  /* 6912 */
922        S2B_256(S2B_SMIN + 25)  /* 7168 */
923        S2B_256(S2B_SMIN + 26)  /* 7424 */
924        S2B_256(S2B_SMIN + 27)  /* 7680 */
925        S2B_256(S2B_SMIN + 28)  /* 7936 */
926#endif
927};
928#undef S2B_1
929#undef S2B_2
930#undef S2B_4
931#undef S2B_8
932#undef S2B_16
933#undef S2B_32
934#undef S2B_64
935#undef S2B_128
936#undef S2B_256
937#undef S2B_QMIN
938#undef S2B_CMIN
939#undef S2B_SMIN
940
941#ifdef MALLOC_MAG
942static size_t           max_rounds;
943#endif
944
945/* Various chunk-related settings. */
946static size_t           chunksize;
947static size_t           chunksize_mask; /* (chunksize - 1). */
948static size_t           chunk_npages;
949static size_t           arena_chunk_header_npages;
950static size_t           arena_maxclass; /* Max size class for arenas. */
951
952/********/
953/*
954 * Chunks.
955 */
956
957/* Protects chunk-related data structures. */
958static malloc_mutex_t   huge_mtx;
959
960/* Tree of chunks that are stand-alone huge allocations. */
961static extent_tree_t    huge;
962
963#ifdef MALLOC_DSS
964/*
965 * Protects sbrk() calls.  This avoids malloc races among threads, though it
966 * does not protect against races with threads that call sbrk() directly.
967 */
968static malloc_mutex_t   dss_mtx;
969/* Base address of the DSS. */
970static void             *dss_base;
971/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
972static void             *dss_prev;
973/* Current upper limit on DSS addresses. */
974static void             *dss_max;
975
976/*
977 * Trees of chunks that were previously allocated (trees differ only in node
978 * ordering).  These are used when allocating chunks, in an attempt to re-use
979 * address space.  Depending on function, different tree orderings are needed,
980 * which is why there are two trees with the same contents.
981 */
982static extent_tree_t    dss_chunks_szad;
983static extent_tree_t    dss_chunks_ad;
984#endif
985
986#ifdef MALLOC_STATS
987/* Huge allocation statistics. */
988static uint64_t         huge_nmalloc;
989static uint64_t         huge_ndalloc;
990static size_t           huge_allocated;
991#endif
992
993/****************************/
994/*
995 * base (internal allocation).
996 */
997
998/*
999 * Current pages that are being used for internal memory allocations.  These
1000 * pages are carved up in cacheline-size quanta, so that there is no chance of
1001 * false cache line sharing.
1002 */
1003static void             *base_pages;
1004static void             *base_next_addr;
1005static void             *base_past_addr; /* Addr immediately past base_pages. */
1006static extent_node_t    *base_nodes;
1007static malloc_mutex_t   base_mtx;
1008#ifdef MALLOC_STATS
1009static size_t           base_mapped;
1010#endif
1011
1012/********/
1013/*
1014 * Arenas.
1015 */
1016
1017/*
1018 * Arenas that are used to service external requests.  Not all elements of the
1019 * arenas array are necessarily used; arenas are created lazily as needed.
1020 */
1021static arena_t          **arenas;
1022static unsigned         narenas;
1023#ifndef NO_TLS
1024#  ifdef MALLOC_BALANCE
1025static unsigned         narenas_2pow;
1026#  else
1027static unsigned         next_arena;
1028#  endif
1029#endif
1030static pthread_mutex_t  arenas_lock; /* Protects arenas initialization. */
1031
1032#ifndef NO_TLS
1033/*
1034 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1035 * for allocations.
1036 */
1037static __thread arena_t *arenas_map;
1038#endif
1039
1040#ifdef MALLOC_MAG
1041/*
1042 * Map of thread-specific magazine racks, used for thread-specific object
1043 * caching.
1044 */
1045static __thread mag_rack_t      *mag_rack;
1046#endif
1047
1048#ifdef MALLOC_STATS
1049/* Chunk statistics. */
1050static chunk_stats_t    stats_chunks;
1051#endif
1052
1053/*******************************/
1054/*
1055 * Runtime configuration options.
1056 */
1057const char      *_malloc_options;
1058
1059#ifndef MALLOC_PRODUCTION
1060static bool     opt_abort = true;
1061static bool     opt_junk = true;
1062#else
1063static bool     opt_abort = false;
1064static bool     opt_junk = false;
1065#endif
1066#ifdef MALLOC_DSS
1067static bool     opt_dss = true;
1068static bool     opt_mmap = true;
1069#endif
1070#ifdef MALLOC_MAG
1071static bool     opt_mag = true;
1072static size_t   opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT;
1073#endif
1074static size_t   opt_dirty_max = DIRTY_MAX_DEFAULT;
1075#ifdef MALLOC_BALANCE
1076static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
1077#endif
1078static bool     opt_print_stats = false;
1079static size_t   opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT;
1080static size_t   opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT;
1081static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
1082static bool     opt_utrace = false;
1083static bool     opt_sysv = false;
1084static bool     opt_xmalloc = false;
1085static bool     opt_zero = false;
1086static int      opt_narenas_lshift = 0;
1087
1088typedef struct {
1089        void    *p;
1090        size_t  s;
1091        void    *r;
1092} malloc_utrace_t;
1093
1094#define UTRACE(a, b, c)                                                 \
1095        if (opt_utrace) {                                               \
1096                malloc_utrace_t ut;                                     \
1097                ut.p = (a);                                             \
1098                ut.s = (b);                                             \
1099                ut.r = (c);                                             \
1100                utrace(&ut, sizeof(ut));                                \
1101        }
1102
1103/******************************************************************************/
1104/*
1105 * Begin function prototypes for non-inline static functions.
1106 */
1107
1108static void     malloc_mutex_init(malloc_mutex_t *mutex);
1109static bool     malloc_spin_init(pthread_mutex_t *lock);
1110static void     wrtmessage(const char *p1, const char *p2, const char *p3,
1111                const char *p4);
1112#ifdef MALLOC_STATS
1113static void     malloc_printf(const char *format, ...);
1114#endif
1115static char     *umax2s(uintmax_t x, char *s);
1116#ifdef MALLOC_DSS
1117static bool     base_pages_alloc_dss(size_t minsize);
1118#endif
1119static bool     base_pages_alloc_mmap(size_t minsize);
1120static bool     base_pages_alloc(size_t minsize);
1121static void     *base_alloc(size_t size);
1122static void     *base_calloc(size_t number, size_t size);
1123static extent_node_t *base_node_alloc(void);
1124static void     base_node_dealloc(extent_node_t *node);
1125#ifdef MALLOC_STATS
1126static void     stats_print(arena_t *arena);
1127#endif
1128static void     *pages_map(void *addr, size_t size);
1129static void     pages_unmap(void *addr, size_t size);
1130#ifdef MALLOC_DSS
1131static void     *chunk_alloc_dss(size_t size);
1132static void     *chunk_recycle_dss(size_t size, bool zero);
1133#endif
1134static void     *chunk_alloc_mmap(size_t size);
1135static void     *chunk_alloc(size_t size, bool zero);
1136#ifdef MALLOC_DSS
1137static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1138static bool     chunk_dealloc_dss(void *chunk, size_t size);
1139#endif
1140static void     chunk_dealloc_mmap(void *chunk, size_t size);
1141static void     chunk_dealloc(void *chunk, size_t size);
1142#ifndef NO_TLS
1143static arena_t  *choose_arena_hard(void);
1144#endif
1145static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1146    bool large, bool zero);
1147static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1148static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1149static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1150    bool zero);
1151static void     arena_purge(arena_t *arena);
1152static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1153static void     arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1154    arena_run_t *run, size_t oldsize, size_t newsize);
1155static void     arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1156    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1157static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1158static void     *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1159static size_t   arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1160#ifdef MALLOC_BALANCE
1161static void     arena_lock_balance_hard(arena_t *arena);
1162#endif
1163#ifdef MALLOC_MAG
1164static void     mag_load(mag_t *mag);
1165#endif
1166static void     *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1167static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1168    size_t alloc_size);
1169static size_t   arena_salloc(const void *ptr);
1170#ifdef MALLOC_MAG
1171static void     mag_unload(mag_t *mag);
1172#endif
1173static void     arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1174    void *ptr);
1175static void     arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1176    void *ptr, size_t size, size_t oldsize);
1177static bool     arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1178    void *ptr, size_t size, size_t oldsize);
1179static bool     arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1180static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1181static bool     arena_new(arena_t *arena);
1182static arena_t  *arenas_extend(unsigned ind);
1183#ifdef MALLOC_MAG
1184static mag_t    *mag_create(arena_t *arena, size_t binind);
1185static void     mag_destroy(mag_t *mag);
1186static mag_rack_t *mag_rack_create(arena_t *arena);
1187static void     mag_rack_destroy(mag_rack_t *rack);
1188#endif
1189static void     *huge_malloc(size_t size, bool zero);
1190static void     *huge_palloc(size_t alignment, size_t size);
1191static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1192static void     huge_dalloc(void *ptr);
1193static void     malloc_print_stats(void);
1194#ifdef MALLOC_DEBUG
1195static void     size2bin_validate(void);
1196#endif
1197static bool     size2bin_init(void);
1198static bool     size2bin_init_hard(void);
1199static bool     malloc_init_hard(void);
1200
1201/*
1202 * End function prototypes.
1203 */
1204/******************************************************************************/
1205/*
1206 * Begin mutex.  We can't use normal pthread mutexes in all places, because
1207 * they require malloc()ed memory, which causes bootstrapping issues in some
1208 * cases.
1209 */
1210
1211static void
1212malloc_mutex_init(malloc_mutex_t *mutex)
1213{
1214        static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1215
1216        mutex->lock = lock;
1217}
1218
1219static inline void
1220malloc_mutex_lock(malloc_mutex_t *mutex)
1221{
1222
1223        if (__isthreaded)
1224                _SPINLOCK(&mutex->lock);
1225}
1226
1227static inline void
1228malloc_mutex_unlock(malloc_mutex_t *mutex)
1229{
1230
1231        if (__isthreaded)
1232                _SPINUNLOCK(&mutex->lock);
1233}
1234
1235/*
1236 * End mutex.
1237 */
1238/******************************************************************************/
1239/*
1240 * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
1241 * after a period of spinning, because unbounded spinning would allow for
1242 * priority inversion.
1243 */
1244
1245/*
1246 * We use an unpublished interface to initialize pthread mutexes with an
1247 * allocation callback, in order to avoid infinite recursion.
1248 */
1249int     _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1250    void *(calloc_cb)(size_t, size_t));
1251
1252__weak_reference(_pthread_mutex_init_calloc_cb_stub,
1253    _pthread_mutex_init_calloc_cb);
1254
1255int
1256_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1257    void *(calloc_cb)(size_t, size_t))
1258{
1259
1260        return (0);
1261}
1262
1263static bool
1264malloc_spin_init(pthread_mutex_t *lock)
1265{
1266
1267        if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1268                return (true);
1269
1270        return (false);
1271}
1272
1273static inline unsigned
1274malloc_spin_lock(pthread_mutex_t *lock)
1275{
1276        unsigned ret = 0;
1277
1278        if (__isthreaded) {
1279                if (_pthread_mutex_trylock(lock) != 0) {
1280                        unsigned i;
1281                        volatile unsigned j;
1282
1283                        /* Exponentially back off. */
1284                        for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
1285                                for (j = 0; j < (1U << i); j++) {
1286                                        ret++;
1287                                        CPU_SPINWAIT;
1288                                }
1289
1290                                if (_pthread_mutex_trylock(lock) == 0)
1291                                        return (ret);
1292                        }
1293
1294                        /*
1295                         * Spinning failed.  Block until the lock becomes
1296                         * available, in order to avoid indefinite priority
1297                         * inversion.
1298                         */
1299                        _pthread_mutex_lock(lock);
1300                        assert((ret << BLOCK_COST_2POW) != 0);
1301                        return (ret << BLOCK_COST_2POW);
1302                }
1303        }
1304
1305        return (ret);
1306}
1307
1308static inline void
1309malloc_spin_unlock(pthread_mutex_t *lock)
1310{
1311
1312        if (__isthreaded)
1313                _pthread_mutex_unlock(lock);
1314}
1315
1316/*
1317 * End spin lock.
1318 */
1319/******************************************************************************/
1320/*
1321 * Begin Utility functions/macros.
1322 */
1323
1324/* Return the chunk address for allocation address a. */
1325#define CHUNK_ADDR2BASE(a)                                              \
1326        ((void *)((uintptr_t)(a) & ~chunksize_mask))
1327
1328/* Return the chunk offset of address a. */
1329#define CHUNK_ADDR2OFFSET(a)                                            \
1330        ((size_t)((uintptr_t)(a) & chunksize_mask))
1331
1332/* Return the smallest chunk multiple that is >= s. */
1333#define CHUNK_CEILING(s)                                                \
1334        (((s) + chunksize_mask) & ~chunksize_mask)
1335
1336/* Return the smallest quantum multiple that is >= a. */
1337#define QUANTUM_CEILING(a)                                              \
1338        (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1339
1340/* Return the smallest cacheline multiple that is >= s. */
1341#define CACHELINE_CEILING(s)                                            \
1342        (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1343
1344/* Return the smallest subpage multiple that is >= s. */
1345#define SUBPAGE_CEILING(s)                                              \
1346        (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1347
1348/* Return the smallest pagesize multiple that is >= s. */
1349#define PAGE_CEILING(s)                                                 \
1350        (((s) + pagesize_mask) & ~pagesize_mask)
1351
1352#ifdef MALLOC_TINY
1353/* Compute the smallest power of 2 that is >= x. */
1354static inline size_t
1355pow2_ceil(size_t x)
1356{
1357
1358        x--;
1359        x |= x >> 1;
1360        x |= x >> 2;
1361        x |= x >> 4;
1362        x |= x >> 8;
1363        x |= x >> 16;
1364#if (SIZEOF_PTR == 8)
1365        x |= x >> 32;
1366#endif
1367        x++;
1368        return (x);
1369}
1370#endif
1371
1372#ifdef MALLOC_BALANCE
1373/*
1374 * Use a simple linear congruential pseudo-random number generator:
1375 *
1376 *   prn(y) = (a*x + c) % m
1377 *
1378 * where the following constants ensure maximal period:
1379 *
1380 *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1381 *   c == Odd number (relatively prime to 2^n).
1382 *   m == 2^32
1383 *
1384 * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1385 *
1386 * This choice of m has the disadvantage that the quality of the bits is
1387 * proportional to bit position.  For example. the lowest bit has a cycle of 2,
1388 * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
1389 * bits.
1390 */
1391#  define PRN_DEFINE(suffix, var, a, c)                                 \
1392static inline void                                                      \
1393sprn_##suffix(uint32_t seed)                                            \
1394{                                                                       \
1395        var = seed;                                                     \
1396}                                                                       \
1397                                                                        \
1398static inline uint32_t                                                  \
1399prn_##suffix(uint32_t lg_range)                                         \
1400{                                                                       \
1401        uint32_t ret, x;                                                \
1402                                                                        \
1403        assert(lg_range > 0);                                           \
1404        assert(lg_range <= 32);                                         \
1405                                                                        \
1406        x = (var * (a)) + (c);                                          \
1407        var = x;                                                        \
1408        ret = x >> (32 - lg_range);                                     \
1409                                                                        \
1410        return (ret);                                                   \
1411}
1412#  define SPRN(suffix, seed)    sprn_##suffix(seed)
1413#  define PRN(suffix, lg_range) prn_##suffix(lg_range)
1414#endif
1415
1416#ifdef MALLOC_BALANCE
1417/* Define the PRNG used for arena assignment. */
1418static __thread uint32_t balance_x;
1419PRN_DEFINE(balance, balance_x, 1297, 1301)
1420#endif
1421
1422static void
1423wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1424{
1425
1426        _write(STDERR_FILENO, p1, strlen(p1));
1427        _write(STDERR_FILENO, p2, strlen(p2));
1428        _write(STDERR_FILENO, p3, strlen(p3));
1429        _write(STDERR_FILENO, p4, strlen(p4));
1430}
1431
1432void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1433            const char *p4) = wrtmessage;
1434
1435#ifdef MALLOC_STATS
1436/*
1437 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1438 */
1439static void
1440malloc_printf(const char *format, ...)
1441{
1442        char buf[4096];
1443        va_list ap;
1444
1445        va_start(ap, format);
1446        vsnprintf(buf, sizeof(buf), format, ap);
1447        va_end(ap);
1448        _malloc_message(buf, "", "", "");
1449}
1450#endif
1451
1452/*
1453 * We don't want to depend on vsnprintf() for production builds, since that can
1454 * cause unnecessary bloat for static binaries.  umax2s() provides minimal
1455 * integer printing functionality, so that malloc_printf() use can be limited to
1456 * MALLOC_STATS code.
1457 */
1458#define UMAX2S_BUFSIZE  21
1459static char *
1460umax2s(uintmax_t x, char *s)
1461{
1462        unsigned i;
1463
1464        /* Make sure UMAX2S_BUFSIZE is large enough. */
1465        assert(sizeof(uintmax_t) <= 8);
1466
1467        i = UMAX2S_BUFSIZE - 1;
1468        s[i] = '\0';
1469        do {
1470                i--;
1471                s[i] = "0123456789"[x % 10];
1472                x /= 10;
1473        } while (x > 0);
1474
1475        return (&s[i]);
1476}
1477
1478/******************************************************************************/
1479
1480#ifdef MALLOC_DSS
1481static bool
1482base_pages_alloc_dss(size_t minsize)
1483{
1484
1485        /*
1486         * Do special DSS allocation here, since base allocations don't need to
1487         * be chunk-aligned.
1488         */
1489        malloc_mutex_lock(&dss_mtx);
1490        if (dss_prev != (void *)-1) {
1491                intptr_t incr;
1492                size_t csize = CHUNK_CEILING(minsize);
1493
1494                do {
1495                        /* Get the current end of the DSS. */
1496                        dss_max = sbrk(0);
1497
1498                        /*
1499                         * Calculate how much padding is necessary to
1500                         * chunk-align the end of the DSS.  Don't worry about
1501                         * dss_max not being chunk-aligned though.
1502                         */
1503                        incr = (intptr_t)chunksize
1504                            - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1505                        assert(incr >= 0);
1506                        if ((size_t)incr < minsize)
1507                                incr += csize;
1508
1509                        dss_prev = sbrk(incr);
1510                        if (dss_prev == dss_max) {
1511                                /* Success. */
1512                                dss_max = (void *)((intptr_t)dss_prev + incr);
1513                                base_pages = dss_prev;
1514                                base_next_addr = base_pages;
1515                                base_past_addr = dss_max;
1516#ifdef MALLOC_STATS
1517                                base_mapped += incr;
1518#endif
1519                                malloc_mutex_unlock(&dss_mtx);
1520                                return (false);
1521                        }
1522                } while (dss_prev != (void *)-1);
1523        }
1524        malloc_mutex_unlock(&dss_mtx);
1525
1526        return (true);
1527}
1528#endif
1529
1530static bool
1531base_pages_alloc_mmap(size_t minsize)
1532{
1533        size_t csize;
1534
1535        assert(minsize != 0);
1536        csize = PAGE_CEILING(minsize);
1537        base_pages = pages_map(NULL, csize);
1538        if (base_pages == NULL)
1539                return (true);
1540        base_next_addr = base_pages;
1541        base_past_addr = (void *)((uintptr_t)base_pages + csize);
1542#ifdef MALLOC_STATS
1543        base_mapped += csize;
1544#endif
1545
1546        return (false);
1547}
1548
1549static bool
1550base_pages_alloc(size_t minsize)
1551{
1552
1553#ifdef MALLOC_DSS
1554        if (opt_dss) {
1555                if (base_pages_alloc_dss(minsize) == false)
1556                        return (false);
1557        }
1558
1559        if (opt_mmap && minsize != 0)
1560#endif
1561        {
1562                if (base_pages_alloc_mmap(minsize) == false)
1563                        return (false);
1564        }
1565
1566        return (true);
1567}
1568
1569static void *
1570base_alloc(size_t size)
1571{
1572        void *ret;
1573        size_t csize;
1574
1575        /* Round size up to nearest multiple of the cacheline size. */
1576        csize = CACHELINE_CEILING(size);
1577
1578        malloc_mutex_lock(&base_mtx);
1579        /* Make sure there's enough space for the allocation. */
1580        if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1581                if (base_pages_alloc(csize)) {
1582                        malloc_mutex_unlock(&base_mtx);
1583                        return (NULL);
1584                }
1585        }
1586        /* Allocate. */
1587        ret = base_next_addr;
1588        base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1589        malloc_mutex_unlock(&base_mtx);
1590
1591        return (ret);
1592}
1593
1594static void *
1595base_calloc(size_t number, size_t size)
1596{
1597        void *ret;
1598
1599        ret = base_alloc(number * size);
1600        memset(ret, 0, number * size);
1601
1602        return (ret);
1603}
1604
1605static extent_node_t *
1606base_node_alloc(void)
1607{
1608        extent_node_t *ret;
1609
1610        malloc_mutex_lock(&base_mtx);
1611        if (base_nodes != NULL) {
1612                ret = base_nodes;
1613                base_nodes = *(extent_node_t **)ret;