source: lib/libvarnish/binary_heap.c @ b4f3b04

Revision b4f3b04, 10.9 KB checked in by Poul-Henning Kamp <phk@…>, 4 years ago (diff)

Turn the binary heap into a "B-heap" to reduce the number of pagefaults by
roughly a factor 9.

git-svn-id:  http://www.varnish-cache.org/svn/trunk/varnish-cache@4646 d4fa192b-c00b-0410-8231-f00ffab90ce4

  • Property mode set to 100644
Line 
1/*-
2 * Copyright (c) 2006 Verdens Gang AS
3 * Copyright (c) 2006-2009 Linpro AS
4 * All rights reserved.
5 *
6 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * Implementation of a binary heap API
30 *
31 * We use a malloc(3)/realloc(3) array to store the pointers using the
32 * classical FORTRAN strategy.
33 */
34
35#include "config.h"
36
37#include "svnid.h"
38SVNID("$Id$")
39
40#include <unistd.h>
41#include <stdlib.h>
42
43#include "binary_heap.h"
44#include "libvarnish.h"
45
46/* Parameters --------------------------------------------------------*/
47
48/*
49 * The number of elements in a row has to be a compromise between
50 * wasted space and number of memory allocations.
51 * With 64k objects per row, there will be at least 5...10 seconds
52 * between row additions on a very busy server.
53 * At the same time, the worst case amount of wasted memory is kept
54 * at a reasonable 1 MB -- two rows on 64bit system.
55 * Finally, but without practical significance: 16 bits should be
56 * easier for the compiler to optimize.
57 */
58#define ROW_SHIFT               16
59
60/* Private definitions -----------------------------------------------*/
61
62#define ROOT_IDX                1
63
64#define ROW_WIDTH               (1 << ROW_SHIFT)
65
66/*lint -emacro(572, ROW) shift 0 >> by 16 */
67/*lint -emacro(835, ROW) 0 left of >> */
68/*lint -emacro(778, ROW) const >> evaluates to zero */
69#define ROW(b, n)               ((b)->array[(n) >> ROW_SHIFT])
70
71/*lint -emacro(835, A) 0 left of & */
72#define A(b, n)                 ROW(b, n)[(n) & (ROW_WIDTH - 1)]
73
74struct binheap {
75        unsigned                magic;
76#define BINHEAP_MAGIC           0xf581581aU     /* from /dev/random */
77        void                    *priv;
78        binheap_cmp_t           *cmp;
79        binheap_update_t        *update;
80        void                    ***array;
81        unsigned                rows;
82        unsigned                length;
83        unsigned                next;
84        unsigned                page_size;
85        unsigned                page_mask;
86        unsigned                page_shift;
87};
88
89#define VM_AWARE
90
91#ifdef VM_AWARE
92
93static  unsigned
94parent(const struct binheap *bh, unsigned u)
95{
96        unsigned po;
97        unsigned v;
98
99        po = u & bh->page_mask;
100
101        if (u < bh->page_size || po > 3) {
102                v = (u & ~bh->page_mask) | (po >> 1);
103        } else if (po < 2) {
104                v = (u - bh->page_size) >> bh->page_shift;
105                v += v & ~(bh->page_mask >> 1);
106                v |= bh->page_size / 2;
107        } else {
108                v = u - 2;
109        }
110        return (v);
111}
112
113static void
114child(const struct binheap *bh, unsigned u, unsigned *a, unsigned *b)
115{
116
117        if (u > bh->page_mask && (u & (bh->page_mask - 1)) == 0) {
118                /* First two elements are magical except on the first page */
119                *a = *b = u + 2;
120        } else if (u & (bh->page_size >> 1)) {
121                /* The bottom row is even more magical */
122                *a = (u & ~bh->page_mask) >> 1;
123                *a |= u & (bh->page_mask >> 1);
124                *a += 1;
125                *a <<= bh->page_shift;
126                *b = *a + 1;
127        } else {
128                /* The rest is as usual, only inside the page */
129                *a = u + (u & bh->page_mask);
130                *b += 1;
131        }
132}
133
134
135#else
136
137static unsigned
138parent(const struct binheap *bh, unsigned u)
139{
140
141        (void)bh;
142        return (u / 2);
143}
144
145static void
146child(const struct binheap *bh, unsigned u, unsigned *a, unsigned *b)
147{
148
149        (void)bh;
150        *a = u * 2;
151        *b = *a + 1;
152}
153
154#endif
155
156/* Implementation ----------------------------------------------------*/
157
158static void
159binheap_addrow(struct binheap *bh)
160{
161        unsigned u;
162
163        /* First make sure we have space for another row */
164        if (&ROW(bh, bh->length) >= bh->array + bh->rows) {
165                u = bh->rows * 2;
166                bh->array = realloc(bh->array, sizeof(*bh->array) * u);
167                assert(bh->array != NULL);
168
169                /* NULL out new pointers */
170                while (bh->rows < u)
171                        bh->array[bh->rows++] = NULL;
172        }
173        assert(ROW(bh, bh->length) == NULL);
174        ROW(bh, bh->length) = malloc(sizeof(**bh->array) * ROW_WIDTH);
175        assert(ROW(bh, bh->length));
176        bh->length += ROW_WIDTH;
177}
178
179struct binheap *
180binheap_new(void *priv, binheap_cmp_t *cmp_f, binheap_update_t *update_f)
181{
182        struct binheap *bh;
183        unsigned u;
184
185        bh = calloc(sizeof *bh, 1);
186        if (bh == NULL)
187                return (bh);
188        bh->priv = priv;
189
190        bh->page_size = (unsigned)getpagesize() / sizeof (void *);
191        bh->page_mask = bh->page_size - 1;
192        assert(!(bh->page_size & bh->page_mask));       /* power of two */
193        for (u = 1; (1U << u) != bh->page_size; u++)
194                ;
195        bh->page_shift = u;
196        assert(bh->page_size <= (sizeof(**bh->array) * ROW_WIDTH));
197       
198        bh->cmp = cmp_f;
199        bh->update = update_f;
200        bh->next = ROOT_IDX;
201        bh->rows = 16;          /* A tiny-ish number */
202        bh->array = calloc(sizeof *bh->array, bh->rows);
203        assert(bh->array != NULL);
204        binheap_addrow(bh);
205        A(bh, ROOT_IDX) = NULL;
206        bh->magic = BINHEAP_MAGIC;
207        return (bh);
208}
209
210static void
211binheap_update(const struct binheap *bh, unsigned u)
212{
213        assert(bh->magic == BINHEAP_MAGIC);
214        assert(u < bh->next);
215        assert(A(bh, u) != NULL);
216        if (bh->update == NULL)
217                return;
218        bh->update(bh->priv, A(bh, u), u);
219}
220
221static void
222binhead_swap(const struct binheap *bh, unsigned u, unsigned v)
223{
224        void *p;
225
226        assert(bh->magic == BINHEAP_MAGIC);
227        assert(u < bh->next);
228        assert(v < bh->next);
229        p = A(bh, u);
230        A(bh, u) = A(bh, v);
231        A(bh, v) = p;
232        binheap_update(bh, u);
233        binheap_update(bh, v);
234}
235
236static unsigned
237binheap_trickleup(const struct binheap *bh, unsigned u)
238{
239        unsigned v;
240
241        assert(bh->magic == BINHEAP_MAGIC);
242        while (u > ROOT_IDX) {
243                v = parent(bh, u);
244                if (!bh->cmp(bh->priv, A(bh, u), A(bh, v)))
245                        break;
246                binhead_swap(bh, u, v);
247                u = v;
248        }
249        return (u);
250}
251
252static void
253binheap_trickledown(const struct binheap *bh, unsigned u)
254{
255        unsigned v1, v2;
256
257        assert(bh->magic == BINHEAP_MAGIC);
258        while (1) {
259                child(bh, u, &v1, &v2);
260                if (v1 >= bh->next)
261                        return;
262                if (v2 < bh->next && bh->cmp(bh->priv, A(bh, v2), A(bh, v1)))
263                        v1 = v2;
264                if (bh->cmp(bh->priv, A(bh, u), A(bh, v1)))
265                        return;
266                binhead_swap(bh, u, v1);
267                u = v1;
268        }
269}
270
271void
272binheap_insert(struct binheap *bh, void *p)
273{
274        unsigned u;
275
276        assert(bh != NULL);
277        assert(bh->magic == BINHEAP_MAGIC);
278        assert(bh->length >= bh->next);
279        if (bh->length == bh->next)
280                binheap_addrow(bh);
281        u = bh->next++;
282        A(bh, u) = p;
283        binheap_update(bh, u);
284        (void)binheap_trickleup(bh, u);
285}
286
287void *
288binheap_root(const struct binheap *bh)
289{
290
291        assert(bh != NULL);
292        assert(bh->magic == BINHEAP_MAGIC);
293        return (A(bh, ROOT_IDX));
294}
295
296/*
297 * It may seem counter-intuitive that we delete by replacement with
298 * the tail object. "That's almost certain to not belong there, in
299 * particular when we delete the root ?" is the typical reaction.
300 *
301 * If we tried to trickle up into the empty position, we would,
302 * eventually, end up with a hole in the bottom row, at which point
303 * we would move the tail object there.
304 * But there is no guarantee that the tail object would not need to
305 * trickle up from that position, in fact, it might be the new root
306 * of this half of the subtree.
307 * The total number of operations is guaranteed to be at least
308 * N{height} downward selections, because we have to get the hole
309 * all the way down, but in addition to that, we may get up to
310 * N{height}-1 upward trickles.
311 *
312 * When we fill the hole with the tail object, the worst case is
313 * that it trickles all the way down to become the tail object
314 * again.
315 * In other words worst case is N{height} downward trickles.
316 * But there is a pretty decent chance that it does not make
317 * it all the way down.
318 */
319
320void
321binheap_delete(struct binheap *bh, unsigned idx)
322{
323
324        assert(bh != NULL);
325        assert(bh->magic == BINHEAP_MAGIC);
326        assert(bh->next > ROOT_IDX);
327        assert(idx < bh->next);
328        assert(idx > 0);
329        assert(A(bh, idx) != NULL);
330        bh->update(bh->priv, A(bh, idx), 0);
331        if (idx == --bh->next) {
332                A(bh, bh->next) = NULL;
333                return;
334        }
335        A(bh, idx) = A(bh, bh->next);
336        A(bh, bh->next) = NULL;
337        binheap_update(bh, idx);
338        idx = binheap_trickleup(bh, idx);
339        binheap_trickledown(bh, idx);
340
341        /*
342         * We keep a hysteresis of one full row before we start to
343         * return space to the OS to avoid silly behaviour around
344         * row boundaries.
345         */
346        if (bh->next + 2 * ROW_WIDTH <= bh->length) {
347                free(ROW(bh, bh->length - 1));
348                ROW(bh, bh->length - 1) = NULL;
349                bh->length -= ROW_WIDTH;
350        }
351}
352
353
354#ifdef TEST_DRIVER
355/* Test driver -------------------------------------------------------*/
356#include <stdio.h>
357
358struct foo {
359        unsigned        idx;
360        unsigned        key;
361};
362
363#define M 31011091      /* Number of operations */
364#define N 10313102              /* Number of items */
365#define R -1            /* Random modulus */
366
367struct foo ff[N];
368
369static int
370cmp(void *priv, void *a, void *b)
371{
372        struct foo *fa, *fb;
373
374        fa = a;
375        fb = b;
376        return (fa->key < fb->key);
377}
378
379void
380update(void *priv, void *a, unsigned u)
381{
382        struct foo *fa;
383
384        fa = a;
385        fa->idx = u;
386}
387
388void
389chk(struct binheap *bh)
390{
391        unsigned u, v;
392        struct foo *fa, *fb;
393
394        for (u = 2; u < bh->next; u++) {
395                v = parent(bh, u);
396                fa = A(bh, u);
397                fb = A(bh, v);
398                assert(fa->key >= fb->key);
399        }
400}
401
402int
403main(int argc, char **argv)
404{
405        struct binheap *bh;
406        unsigned u, v, lr;
407        struct foo *fp;
408
409        if (0) {
410                srandomdev();
411                u = random();
412                printf("Seed %u\n", u);
413                srandom(u);
414        }
415        bh = binheap_new(NULL, cmp, update);
416
417        /* First insert our N elements */
418        for (u = 0; u < N; u++) {
419                lr = random() % R;
420                ff[u].key = lr;
421                binheap_insert(bh, &ff[u]);
422
423                fp = binheap_root(bh);
424                assert(fp->idx == 1);
425                assert(fp->key <= lr);
426        }
427        fprintf(stderr, "%d inserts OK\n", N);
428        /* For M cycles, pick the root, insert new */
429        for (u = 0; u < M; u++) {
430                fp = binheap_root(bh);
431                assert(fp->idx == 1);
432
433                /* It cannot possibly be larger than the last value we added */
434                assert(fp->key <= lr);
435                binheap_delete(bh, fp->idx);
436
437                lr = random() % R;
438                fp->key = lr;
439                binheap_insert(bh, fp);
440        }
441        fprintf(stderr, "%d replacements OK\n", M);
442        /* The remove everything */
443        lr = 0;
444        for (u = 0; u < N; u++) {
445                fp = binheap_root(bh);
446                assert(fp->idx == 1);
447                assert(fp->key >= lr);
448                lr = fp->key;
449                binheap_delete(bh, fp->idx);
450        }
451        fprintf(stderr, "%d removes OK\n", N);
452
453        for (u = 0; u < M; u++) {
454                v = random() % N;
455                if (ff[v].idx > 0) {
456                        assert(ff[v].idx != 0);
457                        binheap_delete(bh, ff[v].idx);
458                        assert(ff[v].idx == 0);
459                } else {
460                        assert(ff[v].idx == 0);
461                        ff[v].key = random() % R;
462                        binheap_insert(bh, &ff[v]);
463                        assert(ff[v].idx != 0);
464                }
465                if (0)
466                        chk(bh);
467        }
468        fprintf(stderr, "%d updates OK\n", M);
469        return (0);
470}
471#endif
Note: See TracBrowser for help on using the repository browser.