source: bin/varnishd/cache/cache_acceptor.c @ 4d047a9

Revision 4d047a9, 10.3 KB checked in by Poul-Henning Kamp <phk@…>, 2 years ago (diff)

Accept-filters on FreeBSD has been broken for ages, fix them.

Add accept-filter param which controls if we attempt kernel
filteringer.

Apply filters after listen() when we do.

Report failuers with VSL(SLT_Error)

Disable filters in pipe-lining test-case.

Fixes #1101

  • Property mode set to 100644
Line 
1/*-
2 * Copyright (c) 2006 Verdens Gang AS
3 * Copyright (c) 2006-2011 Varnish Software AS
4 * All rights reserved.
5 *
6 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * This source file has the various trickery surrounding the accept/listen
30 * sockets.
31 *
32 * The actual acceptance is done from cache_pool.c, by calling
33 * into VCA_Accept() in this file.
34 *
35 * Once the session is allocated we move into it with a call to
36 * VCA_SetupSess().
37 *
38 * If we fail to allocate a session we call VCA_FailSess() to clean up
39 * and initiate pacing.
40 */
41
42#include "config.h"
43
44#include "cache.h"
45#include "common/heritage.h"
46
47#include "vcli.h"
48#include "vcli_priv.h"
49#include "vtcp.h"
50#include "vtim.h"
51
52static pthread_t        VCA_thread;
53static struct timeval   tv_sndtimeo;
54static struct timeval   tv_rcvtimeo;
55static int hack_ready;
56static double vca_pace = 0.0;
57static struct lock pace_mtx;
58
59/*--------------------------------------------------------------------
60 * We want to get out of any kind of trouble-hit TCP connections as fast
61 * as absolutely possible, so we set them LINGER enabled with zero timeout,
62 * so that even if there are outstanding write data on the socket, a close(2)
63 * will return immediately.
64 */
65static const struct linger linger = {
66        .l_onoff        =       0,
67};
68
69static unsigned char    need_sndtimeo, need_rcvtimeo, need_linger, need_test;
70
71/*--------------------------------------------------------------------
72 * Some kernels have bugs/limitations with respect to which options are
73 * inherited from the accept/listen socket, so we have to keep track of
74 * which, if any, sockopts we have to set on the accepted socket.
75 */
76
77static void
78sock_test(int fd)
79{
80        struct linger lin;
81        struct timeval tv;
82        socklen_t l;
83        int i;
84
85        l = sizeof lin;
86        i = getsockopt(fd, SOL_SOCKET, SO_LINGER, &lin, &l);
87        if (i) {
88                VTCP_Assert(i);
89                return;
90        }
91        assert(l == sizeof lin);
92        if (memcmp(&lin, &linger, l))
93                need_linger = 1;
94
95#ifdef SO_SNDTIMEO_WORKS
96        l = sizeof tv;
97        i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);
98        if (i) {
99                VTCP_Assert(i);
100                return;
101        }
102        assert(l == sizeof tv);
103        if (memcmp(&tv, &tv_sndtimeo, l))
104                need_sndtimeo = 1;
105#else
106        (void)tv;
107        (void)tv_sndtimeo;
108        (void)need_sndtimeo;
109#endif
110
111#ifdef SO_RCVTIMEO_WORKS
112        l = sizeof tv;
113        i = getsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, &l);
114        if (i) {
115                VTCP_Assert(i);
116                return;
117        }
118        assert(l == sizeof tv);
119        if (memcmp(&tv, &tv_rcvtimeo, l))
120                need_rcvtimeo = 1;
121#else
122        (void)tv;
123        (void)tv_rcvtimeo;
124        (void)need_rcvtimeo;
125#endif
126
127        need_test = 0;
128}
129
130/*--------------------------------------------------------------------
131 * If accept(2)'ing fails, we pace ourselves to relive any resource
132 * shortage if possible.
133 */
134
135static void
136vca_pace_check(void)
137{
138        double p;
139
140        if (vca_pace == 0.0)
141                return;
142        Lck_Lock(&pace_mtx);
143        p = vca_pace;
144        Lck_Unlock(&pace_mtx);
145        if (p > 0.0)
146                VTIM_sleep(p);
147}
148
149static void
150vca_pace_bad(void)
151{
152
153        Lck_Lock(&pace_mtx);
154        vca_pace += cache_param->acceptor_sleep_incr;
155        if (vca_pace > cache_param->acceptor_sleep_max)
156                vca_pace = cache_param->acceptor_sleep_max;
157        Lck_Unlock(&pace_mtx);
158}
159
160static void
161vca_pace_good(void)
162{
163
164        if (vca_pace == 0.0)
165                return;
166        Lck_Lock(&pace_mtx);
167        vca_pace *= cache_param->acceptor_sleep_decay;
168        if (vca_pace < cache_param->acceptor_sleep_incr)
169                vca_pace = 0.0;
170        Lck_Unlock(&pace_mtx);
171}
172
173/*--------------------------------------------------------------------
174 * Accept on a listen socket, and handle error returns.
175 */
176
177int
178VCA_Accept(struct listen_sock *ls, struct wrk_accept *wa)
179{
180        int i;
181
182        CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
183        vca_pace_check();
184
185        while(!hack_ready)
186                (void)usleep(100*1000);
187
188        wa->acceptaddrlen = sizeof wa->acceptaddr;
189        do {
190                i = accept(ls->sock, (void*)&wa->acceptaddr,
191                           &wa->acceptaddrlen);
192        } while (i < 0 && errno == EAGAIN);
193
194        if (i < 0) {
195                switch (errno) {
196                case ECONNABORTED:
197                        break;
198                case EMFILE:
199                        VSL(SLT_Debug, ls->sock, "Too many open files");
200                        vca_pace_bad();
201                        break;
202                default:
203                        VSL(SLT_Debug, ls->sock, "Accept failed: %s",
204                            strerror(errno));
205                        vca_pace_bad();
206                        break;
207                }
208        }
209        wa->acceptlsock = ls;
210        wa->acceptsock = i;
211        return (i);
212}
213
214/*--------------------------------------------------------------------
215 * Fail a session
216 *
217 * This happens if we accept the socket, but cannot get a session
218 * structure.
219 *
220 * We consider this a DoS situation (false positive:  Extremely popular
221 * busy objects) and silently close the connection with minimum effort
222 * and fuzz, rather than try to send an intelligent message back.
223 */
224
225void
226VCA_FailSess(struct worker *wrk)
227{
228        struct wrk_accept *wa;
229
230        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
231        CAST_OBJ_NOTNULL(wa, (void*)wrk->aws->f, WRK_ACCEPT_MAGIC);
232        AZ(close(wa->acceptsock));
233        wrk->stats.sess_drop++;
234        vca_pace_bad();
235        WS_Release(wrk->aws, 0);
236}
237
238/*--------------------------------------------------------------------
239 * We have allocated a session, move our info into it.
240 */
241
242void
243VCA_SetupSess(struct worker *wrk, struct sess *sp)
244{
245        struct wrk_accept *wa;
246
247        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
248        CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
249        CAST_OBJ_NOTNULL(wa, (void*)wrk->aws->f, WRK_ACCEPT_MAGIC);
250        sp->vxid = wa->vxid;
251        sp->vseq = 0;
252        sp->fd = wa->acceptsock;
253        sp->vsl_id = wa->acceptsock | VSL_CLIENTMARKER ;
254        wa->acceptsock = -1;
255        sp->t_open = VTIM_real();
256        sp->mylsock = wa->acceptlsock;
257        CHECK_OBJ_NOTNULL(sp->mylsock, LISTEN_SOCK_MAGIC);
258        assert(wa->acceptaddrlen <= sp->sockaddrlen);
259        memcpy(&sp->sockaddr, &wa->acceptaddr, wa->acceptaddrlen);
260        sp->sockaddrlen = wa->acceptaddrlen;
261        vca_pace_good();
262        wrk->stats.sess_conn++;
263        WS_Release(wrk->aws, 0);
264
265        if (need_test)
266                sock_test(sp->fd);
267        if (need_linger)
268                VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,
269                    &linger, sizeof linger));
270#ifdef SO_SNDTIMEO_WORKS
271        if (need_sndtimeo)
272                VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,
273                    &tv_sndtimeo, sizeof tv_sndtimeo));
274#endif
275#ifdef SO_RCVTIMEO_WORKS
276        if (need_rcvtimeo)
277                VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_RCVTIMEO,
278                    &tv_rcvtimeo, sizeof tv_rcvtimeo));
279#endif
280}
281
282/*--------------------------------------------------------------------*/
283
284static void *
285vca_acct(void *arg)
286{
287#ifdef SO_RCVTIMEO_WORKS
288        double timeout_idle = 0;
289#endif
290#ifdef SO_SNDTIMEO_WORKS
291        double send_timeout = 0;
292#endif
293        struct listen_sock *ls;
294        double t0, now;
295        int i;
296
297        THR_SetName("cache-acceptor");
298        (void)arg;
299
300        VTAILQ_FOREACH(ls, &heritage.socks, list) {
301                if (ls->sock < 0)
302                        continue;
303                AZ(listen(ls->sock, cache_param->listen_depth));
304                AZ(setsockopt(ls->sock, SOL_SOCKET, SO_LINGER,
305                    &linger, sizeof linger));
306                if (cache_param->accept_filter) {
307                        i = VTCP_filter_http(ls->sock);
308                        if (i)
309                                VSL(SLT_Error, ls->sock,
310                                    "Kernel filtering: sock=%d, ret=%d %s\n",
311                                    ls->sock, i, strerror(errno));
312                }
313        }
314
315        hack_ready = 1;
316
317        need_test = 1;
318        t0 = VTIM_real();
319        while (1) {
320                (void)sleep(1);
321#ifdef SO_SNDTIMEO_WORKS
322                if (cache_param->idle_send_timeout != send_timeout) {
323                        need_test = 1;
324                        send_timeout = cache_param->idle_send_timeout;
325                        tv_sndtimeo = VTIM_timeval(send_timeout);
326                        VTAILQ_FOREACH(ls, &heritage.socks, list) {
327                                if (ls->sock < 0)
328                                        continue;
329                                AZ(setsockopt(ls->sock, SOL_SOCKET,
330                                    SO_SNDTIMEO,
331                                    &tv_sndtimeo, sizeof tv_sndtimeo));
332                        }
333                }
334#endif
335#ifdef SO_RCVTIMEO_WORKS
336                if (cache_param->timeout_idle != timeout_idle) {
337                        need_test = 1;
338                        timeout_idle = cache_param->timeout_idle;
339                        tv_rcvtimeo = VTIM_timeval(timeout_idle);
340                        VTAILQ_FOREACH(ls, &heritage.socks, list) {
341                                if (ls->sock < 0)
342                                        continue;
343                                AZ(setsockopt(ls->sock, SOL_SOCKET,
344                                    SO_RCVTIMEO,
345                                    &tv_rcvtimeo, sizeof tv_rcvtimeo));
346                        }
347                }
348#endif
349                now = VTIM_real();
350                VSC_C_main->uptime = (uint64_t)(now - t0);
351        }
352        NEEDLESS_RETURN(NULL);
353}
354
355
356/*--------------------------------------------------------------------*/
357
358static void
359ccf_start(struct cli *cli, const char * const *av, void *priv)
360{
361
362        (void)cli;
363        (void)av;
364        (void)priv;
365
366        AZ(pthread_create(&VCA_thread, NULL, vca_acct, NULL));
367}
368
369/*--------------------------------------------------------------------*/
370
371static void
372ccf_listen_address(struct cli *cli, const char * const *av, void *priv)
373{
374        struct listen_sock *ls;
375        char h[32], p[32];
376
377        (void)cli;
378        (void)av;
379        (void)priv;
380
381        /*
382         * This CLI command is primarily used by varnishtest.  Don't
383         * respond until listen(2) has been called, in order to avoid
384         * a race where varnishtest::client would attempt to connect(2)
385         * before listen(2) has been called.
386         */
387        while(!hack_ready)
388                (void)usleep(100*1000);
389
390        VTAILQ_FOREACH(ls, &heritage.socks, list) {
391                if (ls->sock < 0)
392                        continue;
393                VTCP_myname(ls->sock, h, sizeof h, p, sizeof p);
394                VCLI_Out(cli, "%s %s\n", h, p);
395        }
396}
397
398/*--------------------------------------------------------------------*/
399
400static struct cli_proto vca_cmds[] = {
401        { CLI_SERVER_START,     "i", ccf_start },
402        { "debug.listen_address",
403            "debug.listen_address",
404            "Report the actual listen address\n", 0, 0,
405            "d", ccf_listen_address, NULL },
406        { NULL }
407};
408
409void
410VCA_Init(void)
411{
412
413        CLI_AddFuncs(vca_cmds);
414        Lck_New(&pace_mtx, lck_vcapace);
415}
416
417void
418VCA_Shutdown(void)
419{
420        struct listen_sock *ls;
421        int i;
422
423        VTAILQ_FOREACH(ls, &heritage.socks, list) {
424                if (ls->sock < 0)
425                        continue;
426                i = ls->sock;
427                ls->sock = -1;
428                (void)close(i);
429        }
430}
Note: See TracBrowser for help on using the repository browser.