Ah, I wasn't aware of that. Thanks Andrea, I will update the docs to reflect that.<div><br></div><div>-Martin<br><br><div class="gmail_quote">On Mon, Feb 11, 2013 at 3:20 PM, Andrea Campi <span dir="ltr"><<a href="mailto:andrea.campi@zephirworks.com" target="_blank">andrea.campi@zephirworks.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">FreeBSD seems to also have these sockets options since this commit:<div><br></div><div><a href="http://svnweb.freebsd.org/base?view=revision&revision=232945" target="_blank">http://svnweb.freebsd.org/base?view=revision&revision=232945</a><br>


</div><div><br></div><div>At a quick glance, this went in for FreeBSD 9.1.</div></div><div class="gmail_extra"><br><br><div class="gmail_quote"><div><div class="h5">On Mon, Feb 11, 2013 at 3:09 PM, Martin Blix Grydeland <span dir="ltr"><<a href="mailto:martin@varnish-software.com" target="_blank">martin@varnish-software.com</a>></span> wrote:<br>


</div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div class="h5">This will help in determining remote hang up of the connection for<br>
situations where we still are not able to send any reply, but freeing<br>
the session will reduce resource overhead (e.g. when staying on<br>
waitinglists for extended periods).<br>
<br>
On platforms that support it also add runtime parameters to control<br>
the keep-alive packet settings through socket options. On platforms<br>
that don't support these socket options, the values must be set system<br>
wide.<br>
---<br>
 bin/varnishd/cache/cache_acceptor.c       |  116 +++++++++++++++++++++++++++++<br>
 bin/varnishd/common/params.h              |    5 ++<br>
 bin/varnishd/mgt/mgt_param_tbl.c          |   20 +++++<br>
 <a href="http://configure.ac" target="_blank">configure.ac</a>                              |   32 ++++++++<br>
 doc/sphinx/installation/platformnotes.rst |   15 ++++<br>
 5 files changed, 188 insertions(+)<br>
<br>
diff --git a/bin/varnishd/cache/cache_acceptor.c b/bin/varnishd/cache/cache_acceptor.c<br>
index 62209a5..4e17dfb 100644<br>
--- a/bin/varnishd/cache/cache_acceptor.c<br>
+++ b/bin/varnishd/cache/cache_acceptor.c<br>
@@ -70,8 +70,23 @@ static const struct linger linger = {<br>
        .l_onoff        =       0,<br>
 };<br>
<br>
+/*<br>
+ * We turn on keepalives by default to assist in detecting clients that have<br>
+ * hung up on connections returning from waitinglists<br>
+ */<br>
+static const int keepalive = 1;<br>
+<br>
 static unsigned char   need_sndtimeo, need_rcvtimeo, need_linger, need_test,<br>
                        need_tcpnodelay;<br>
+static unsigned char   need_keepalive = 0;<br>
+#ifdef TCP_KEEP_WORKS<br>
+static unsigned char   need_ka_time = 0;<br>
+static unsigned char   need_ka_probes = 0;<br>
+static unsigned char   need_ka_intvl = 0;<br>
+static int             ka_time = 0;<br>
+static int             ka_probes = 0;<br>
+static int             ka_intvl = 0;<br>
+#endif<br>
<br>
 /*--------------------------------------------------------------------<br>
  * Some kernels have bugs/limitations with respect to which options are<br>
@@ -83,6 +98,10 @@ static void<br>
 sock_test(int fd)<br>
 {<br>
        struct linger lin;<br>
+       int tka;<br>
+#ifdef TCP_KEEP_WORKS<br>
+       int tka_time, tka_probes, tka_intvl;<br>
+#endif<br>
        struct timeval tv;<br>
        socklen_t l;<br>
        int i, tcp_nodelay;<br>
@@ -97,6 +116,48 @@ sock_test(int fd)<br>
        if (memcmp(&lin, &linger, l))<br>
                need_linger = 1;<br>
<br>
+       l = sizeof tka;<br>
+       i = getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &tka, &l);<br>
+       if (i) {<br>
+               VTCP_Assert(i);<br>
+               return;<br>
+       }<br>
+       assert(l == sizeof tka);<br>
+       if (tka != keepalive)<br>
+               need_keepalive = 1;<br>
+<br>
+#ifdef TCP_KEEP_WORKS<br>
+       l = sizeof tka_time;<br>
+       i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &tka_time, &l);<br>
+       if (i) {<br>
+               VTCP_Assert(i);<br>
+               return;<br>
+       }<br>
+       assert(l == sizeof tka_time);<br>
+       if (tka_time != ka_time)<br>
+               need_ka_time = 1;<br>
+<br>
+       l = sizeof tka_probes;<br>
+       i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &tka_probes, &l);<br>
+       if (i) {<br>
+               VTCP_Assert(i);<br>
+               return;<br>
+       }<br>
+       assert(l == sizeof tka_probes);<br>
+       if (tka_probes != ka_probes)<br>
+               need_ka_probes = 1;<br>
+<br>
+       l = sizeof tka_intvl;<br>
+       i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &tka_intvl, &l);<br>
+       if (i) {<br>
+               VTCP_Assert(i);<br>
+               return;<br>
+       }<br>
+       assert(l == sizeof tka_intvl);<br>
+       if (tka_intvl != ka_intvl)<br>
+               need_ka_intvl = 1;<br>
+#endif<br>
+<br>
 #ifdef SO_SNDTIMEO_WORKS<br>
        l = sizeof tv;<br>
        i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);<br>
@@ -281,6 +342,22 @@ VCA_SetupSess(struct worker *wrk, struct sess *sp)<br>
        if (need_linger)<br>
                VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,<br>
                    &linger, sizeof linger));<br>
+       if (need_keepalive)<br>
+               VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_KEEPALIVE,<br>
+                   &keepalive, sizeof keepalive));<br>
+#ifdef TCP_KEEP_WORKS<br>
+       AN(ka_time);<br>
+       if (need_ka_time)<br>
+               VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPIDLE,<br>
+                       &ka_time, sizeof ka_time));<br>
+       if (need_ka_probes)<br>
+               VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPCNT,<br>
+                       &ka_probes, sizeof ka_probes));<br>
+       if (need_ka_intvl)<br>
+               VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPINTVL,<br>
+                       &ka_intvl, sizeof ka_intvl));<br>
+#endif<br>
+<br>
 #ifdef SO_SNDTIMEO_WORKS<br>
        if (need_sndtimeo)<br>
                VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,<br>
@@ -316,6 +393,12 @@ vca_acct(void *arg)<br>
        THR_SetName("cache-acceptor");<br>
        (void)arg;<br>
<br>
+#ifdef TCP_KEEP_WORKS<br>
+       ka_time = cache_param->tcp_keepalive_time;<br>
+       ka_probes = cache_param->tcp_keepalive_probes;<br>
+       ka_intvl = cache_param->tcp_keepalive_intvl;<br>
+#endif<br>
+<br>
        VTAILQ_FOREACH(ls, &heritage.socks, list) {<br>
                if (ls->sock < 0)<br>
                        continue;<br>
@@ -324,6 +407,16 @@ vca_acct(void *arg)<br>
                    &linger, sizeof linger));<br>
                AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_NODELAY,<br>
                    &tcp_nodelay, sizeof tcp_nodelay));<br>
+               AZ(setsockopt(ls->sock, SOL_SOCKET, SO_KEEPALIVE,<br>
+                   &keepalive, sizeof keepalive));<br>
+#ifdef TCP_KEEP_WORKS<br>
+               AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,<br>
+                   &ka_time, sizeof ka_time));<br>
+               AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,<br>
+                   &ka_probes, sizeof ka_probes));<br>
+               AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,<br>
+                   &ka_intvl, sizeof ka_intvl));<br>
+#endif<br>
                if (cache_param->accept_filter) {<br>
                        i = VTCP_filter_http(ls->sock);<br>
                        if (i)<br>
@@ -339,6 +432,29 @@ vca_acct(void *arg)<br>
        t0 = VTIM_real();<br>
        while (1) {<br>
                (void)sleep(1);<br>
+#ifdef TCP_KEEP_WORKS<br>
+               if (cache_param->tcp_keepalive_time != ka_time ||<br>
+                   cache_param->tcp_keepalive_probes != ka_probes ||<br>
+                   cache_param->tcp_keepalive_intvl != ka_intvl) {<br>
+                       need_test = 1;<br>
+                       ka_time = cache_param->tcp_keepalive_time;<br>
+                       ka_probes = cache_param->tcp_keepalive_probes;<br>
+                       ka_intvl = cache_param->tcp_keepalive_intvl;<br>
+                       VTAILQ_FOREACH(ls, &heritage.socks, list) {<br>
+                               if (ls->sock < 0)<br>
+                                       continue;<br>
+                               AZ(setsockopt(ls->sock, IPPROTO_TCP,<br>
+                                   TCP_KEEPIDLE,<br>
+                                   &ka_time, sizeof ka_time));<br>
+                               AZ(setsockopt(ls->sock, IPPROTO_TCP,<br>
+                                   TCP_KEEPCNT,<br>
+                                   &ka_probes, sizeof ka_probes));<br>
+                               AZ(setsockopt(ls->sock, IPPROTO_TCP,<br>
+                                   TCP_KEEPINTVL,<br>
+                                   &ka_intvl, sizeof ka_intvl));<br>
+                       }<br>
+               }<br>
+#endif<br>
 #ifdef SO_SNDTIMEO_WORKS<br>
                if (cache_param->idle_send_timeout != send_timeout) {<br>
                        need_test = 1;<br>
diff --git a/bin/varnishd/common/params.h b/bin/varnishd/common/params.h<br>
index a6e881b..6893461 100644<br>
--- a/bin/varnishd/common/params.h<br>
+++ b/bin/varnishd/common/params.h<br>
@@ -110,6 +110,11 @@ struct params {<br>
        unsigned                pipe_timeout;<br>
        unsigned                send_timeout;<br>
        unsigned                idle_send_timeout;<br>
+#ifdef TCP_KEEP_WORKS<br>
+       unsigned                tcp_keepalive_time;<br>
+       unsigned                tcp_keepalive_probes;<br>
+       unsigned                tcp_keepalive_intvl;<br>
+#endif<br>
<br>
        /* Management hints */<br>
        unsigned                auto_restart;<br>
diff --git a/bin/varnishd/mgt/mgt_param_tbl.c b/bin/varnishd/mgt/mgt_param_tbl.c<br>
index 8601bae..0380a02 100644<br>
--- a/bin/varnishd/mgt/mgt_param_tbl.c<br>
+++ b/bin/varnishd/mgt/mgt_param_tbl.c<br>
@@ -205,6 +205,26 @@ const struct parspec mgt_parspec[] = {<br>
                "See setsockopt(2) under SO_SNDTIMEO for more information.",<br>
                DELAYED_EFFECT,<br>
                "60", "seconds" },<br>
+#ifdef TCP_KEEP_WORKS<br>
+       { "tcp_keepalive_time", tweak_timeout, &mgt_param.tcp_keepalive_time,<br>
+               1, 7200,<br>
+               "The number of seconds a connection needs to be idle before "<br>
+               "TCP begins sending out keep-alive probes.",<br>
+               0,<br>
+               "600", "seconds" },<br>
+       { "tcp_keepalive_probes", tweak_uint, &mgt_param.tcp_keepalive_probes,<br>
+               1, 100,<br>
+               "The maximum number of TCP keep-alive probes to send before "<br>
+               "giving up and killing the connection if no response is "<br>
+               "obtained from the other end.",<br>
+               0,<br>
+               "5", "probes" },<br>
+       { "tcp_keepalive_intvl", tweak_timeout, &mgt_param.tcp_keepalive_intvl,<br>
+               1, 100,<br>
+               "The number of seconds between TCP keep-alive probes.",<br>
+               0,<br>
+               "5", "seconds" },<br>
+#endif<br>
        { "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,<br>
                "Restart child process automatically if it dies.\n",<br>
                0,<br>
diff --git a/<a href="http://configure.ac" target="_blank">configure.ac</a> b/<a href="http://configure.ac" target="_blank">configure.ac</a><br>
index a4cd8e8..6613980 100644<br>
--- a/<a href="http://configure.ac" target="_blank">configure.ac</a><br>
+++ b/<a href="http://configure.ac" target="_blank">configure.ac</a><br>
@@ -423,6 +423,38 @@ if test "$ac_cv_so_rcvtimeo_works" = no ||<br>
 fi<br>
 LIBS="${save_LIBS}"<br>
<br>
+# Check if the OS supports TCP_KEEP(CNT|IDLE|INTVL) socket options<br>
+save_LIBS="${LIBS}"<br>
+LIBS="${LIBS} ${NET_LIBS}"<br>
+AC_CACHE_CHECK([for TCP_KEEP(CNT|IDLE|INTVL) socket options],<br>
+  [ac_cv_tcp_keep_works],<br>
+  [AC_RUN_IFELSE(<br>
+    [AC_LANG_PROGRAM([[<br>
+#include <stdio.h><br>
+#include <sys/types.h><br>
+#include <sys/socket.h><br>
+#include <netinet/in.h><br>
+#include <netinet/tcp.h><br>
+    ]],[[<br>
+int s = socket(AF_INET, SOCK_STREAM, 0);<br>
+int i;<br>
+i = 5;<br>
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &i, sizeof i))<br>
+  return (1);<br>
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &i, sizeof i))<br>
+  return (1);<br>
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &i, sizeof i))<br>
+  return (1);<br>
+return (0);<br>
+    ]])],<br>
+    [ac_cv_tcp_keep_works=yes],<br>
+    [ac_cv_tcp_keep_works=no])<br>
+  ])<br>
+if test "$ac_cv_tcp_keep_works" = yes; then<br>
+   AC_DEFINE([TCP_KEEP_WORKS], [1], [Define if TCP_KEEP* works])<br>
+fi<br>
+LIBS="${save_LIBS}"<br>
+<br>
 # Run-time directory<br>
 VARNISH_STATE_DIR='${localstatedir}/varnish'<br>
 AC_SUBST(VARNISH_STATE_DIR)<br>
diff --git a/doc/sphinx/installation/platformnotes.rst b/doc/sphinx/installation/platformnotes.rst<br>
index 3ad486c..e1720b6 100644<br>
--- a/doc/sphinx/installation/platformnotes.rst<br>
+++ b/doc/sphinx/installation/platformnotes.rst<br>
@@ -35,3 +35,18 @@ Reduce the maximum stack size by running::<br>
<br>
 in the Varnish startup script.<br>
<br>
+TCP keep-alive configuration<br>
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~<br>
+<br>
+On platforms except Linux, Varnish is not able to set the TCP<br>
+keep-alive values per socket, and therefor the tcp_keepalive_* Varnish<br>
+runtime parameters are not available. On these platforms it can be<br>
+benefitial to tune the system wide values for these in order to more<br>
+reliably detect remote close for sessions spending long time on<br>
+waitinglists. This will help free up resources faster.<br>
+<br>
+On Linux the defaults are set to:<br>
+<br>
+       tcp_keepalive_time = 600 seconds<br>
+       tcp_keepalive_probes = 5<br>
+       tcp_keepalive_intvl = 5 seconds<br>
</div></div><span class="HOEnZb"><font color="#888888"><span><font color="#888888">--<br>
1.7.10.4<br>
<br>
<br>
_______________________________________________<br>
varnish-dev mailing list<br>
<a href="mailto:varnish-dev@varnish-cache.org" target="_blank">varnish-dev@varnish-cache.org</a><br>
<a href="https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev" target="_blank">https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev</a><br>
</font></span></font></span></blockquote></div><br></div>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div><table border="0" cellpadding="0" cellspacing="0" style="font-size:12px;line-height:1.5em;font-family:'Helvetica Neue',Arial,sans-serif;color:rgb(102,102,102);width:550px;border-top-width:1px;border-top-style:solid;border-top-color:rgb(238,238,238);border-bottom-width:1px;border-bottom-style:solid;border-bottom-color:rgb(238,238,238);margin-top:20px;padding-top:5px;padding-bottom:5px">
<tbody><tr><td width="100"><a href="http://varnish-software.com" target="_blank"><img src="http://www.varnish-software.com/static/media/logo-email.png"></a><span></span><span></span></td><td><strong style="font-size:14px;color:rgb(34,34,34)">Martin Blix Grydeland</strong><br>
Senior Developer | Varnish Software AS<br>Cell: +47 21 98 92 60<br><span style="font-weight:bold">We Make Websites Fly!</span></td></tr></tbody></table></div>
</div>