[PATCH 1/2] Turn on SO_KEEPALIVE on all TCP connections.

Martin Blix Grydeland martin at varnish-software.com
Mon Feb 11 15:09:57 CET 2013


This will help in determining remote hang up of the connection for
situations where we still are not able to send any reply, but freeing
the session will reduce resource overhead (e.g. when staying on
waitinglists for extended periods).

On platforms that support it also add runtime parameters to control
the keep-alive packet settings through socket options. On platforms
that don't support these socket options, the values must be set system
wide.
---
 bin/varnishd/cache/cache_acceptor.c       |  116 +++++++++++++++++++++++++++++
 bin/varnishd/common/params.h              |    5 ++
 bin/varnishd/mgt/mgt_param_tbl.c          |   20 +++++
 configure.ac                              |   32 ++++++++
 doc/sphinx/installation/platformnotes.rst |   15 ++++
 5 files changed, 188 insertions(+)

diff --git a/bin/varnishd/cache/cache_acceptor.c b/bin/varnishd/cache/cache_acceptor.c
index 62209a5..4e17dfb 100644
--- a/bin/varnishd/cache/cache_acceptor.c
+++ b/bin/varnishd/cache/cache_acceptor.c
@@ -70,8 +70,23 @@ static const struct linger linger = {
 	.l_onoff	=	0,
 };
 
+/*
+ * We turn on keepalives by default to assist in detecting clients that have
+ * hung up on connections returning from waitinglists
+ */
+static const int keepalive = 1;
+
 static unsigned char	need_sndtimeo, need_rcvtimeo, need_linger, need_test,
 			need_tcpnodelay;
+static unsigned char	need_keepalive = 0;
+#ifdef TCP_KEEP_WORKS
+static unsigned char	need_ka_time = 0;
+static unsigned char	need_ka_probes = 0;
+static unsigned char	need_ka_intvl = 0;
+static int		ka_time = 0;
+static int		ka_probes = 0;
+static int		ka_intvl = 0;
+#endif
 
 /*--------------------------------------------------------------------
  * Some kernels have bugs/limitations with respect to which options are
@@ -83,6 +98,10 @@ static void
 sock_test(int fd)
 {
 	struct linger lin;
+	int tka;
+#ifdef TCP_KEEP_WORKS
+	int tka_time, tka_probes, tka_intvl;
+#endif
 	struct timeval tv;
 	socklen_t l;
 	int i, tcp_nodelay;
@@ -97,6 +116,48 @@ sock_test(int fd)
 	if (memcmp(&lin, &linger, l))
 		need_linger = 1;
 
+	l = sizeof tka;
+	i = getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &tka, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka);
+	if (tka != keepalive)
+		need_keepalive = 1;
+
+#ifdef TCP_KEEP_WORKS
+	l = sizeof tka_time;
+	i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &tka_time, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka_time);
+	if (tka_time != ka_time)
+		need_ka_time = 1;
+
+	l = sizeof tka_probes;
+	i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &tka_probes, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka_probes);
+	if (tka_probes != ka_probes)
+		need_ka_probes = 1;
+
+	l = sizeof tka_intvl;
+	i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &tka_intvl, &l);
+	if (i) {
+		VTCP_Assert(i);
+		return;
+	}
+	assert(l == sizeof tka_intvl);
+	if (tka_intvl != ka_intvl)
+		need_ka_intvl = 1;
+#endif
+
 #ifdef SO_SNDTIMEO_WORKS
 	l = sizeof tv;
 	i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);
@@ -281,6 +342,22 @@ VCA_SetupSess(struct worker *wrk, struct sess *sp)
 	if (need_linger)
 		VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,
 		    &linger, sizeof linger));
+	if (need_keepalive)
+		VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_KEEPALIVE,
+		    &keepalive, sizeof keepalive));
+#ifdef TCP_KEEP_WORKS
+	AN(ka_time);
+	if (need_ka_time)
+		VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPIDLE,
+			&ka_time, sizeof ka_time));
+	if (need_ka_probes)
+		VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPCNT,
+			&ka_probes, sizeof ka_probes));
+	if (need_ka_intvl)
+		VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPINTVL,
+			&ka_intvl, sizeof ka_intvl));
+#endif
+
 #ifdef SO_SNDTIMEO_WORKS
 	if (need_sndtimeo)
 		VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,
@@ -316,6 +393,12 @@ vca_acct(void *arg)
 	THR_SetName("cache-acceptor");
 	(void)arg;
 
+#ifdef TCP_KEEP_WORKS
+	ka_time = cache_param->tcp_keepalive_time;
+	ka_probes = cache_param->tcp_keepalive_probes;
+	ka_intvl = cache_param->tcp_keepalive_intvl;
+#endif
+
 	VTAILQ_FOREACH(ls, &heritage.socks, list) {
 		if (ls->sock < 0)
 			continue;
@@ -324,6 +407,16 @@ vca_acct(void *arg)
 		    &linger, sizeof linger));
 		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_NODELAY,
 		    &tcp_nodelay, sizeof tcp_nodelay));
+		AZ(setsockopt(ls->sock, SOL_SOCKET, SO_KEEPALIVE,
+		    &keepalive, sizeof keepalive));
+#ifdef TCP_KEEP_WORKS
+		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,
+		    &ka_time, sizeof ka_time));
+		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,
+		    &ka_probes, sizeof ka_probes));
+		AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,
+		    &ka_intvl, sizeof ka_intvl));
+#endif
 		if (cache_param->accept_filter) {
 			i = VTCP_filter_http(ls->sock);
 			if (i)
@@ -339,6 +432,29 @@ vca_acct(void *arg)
 	t0 = VTIM_real();
 	while (1) {
 		(void)sleep(1);
+#ifdef TCP_KEEP_WORKS
+		if (cache_param->tcp_keepalive_time != ka_time ||
+		    cache_param->tcp_keepalive_probes != ka_probes ||
+		    cache_param->tcp_keepalive_intvl != ka_intvl) {
+			need_test = 1;
+			ka_time = cache_param->tcp_keepalive_time;
+			ka_probes = cache_param->tcp_keepalive_probes;
+			ka_intvl = cache_param->tcp_keepalive_intvl;
+			VTAILQ_FOREACH(ls, &heritage.socks, list) {
+				if (ls->sock < 0)
+					continue;
+				AZ(setsockopt(ls->sock, IPPROTO_TCP,
+				    TCP_KEEPIDLE,
+				    &ka_time, sizeof ka_time));
+				AZ(setsockopt(ls->sock, IPPROTO_TCP,
+				    TCP_KEEPCNT,
+				    &ka_probes, sizeof ka_probes));
+				AZ(setsockopt(ls->sock, IPPROTO_TCP,
+				    TCP_KEEPINTVL,
+				    &ka_intvl, sizeof ka_intvl));
+			}
+		}
+#endif
 #ifdef SO_SNDTIMEO_WORKS
 		if (cache_param->idle_send_timeout != send_timeout) {
 			need_test = 1;
diff --git a/bin/varnishd/common/params.h b/bin/varnishd/common/params.h
index a6e881b..6893461 100644
--- a/bin/varnishd/common/params.h
+++ b/bin/varnishd/common/params.h
@@ -110,6 +110,11 @@ struct params {
 	unsigned		pipe_timeout;
 	unsigned		send_timeout;
 	unsigned		idle_send_timeout;
+#ifdef TCP_KEEP_WORKS
+	unsigned		tcp_keepalive_time;
+	unsigned		tcp_keepalive_probes;
+	unsigned		tcp_keepalive_intvl;
+#endif
 
 	/* Management hints */
 	unsigned		auto_restart;
diff --git a/bin/varnishd/mgt/mgt_param_tbl.c b/bin/varnishd/mgt/mgt_param_tbl.c
index 8601bae..0380a02 100644
--- a/bin/varnishd/mgt/mgt_param_tbl.c
+++ b/bin/varnishd/mgt/mgt_param_tbl.c
@@ -205,6 +205,26 @@ const struct parspec mgt_parspec[] = {
 		"See setsockopt(2) under SO_SNDTIMEO for more information.",
 		DELAYED_EFFECT,
 		"60", "seconds" },
+#ifdef TCP_KEEP_WORKS
+	{ "tcp_keepalive_time", tweak_timeout, &mgt_param.tcp_keepalive_time,
+		1, 7200,
+		"The number of seconds a connection needs to be idle before "
+		"TCP begins sending out keep-alive probes.",
+		0,
+		"600", "seconds" },
+	{ "tcp_keepalive_probes", tweak_uint, &mgt_param.tcp_keepalive_probes,
+		1, 100,
+		"The maximum number of TCP keep-alive probes to send before "
+		"giving up and killing the connection if no response is "
+		"obtained from the other end.",
+		0,
+		"5", "probes" },
+	{ "tcp_keepalive_intvl", tweak_timeout, &mgt_param.tcp_keepalive_intvl,
+		1, 100,
+		"The number of seconds between TCP keep-alive probes.",
+		0,
+		"5", "seconds" },
+#endif
 	{ "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,
 		"Restart child process automatically if it dies.\n",
 		0,
diff --git a/configure.ac b/configure.ac
index a4cd8e8..6613980 100644
--- a/configure.ac
+++ b/configure.ac
@@ -423,6 +423,38 @@ if test "$ac_cv_so_rcvtimeo_works" = no ||
 fi
 LIBS="${save_LIBS}"
 
+# Check if the OS supports TCP_KEEP(CNT|IDLE|INTVL) socket options
+save_LIBS="${LIBS}"
+LIBS="${LIBS} ${NET_LIBS}"
+AC_CACHE_CHECK([for TCP_KEEP(CNT|IDLE|INTVL) socket options],
+  [ac_cv_tcp_keep_works],
+  [AC_RUN_IFELSE(
+    [AC_LANG_PROGRAM([[
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+    ]],[[
+int s = socket(AF_INET, SOCK_STREAM, 0);
+int i;
+i = 5;
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &i, sizeof i))
+  return (1);
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &i, sizeof i))
+  return (1);
+if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &i, sizeof i))
+  return (1);
+return (0);
+    ]])],
+    [ac_cv_tcp_keep_works=yes],
+    [ac_cv_tcp_keep_works=no])
+  ])
+if test "$ac_cv_tcp_keep_works" = yes; then
+   AC_DEFINE([TCP_KEEP_WORKS], [1], [Define if TCP_KEEP* works])
+fi
+LIBS="${save_LIBS}"
+
 # Run-time directory
 VARNISH_STATE_DIR='${localstatedir}/varnish'
 AC_SUBST(VARNISH_STATE_DIR)
diff --git a/doc/sphinx/installation/platformnotes.rst b/doc/sphinx/installation/platformnotes.rst
index 3ad486c..e1720b6 100644
--- a/doc/sphinx/installation/platformnotes.rst
+++ b/doc/sphinx/installation/platformnotes.rst
@@ -35,3 +35,18 @@ Reduce the maximum stack size by running::
 
 in the Varnish startup script.
 
+TCP keep-alive configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On platforms except Linux, Varnish is not able to set the TCP
+keep-alive values per socket, and therefor the tcp_keepalive_* Varnish
+runtime parameters are not available. On these platforms it can be
+benefitial to tune the system wide values for these in order to more
+reliably detect remote close for sessions spending long time on
+waitinglists. This will help free up resources faster.
+
+On Linux the defaults are set to:
+
+	tcp_keepalive_time = 600 seconds
+	tcp_keepalive_probes = 5
+	tcp_keepalive_intvl = 5 seconds
-- 
1.7.10.4




More information about the varnish-dev mailing list