[master] 5c12ec5 Remember to signal the thread to exit when decimating the flock

Martin Blix Grydeland martin at varnish-software.com
Mon May 5 10:42:25 CEST 2014


commit 5c12ec5f2568b43fc1f4cc68d76e9c9889462595
Author: Martin Blix Grydeland <martin at varnish-software.com>
Date:   Wed Apr 30 13:26:11 2014 +0200

    Remember to signal the thread to exit when decimating the flock
    
    The pool herder didn't signal the thread when decimating the flock,
    causing the thread to be leaked.
    
    Spotted by: xcir
    Fixes: #1490
    
    Also close a couple of other races:
    
    When pthread_cond_timedwait returns ETIMEDOUT, the predicate
    (wrk->task.func == NULL) could still have changed while reacquiring
    the mutex. If so, the signal would've been lost and the thread
    leaked. Changed the idle wait loop in Pool_Work_Thread() to always
    check the predicate before resuming the cond_wait.
    
    The update of the predicate (wrk->task.func) from e.g. Pool_Task() is
    done without holding the mutex. This opens a race on the predicate,
    and the possibility of the worker going back on cond_wait loosing the
    signal. Changed to update the predicate while holding the mutex.

diff --git a/bin/varnishd/cache/cache_pool.c b/bin/varnishd/cache/cache_pool.c
index 75fac47..ce8d526 100644
--- a/bin/varnishd/cache/cache_pool.c
+++ b/bin/varnishd/cache/cache_pool.c
@@ -163,12 +163,12 @@ pool_accept(struct worker *wrk, void *arg)
 		}
 		VTAILQ_REMOVE(&pp->idle_queue, &wrk2->task, list);
 		AZ(wrk2->task.func);
-		Lck_Unlock(&pp->mtx);
 		assert(sizeof *wa2 == WS_Reserve(wrk2->aws, sizeof *wa2));
 		wa2 = (void*)wrk2->aws->f;
 		memcpy(wa2, wa, sizeof *wa);
 		wrk2->task.func = SES_pool_accept_task;
 		wrk2->task.priv = pp->sesspool;
+		Lck_Unlock(&pp->mtx);
 		AZ(pthread_cond_signal(&wrk2->cond));
 
 		/*
@@ -204,9 +204,9 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum pool_how how)
 	if (wrk != NULL) {
 		VTAILQ_REMOVE(&pp->idle_queue, &wrk->task, list);
 		AZ(wrk->task.func);
-		Lck_Unlock(&pp->mtx);
 		wrk->task.func = task->func;
 		wrk->task.priv = task->priv;
+		Lck_Unlock(&pp->mtx);
 		AZ(pthread_cond_signal(&wrk->cond));
 		return (0);
 	}
@@ -237,6 +237,17 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum pool_how how)
 }
 
 /*--------------------------------------------------------------------
+ * Empty function used as a pointer value for the thread exit condition.
+ */
+
+static void
+pool_kiss_of_death(struct worker *wrk, void *priv)
+{
+	(void)wrk;
+	(void)priv;
+}
+
+/*--------------------------------------------------------------------
  * This is the work function for worker threads in the pool.
  */
 
@@ -274,7 +285,6 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
 				wrk->lastused = VTIM_real();
 			wrk->task.func = NULL;
 			wrk->task.priv = wrk;
-			AZ(wrk->task.func);
 			VTAILQ_INSERT_HEAD(&pp->idle_queue, &wrk->task, list);
 			if (!stats_clean)
 				WRK_SumStat(wrk);
@@ -283,12 +293,12 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
 				    wrk->vcl == NULL ?  0 : wrk->lastused+60.);
 				if (i == ETIMEDOUT)
 					VCL_Rel(&wrk->vcl);
-			} while (i);
+			} while (wrk->task.func == NULL);
 			tp = &wrk->task;
 		}
 		Lck_Unlock(&pp->mtx);
 
-		if (tp->func == NULL)
+		if (tp->func == pool_kiss_of_death)
 			break;
 
 		assert(wrk->pool == pp);
@@ -387,23 +397,23 @@ pool_herder(void *priv)
 				CAST_OBJ_NOTNULL(wrk, pt->priv, WORKER_MAGIC);
 
 				if (wrk->lastused < t_idle ||
-				    pp->nthr > cache_param->wthread_max)
+				    pp->nthr > cache_param->wthread_max) {
+					/* Give it a kiss on the cheek... */
 					VTAILQ_REMOVE(&pp->idle_queue,
 					    &wrk->task, list);
-				else
+					wrk->task.func = pool_kiss_of_death;
+					AZ(pthread_cond_signal(&wrk->cond));
+				} else
 					wrk = NULL;
 			}
 			Lck_Unlock(&pp->mtx);
 
-			/* And give it a kiss on the cheek... */
 			if (wrk != NULL) {
 				pp->nthr--;
 				Lck_Lock(&pool_mtx);
 				VSC_C_main->threads--;
 				VSC_C_main->threads_destroyed++;
 				Lck_Unlock(&pool_mtx);
-				wrk->task.func = NULL;
-				wrk->task.priv = NULL;
 				VTIM_sleep(cache_param->wthread_destroy_delay);
 				continue;
 			}
diff --git a/bin/varnishtest/tests/r01490.vtc b/bin/varnishtest/tests/r01490.vtc
new file mode 100644
index 0000000..82ffdb4
--- /dev/null
+++ b/bin/varnishtest/tests/r01490.vtc
@@ -0,0 +1,38 @@
+varnishtest "#1490 - thread destruction"
+
+server s1 {
+} -start
+
+varnish v1 \
+	-arg "-p debug=+syncvsl" \
+	-arg "-p vsl_mask=+WorkThread" \
+	-arg "-p thread_pool_min=2" \
+	-arg "-p thread_pool_max=3" \
+	-arg "-p thread_pools=1" \
+	-vcl+backend {}
+varnish v1 -start
+
+varnish v1 -expect threads == 2
+
+logexpect l1 -v v1 -g raw {
+	expect * 0 WorkThread {^\S+ start$}
+	expect * 0 WorkThread {^\S+ end$}
+} -start
+
+varnish v1 -cliok "param.set thread_pool_min 3"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 3
+
+varnish v1 -cliok "param.set thread_pool_min 2"
+varnish v1 -cliok "param.set thread_pool_max 2"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 2
+
+# Use logexpect to see that the thread actually exited
+logexpect l1 -wait



More information about the varnish-commit mailing list