[master] 195fa50 Split the expire lock into expire and per-lru list locks.
Poul-Henning Kamp
phk at varnish-cache.org
Wed Feb 9 13:06:52 CET 2011
commit 195fa50ceba17392dd708f92c6203ea12ddcb72e
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date: Wed Feb 9 12:04:25 2011 +0000
Split the expire lock into expire and per-lru list locks.
The lock order is lru->exp lock, so that the timer_index
can be safely examined while holding only the lru lock,
eliminating the need for the separate OC_F_ONLRU flag.
The critical trick is that in EXP_Touch() the lru_lock
is sufficient: We just move the object around on the
lru list, we don't add or delete it.
This hopefully reduces lock contention on these locks.
diff --git a/bin/varnishd/cache.h b/bin/varnishd/cache.h
index 08d10ec..be209a7 100644
--- a/bin/varnishd/cache.h
+++ b/bin/varnishd/cache.h
@@ -100,8 +100,6 @@ struct vsc_lck;
struct waitinglist;
struct vef_priv;
-struct lock { void *priv; }; // Opaque
-
#define DIGEST_LEN 32
/* Name of transient storage */
@@ -375,7 +373,6 @@ struct objcore {
struct objhead *objhead;
double timer_when;
unsigned flags;
-#define OC_F_ONLRU (1<<0)
#define OC_F_BUSY (1<<1)
#define OC_F_PASS (1<<2)
#define OC_F_LRUDONTMOVE (1<<4)
@@ -391,6 +388,9 @@ static inline struct object *
oc_getobj(struct worker *wrk, struct objcore *oc)
{
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AN(oc->methods);
+ AN(oc->methods->getobj);
return (oc->methods->getobj(wrk, oc));
}
@@ -398,6 +398,8 @@ static inline void
oc_updatemeta(struct objcore *oc)
{
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AN(oc->methods);
if (oc->methods->updatemeta != NULL)
oc->methods->updatemeta(oc);
}
@@ -406,6 +408,9 @@ static inline void
oc_freeobj(struct objcore *oc)
{
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AN(oc->methods);
+ AN(oc->methods->freeobj);
oc->methods->freeobj(oc);
}
@@ -413,6 +418,9 @@ static inline struct lru *
oc_getlru(const struct objcore *oc)
{
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AN(oc->methods);
+ AN(oc->methods->getlru);
return (oc->methods->getlru(oc));
}
diff --git a/bin/varnishd/cache_expire.c b/bin/varnishd/cache_expire.c
index 0d3b1a2..e7eca76 100644
--- a/bin/varnishd/cache_expire.c
+++ b/bin/varnishd/cache_expire.c
@@ -62,7 +62,7 @@ static struct lock exp_mtx;
* so that other users of the object will not stumble trying to change the
* ttl or lru position.
*/
-#define BINHEAP_NOIDX 0
+#define BINHEAP_NOIDX 0 /* XXX: should be in binary_heap.h */
/*--------------------------------------------------------------------
* When & why does the timer fire for this object ?
@@ -99,7 +99,6 @@ exp_insert(struct objcore *oc, struct lru *lru)
binheap_insert(exp_heap, oc);
assert(oc->timer_idx != BINHEAP_NOIDX);
VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list);
- oc->flags |= OC_F_ONLRU;
}
/*--------------------------------------------------------------------
@@ -115,10 +114,12 @@ EXP_Inject(struct objcore *oc, struct lru *lru, double when)
CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ Lck_Lock(&lru->mtx);
Lck_Lock(&exp_mtx);
oc->timer_when = when;
exp_insert(oc, lru);
Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
}
/*--------------------------------------------------------------------
@@ -145,10 +146,12 @@ EXP_Insert(struct object *o)
lru = oc_getlru(oc);
CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ Lck_Lock(&lru->mtx);
Lck_Lock(&exp_mtx);
(void)update_object_when(o);
exp_insert(oc, lru);
Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
oc_updatemeta(oc);
}
@@ -183,17 +186,23 @@ EXP_Touch(struct object *o, double tnow)
lru = oc_getlru(oc);
CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
- if (Lck_Trylock(&exp_mtx))
+ /*
+ * We only need the LRU lock here. The locking order is LRU->EXP
+ * so we can trust the content of the oc->timer_idx without the
+ * EXP lock. Since each lru list has its own lock, this should
+ * reduce contention a fair bit
+ */
+ if (Lck_Trylock(&lru->mtx))
return;
- if (oc->flags & OC_F_ONLRU) { /* XXX ?? */
+ if (oc->timer_idx != BINHEAP_NOIDX) {
VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list);
VSC_main->n_lru_moved++;
o->last_lru = tnow;
}
- Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
}
/*--------------------------------------------------------------------
@@ -209,12 +218,15 @@ void
EXP_Rearm(const struct object *o)
{
struct objcore *oc;
+ struct lru *lru;
CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
oc = o->objcore;
if (oc == NULL)
return;
CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ lru = oc_getlru(oc);
+ Lck_Lock(&lru->mtx);
Lck_Lock(&exp_mtx);
/*
* The hang-man might have this object of the binheap while
@@ -226,10 +238,10 @@ EXP_Rearm(const struct object *o)
assert(oc->timer_idx != BINHEAP_NOIDX);
}
Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
oc_updatemeta(oc);
}
-
/*--------------------------------------------------------------------
* This thread monitors the root of the binary heap and whenever an
* object expires, accounting also for graceability, it is killed.
@@ -244,27 +256,50 @@ exp_timer(struct sess *sp, void *priv)
(void)priv;
t = TIM_real();
+ oc = NULL;
while (1) {
+ if (oc == NULL) {
+ WSL_Flush(sp->wrk, 0);
+ WRK_SumStat(sp->wrk);
+ TIM_sleep(params->expiry_sleep);
+ t = TIM_real();
+ }
+
Lck_Lock(&exp_mtx);
oc = binheap_root(exp_heap);
- CHECK_OBJ_ORNULL(oc, OBJCORE_MAGIC);
+ if (oc == NULL) {
+ Lck_Unlock(&exp_mtx);
+ continue;
+ }
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+
/*
* We may have expired so many objects that our timestamp
* got out of date, refresh it and check again.
*/
- if (oc != NULL && oc->timer_when > t)
+ if (oc->timer_when > t)
t = TIM_real();
- if (oc == NULL || oc->timer_when > t) { /* XXX: > or >= ? */
+ if (oc->timer_when > t) {
Lck_Unlock(&exp_mtx);
- WSL_Flush(sp->wrk, 0);
- WRK_SumStat(sp->wrk);
- TIM_sleep(params->expiry_sleep);
- t = TIM_real();
+ oc = NULL;
continue;
}
- /* It's time... */
- CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
+ /*
+ * It's time...
+ * Technically we should drop the exp_mtx, get the lru->mtx
+ * get the exp_mtx again and then check that the oc is still
+ * on the binheap. We take the shorter route and try to
+ * get the lru->mtx and punt if we fail.
+ */
+
+ lru = oc_getlru(oc);
+ CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ if (Lck_Trylock(&lru->mtx)) {
+ Lck_Unlock(&exp_mtx);
+ oc = NULL;
+ continue;
+ }
/* Remove from binheap */
assert(oc->timer_idx != BINHEAP_NOIDX);
@@ -272,13 +307,11 @@ exp_timer(struct sess *sp, void *priv)
assert(oc->timer_idx == BINHEAP_NOIDX);
/* And from LRU */
- if (oc->flags & OC_F_ONLRU) {
- lru = oc_getlru(oc);
- VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
- oc->flags &= ~OC_F_ONLRU;
- }
+ lru = oc_getlru(oc);
+ VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
VSC_main->n_expired++;
@@ -301,26 +334,32 @@ EXP_NukeOne(const struct sess *sp, struct lru *lru)
struct object *o;
/* Find the first currently unused object on the LRU. */
+ Lck_Lock(&lru->mtx);
Lck_Lock(&exp_mtx);
VTAILQ_FOREACH(oc, &lru->lru_head, lru_list) {
CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
- if (oc->timer_idx == BINHEAP_NOIDX) /* exp_timer has it */
- continue;
+ assert (oc->timer_idx != BINHEAP_NOIDX);
+ /*
+ * It wont release any space if we cannot release the last
+ * reference, besides, if somebody else has a reference,
+ * it's a bad idea to nuke this object anyway.
+ */
if (oc->refcnt == 1)
break;
}
if (oc != NULL) {
VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
- oc->flags &= ~OC_F_ONLRU;
binheap_delete(exp_heap, oc->timer_idx);
assert(oc->timer_idx == BINHEAP_NOIDX);
VSC_main->n_lru_nuked++;
}
Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
if (oc == NULL)
return (-1);
+ /* XXX: bad idea for -spersistent */
o = oc_getobj(sp->wrk, oc);
WSL(sp->wrk, SLT_ExpKill, 0, "%u LRU", o->xid);
(void)HSH_Deref(sp->wrk, NULL, &o);
diff --git a/bin/varnishd/cache_lck.c b/bin/varnishd/cache_lck.c
index db0ee2b..831ef7b 100644
--- a/bin/varnishd/cache_lck.c
+++ b/bin/varnishd/cache_lck.c
@@ -162,6 +162,7 @@ Lck__New(struct lock *lck, struct vsc_lck *st, const char *w)
{
struct ilck *ilck;
+ AN(st);
AZ(lck->priv);
ALLOC_OBJ(ilck, ILCK_MAGIC);
AN(ilck);
diff --git a/bin/varnishd/common.h b/bin/varnishd/common.h
index 1c29696..6cbbfd7 100644
--- a/bin/varnishd/common.h
+++ b/bin/varnishd/common.h
@@ -90,3 +90,6 @@ void vsm_iter_n(struct vsm_chunk **pp);
#define VSM_CLASS_PARAM "Params"
#define VSM_CLASS_MARK "MgrCld"
#define VSM_COOL_TIME 5
+
+/* cache_lck.c */
+struct lock { void *priv; }; // Opaque
diff --git a/bin/varnishd/locks.h b/bin/varnishd/locks.h
index 5f8ff63..ec56832 100644
--- a/bin/varnishd/locks.h
+++ b/bin/varnishd/locks.h
@@ -44,6 +44,7 @@ LOCK(herder)
LOCK(wq)
LOCK(objhdr)
LOCK(exp)
+LOCK(lru)
LOCK(cli)
LOCK(ban)
LOCK(vbp)
diff --git a/bin/varnishd/stevedore.c b/bin/varnishd/stevedore.c
index 5b165e1..5dd9982 100644
--- a/bin/varnishd/stevedore.c
+++ b/bin/varnishd/stevedore.c
@@ -71,6 +71,7 @@ LRU_Alloc(void)
ALLOC_OBJ(l, LRU_MAGIC);
AN(l);
VTAILQ_INIT(&l->lru_head);
+ Lck_New(&l->mtx, lck_lru);
return (l);
}
diff --git a/bin/varnishd/stevedore.h b/bin/varnishd/stevedore.h
index 94a0e06..c0c3c31 100644
--- a/bin/varnishd/stevedore.h
+++ b/bin/varnishd/stevedore.h
@@ -56,6 +56,7 @@ struct lru {
unsigned magic;
#define LRU_MAGIC 0x3fec7bb0
VTAILQ_HEAD(,objcore) lru_head;
+ struct lock mtx;
};
/*--------------------------------------------------------------------*/
More information about the varnish-commit
mailing list