[experimental-ims] ace762d Merge with trunk as of 2011-11-22

Geoff Simmons geoff at varnish-cache.org
Mon Jan 9 21:52:42 CET 2012


commit ace762d59b261f22d14849677e297e70e4f6b1d0
Merge: 71ee192 aed74d6
Author: Geoff Simmons <geoff at uplex.de>
Date:   Fri Nov 25 11:17:52 2011 +0100

    Merge with trunk as of 2011-11-22

diff --cc bin/varnishd/cache/cache.h
index 0000000,4b66309..4df2e3d
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@@ -1,0 -1,1032 +1,1040 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  */
+ 
+ /*
+  * This macro can be used in .h files to isolate bits that the manager
+  * should not (need to) see, such as pthread mutexes etc.
+  */
+ #define VARNISH_CACHE_CHILD	1
+ 
+ #include "common/common.h"
+ 
+ #include "vapi/vsc_int.h"
+ #include "vapi/vsl_int.h"
+ 
+ #include <sys/socket.h>
+ 
+ #include <pthread.h>
+ #ifdef HAVE_PTHREAD_NP_H
+ #include <pthread_np.h>
+ #endif
+ #include <stdarg.h>
+ #include <string.h>
+ #include <limits.h>
+ #include <unistd.h>
+ 
+ #if defined(HAVE_EPOLL_CTL)
+ #include <sys/epoll.h>
+ #endif
+ 
+ 
+ #include "common/params.h"
+ 
+ enum body_status {
+ #define BODYSTATUS(U,l)	BS_##U,
+ #include "tbl/body_status.h"
+ #undef BODYSTATUS
+ };
+ 
+ static inline const char *
+ body_status(enum body_status e)
+ {
+ 	switch(e) {
+ #define BODYSTATUS(U,l)	case BS_##U: return (#l);
+ #include "tbl/body_status.h"
+ #undef BODYSTATUS
+ 	default:
+ 		return ("?");
+ 	}
+ }
+ 
+ /*
+  * NB: HDR_STATUS is only used in cache_http.c, everybody else uses the
+  * http->status integer field.
+  */
+ 
+ enum {
+ 	/* Fields from the first line of HTTP proto */
+ 	HTTP_HDR_REQ,
+ 	HTTP_HDR_URL,
+ 	HTTP_HDR_PROTO,
+ 	HTTP_HDR_STATUS,
+ 	HTTP_HDR_RESPONSE,
+ 	/* HTTP header lines */
+ 	HTTP_HDR_FIRST,
+ };
+ 
+ struct SHA256Context;
+ struct VSC_C_lck;
+ struct ban;
+ struct busyobj;
+ struct cli;
+ struct cli_proto;
+ struct director;
+ struct iovec;
+ struct objcore;
+ struct object;
+ struct objhead;
+ struct pool;
+ struct sess;
+ struct sesspool;
+ struct vbc;
+ struct vef_priv;
+ struct vrt_backend;
+ struct vsb;
+ struct waitinglist;
+ struct worker;
+ 
+ #define DIGEST_LEN		32
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ typedef struct {
+ 	char			*b;
+ 	char			*e;
+ } txt;
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ enum step {
+ #define STEP(l, u)	STP_##u,
+ #include "tbl/steps.h"
+ #undef STEP
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ struct lock { void *priv; };	// Opaque
+ 
+ /*--------------------------------------------------------------------
+  * Workspace structure for quick memory allocation.
+  */
+ 
+ struct ws {
+ 	unsigned		magic;
+ #define WS_MAGIC		0x35fac554
+ 	unsigned		overflow;	/* workspace overflowed */
+ 	const char		*id;		/* identity */
+ 	char			*s;		/* (S)tart of buffer */
+ 	char			*f;		/* (F)ree pointer */
+ 	char			*r;		/* (R)eserved length */
+ 	char			*e;		/* (E)nd of buffer */
+ };
+ 
+ /*--------------------------------------------------------------------
+  * HTTP Request/Response/Header handling structure.
+  */
+ 
+ enum httpwhence {
+ 	HTTP_Rx	 = 1,
+ 	HTTP_Tx  = 2,
+ 	HTTP_Obj = 3
+ };
+ 
+ /* NB: remember to update http_Copy() if you add fields */
+ struct http {
+ 	unsigned		magic;
+ #define HTTP_MAGIC		0x6428b5c9
+ 
+ 	enum httpwhence		logtag;
+ 
+ 	struct ws		*ws;
+ 	txt			*hd;
+ 	unsigned char		*hdf;
+ #define HDF_FILTER		(1 << 0)	/* Filtered by Connection */
+ 	uint16_t		shd;		/* Size of hd space */
+ 	uint16_t		nhd;		/* Next free hd */
+ 	uint16_t		status;
+ 	uint8_t			protover;
+ 	uint8_t			conds;		/* If-* headers present */
+ };
+ 
+ /*--------------------------------------------------------------------
+  * HTTP Protocol connection structure
+  */
+ 
+ struct http_conn {
+ 	unsigned		magic;
+ #define HTTP_CONN_MAGIC		0x3e19edd1
+ 
+ 	int			fd;
+ 	unsigned		vsl_id;
+ 	unsigned		maxbytes;
+ 	unsigned		maxhdr;
+ 	struct ws		*ws;
+ 	txt			rxbuf;
+ 	txt			pipeline;
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct acct {
+ 	double			first;
+ #define ACCT(foo)	uint64_t	foo;
+ #include "tbl/acct_fields.h"
+ #undef ACCT
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ #define L0(t, n)
+ #define L1(t, n)		t n;
+ #define VSC_F(n, t, l, f, e,d)	L##l(t, n)
+ #define VSC_DO_MAIN
+ struct dstat {
+ #include "tbl/vsc_fields.h"
+ };
+ #undef VSC_F
+ #undef VSC_DO_MAIN
+ #undef L0
+ #undef L1
+ 
+ /* Fetch processors --------------------------------------------------*/
+ 
+ typedef void vfp_begin_f(struct worker *, size_t );
+ typedef int vfp_bytes_f(struct worker *, struct http_conn *, ssize_t);
+ typedef int vfp_end_f(struct worker *);
+ 
+ struct vfp {
+ 	vfp_begin_f	*begin;
+ 	vfp_bytes_f	*bytes;
+ 	vfp_end_f	*end;
+ };
+ 
+ extern struct vfp vfp_gunzip;
+ extern struct vfp vfp_gzip;
+ extern struct vfp vfp_testgzip;
+ extern struct vfp vfp_esi;
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct exp {
+ 	double			ttl;
+ 	double			grace;
+ 	double			keep;
+ 	double			age;
+ 	double			entered;
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct wrw {
+ 	int			*wfd;
+ 	unsigned		werr;	/* valid after WRW_Flush() */
+ 	struct iovec		*iov;
+ 	unsigned		siov;
+ 	unsigned		niov;
+ 	ssize_t			liov;
+ 	ssize_t			cliov;
+ 	unsigned		ciov;	/* Chunked header marker */
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct stream_ctx {
+ 	unsigned		magic;
+ #define STREAM_CTX_MAGIC	0x8213728b
+ 
+ 	struct vgz		*vgz;
+ 	void			*obuf;
+ 	ssize_t			obuf_len;
+ 	ssize_t			obuf_ptr;
+ 
+ 	/* Next byte we will take from storage */
+ 	ssize_t			stream_next;
+ 
+ 	/* First byte of storage if we free it as we go (pass) */
+ 	ssize_t			stream_front;
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct wrk_accept {
+ 	unsigned		magic;
+ #define WRK_ACCEPT_MAGIC	0x8c4b4d59
+ 
+ 	/* Accept stuff */
+ 	struct sockaddr_storage	acceptaddr;
+ 	socklen_t		acceptaddrlen;
+ 	int			acceptsock;
+ 	struct listen_sock	*acceptlsock;
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct worker {
+ 	unsigned		magic;
+ #define WORKER_MAGIC		0x6391adcf
+ 	struct pool		*pool;
+ 	struct objhead		*nobjhead;
+ 	struct objcore		*nobjcore;
+ 	struct waitinglist	*nwaitinglist;
+ 	struct busyobj		*nbusyobj;
+ 	void			*nhashpriv;
+ 	struct dstat		stats;
+ 
+ 	/* Pool stuff */
+ 	double			lastused;
+ 
+ 	struct wrw		wrw;
+ 
+ 	pthread_cond_t		cond;
+ 
+ 	VTAILQ_ENTRY(worker)	list;
+ 	struct sess		*sp;
+ 
+ 	struct VCL_conf		*vcl;
+ 
+ 	uint32_t		*wlb, *wlp, *wle;
+ 	unsigned		wlr;
+ 
+ 	/* Lookup stuff */
+ 	struct SHA256Context	*sha256ctx;
+ 
+ 	struct http_conn	htc[1];
+ 	struct ws		ws[1];
+ 	struct http		*bereq;
+ 	struct http		*beresp;
+ 	struct http		*resp;
+ 
+ 	struct exp		exp;
+ 
+ 	/* This is only here so VRT can find it */
+ 	const char		*storage_hint;
+ 
+ 	/* Fetch stuff */
+ 	struct vbc		*vbc;
+ 	struct object		*fetch_obj;
+ 	enum body_status	body_status;
+ 	struct vfp		*vfp;
+ 	struct vgz		*vgz_rx;
+ 	struct vef_priv		*vef_priv;
+ 	unsigned		fetch_failed;
+ 	unsigned		do_stream;
+ 	unsigned		do_esi;
+ 	unsigned		do_gzip;
+ 	unsigned		is_gzip;
+ 	unsigned		do_gunzip;
+ 	unsigned		is_gunzip;
+ 	unsigned		do_close;
+ 	char			*h_content_length;
+ 
+ 	/* Stream state */
+ 	struct stream_ctx	*sctx;
+ 
+ 	/* ESI stuff */
+ 	struct vep_state	*vep;
+ 	int			gzip_resp;
+ 	ssize_t			l_crc;
+ 	uint32_t		crc;
+ 
+ 	/* Timeouts */
+ 	double			connect_timeout;
+ 	double			first_byte_timeout;
+ 	double			between_bytes_timeout;
+ 
+ 	/* Delivery mode */
+ 	unsigned		res_mode;
+ #define RES_LEN			(1<<1)
+ #define RES_EOF			(1<<2)
+ #define RES_CHUNKED		(1<<3)
+ #define RES_ESI			(1<<4)
+ #define RES_ESI_CHILD		(1<<5)
+ #define RES_GUNZIP		(1<<6)
+ 
+ 	/* Temporary accounting */
+ 	struct acct		acct_tmp;
+ };
+ 
+ /* LRU ---------------------------------------------------------------*/
+ 
+ struct lru {
+ 	unsigned		magic;
+ #define LRU_MAGIC		0x3fec7bb0
+ 	VTAILQ_HEAD(,objcore)	lru_head;
+ 	struct lock		mtx;
+ };
+ 
+ /* Storage -----------------------------------------------------------*/
+ 
+ struct storage {
+ 	unsigned		magic;
+ #define STORAGE_MAGIC		0x1a4e51c0
+ 
+ #ifdef SENDFILE_WORKS
+ 	int			fd;
+ 	off_t			where;
+ #endif
+ 
+ 	VTAILQ_ENTRY(storage)	list;
+ 	struct stevedore	*stevedore;
+ 	void			*priv;
+ 
+ 	unsigned char		*ptr;
+ 	unsigned		len;
+ 	unsigned		space;
+ };
+ 
+ /* Object core structure ---------------------------------------------
+  * Objects have sideways references in the binary heap and the LRU list
+  * and we want to avoid paging in a lot of objects just to move them up
+  * or down the binheap or to move a unrelated object on the LRU list.
+  * To avoid this we use a proxy object, objcore, to hold the relevant
+  * housekeeping fields parts of an object.
+  */
+ 
+ typedef struct object *getobj_f(struct worker *wrk, struct objcore *oc);
+ typedef void updatemeta_f(struct objcore *oc);
+ typedef void freeobj_f(struct objcore *oc);
+ typedef struct lru *getlru_f(const struct objcore *oc);
+ 
+ struct objcore_methods {
+ 	getobj_f	*getobj;
+ 	updatemeta_f	*updatemeta;
+ 	freeobj_f	*freeobj;
+ 	getlru_f	*getlru;
+ };
+ 
+ struct objcore {
+ 	unsigned		magic;
+ #define OBJCORE_MAGIC		0x4d301302
+ 	unsigned		refcnt;
+ 	struct objcore_methods	*methods;
+ 	void			*priv;
+ 	unsigned		priv2;
+ 	struct objhead		*objhead;
+ 	struct busyobj		*busyobj;
+ 	double			timer_when;
+ 	unsigned		flags;
+ #define OC_F_BUSY		(1<<1)
+ #define OC_F_PASS		(1<<2)
+ #define OC_F_LRUDONTMOVE	(1<<4)
+ #define OC_F_PRIV		(1<<5)		/* Stevedore private flag */
+ #define OC_F_LURK		(3<<6)		/* Ban-lurker-color */
+ 	unsigned		timer_idx;
+ 	VTAILQ_ENTRY(objcore)	list;
+ 	VTAILQ_ENTRY(objcore)	lru_list;
+ 	VTAILQ_ENTRY(objcore)	ban_list;
+ 	struct ban		*ban;
+ };
+ 
+ static inline struct object *
+ oc_getobj(struct worker *wrk, struct objcore *oc)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	AZ(oc->flags & OC_F_BUSY);
+ 	AN(oc->methods);
+ 	AN(oc->methods->getobj);
+ 	return (oc->methods->getobj(wrk, oc));
+ }
+ 
+ static inline void
+ oc_updatemeta(struct objcore *oc)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	AN(oc->methods);
+ 	if (oc->methods->updatemeta != NULL)
+ 		oc->methods->updatemeta(oc);
+ }
+ 
+ static inline void
+ oc_freeobj(struct objcore *oc)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	AN(oc->methods);
+ 	AN(oc->methods->freeobj);
+ 	oc->methods->freeobj(oc);
+ }
+ 
+ static inline struct lru *
+ oc_getlru(const struct objcore *oc)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	AN(oc->methods);
+ 	AN(oc->methods->getlru);
+ 	return (oc->methods->getlru(oc));
+ }
+ 
+ /* Busy Object structure ---------------------------------------------*/
+ 
+ struct busyobj {
+ 	unsigned		magic;
+ #define BUSYOBJ_MAGIC		0x23b95567
+ 	uint8_t			*vary;
+ };
+ 
+ /* Object structure --------------------------------------------------*/
+ 
+ VTAILQ_HEAD(storagehead, storage);
+ 
+ struct object {
+ 	unsigned		magic;
+ #define OBJECT_MAGIC		0x32851d42
+ 	unsigned		xid;
+ 	struct storage		*objstore;
+ 	struct objcore		*objcore;
+ 
+ 	struct ws		ws_o[1];
+ 
+ 	uint8_t			*vary;
+ 	unsigned		hits;
+ 	uint16_t		response;
+ 
+ 	/* XXX: make bitmap */
+ 	uint8_t			gziped;
+ 	/* Bit positions in the gzip stream */
+ 	ssize_t			gzip_start;
+ 	ssize_t			gzip_last;
+ 	ssize_t			gzip_stop;
+ 
+ 	ssize_t			len;
+ 
+ 	struct exp		exp;
+ 
+ 	double			last_modified;
+ 	double			last_lru;
+ 
+ 	struct http		*http;
+ 
+ 	struct storagehead	store;
+ 
+ 	struct storage		*esidata;
+ 
+ 	double			last_use;
+ 
+ };
+ 
+ /* -------------------------------------------------------------------*/
+ 
+ struct sess {
+ 	unsigned		magic;
+ #define SESS_MAGIC		0x2c2f9c5a
+ 	int			fd;
+ 	unsigned		vsl_id;
+ 	unsigned		xid;
+ 
+ 	int			restarts;
+ 	int			esi_level;
+ 	int			disable_esi;
+ 
+ 	uint8_t			hash_ignore_busy;
+ 	uint8_t			hash_always_miss;
+ 
+ 	struct worker		*wrk;
+ 
+ 	socklen_t		sockaddrlen;
+ 	socklen_t		mysockaddrlen;
+ 	struct sockaddr_storage	sockaddr;
+ 	struct sockaddr_storage	mysockaddr;
+ 	struct listen_sock	*mylsock;
+ 
+ 	/* formatted ascii client address */
+ 	char			*addr;
+ 	char			*port;
+ 	char			*client_identity;
+ 
+ 	/* HTTP request */
+ 	const char		*doclose;
+ 	struct http		*http;
+ 	struct http		*http0;
+ 
+ 	struct ws		ws[1];
+ 	char			*ws_ses;	/* WS above session data */
+ 	char			*ws_req;	/* WS above request data */
+ 
+ 	unsigned char		digest[DIGEST_LEN];
+ 
+ 	/* Built Vary string */
+ 	uint8_t			*vary_b;
+ 	uint8_t			*vary_l;
+ 	uint8_t			*vary_e;
+ 
+ 	struct http_conn	htc[1];
+ 
+ 	/* Timestamps, all on TIM_real() timescale */
+ 	double			t_open;
+ 	double			t_req;
+ 	double			t_resp;
+ 	double			t_end;
+ 
+ 	/* Acceptable grace period */
+ 	struct exp		exp;
+ 
+ 	enum step		step;
+ 	unsigned		cur_method;
+ 	unsigned		handling;
+ 	unsigned char		sendbody;
+ 	unsigned char		wantbody;
+ 	uint16_t		err_code;
+ 	const char		*err_reason;
+ 
+ 	VTAILQ_ENTRY(sess)	list;
+ 
+ 	struct director		*director;
+ 	struct object		*obj;
+ 	struct objcore		*objcore;
+ 	struct VCL_conf		*vcl;
+ 
++	struct object		*stale_obj;
+ 	/* The busy objhead we sleep on */
+ 	struct objhead		*hash_objhead;
+ 
+ 	/* Various internal stuff */
+ 	struct sessmem		*mem;
+ 
+ 	VTAILQ_ENTRY(sess)	poollist;
+ 	uint64_t		req_bodybytes;
+ 	struct acct		acct_ses;
+ 
+ #if defined(HAVE_EPOLL_CTL)
+ 	struct epoll_event ev;
+ #endif
+ };
+ 
+ /* Prototypes etc ----------------------------------------------------*/
+ 
+ /* cache_acceptor.c */
+ void VCA_Prep(struct sess *sp);
+ void VCA_Init(void);
+ void VCA_Shutdown(void);
+ int VCA_Accept(struct listen_sock *ls, struct wrk_accept *wa);
+ void VCA_SetupSess(struct worker *w);
+ void VCA_FailSess(struct worker *w);
+ 
+ /* cache_backend.c */
+ void VBE_UseHealth(const struct director *vdi);
+ 
+ struct vbc *VDI_GetFd(const struct director *, struct sess *sp);
+ int VDI_Healthy(const struct director *, const struct sess *sp);
+ void VDI_CloseFd(struct worker *wrk);
+ void VDI_RecycleFd(struct worker *wrk);
+ void VDI_AddHostHeader(const struct sess *sp);
+ void VBE_Poll(void);
+ 
+ /* cache_backend_cfg.c */
+ void VBE_Init(void);
+ struct backend *VBE_AddBackend(struct cli *cli, const struct vrt_backend *vb);
+ 
+ /* cache_backend_poll.c */
+ void VBP_Init(void);
+ 
+ /* cache_ban.c */
+ struct ban *BAN_New(void);
+ int BAN_AddTest(struct cli *, struct ban *, const char *, const char *,
+     const char *);
+ void BAN_Free(struct ban *b);
+ void BAN_Insert(struct ban *b);
+ void BAN_Init(void);
+ void BAN_NewObjCore(struct objcore *oc);
+ void BAN_DestroyObj(struct objcore *oc);
+ int BAN_CheckObject(struct object *o, const struct sess *sp);
+ void BAN_Reload(const uint8_t *ban, unsigned len);
+ struct ban *BAN_TailRef(void);
+ void BAN_Compile(void);
+ struct ban *BAN_RefBan(struct objcore *oc, double t0, const struct ban *tail);
+ void BAN_TailDeref(struct ban **ban);
+ double BAN_Time(const struct ban *ban);
+ 
+ /* cache_center.c [CNT] */
+ void CNT_Session(struct sess *sp);
+ void CNT_Init(void);
+ 
+ /* cache_cli.c [CLI] */
+ void CLI_Init(void);
+ void CLI_Run(void);
+ void CLI_AddFuncs(struct cli_proto *p);
+ extern pthread_t cli_thread;
+ #define ASSERT_CLI() do {assert(pthread_self() == cli_thread);} while (0)
+ 
+ /* cache_expiry.c */
+ void EXP_Clr(struct exp *e);
+ double EXP_Get_ttl(const struct exp *e);
+ double EXP_Get_grace(const struct exp *e);
+ double EXP_Get_keep(const struct exp *e);
+ void EXP_Set_ttl(struct exp *e, double v);
+ void EXP_Set_grace(struct exp *e, double v);
+ void EXP_Set_keep(struct exp *e, double v);
+ 
+ double EXP_Ttl(const struct sess *, const struct object*);
+ double EXP_Grace(const struct sess *, const struct object*);
++double EXP_Keep(const struct sess *, const struct object*);
+ void EXP_Insert(struct object *o);
+ void EXP_Inject(struct objcore *oc, struct lru *lru, double when);
+ void EXP_Init(void);
+ void EXP_Rearm(const struct object *o);
+ int EXP_Touch(struct objcore *oc);
+ int EXP_NukeOne(struct worker *w, struct lru *lru);
+ 
+ /* cache_fetch.c */
+ struct storage *FetchStorage(struct worker *w, ssize_t sz);
+ int FetchError(struct worker *w, const char *error);
+ int FetchError2(struct worker *w, const char *error, const char *more);
+ int FetchHdr(struct sess *sp);
+ int FetchBody(struct worker *w, struct object *obj);
+ int FetchReqBody(struct sess *sp);
+ void Fetch_Init(void);
+ 
+ /* cache_gzip.c */
+ struct vgz;
+ 
+ enum vgz_flag { VGZ_NORMAL, VGZ_ALIGN, VGZ_RESET, VGZ_FINISH };
+ struct vgz *VGZ_NewUngzip(struct worker *wrk, const char *id);
+ struct vgz *VGZ_NewGzip(struct worker *wrk, const char *id);
+ void VGZ_Ibuf(struct vgz *, const void *, ssize_t len);
+ int VGZ_IbufEmpty(const struct vgz *vg);
+ void VGZ_Obuf(struct vgz *, void *, ssize_t len);
+ int VGZ_ObufFull(const struct vgz *vg);
+ int VGZ_ObufStorage(struct worker *w, struct vgz *vg);
+ int VGZ_Gzip(struct vgz *, const void **, size_t *len, enum vgz_flag);
+ int VGZ_Gunzip(struct vgz *, const void **, size_t *len);
+ int VGZ_Destroy(struct vgz **, int vsl_id);
+ void VGZ_UpdateObj(const struct vgz*, struct object *);
+ int VGZ_WrwGunzip(struct worker *w, struct vgz *, const void *ibuf,
+     ssize_t ibufl, char *obuf, ssize_t obufl, ssize_t *obufp);
+ 
+ /* Return values */
+ #define VGZ_ERROR	-1
+ #define VGZ_OK		0
+ #define VGZ_END		1
+ #define VGZ_STUCK	2
+ 
+ /* cache_http.c */
+ unsigned HTTP_estimate(unsigned nhttp);
+ void HTTP_Copy(struct http *to, const struct http * const fm);
+ struct http *HTTP_create(void *p, uint16_t nhttp);
+ const char *http_StatusMessage(unsigned);
+ unsigned http_EstimateWS(const struct http *fm, unsigned how, uint16_t *nhd);
+ void HTTP_Init(void);
+ void http_ClrHeader(struct http *to);
+ unsigned http_Write(struct worker *w, unsigned vsl_id, const struct http *hp,
+     int resp);
+ void http_CopyResp(struct http *to, const struct http *fm);
+ void http_SetResp(struct http *to, const char *proto, uint16_t status,
+     const char *response);
+ void http_FilterFields(struct worker *w, unsigned vsl_id, struct http *to,
+     const struct http *fm, unsigned how);
+ void http_FilterHeader(const struct sess *sp, unsigned how);
++
++/* Check if a refresh should be done */
++void http_CheckRefresh(struct sess *sp);
++/* Check if we got 304 response */
++void http_Check304(struct sess *sp);
++
+ void http_PutProtocol(struct worker *w, unsigned vsl_id, const struct http *to,
+     const char *protocol);
+ void http_PutStatus(struct http *to, uint16_t status);
+ void http_PutResponse(struct worker *w, unsigned vsl_id, const struct http *to,
+     const char *response);
+ void http_PrintfHeader(struct worker *w, unsigned vsl_id, struct http *to,
+     const char *fmt, ...);
+ void http_SetHeader(struct worker *w, unsigned vsl_id, struct http *to,
+     const char *hdr);
+ void http_SetH(const struct http *to, unsigned n, const char *fm);
+ void http_ForceGet(const struct http *to);
+ void http_Setup(struct http *ht, struct ws *ws);
+ int http_GetHdr(const struct http *hp, const char *hdr, char **ptr);
+ int http_GetHdrData(const struct http *hp, const char *hdr,
+     const char *field, char **ptr);
+ int http_GetHdrField(const struct http *hp, const char *hdr,
+     const char *field, char **ptr);
+ double http_GetHdrQ(const struct http *hp, const char *hdr, const char *field);
+ uint16_t http_GetStatus(const struct http *hp);
+ const char *http_GetReq(const struct http *hp);
+ int http_HdrIs(const struct http *hp, const char *hdr, const char *val);
+ uint16_t http_DissectRequest(struct sess *sp);
+ uint16_t http_DissectResponse(struct worker *w, const struct http_conn *htc,
+     struct http *sp);
+ const char *http_DoConnection(const struct http *hp);
+ void http_CopyHome(struct worker *w, unsigned vsl_id, const struct http *hp);
+ void http_Unset(struct http *hp, const char *hdr);
+ void http_CollectHdr(struct http *hp, const char *hdr);
+ 
+ /* cache_httpconn.c */
+ void HTC_Init(struct http_conn *htc, struct ws *ws, int fd, unsigned vsl_id,
+     unsigned maxbytes, unsigned maxhdr);
+ int HTC_Reinit(struct http_conn *htc);
+ int HTC_Rx(struct http_conn *htc);
+ ssize_t HTC_Read(struct worker *w, struct http_conn *htc, void *d, size_t len);
+ int HTC_Complete(struct http_conn *htc);
+ 
+ #define HTTPH(a, b, c, d, e, f, g) extern char b[];
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ 
+ /* cache_main.c */
+ extern volatile struct params * cache_param;
+ void THR_SetName(const char *name);
+ const char* THR_GetName(void);
+ void THR_SetSession(const struct sess *sp);
+ const struct sess * THR_GetSession(void);
+ 
+ /* cache_lck.c */
+ 
+ /* Internal functions, call only through macros below */
+ void Lck__Lock(struct lock *lck, const char *p, const char *f, int l);
+ void Lck__Unlock(struct lock *lck, const char *p, const char *f, int l);
+ int Lck__Trylock(struct lock *lck, const char *p, const char *f, int l);
+ void Lck__New(struct lock *lck, struct VSC_C_lck *, const char *);
+ void Lck__Assert(const struct lock *lck, int held);
+ 
+ /* public interface: */
+ void LCK_Init(void);
+ void Lck_Delete(struct lock *lck);
+ int Lck_CondWait(pthread_cond_t *cond, struct lock *lck, struct timespec *ts);
+ 
+ #define Lck_New(a, b) Lck__New(a, b, #b)
+ #define Lck_Lock(a) Lck__Lock(a, __func__, __FILE__, __LINE__)
+ #define Lck_Unlock(a) Lck__Unlock(a, __func__, __FILE__, __LINE__)
+ #define Lck_Trylock(a) Lck__Trylock(a, __func__, __FILE__, __LINE__)
+ #define Lck_AssertHeld(a) Lck__Assert(a, 1)
+ 
+ #define LOCK(nam) extern struct VSC_C_lck *lck_##nam;
+ #include "tbl/locks.h"
+ #undef LOCK
+ 
+ /* cache_panic.c */
+ void PAN_Init(void);
+ 
+ /* cache_pipe.c */
+ void PipeSession(struct sess *sp);
+ 
+ /* cache_pool.c */
+ void Pool_Init(void);
+ void Pool_Work_Thread(void *priv, struct worker *w);
+ void Pool_Wait(struct sess *sp);
+ int Pool_Schedule(struct pool *pp, struct sess *sp);
+ 
+ #define WRW_IsReleased(w)	((w)->wrw.wfd == NULL)
+ int WRW_Error(const struct worker *w);
+ void WRW_Chunked(struct worker *w);
+ void WRW_EndChunk(struct worker *w);
+ void WRW_Reserve(struct worker *w, int *fd);
+ unsigned WRW_Flush(struct worker *w);
+ unsigned WRW_FlushRelease(struct worker *w);
+ unsigned WRW_Write(struct worker *w, const void *ptr, int len);
+ unsigned WRW_WriteH(struct worker *w, const txt *hh, const char *suf);
+ #ifdef SENDFILE_WORKS
+ void WRW_Sendfile(struct worker *w, int fd, off_t off, unsigned len);
+ #endif  /* SENDFILE_WORKS */
+ 
+ /* cache_session.c [SES] */
+ struct sess *SES_New(struct worker *wrk, struct sesspool *pp);
+ struct sess *SES_Alloc(void);
+ void SES_Close(struct sess *sp, const char *reason);
+ void SES_Delete(struct sess *sp, const char *reason);
+ void SES_Charge(struct sess *sp);
+ struct sesspool *SES_NewPool(struct pool *pp);
+ void SES_DeletePool(struct sesspool *sp, struct worker *wrk);
+ int SES_Schedule(struct sess *sp);
+ 
+ 
+ /* cache_shmlog.c */
+ extern struct VSC_C_main *VSC_C_main;
+ void VSL_Init(void);
+ void *VSM_Alloc(unsigned size, const char *class, const char *type,
+     const char *ident);
+ void VSM_Free(void *ptr);
+ #ifdef VSL_ENDMARKER
+ void VSL(enum VSL_tag_e tag, int id, const char *fmt, ...);
+ void WSLR(struct worker *w, enum VSL_tag_e tag, int id, txt t);
+ void WSL(struct worker *w, enum VSL_tag_e tag, int id, const char *fmt, ...);
+ void WSLB(struct worker *w, enum VSL_tag_e tag, const char *fmt, ...);
+ 
+ void WSL_Flush(struct worker *w, int overflow);
+ 
+ #define DSL(flag, tag, id, ...)					\
+ 	do {							\
+ 		if (cache_param->diag_bitmap & (flag))		\
+ 			VSL((tag), (id), __VA_ARGS__);		\
+ 	} while (0)
+ 
+ #define WSP(sess, tag, ...)					\
+ 	WSL((sess)->wrk, tag, (sess)->vsl_id, __VA_ARGS__)
+ 
+ #define WSPR(sess, tag, txt)					\
+ 	WSLR((sess)->wrk, tag, (sess)->vsl_id, txt)
+ 
+ #define INCOMPL() do {							\
+ 	VSL(SLT_Debug, 0, "INCOMPLETE AT: %s(%d)", __func__, __LINE__); \
+ 	fprintf(stderr,							\
+ 	    "INCOMPLETE AT: %s(%d)\n",					\
+ 	    (const char *)__func__, __LINE__);				\
+ 	abort();							\
+ 	} while (0)
+ #endif
+ 
+ /* cache_response.c */
+ void RES_BuildHttp(const struct sess *sp);
+ void RES_WriteObj(struct sess *sp);
+ void RES_StreamStart(struct sess *sp);
+ void RES_StreamEnd(struct sess *sp);
+ void RES_StreamPoll(struct worker *);
+ 
+ /* cache_vary.c */
+ struct vsb *VRY_Create(const struct sess *sp, const struct http *hp);
+ int VRY_Match(struct sess *sp, const uint8_t *vary);
+ void VRY_Validate(const uint8_t *vary);
+ 
+ /* cache_vcl.c */
+ void VCL_Init(void);
+ void VCL_Refresh(struct VCL_conf **vcc);
+ void VCL_Rel(struct VCL_conf **vcc);
+ void VCL_Poll(void);
+ const char *VCL_Return_Name(unsigned method);
+ 
+ #define VCL_MET_MAC(l,u,b) void VCL_##l##_method(struct sess *);
+ #include "tbl/vcl_returns.h"
+ #undef VCL_MET_MAC
+ 
+ /* cache_vrt.c */
+ 
+ char *VRT_String(struct ws *ws, const char *h, const char *p, va_list ap);
+ char *VRT_StringList(char *d, unsigned dl, const char *p, va_list ap);
+ 
+ void ESI_Deliver(struct sess *);
+ void ESI_DeliverChild(const struct sess *);
+ 
+ /* cache_vrt_vmod.c */
+ void VMOD_Init(void);
+ 
+ /* cache_wrk.c */
+ 
+ void WRK_Init(void);
+ int WRK_TrySumStat(struct worker *w);
+ void WRK_SumStat(struct worker *w);
+ void *WRK_thread(void *priv);
+ typedef void *bgthread_t(struct sess *, void *priv);
+ void WRK_BgThread(pthread_t *thr, const char *name, bgthread_t *func,
+     void *priv);
+ 
+ /* cache_ws.c */
+ 
+ void WS_Init(struct ws *ws, const char *id, void *space, unsigned len);
+ unsigned WS_Reserve(struct ws *ws, unsigned bytes);
+ void WS_Release(struct ws *ws, unsigned bytes);
+ void WS_ReleaseP(struct ws *ws, char *ptr);
+ void WS_Assert(const struct ws *ws);
+ void WS_Reset(struct ws *ws, char *p);
+ char *WS_Alloc(struct ws *ws, unsigned bytes);
+ char *WS_Dup(struct ws *ws, const char *);
+ char *WS_Snapshot(struct ws *ws);
+ unsigned WS_Free(const struct ws *ws);
+ 
+ /* rfc2616.c */
+ void RFC2616_Ttl(const struct sess *sp);
+ enum body_status RFC2616_Body(const struct sess *sp);
+ unsigned RFC2616_Req_Gzip(const struct sess *sp);
+ int RFC2616_Do_Cond(const struct sess *sp);
+ 
+ /* stevedore.c */
+ struct object *STV_NewObject(struct sess *sp, const char *hint, unsigned len,
+     struct exp *, uint16_t nhttp);
+ struct storage *STV_alloc(struct worker *w, size_t size);
+ void STV_trim(struct storage *st, size_t size);
+ void STV_free(struct storage *st);
+ void STV_open(void);
+ void STV_close(void);
+ void STV_Freestore(struct object *o);
+ 
+ /* storage_synth.c */
+ struct vsb *SMS_Makesynth(struct object *obj);
+ void SMS_Finish(struct object *obj);
+ void SMS_Init(void);
+ 
+ /* storage_persistent.c */
+ void SMP_Init(void);
+ void SMP_Ready(void);
+ void SMP_NewBan(const uint8_t *ban, unsigned len);
+ 
+ /*
+  * A normal pointer difference is signed, but we never want a negative value
+  * so this little tool will make sure we don't get that.
+  */
+ 
+ static inline unsigned
+ pdiff(const void *b, const void *e)
+ {
+ 
+ 	assert(b <= e);
+ 	return
+ 	    ((unsigned)((const unsigned char *)e - (const unsigned char *)b));
+ }
+ 
+ static inline void
+ Tcheck(const txt t)
+ {
+ 
+ 	AN(t.b);
+ 	AN(t.e);
+ 	assert(t.b <= t.e);
+ }
+ 
+ /*
+  * unsigned length of a txt
+  */
+ 
+ static inline unsigned
+ Tlen(const txt t)
+ {
+ 
+ 	Tcheck(t);
+ 	return ((unsigned)(t.e - t.b));
+ }
+ 
+ static inline void
+ Tadd(txt *t, const char *p, int l)
+ {
+ 	Tcheck(*t);
+ 
+ 	if (l <= 0) {
+ 	} if (t->b + l < t->e) {
+ 		memcpy(t->b, p, l);
+ 		t->b += l;
+ 	} else {
+ 		t->b = t->e;
+ 	}
+ }
+ 
+ static inline void
+ AssertObjBusy(const struct object *o)
+ {
+ 	AN(o->objcore);
+ 	AN (o->objcore->flags & OC_F_BUSY);
+ }
+ 
+ static inline void
+ AssertObjCorePassOrBusy(const struct objcore *oc)
+ {
+ 	if (oc != NULL)
+ 		AN (oc->flags & OC_F_BUSY);
+ }
diff --cc bin/varnishd/cache/cache_center.c
index 0000000,e42fac8..7b8dc8a
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_center.c
+++ b/bin/varnishd/cache/cache_center.c
@@@ -1,0 -1,1691 +1,1724 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * This file contains the central state machine for pushing requests.
+  *
+  * We cannot just use direct calls because it is possible to kick a
+  * request back to the lookup stage (usually after a rewrite).  The
+  * state engine also allows us to break the processing up into some
+  * logical chunks which improves readability a little bit.
+  *
+  * Since the states are rather nasty in detail, I have decided to embedd
+  * a dot(1) graph in the source code comments.  So to see the big picture,
+  * extract the DOT lines and run though dot(1), for instance with the
+  * command:
+  *	sed -n '/^DOT/s///p' cache_center.c | dot -Tps > /tmp/_.ps
+  */
+ 
+ /*
+ DOT digraph vcl_center {
+ xDOT	page="8.2,11.5"
+ DOT	size="7.2,10.5"
+ DOT	margin="0.5"
+ DOT	center="1"
+ DOT acceptor [
+ DOT	shape=hexagon
+ DOT	label="Request received"
+ DOT ]
+ DOT ERROR [shape=plaintext]
+ DOT RESTART [shape=plaintext]
+ DOT acceptor -> start [style=bold,color=green]
+  */
+ 
+ #include "config.h"
+ 
+ #include <math.h>
+ #include <poll.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache.h"
+ 
+ #include "hash/hash_slinger.h"
+ #include "vcl.h"
+ #include "vcli_priv.h"
+ #include "vsha256.h"
+ #include "vtcp.h"
+ #include "vtim.h"
+ 
+ #ifndef HAVE_SRANDOMDEV
+ #include "compat/srandomdev.h"
+ #endif
+ 
+ static unsigned xids;
+ 
+ /*--------------------------------------------------------------------
+  * WAIT
+  * Wait (briefly) until we have a full request in our htc.
+  */
+ 
+ static int
+ cnt_wait(struct sess *sp)
+ {
+ 	int i;
+ 	struct pollfd pfd[1];
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	AZ(sp->vcl);
+ 	AZ(sp->obj);
+ 	assert(sp->xid == 0);
+ 
+ 	i = HTC_Complete(sp->htc);
+ 	if (i == 0 && cache_param->session_linger > 0) {
+ 		pfd[0].fd = sp->fd;
+ 		pfd[0].events = POLLIN;
+ 		pfd[0].revents = 0;
+ 		i = poll(pfd, 1, cache_param->session_linger);
+ 		if (i)
+ 			i = HTC_Rx(sp->htc);
+ 	}
+ 	if (i == 0) {
+ 		WSP(sp, SLT_Debug, "herding");
+ 		sp->wrk->stats.sess_herd++;
+ 		SES_Charge(sp);
+ 		sp->wrk = NULL;
+ 		Pool_Wait(sp);
+ 		return (1);
+ 	}
+ 	if (i == 1) {
+ 		sp->step = STP_START;
+ 		return (0);
+ 	}
+ 	if (i == -2) {
+ 		SES_Close(sp, "overflow");
+ 		return (0);
+ 	}
+ 	if (i == -1 && Tlen(sp->htc->rxbuf) == 0 &&
+ 	    (errno == 0 || errno == ECONNRESET))
+ 		SES_Close(sp, "EOF");
+ 	else
+ 		SES_Close(sp, "error");
+ 	sp->step = STP_DONE;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * We have a refcounted object on the session, now deliver it.
+  *
+ DOT subgraph xcluster_prepresp {
+ DOT	prepresp [
+ DOT		shape=ellipse
+ DOT		label="Filter obj.->resp."
+ DOT	]
+ DOT	vcl_deliver [
+ DOT		shape=record
+ DOT		label="vcl_deliver()|resp."
+ DOT	]
+ DOT	prepresp -> vcl_deliver [style=bold,color=green]
+ DOT	prepresp -> vcl_deliver [style=bold,color=cyan]
+ DOT	prepresp -> vcl_deliver [style=bold,color=red]
+ DOT	prepresp -> vcl_deliver [style=bold,color=blue,]
+ DOT	vcl_deliver -> deliver [style=bold,color=green,label=deliver]
+ DOT	vcl_deliver -> deliver [style=bold,color=red]
+ DOT	vcl_deliver -> deliver [style=bold,color=blue]
+ DOT     vcl_deliver -> errdeliver [label="error"]
+ DOT     errdeliver [label="ERROR",shape=plaintext]
+ DOT     vcl_deliver -> rstdeliver [label="restart",color=purple]
+ DOT     rstdeliver [label="RESTART",shape=plaintext]
+ DOT     vcl_deliver -> streambody [style=bold,color=cyan,label="deliver"]
+ DOT }
+  *
+  */
+ 
+ static int
+ cnt_prepresp(struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 
+ 	if (sp->wrk->do_stream)
+ 		AssertObjCorePassOrBusy(sp->obj->objcore);
+ 
+ 	sp->wrk->res_mode = 0;
+ 
+ 	if ((sp->wrk->h_content_length != NULL || !sp->wrk->do_stream) &&
+ 	    !sp->wrk->do_gzip && !sp->wrk->do_gunzip)
+ 		sp->wrk->res_mode |= RES_LEN;
+ 
+ 	if (!sp->disable_esi && sp->obj->esidata != NULL) {
+ 		/* In ESI mode, we don't know the aggregate length */
+ 		sp->wrk->res_mode &= ~RES_LEN;
+ 		sp->wrk->res_mode |= RES_ESI;
+ 	}
+ 
+ 	if (sp->esi_level > 0) {
+ 		sp->wrk->res_mode &= ~RES_LEN;
+ 		sp->wrk->res_mode |= RES_ESI_CHILD;
+ 	}
+ 
+ 	if (cache_param->http_gzip_support && sp->obj->gziped &&
+ 	    !RFC2616_Req_Gzip(sp)) {
+ 		/*
+ 		 * We don't know what it uncompresses to
+ 		 * XXX: we could cache that
+ 		 */
+ 		sp->wrk->res_mode &= ~RES_LEN;
+ 		sp->wrk->res_mode |= RES_GUNZIP;
+ 	}
+ 
+ 	if (!(sp->wrk->res_mode & (RES_LEN|RES_CHUNKED|RES_EOF))) {
+ 		if (sp->obj->len == 0 && !sp->wrk->do_stream)
+ 			/*
+ 			 * If the object is empty, neither ESI nor GUNZIP
+ 			 * can make it any different size
+ 			 */
+ 			sp->wrk->res_mode |= RES_LEN;
+ 		else if (!sp->wantbody) {
+ 			/* Nothing */
+ 		} else if (sp->http->protover >= 11) {
+ 			sp->wrk->res_mode |= RES_CHUNKED;
+ 		} else {
+ 			sp->wrk->res_mode |= RES_EOF;
+ 			sp->doclose = "EOF mode";
+ 		}
+ 	}
+ 
+ 	sp->t_resp = VTIM_real();
+ 	if (sp->obj->objcore != NULL) {
+ 		if ((sp->t_resp - sp->obj->last_lru) > cache_param->lru_timeout &&
+ 		    EXP_Touch(sp->obj->objcore))
+ 			sp->obj->last_lru = sp->t_resp;
+ 		sp->obj->last_use = sp->t_resp;	/* XXX: locking ? */
+ 	}
+ 	http_Setup(sp->wrk->resp, sp->wrk->ws);
+ 	RES_BuildHttp(sp);
+ 	VCL_deliver_method(sp);
+ 	switch (sp->handling) {
+ 	case VCL_RET_DELIVER:
+ 		break;
+ 	case VCL_RET_RESTART:
+ 		if (sp->restarts >= cache_param->max_restarts)
+ 			break;
+ 		if (sp->wrk->do_stream) {
+ 			VDI_CloseFd(sp->wrk);
+ 			HSH_Drop(sp);
+ 		} else {
+ 			(void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ 		}
+ 		AZ(sp->obj);
+ 		sp->restarts++;
+ 		sp->director = NULL;
+ 		sp->wrk->h_content_length = NULL;
+ 		http_Setup(sp->wrk->bereq, NULL);
+ 		http_Setup(sp->wrk->beresp, NULL);
+ 		http_Setup(sp->wrk->resp, NULL);
+ 		sp->step = STP_RECV;
+ 		return (0);
+ 	default:
+ 		WRONG("Illegal action in vcl_deliver{}");
+ 	}
+ 	if (sp->wrk->do_stream) {
+ 		AssertObjCorePassOrBusy(sp->obj->objcore);
+ 		sp->step = STP_STREAMBODY;
+ 	} else {
+ 		sp->step = STP_DELIVER;
+ 	}
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Deliver an already stored object
+  *
+ DOT subgraph xcluster_deliver {
+ DOT	deliver [
+ DOT		shape=ellipse
+ DOT		label="Send body"
+ DOT	]
+ DOT }
+ DOT deliver -> DONE [style=bold,color=green]
+ DOT deliver -> DONE [style=bold,color=red]
+ DOT deliver -> DONE [style=bold,color=blue]
+  *
+  */
+ 
+ static int
+ cnt_deliver(struct sess *sp)
+ {
+ 
+ 	sp->director = NULL;
+ 	sp->restarts = 0;
+ 
+ 	RES_WriteObj(sp);
+ 
+ 	assert(WRW_IsReleased(sp->wrk));
+ 	assert(sp->wrk->wrw.ciov == sp->wrk->wrw.siov);
+ 	(void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ 	http_Setup(sp->wrk->resp, NULL);
+ 	sp->step = STP_DONE;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * This is the final state, figure out if we should close or recycle
+  * the client connection
+  *
+ DOT	DONE [
+ DOT		shape=hexagon
+ DOT		label="Request completed"
+ DOT	]
+  */
+ 
+ static int
+ cnt_done(struct sess *sp)
+ {
+ 	double dh, dp, da;
+ 	int i;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_ORNULL(sp->vcl, VCL_CONF_MAGIC);
+ 
+ 	AZ(sp->obj);
+ 	AZ(sp->wrk->vbc);
+ 	sp->director = NULL;
+ 	sp->restarts = 0;
+ 
+ 	sp->wrk->do_esi = 0;
+ 	sp->wrk->do_gunzip = 0;
+ 	sp->wrk->do_gzip = 0;
+ 	sp->wrk->do_stream = 0;
+ 	sp->wrk->is_gunzip = 0;
+ 	sp->wrk->is_gzip = 0;
+ 
+ 	if (sp->vcl != NULL && sp->esi_level == 0) {
+ 		if (sp->wrk->vcl != NULL)
+ 			VCL_Rel(&sp->wrk->vcl);
+ 		sp->wrk->vcl = sp->vcl;
+ 		sp->vcl = NULL;
+ 	}
+ 
+ 	SES_Charge(sp);
+ 
+ 	sp->t_end = VTIM_real();
+ 	sp->wrk->lastused = sp->t_end;
+ 	if (sp->xid == 0) {
+ 		sp->t_req = sp->t_end;
+ 		sp->t_resp = sp->t_end;
+ 	} else if (sp->esi_level == 0) {
+ 		dp = sp->t_resp - sp->t_req;
+ 		da = sp->t_end - sp->t_resp;
+ 		dh = sp->t_req - sp->t_open;
+ 		/* XXX: Add StatReq == StatSess */
+ 		/* XXX: Workaround for pipe */
+ 		if (sp->fd >= 0) {
+ 			WSP(sp, SLT_Length, "%ju",
+ 			    (uintmax_t)sp->req_bodybytes);
+ 		}
+ 		WSP(sp, SLT_ReqEnd, "%u %.9f %.9f %.9f %.9f %.9f",
+ 		    sp->xid, sp->t_req, sp->t_end, dh, dp, da);
+ 	}
+ 	sp->xid = 0;
+ 	sp->t_open = sp->t_end;
+ 	sp->t_resp = NAN;
+ 	WSL_Flush(sp->wrk, 0);
+ 
+ 	/* If we did an ESI include, don't mess up our state */
+ 	if (sp->esi_level > 0)
+ 		return (1);
+ 
+ 	sp->req_bodybytes = 0;
+ 
+ 	sp->t_req = NAN;
+ 	sp->hash_always_miss = 0;
+ 	sp->hash_ignore_busy = 0;
+ 
+ 	if (sp->fd >= 0 && sp->doclose != NULL) {
+ 		/*
+ 		 * This is an orderly close of the connection; ditch nolinger
+ 		 * before we close, to get queued data transmitted.
+ 		 */
+ 		// XXX: not yet (void)VTCP_linger(sp->fd, 0);
+ 		SES_Close(sp, sp->doclose);
+ 	}
+ 
+ 	if (sp->fd < 0) {
+ 		sp->wrk->stats.sess_closed++;
+ 		SES_Delete(sp, NULL);
+ 		return (1);
+ 	}
+ 
+ 	if (sp->wrk->stats.client_req >= cache_param->wthread_stats_rate)
+ 		WRK_SumStat(sp->wrk);
+ 	/* Reset the workspace to the session-watermark */
+ 	WS_Reset(sp->ws, sp->ws_ses);
+ 	WS_Reset(sp->wrk->ws, NULL);
+ 
+ 	i = HTC_Reinit(sp->htc);
+ 	if (i == 1) {
+ 		sp->wrk->stats.sess_pipeline++;
+ 		sp->step = STP_START;
+ 		return (0);
+ 	}
+ 	if (Tlen(sp->htc->rxbuf)) {
+ 		sp->wrk->stats.sess_readahead++;
+ 		sp->step = STP_WAIT;
+ 		return (0);
+ 	}
+ 	if (cache_param->session_linger > 0) {
+ 		sp->wrk->stats.sess_linger++;
+ 		sp->step = STP_WAIT;
+ 		return (0);
+ 	}
+ 	sp->wrk->stats.sess_herd++;
+ 	sp->wrk = NULL;
+ 	Pool_Wait(sp);
+ 	return (1);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Emit an error
+  *
+ DOT subgraph xcluster_error {
+ DOT	vcl_error [
+ DOT		shape=record
+ DOT		label="vcl_error()|resp."
+ DOT	]
+ DOT	ERROR -> vcl_error
+ DOT	vcl_error-> prepresp [label=deliver]
+ DOT }
+ DOT vcl_error-> rsterr [label="restart",color=purple]
+ DOT rsterr [label="RESTART",shape=plaintext]
+  */
+ 
+ static int
+ cnt_error(struct sess *sp)
+ {
+ 	struct worker *w;
+ 	struct http *h;
+ 	char date[40];
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 
+ 	sp->wrk->do_esi = 0;
+ 	sp->wrk->is_gzip = 0;
+ 	sp->wrk->is_gunzip = 0;
+ 	sp->wrk->do_gzip = 0;
+ 	sp->wrk->do_gunzip = 0;
+ 	sp->wrk->do_stream = 0;
+ 
+ 	w = sp->wrk;
+ 	if (sp->obj == NULL) {
+ 		HSH_Prealloc(sp);
+ 		EXP_Clr(&w->exp);
+ 		sp->obj = STV_NewObject(sp, NULL, cache_param->http_resp_size,
+ 		     &w->exp, (uint16_t)cache_param->http_max_hdr);
+ 		if (sp->obj == NULL)
+ 			sp->obj = STV_NewObject(sp, TRANSIENT_STORAGE,
+ 			    cache_param->http_resp_size, &w->exp,
+ 			    (uint16_t)cache_param->http_max_hdr);
+ 		if (sp->obj == NULL) {
+ 			sp->doclose = "Out of objects";
+ 			sp->director = NULL;
+ 			sp->wrk->h_content_length = NULL;
+ 			http_Setup(sp->wrk->beresp, NULL);
+ 			http_Setup(sp->wrk->bereq, NULL);
+ 			sp->step = STP_DONE;
+ 			return(0);
+ 		}
+ 		AN(sp->obj);
+ 		sp->obj->xid = sp->xid;
+ 		sp->obj->exp.entered = sp->t_req;
+ 	} else {
+ 		/* XXX: Null the headers ? */
+ 	}
+ 	CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ 	h = sp->obj->http;
+ 
+ 	if (sp->err_code < 100 || sp->err_code > 999)
+ 		sp->err_code = 501;
+ 
+ 	http_PutProtocol(w, sp->vsl_id, h, "HTTP/1.1");
+ 	http_PutStatus(h, sp->err_code);
+ 	VTIM_format(VTIM_real(), date);
+ 	http_PrintfHeader(w, sp->vsl_id, h, "Date: %s", date);
+ 	http_SetHeader(w, sp->vsl_id, h, "Server: Varnish");
+ 
+ 	if (sp->err_reason != NULL)
+ 		http_PutResponse(w, sp->vsl_id, h, sp->err_reason);
+ 	else
+ 		http_PutResponse(w, sp->vsl_id, h,
+ 		    http_StatusMessage(sp->err_code));
+ 	VCL_error_method(sp);
+ 
+ 	if (sp->handling == VCL_RET_RESTART &&
+ 	    sp->restarts <  cache_param->max_restarts) {
+ 		HSH_Drop(sp);
+ 		sp->director = NULL;
+ 		sp->restarts++;
+ 		sp->step = STP_RECV;
+ 		return (0);
+ 	} else if (sp->handling == VCL_RET_RESTART)
+ 		sp->handling = VCL_RET_DELIVER;
+ 
+ 
+ 	/* We always close when we take this path */
+ 	sp->doclose = "error";
+ 	sp->wantbody = 1;
+ 
+ 	assert(sp->handling == VCL_RET_DELIVER);
+ 	sp->err_code = 0;
+ 	sp->err_reason = NULL;
+ 	http_Setup(sp->wrk->bereq, NULL);
+ 	sp->step = STP_PREPRESP;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Fetch response headers from the backend
+  *
+ DOT subgraph xcluster_fetch {
+ DOT	fetch [
+ DOT		shape=ellipse
+ DOT		label="fetch hdr\nfrom backend\n(find obj.ttl)"
+ DOT	]
+ DOT	vcl_fetch [
+ DOT		shape=record
+ DOT		label="vcl_fetch()|req.\nbereq.\nberesp."
+ DOT	]
+ DOT	fetch -> vcl_fetch [style=bold,color=blue]
+ DOT	fetch -> vcl_fetch [style=bold,color=red]
+ DOT	fetch_pass [
+ DOT		shape=ellipse
+ DOT		label="obj.f.pass=true"
+ DOT	]
+ DOT	vcl_fetch -> fetch_pass [label="hit_for_pass",style=bold,color=red]
+ DOT }
+ DOT fetch_pass -> fetchbody [style=bold,color=red]
+ DOT vcl_fetch -> fetchbody [label="deliver",style=bold,color=blue]
+ DOT vcl_fetch -> rstfetch [label="restart",color=purple]
+ DOT rstfetch [label="RESTART",shape=plaintext]
+ DOT fetch -> errfetch
+ DOT vcl_fetch -> errfetch [label="error"]
+ DOT errfetch [label="ERROR",shape=plaintext]
+  */
+ 
+ static int
+ cnt_fetch(struct sess *sp)
+ {
+ 	int i;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 
+ 	AN(sp->director);
+ 	AZ(sp->wrk->vbc);
+ 	AZ(sp->wrk->h_content_length);
+ 	AZ(sp->wrk->do_close);
+ 	AZ(sp->wrk->storage_hint);
+ 
+ 	http_Setup(sp->wrk->beresp, sp->wrk->ws);
+ 
+ 	i = FetchHdr(sp);
+ 	/*
+ 	 * If we recycle a backend connection, there is a finite chance
+ 	 * that the backend closed it before we get a request to it.
+ 	 * Do a single retry in that case.
+ 	 */
+ 	if (i == 1) {
+ 		VSC_C_main->backend_retry++;
+ 		i = FetchHdr(sp);
+ 	}
+ 
+ 	if (i) {
+ 		sp->handling = VCL_RET_ERROR;
+ 		sp->err_code = 503;
+ 	} else {
+ 		/*
+ 		 * These two headers can be spread over multiple actual headers
+ 		 * and we rely on their content outside of VCL, so collect them
+ 		 * into one line here.
+ 		 */
+ 		http_CollectHdr(sp->wrk->beresp, H_Cache_Control);
+ 		http_CollectHdr(sp->wrk->beresp, H_Vary);
+ 
+ 		/*
+ 		 * Figure out how the fetch is supposed to happen, before the
+ 		 * headers are adultered by VCL
+ 		 * NB: Also sets other sp->wrk variables
+ 		 */
+ 		sp->wrk->body_status = RFC2616_Body(sp);
+ 
+ 		sp->err_code = http_GetStatus(sp->wrk->beresp);
+ 
+ 		/*
+ 		 * What does RFC2616 think about TTL ?
+ 		 */
+ 		EXP_Clr(&sp->wrk->exp);
+ 		sp->wrk->exp.entered = VTIM_real();
+ 		RFC2616_Ttl(sp);
++		sp->wrk->exp.keep = cache_param->default_keep;
+ 
+ 		/* pass from vclrecv{} has negative TTL */
+ 		if (sp->objcore == NULL)
+ 			sp->wrk->exp.ttl = -1.;
+ 
+ 		AZ(sp->wrk->do_esi);
+ 
+ 		VCL_fetch_method(sp);
+ 
+ 		switch (sp->handling) {
+ 		case VCL_RET_HIT_FOR_PASS:
+ 			if (sp->objcore != NULL)
+ 				sp->objcore->flags |= OC_F_PASS;
+ 			sp->step = STP_FETCHBODY;
+ 			return (0);
+ 		case VCL_RET_DELIVER:
+ 			AssertObjCorePassOrBusy(sp->objcore);
+ 			sp->step = STP_FETCHBODY;
+ 			return (0);
+ 		default:
+ 			break;
+ 		}
+ 
+ 		/* We are not going to fetch the body, Close the connection */
+ 		VDI_CloseFd(sp->wrk);
+ 	}
+ 
+ 	/* Clean up partial fetch */
+ 	AZ(sp->wrk->vbc);
+ 
+ 	if (sp->objcore != NULL) {
+ 		CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ 		AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ 		sp->objcore = NULL;
+ 	}
+ 	http_Setup(sp->wrk->bereq, NULL);
+ 	http_Setup(sp->wrk->beresp, NULL);
+ 	sp->wrk->h_content_length = NULL;
+ 	sp->director = NULL;
+ 	sp->wrk->storage_hint = NULL;
+ 
+ 	switch (sp->handling) {
+ 	case VCL_RET_RESTART:
+ 		sp->restarts++;
+ 		sp->step = STP_RECV;
+ 		return (0);
+ 	case VCL_RET_ERROR:
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	default:
+ 		WRONG("Illegal action in vcl_fetch{}");
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Fetch response body from the backend
+  *
+ DOT subgraph xcluster_body {
+ DOT	fetchbody [
+ DOT		shape=diamond
+ DOT		label="stream ?"
+ DOT	]
+ DOT	fetchbody2 [
+ DOT		shape=ellipse
+ DOT		label="fetch body\nfrom backend\n"
+ DOT	]
+ DOT }
+ DOT fetchbody -> fetchbody2 [label=no,style=bold,color=red]
+ DOT fetchbody -> fetchbody2 [style=bold,color=blue]
+ DOT fetchbody -> prepresp [label=yes,style=bold,color=cyan]
+ DOT fetchbody2 -> prepresp [style=bold,color=red]
+ DOT fetchbody2 -> prepresp [style=bold,color=blue]
+  */
+ 
+ 
+ static int
+ cnt_fetchbody(struct sess *sp)
+ {
+ 	int i;
+ 	struct http *hp, *hp2;
+ 	char *b;
 -	uint16_t nhttp;
++	uint16_t nhttp, stale_nhttp;
+ 	unsigned l;
+ 	struct vsb *vary = NULL;
+ 	int varyl = 0, pass;
+ 
+ 	assert(sp->handling == VCL_RET_HIT_FOR_PASS ||
+ 	    sp->handling == VCL_RET_DELIVER);
+ 
+ 	if (sp->objcore == NULL) {
+ 		/* This is a pass from vcl_recv */
+ 		pass = 1;
+ 		/* VCL may have fiddled this, but that doesn't help */
+ 		sp->wrk->exp.ttl = -1.;
+ 	} else if (sp->handling == VCL_RET_HIT_FOR_PASS) {
+ 		/* pass from vcl_fetch{} -> hit-for-pass */
+ 		/* XXX: the bereq was not filtered pass... */
+ 		pass = 1;
+ 	} else {
+ 		/* regular object */
+ 		pass = 0;
+ 	}
+ 
+ 	/*
+ 	 * The VCL variables beresp.do_g[un]zip tells us how we want the
+ 	 * object processed before it is stored.
+ 	 *
+ 	 * The backend Content-Encoding header tells us what we are going
+ 	 * to receive, which we classify in the following three classes:
+ 	 *
+ 	 *	"Content-Encoding: gzip"	--> object is gzip'ed.
+ 	 *	no Content-Encoding		--> object is not gzip'ed.
+ 	 *	anything else			--> do nothing wrt gzip
+ 	 *
+ 	 */
+ 
+ 	AZ(sp->wrk->vfp);
+ 
+ 	/* We do nothing unless the param is set */
+ 	if (!cache_param->http_gzip_support)
+ 		sp->wrk->do_gzip = sp->wrk->do_gunzip = 0;
+ 
+ 	sp->wrk->is_gzip =
+ 	    http_HdrIs(sp->wrk->beresp, H_Content_Encoding, "gzip");
+ 
+ 	sp->wrk->is_gunzip =
+ 	    !http_GetHdr(sp->wrk->beresp, H_Content_Encoding, NULL);
+ 
+ 	/* It can't be both */
+ 	assert(sp->wrk->is_gzip == 0 || sp->wrk->is_gunzip == 0);
+ 
+ 	/* We won't gunzip unless it is gzip'ed */
+ 	if (sp->wrk->do_gunzip && !sp->wrk->is_gzip)
+ 		sp->wrk->do_gunzip = 0;
+ 
+ 	/* If we do gunzip, remove the C-E header */
+ 	if (sp->wrk->do_gunzip)
+ 		http_Unset(sp->wrk->beresp, H_Content_Encoding);
+ 
+ 	/* We wont gzip unless it is ungziped */
+ 	if (sp->wrk->do_gzip && !sp->wrk->is_gunzip)
+ 		sp->wrk->do_gzip = 0;
+ 
+ 	/* If we do gzip, add the C-E header */
+ 	if (sp->wrk->do_gzip)
+ 		http_SetHeader(sp->wrk, sp->vsl_id, sp->wrk->beresp,
+ 		    "Content-Encoding: gzip");
+ 
+ 	/* But we can't do both at the same time */
+ 	assert(sp->wrk->do_gzip == 0 || sp->wrk->do_gunzip == 0);
+ 
+ 	/* ESI takes precedence and handles gzip/gunzip itself */
+ 	if (sp->wrk->do_esi)
+ 		sp->wrk->vfp = &vfp_esi;
+ 	else if (sp->wrk->do_gunzip)
+ 		sp->wrk->vfp = &vfp_gunzip;
+ 	else if (sp->wrk->do_gzip)
+ 		sp->wrk->vfp = &vfp_gzip;
+ 	else if (sp->wrk->is_gzip)
+ 		sp->wrk->vfp = &vfp_testgzip;
+ 
+ 	if (sp->wrk->do_esi || sp->esi_level > 0)
+ 		sp->wrk->do_stream = 0;
+ 	if (!sp->wantbody)
+ 		sp->wrk->do_stream = 0;
+ 
+ 	l = http_EstimateWS(sp->wrk->beresp,
+ 	    pass ? HTTPH_R_PASS : HTTPH_A_INS, &nhttp);
++        if (sp->stale_obj) {
++            l += http_EstimateWS(sp->stale_obj->http, 0, &stale_nhttp);
++            nhttp += stale_nhttp;
++        }
+ 
+ 	/* Create Vary instructions */
+ 	if (sp->objcore != NULL) {
+ 		CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ 		vary = VRY_Create(sp, sp->wrk->beresp);
+ 		if (vary != NULL) {
+ 			varyl = VSB_len(vary);
+ 			assert(varyl > 0);
+ 			l += varyl;
+ 		}
+ 	}
+ 
+ 	/*
+ 	 * Space for producing a Content-Length: header including padding
+ 	 * A billion gigabytes is enough for anybody.
+ 	 */
+ 	l += strlen("Content-Length: XxxXxxXxxXxxXxxXxx") + sizeof(void *);
+ 
+ 	if (sp->wrk->exp.ttl < cache_param->shortlived || sp->objcore == NULL)
+ 		sp->wrk->storage_hint = TRANSIENT_STORAGE;
+ 
+ 	sp->obj = STV_NewObject(sp, sp->wrk->storage_hint, l,
+ 	    &sp->wrk->exp, nhttp);
+ 	if (sp->obj == NULL) {
+ 		/*
+ 		 * Try to salvage the transaction by allocating a
+ 		 * shortlived object on Transient storage.
+ 		 */
+ 		sp->obj = STV_NewObject(sp, TRANSIENT_STORAGE, l,
+ 		    &sp->wrk->exp, nhttp);
+ 		if (sp->wrk->exp.ttl > cache_param->shortlived)
+ 			sp->wrk->exp.ttl = cache_param->shortlived;
+ 		sp->wrk->exp.grace = 0.0;
+ 		sp->wrk->exp.keep = 0.0;
+ 	}
+ 	if (sp->obj == NULL) {
+ 		sp->err_code = 503;
+ 		sp->step = STP_ERROR;
+ 		VDI_CloseFd(sp->wrk);
+ 		return (0);
+ 	}
+ 	CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
++        sp->obj->exp.keep = sp->wrk->exp.keep;
+ 
+ 	sp->wrk->storage_hint = NULL;
+ 
+ 	if (sp->wrk->do_gzip || (sp->wrk->is_gzip && !sp->wrk->do_gunzip))
+ 		sp->obj->gziped = 1;
+ 
+ 	if (vary != NULL) {
+ 		sp->obj->vary =
+ 		    (void *)WS_Alloc(sp->obj->http->ws, varyl);
+ 		AN(sp->obj->vary);
+ 		memcpy(sp->obj->vary, VSB_data(vary), varyl);
+ 		VRY_Validate(sp->obj->vary);
+ 		VSB_delete(vary);
+ 	}
+ 
+ 	sp->obj->xid = sp->xid;
+ 	sp->obj->response = sp->err_code;
+ 	WS_Assert(sp->obj->ws_o);
+ 
+ 	/* Filter into object */
+ 	hp = sp->wrk->beresp;
+ 	hp2 = sp->obj->http;
+ 
+ 	hp2->logtag = HTTP_Obj;
+ 	http_CopyResp(hp2, hp);
++        
+ 	http_FilterFields(sp->wrk, sp->vsl_id, hp2, hp,
+ 	    pass ? HTTPH_R_PASS : HTTPH_A_INS);
++
++        /*
++	 * If we found a candidate for conditional backend request, attempt it
++         * now. If backend responds with 304, http_Check304() merges stale_obj
++         * into sp->obj, any other response is handled as usual. In either case,
++         * the stale_obj is no longer needed in the cache, so discard it.
++         */
++        if (sp->stale_obj) {
++            http_Check304(sp);
++            if (sp->wrk->beresp->status == 304)
++                assert(sp->obj->http->status == 200);
++	    EXP_Clr(&sp->stale_obj->exp);
++	    EXP_Rearm(sp->stale_obj);
++	    HSH_Deref(sp->wrk, NULL, &sp->stale_obj);
++	    AZ(sp->stale_obj);
++        }
+ 	http_CopyHome(sp->wrk, sp->vsl_id, hp2);
+ 
 -	if (http_GetHdr(hp, H_Last_Modified, &b))
++	if (http_GetHdr(hp, H_Last_Modified, &b)
++            || http_GetHdr(sp->obj->http, H_Last_Modified, &b))
+ 		sp->obj->last_modified = VTIM_parse(b);
+ 	else
+ 		sp->obj->last_modified = floor(sp->wrk->exp.entered);
+ 
+ 	assert(WRW_IsReleased(sp->wrk));
+ 
+ 	/*
+ 	 * If we can deliver a 304 reply, we don't bother streaming.
+ 	 * Notice that vcl_deliver{} could still nuke the headers
+ 	 * that allow the 304, in which case we return 200 non-stream.
+ 	 */
+ 	if (sp->obj->response == 200 &&
+ 	    sp->http->conds &&
+ 	    RFC2616_Do_Cond(sp))
+ 		sp->wrk->do_stream = 0;
+ 
+ 	AssertObjCorePassOrBusy(sp->obj->objcore);
+ 
+ 	if (sp->wrk->do_stream) {
+ 		sp->step = STP_PREPRESP;
+ 		return (0);
+ 	}
+ 
+ 	/* Use unmodified headers*/
+ 	i = FetchBody(sp->wrk, sp->obj);
+ 
+ 	sp->wrk->h_content_length = NULL;
+ 
+ 	http_Setup(sp->wrk->bereq, NULL);
+ 	http_Setup(sp->wrk->beresp, NULL);
+ 	sp->wrk->vfp = NULL;
+ 	assert(WRW_IsReleased(sp->wrk));
+ 	AZ(sp->wrk->vbc);
+ 	AN(sp->director);
+ 
+ 	if (i) {
+ 		HSH_Drop(sp);
+ 		AZ(sp->obj);
+ 		sp->err_code = 503;
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	}
+ 
+ 	if (sp->obj->objcore != NULL) {
+ 		EXP_Insert(sp->obj);
+ 		AN(sp->obj->objcore);
+ 		AN(sp->obj->objcore->ban);
+ 		HSH_Unbusy(sp);
+ 	}
+ 	sp->wrk->acct_tmp.fetch++;
+ 	sp->step = STP_PREPRESP;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Stream the body as we fetch it
+ DOT subgraph xstreambody {
+ DOT	streambody [
+ DOT		shape=ellipse
+ DOT		label="streaming\nfetch/deliver"
+ DOT	]
+ DOT }
+ DOT streambody -> DONE [style=bold,color=cyan]
+  */
+ 
+ static int
+ cnt_streambody(struct sess *sp)
+ {
+ 	int i;
+ 	struct stream_ctx sctx;
+ 	uint8_t obuf[sp->wrk->res_mode & RES_GUNZIP ?
+ 	    cache_param->gzip_stack_buffer : 1];
+ 
+ 	memset(&sctx, 0, sizeof sctx);
+ 	sctx.magic = STREAM_CTX_MAGIC;
+ 	AZ(sp->wrk->sctx);
+ 	sp->wrk->sctx = &sctx;
+ 
+ 	if (sp->wrk->res_mode & RES_GUNZIP) {
+ 		sctx.vgz = VGZ_NewUngzip(sp->wrk, "U S -");
+ 		sctx.obuf = obuf;
+ 		sctx.obuf_len = sizeof (obuf);
+ 	}
+ 
+ 	RES_StreamStart(sp);
+ 
+ 	AssertObjCorePassOrBusy(sp->obj->objcore);
+ 
+ 	i = FetchBody(sp->wrk, sp->obj);
+ 
+ 	sp->wrk->h_content_length = NULL;
+ 
+ 	http_Setup(sp->wrk->bereq, NULL);
+ 	http_Setup(sp->wrk->beresp, NULL);
+ 	sp->wrk->vfp = NULL;
+ 	AZ(sp->wrk->vbc);
+ 	AN(sp->director);
+ 
+ 	if (!i && sp->obj->objcore != NULL) {
+ 		EXP_Insert(sp->obj);
+ 		AN(sp->obj->objcore);
+ 		AN(sp->obj->objcore->ban);
+ 		HSH_Unbusy(sp);
+ 	} else {
+ 		sp->doclose = "Stream error";
+ 	}
+ 	sp->wrk->acct_tmp.fetch++;
+ 	sp->director = NULL;
+ 	sp->restarts = 0;
+ 
+ 	RES_StreamEnd(sp);
+ 	if (sp->wrk->res_mode & RES_GUNZIP)
+ 		(void)VGZ_Destroy(&sctx.vgz, sp->vsl_id);
+ 
+ 	sp->wrk->sctx = NULL;
+ 	assert(WRW_IsReleased(sp->wrk));
+ 	assert(sp->wrk->wrw.ciov == sp->wrk->wrw.siov);
+ 	(void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ 	http_Setup(sp->wrk->resp, NULL);
+ 	sp->step = STP_DONE;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * The very first request
+  */
+ static int
+ cnt_first(struct sess *sp)
+ {
+ 
+ 	/*
+ 	 * XXX: If we don't have acceptfilters we are somewhat subject
+ 	 * XXX: to DoS'ing here.  One remedy would be to set a shorter
+ 	 * XXX: SO_RCVTIMEO and once we have received something here
+ 	 * XXX: increase it to the normal value.
+ 	 */
+ 
+ 	assert(sp->xid == 0);
+ 	assert(sp->restarts == 0);
+ 	VCA_Prep(sp);
+ 
+ 	/* Record the session watermark */
+ 	sp->ws_ses = WS_Snapshot(sp->ws);
+ 
+ 	/* Receive a HTTP protocol request */
+ 	HTC_Init(sp->htc, sp->ws, sp->fd, sp->vsl_id, cache_param->http_req_size,
+ 	    cache_param->http_req_hdr_len);
+ 	sp->wrk->lastused = sp->t_open;
+ 	sp->wrk->acct_tmp.sess++;
+ 
+ 	sp->step = STP_WAIT;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * HIT
+  * We had a cache hit.  Ask VCL, then march off as instructed.
+  *
+ DOT subgraph xcluster_hit {
+ DOT	hit [
+ DOT		shape=record
+ DOT		label="vcl_hit()|req.\nobj."
+ DOT	]
+ DOT }
+ DOT hit -> err_hit [label="error"]
+ DOT err_hit [label="ERROR",shape=plaintext]
+ DOT hit -> rst_hit [label="restart",color=purple]
+ DOT rst_hit [label="RESTART",shape=plaintext]
+ DOT hit -> pass [label=pass,style=bold,color=red]
+ DOT hit -> prepresp [label="deliver",style=bold,color=green]
+  */
+ 
+ static int
+ cnt_hit(struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 
+ 	assert(!(sp->obj->objcore->flags & OC_F_PASS));
+ 
+ 	AZ(sp->wrk->do_stream);
+ 
+ 	VCL_hit_method(sp);
+ 
+ 	if (sp->handling == VCL_RET_DELIVER) {
+ 		/* Dispose of any body part of the request */
+ 		(void)FetchReqBody(sp);
+ 		AZ(sp->wrk->bereq->ws);
+ 		AZ(sp->wrk->beresp->ws);
+ 		sp->step = STP_PREPRESP;
+ 		return (0);
+ 	}
+ 
+ 	/* Drop our object, we won't need it */
+ 	(void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ 	sp->objcore = NULL;
+ 
+ 	switch(sp->handling) {
+ 	case VCL_RET_PASS:
+ 		sp->step = STP_PASS;
+ 		return (0);
+ 	case VCL_RET_ERROR:
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	case VCL_RET_RESTART:
+ 		sp->director = NULL;
+ 		sp->restarts++;
+ 		sp->step = STP_RECV;
+ 		return (0);
+ 	default:
+ 		WRONG("Illegal action in vcl_hit{}");
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------
+  * LOOKUP
+  * Hash things together and look object up in hash-table.
+  *
+  * LOOKUP consists of two substates so that we can reenter if we
+  * encounter a busy object.
+  *
+ DOT subgraph xcluster_lookup {
+ DOT	hash [
+ DOT		shape=record
+ DOT		label="vcl_hash()|req."
+ DOT	]
+ DOT	lookup [
+ DOT		shape=diamond
+ DOT		label="obj in cache ?\ncreate if not"
+ DOT	]
+ DOT	lookup2 [
+ DOT		shape=diamond
+ DOT		label="obj.f.pass ?"
+ DOT	]
+ DOT	hash -> lookup [label="hash",style=bold,color=green]
+ DOT	lookup -> lookup2 [label="yes",style=bold,color=green]
+ DOT }
+ DOT lookup2 -> hit [label="no", style=bold,color=green]
+ DOT lookup2 -> pass [label="yes",style=bold,color=red]
+ DOT lookup -> miss [label="no",style=bold,color=blue]
+  */
+ 
+ static int
+ cnt_lookup(struct sess *sp)
+ {
+ 	struct objcore *oc;
+ 	struct object *o;
+ 	struct objhead *oh;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 
+ 	if (sp->hash_objhead == NULL) {
+ 		/* Not a waiting list return */
+ 		AZ(sp->vary_b);
+ 		AZ(sp->vary_l);
+ 		AZ(sp->vary_e);
+ 		(void)WS_Reserve(sp->ws, 0);
+ 	} else {
+ 		AN(sp->ws->r);
+ 	}
+ 	sp->vary_b = (void*)sp->ws->f;
+ 	sp->vary_e = (void*)sp->ws->r;
+ 	sp->vary_b[2] = '\0';
+ 
+ 	oc = HSH_Lookup(sp, &oh);
+ 
+ 	if (oc == NULL) {
+ 		/*
+ 		 * We lost the session to a busy object, disembark the
+ 		 * worker thread.   The hash code to restart the session,
+ 		 * still in STP_LOOKUP, later when the busy object isn't.
+ 		 * NB:  Do not access sp any more !
+ 		 */
+ 		return (1);
+ 	}
+ 
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ 
+ 	/* If we inserted a new object it's a miss */
+ 	if (oc->flags & OC_F_BUSY) {
+ 		sp->wrk->stats.cache_miss++;
+ 
+ 		if (sp->vary_l != NULL) {
+ 			assert(oc->busyobj->vary == sp->vary_b);
+ 			VRY_Validate(oc->busyobj->vary);
+ 			WS_ReleaseP(sp->ws, (void*)sp->vary_l);
+ 		} else {
+ 			AZ(oc->busyobj->vary);
+ 			WS_Release(sp->ws, 0);
+ 		}
+ 		sp->vary_b = NULL;
+ 		sp->vary_l = NULL;
+ 		sp->vary_e = NULL;
+ 
+ 		sp->objcore = oc;
+ 		sp->step = STP_MISS;
+ 		return (0);
+ 	}
+ 
+ 	o = oc_getobj(sp->wrk, oc);
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	sp->obj = o;
+ 
+ 	WS_Release(sp->ws, 0);
+ 	sp->vary_b = NULL;
+ 	sp->vary_l = NULL;
+ 	sp->vary_e = NULL;
+ 
+ 	if (oc->flags & OC_F_PASS) {
+ 		sp->wrk->stats.cache_hitpass++;
+ 		WSP(sp, SLT_HitPass, "%u", sp->obj->xid);
+ 		(void)HSH_Deref(sp->wrk, NULL, &sp->obj);
++                if (sp->stale_obj != NULL)
++                    (void)HSH_Deref(sp->wrk, NULL, &sp->stale_obj);
+ 		sp->objcore = NULL;
+ 		sp->step = STP_PASS;
+ 		return (0);
+ 	}
+ 
+ 	sp->wrk->stats.cache_hit++;
+ 	WSP(sp, SLT_Hit, "%u", sp->obj->xid);
+ 	sp->step = STP_HIT;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * We had a miss, ask VCL, proceed as instructed
+  *
+ DOT subgraph xcluster_miss {
+ DOT	miss [
+ DOT		shape=ellipse
+ DOT		label="filter req.->bereq."
+ DOT	]
+ DOT	vcl_miss [
+ DOT		shape=record
+ DOT		label="vcl_miss()|req.\nbereq."
+ DOT	]
+ DOT	miss -> vcl_miss [style=bold,color=blue]
+ DOT }
+ DOT vcl_miss -> rst_miss [label="restart",color=purple]
+ DOT rst_miss [label="RESTART",shape=plaintext]
+ DOT vcl_miss -> err_miss [label="error"]
+ DOT err_miss [label="ERROR",shape=plaintext]
+ DOT vcl_miss -> fetch [label="fetch",style=bold,color=blue]
+ DOT vcl_miss -> pass [label="pass",style=bold,color=red]
+ DOT
+  */
+ 
+ static int
+ cnt_miss(struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 
+ 	AZ(sp->obj);
+ 	AN(sp->objcore);
+ 	WS_Reset(sp->wrk->ws, NULL);
+ 	http_Setup(sp->wrk->bereq, sp->wrk->ws);
+ 	http_FilterHeader(sp, HTTPH_R_FETCH);
+ 	http_ForceGet(sp->wrk->bereq);
+ 	if (cache_param->http_gzip_support) {
+ 		/*
+ 		 * We always ask the backend for gzip, even if the
+ 		 * client doesn't grok it.  We will uncompress for
+ 		 * the minority of clients which don't.
+ 		 */
+ 		http_Unset(sp->wrk->bereq, H_Accept_Encoding);
+ 		http_SetHeader(sp->wrk, sp->vsl_id, sp->wrk->bereq,
+ 		    "Accept-Encoding: gzip");
+ 	}
+ 	sp->wrk->connect_timeout = 0;
+ 	sp->wrk->first_byte_timeout = 0;
+ 	sp->wrk->between_bytes_timeout = 0;
++
++        /* If a candidate for a conditional backend request was found,
++         * add If-Modified-Since and/or If-None-Match to the bereq.
++         */
++        if (sp->stale_obj)
++                http_CheckRefresh(sp);
++
+ 	VCL_miss_method(sp);
+ 	switch(sp->handling) {
+ 	case VCL_RET_ERROR:
+ 		AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ 		sp->objcore = NULL;
+ 		http_Setup(sp->wrk->bereq, NULL);
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	case VCL_RET_PASS:
+ 		AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ 		sp->objcore = NULL;
+ 		sp->step = STP_PASS;
+ 		return (0);
+ 	case VCL_RET_FETCH:
+ 		sp->step = STP_FETCH;
+ 		return (0);
+ 	case VCL_RET_RESTART:
+ 		AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ 		sp->objcore = NULL;
+ 		INCOMPL();
+ 	default:
+ 		WRONG("Illegal action in vcl_miss{}");
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Start pass processing by getting headers from backend, then
+  * continue in passbody.
+  *
+ DOT subgraph xcluster_pass {
+ DOT	pass [
+ DOT		shape=ellipse
+ DOT		label="deref obj."
+ DOT	]
+ DOT	pass2 [
+ DOT		shape=ellipse
+ DOT		label="filter req.->bereq."
+ DOT	]
+ DOT	vcl_pass [
+ DOT		shape=record
+ DOT		label="vcl_pass()|req.\nbereq."
+ DOT	]
+ DOT	pass_do [
+ DOT		shape=ellipse
+ DOT		label="create anon object\n"
+ DOT	]
+ DOT	pass -> pass2 [style=bold, color=red]
+ DOT	pass2 -> vcl_pass [style=bold, color=red]
+ DOT	vcl_pass -> pass_do [label="pass"] [style=bold, color=red]
+ DOT }
+ DOT pass_do -> fetch [style=bold, color=red]
+ DOT vcl_pass -> rst_pass [label="restart",color=purple]
+ DOT rst_pass [label="RESTART",shape=plaintext]
+ DOT vcl_pass -> err_pass [label="error"]
+ DOT err_pass [label="ERROR",shape=plaintext]
+  */
+ 
+ static int
+ cnt_pass(struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 	AZ(sp->obj);
+ 
+ 	WS_Reset(sp->wrk->ws, NULL);
+ 	http_Setup(sp->wrk->bereq, sp->wrk->ws);
+ 	http_FilterHeader(sp, HTTPH_R_PASS);
+ 
+ 	sp->wrk->connect_timeout = 0;
+ 	sp->wrk->first_byte_timeout = 0;
+ 	sp->wrk->between_bytes_timeout = 0;
+ 	VCL_pass_method(sp);
+ 	if (sp->handling == VCL_RET_ERROR) {
+ 		http_Setup(sp->wrk->bereq, NULL);
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	}
+ 	assert(sp->handling == VCL_RET_PASS);
+ 	sp->wrk->acct_tmp.pass++;
+ 	sp->sendbody = 1;
+ 	sp->step = STP_FETCH;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Ship the request header to the backend unchanged, then pipe
+  * until one of the ends close the connection.
+  *
+ DOT subgraph xcluster_pipe {
+ DOT	pipe [
+ DOT		shape=ellipse
+ DOT		label="Filter req.->bereq."
+ DOT	]
+ DOT	vcl_pipe [
+ DOT		shape=record
+ DOT		label="vcl_pipe()|req.\nbereq\."
+ DOT	]
+ DOT	pipe_do [
+ DOT		shape=ellipse
+ DOT		label="send bereq.\npipe until close"
+ DOT	]
+ DOT	vcl_pipe -> pipe_do [label="pipe",style=bold,color=orange]
+ DOT	pipe -> vcl_pipe [style=bold,color=orange]
+ DOT }
+ DOT pipe_do -> DONE [style=bold,color=orange]
+ DOT vcl_pipe -> err_pipe [label="error"]
+ DOT err_pipe [label="ERROR",shape=plaintext]
+  */
+ 
+ static int
+ cnt_pipe(struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 
+ 	sp->wrk->acct_tmp.pipe++;
+ 	WS_Reset(sp->wrk->ws, NULL);
+ 	http_Setup(sp->wrk->bereq, sp->wrk->ws);
+ 	http_FilterHeader(sp, HTTPH_R_PIPE);
+ 
+ 	VCL_pipe_method(sp);
+ 
+ 	if (sp->handling == VCL_RET_ERROR)
+ 		INCOMPL();
+ 	assert(sp->handling == VCL_RET_PIPE);
+ 
+ 	PipeSession(sp);
+ 	assert(WRW_IsReleased(sp->wrk));
+ 	http_Setup(sp->wrk->bereq, NULL);
+ 	sp->step = STP_DONE;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * RECV
+  * We have a complete request, set everything up and start it.
+  *
+ DOT subgraph xcluster_recv {
+ DOT	recv [
+ DOT		shape=record
+ DOT		label="vcl_recv()|req."
+ DOT	]
+ DOT }
+ DOT RESTART -> recv
+ DOT recv -> pipe [label="pipe",style=bold,color=orange]
+ DOT recv -> pass2 [label="pass",style=bold,color=red]
+ DOT recv -> err_recv [label="error"]
+ DOT err_recv [label="ERROR",shape=plaintext]
+ DOT recv -> hash [label="lookup",style=bold,color=green]
+  */
+ 
+ static int
+ cnt_recv(struct sess *sp)
+ {
+ 	unsigned recv_handling;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ 	AZ(sp->obj);
+ 	assert(sp->wrk->wrw.ciov == sp->wrk->wrw.siov);
+ 
+ 	/* By default we use the first backend */
+ 	AZ(sp->director);
+ 	sp->director = sp->vcl->director[0];
+ 	AN(sp->director);
+ 
+ 	sp->disable_esi = 0;
+ 	sp->hash_always_miss = 0;
+ 	sp->hash_ignore_busy = 0;
+ 	sp->client_identity = NULL;
+ 
+ 	http_CollectHdr(sp->http, H_Cache_Control);
+ 
+ 	VCL_recv_method(sp);
+ 	recv_handling = sp->handling;
+ 
+ 	if (sp->restarts >= cache_param->max_restarts) {
+ 		if (sp->err_code == 0)
+ 			sp->err_code = 503;
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	}
+ 
+ 	/* Zap these, in case we came here through restart */
+ 	sp->wrk->do_esi = 0;
+ 	sp->wrk->is_gzip = 0;
+ 	sp->wrk->is_gunzip = 0;
+ 	sp->wrk->do_gzip = 0;
+ 	sp->wrk->do_gunzip = 0;
+ 	sp->wrk->do_stream = 0;
+ 
+ 	if (cache_param->http_gzip_support &&
+ 	     (recv_handling != VCL_RET_PIPE) &&
+ 	     (recv_handling != VCL_RET_PASS)) {
+ 		if (RFC2616_Req_Gzip(sp)) {
+ 			http_Unset(sp->http, H_Accept_Encoding);
+ 			http_SetHeader(sp->wrk, sp->vsl_id, sp->http,
+ 			    "Accept-Encoding: gzip");
+ 		} else {
+ 			http_Unset(sp->http, H_Accept_Encoding);
+ 		}
+ 	}
+ 
+ 	SHA256_Init(sp->wrk->sha256ctx);
+ 	VCL_hash_method(sp);
+ 	assert(sp->handling == VCL_RET_HASH);
+ 	SHA256_Final(sp->digest, sp->wrk->sha256ctx);
+ 
+ 	if (!strcmp(sp->http->hd[HTTP_HDR_REQ].b, "HEAD"))
+ 		sp->wantbody = 0;
+ 	else
+ 		sp->wantbody = 1;
+ 
+ 	sp->sendbody = 0;
+ 	switch(recv_handling) {
+ 	case VCL_RET_LOOKUP:
+ 		/* XXX: discard req body, if any */
+ 		sp->step = STP_LOOKUP;
+ 		return (0);
+ 	case VCL_RET_PIPE:
+ 		if (sp->esi_level > 0) {
+ 			/* XXX: VSL something */
+ 			INCOMPL();
+ 			/* sp->step = STP_DONE; */
+ 			return (1);
+ 		}
+ 		sp->step = STP_PIPE;
+ 		return (0);
+ 	case VCL_RET_PASS:
+ 		sp->step = STP_PASS;
+ 		return (0);
+ 	case VCL_RET_ERROR:
+ 		/* XXX: discard req body, if any */
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	default:
+ 		WRONG("Illegal action in vcl_recv{}");
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------
+  * START
+  * Handle a request, wherever it came from recv/restart.
+  *
+ DOT start [shape=box,label="Dissect request"]
+ DOT start -> recv [style=bold,color=green]
+  */
+ 
+ static int
+ cnt_start(struct sess *sp)
+ {
+ 	uint16_t done;
+ 	char *p;
+ 	const char *r = "HTTP/1.1 100 Continue\r\n\r\n";
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	AZ(sp->restarts);
+ 	AZ(sp->obj);
+ 	AZ(sp->vcl);
+ 
+ 	/* Update stats of various sorts */
+ 	sp->wrk->stats.client_req++;
+ 	sp->t_req = VTIM_real();
+ 	sp->wrk->lastused = sp->t_req;
+ 	sp->wrk->acct_tmp.req++;
+ 
+ 	/* Assign XID and log */
+ 	sp->xid = ++xids;				/* XXX not locked */
+ 	WSP(sp, SLT_ReqStart, "%s %s %u", sp->addr, sp->port,  sp->xid);
+ 
+ 	/* Borrow VCL reference from worker thread */
+ 	VCL_Refresh(&sp->wrk->vcl);
+ 	sp->vcl = sp->wrk->vcl;
+ 	sp->wrk->vcl = NULL;
+ 
+ 	http_Setup(sp->http, sp->ws);
+ 	done = http_DissectRequest(sp);
+ 
+ 	/* If we could not even parse the request, just close */
+ 	if (done == 400) {
+ 		sp->step = STP_DONE;
+ 		SES_Close(sp, "junk");
+ 		return (0);
+ 	}
+ 
+ 	/* Catch request snapshot */
+ 	sp->ws_req = WS_Snapshot(sp->ws);
+ 
+ 	/* Catch original request, before modification */
+ 	HTTP_Copy(sp->http0, sp->http);
+ 
+ 	if (done != 0) {
+ 		sp->err_code = done;
+ 		sp->step = STP_ERROR;
+ 		return (0);
+ 	}
+ 
+ 	sp->doclose = http_DoConnection(sp->http);
+ 
+ 	/* XXX: Handle TRACE & OPTIONS of Max-Forwards = 0 */
+ 
+ 	/*
+ 	 * Handle Expect headers
+ 	 */
+ 	if (http_GetHdr(sp->http, H_Expect, &p)) {
+ 		if (strcasecmp(p, "100-continue")) {
+ 			sp->err_code = 417;
+ 			sp->step = STP_ERROR;
+ 			return (0);
+ 		}
+ 
+ 		/* XXX: Don't bother with write failures for now */
+ 		(void)write(sp->fd, r, strlen(r));
+ 		/* XXX: When we do ESI includes, this is not removed
+ 		 * XXX: because we use http0 as our basis.  Believed
+ 		 * XXX: safe, but potentially confusing.
+ 		 */
+ 		http_Unset(sp->http, H_Expect);
+ 	}
+ 
+ 	sp->step = STP_RECV;
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Central state engine dispatcher.
+  *
+  * Kick the session around until it has had enough.
+  *
+  */
+ 
+ static void
+ cnt_diag(struct sess *sp, const char *state)
+ {
+ 	if (sp->wrk != NULL) {
+ 		WSP(sp, SLT_Debug, "thr %p STP_%s sp %p obj %p vcl %p",
+ 		    pthread_self(), state, sp, sp->obj, sp->vcl);
+ 		WSL_Flush(sp->wrk, 0);
+ 	} else {
+ 		VSL(SLT_Debug, sp->vsl_id,
+ 		    "thr %p STP_%s sp %p obj %p vcl %p",
+ 		    pthread_self(), state, sp, sp->obj, sp->vcl);
+ 	}
+ }
+ 
+ void
+ CNT_Session(struct sess *sp)
+ {
+ 	int done;
+ 	struct worker *w;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	w = sp->wrk;
+ 	CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+ 
+ 	/*
+ 	 * Possible entrance states
+ 	 */
+ 	assert(
+ 	    sp->step == STP_FIRST ||
+ 	    sp->step == STP_START ||
+ 	    sp->step == STP_LOOKUP ||
+ 	    sp->step == STP_RECV);
+ 
+ 	AZ(w->do_stream);
+ 	AZ(w->is_gzip);
+ 	AZ(w->do_gzip);
+ 	AZ(w->is_gunzip);
+ 	AZ(w->do_gunzip);
+ 	AZ(w->do_esi);
+ 
+ 	/*
+ 	 * Whenever we come in from the acceptor or waiter, we need to set
+ 	 * blocking mode, but there is no point in setting it when we come from
+ 	 * ESI or when a parked sessions returns.
+ 	 * It would be simpler to do this in the acceptor or waiter, but we'd
+ 	 * rather do the syscall in the worker thread.
+ 	 * On systems which return errors for ioctl, we close early
+ 	 */
+ 	if ((sp->step == STP_FIRST || sp->step == STP_START) &&
+ 	    VTCP_blocking(sp->fd)) {
+ 		if (errno == ECONNRESET)
+ 			SES_Close(sp, "remote closed");
+ 		else
+ 			SES_Close(sp, "error");
+ 		sp->step = STP_DONE;
+ 	}
+ 
+ 	/*
+ 	 * NB: Once done is set, we can no longer touch sp!
+ 	 */
+ 	for (done = 0; !done; ) {
+ 		assert(sp->wrk == w);
+ 		/*
+ 		 * This is a good place to be paranoid about the various
+ 		 * pointers still pointing to the things we expect.
+ 		 */
+ 		CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 		CHECK_OBJ_ORNULL(sp->obj, OBJECT_MAGIC);
+ 		CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ 		CHECK_OBJ_ORNULL(w->nobjhead, OBJHEAD_MAGIC);
+ 		WS_Assert(w->ws);
+ 
+ 		switch (sp->step) {
+ #define STEP(l,u) \
+ 		    case STP_##u: \
+ 			if (cache_param->diag_bitmap & 0x01) \
+ 				cnt_diag(sp, #u); \
+ 			done = cnt_##l(sp); \
+ 		        break;
+ #include "tbl/steps.h"
+ #undef STEP
+ 		default:
+ 			WRONG("State engine misfire");
+ 		}
+ 		WS_Assert(w->ws);
+ 		CHECK_OBJ_ORNULL(w->nobjhead, OBJHEAD_MAGIC);
+ 	}
+ 	WSL_Flush(w, 0);
+ 	AZ(w->do_stream);
+ 	AZ(w->is_gzip);
+ 	AZ(w->do_gzip);
+ 	AZ(w->is_gunzip);
+ 	AZ(w->do_gunzip);
+ 	AZ(w->do_esi);
+ #define ACCT(foo)	AZ(w->acct_tmp.foo);
+ #include "tbl/acct_fields.h"
+ #undef ACCT
+ 	assert(WRW_IsReleased(w));
+ }
+ 
+ /*
+ DOT }
+ */
+ 
+ /*--------------------------------------------------------------------
+  * Debugging aids
+  */
+ 
+ static void
+ cli_debug_xid(struct cli *cli, const char * const *av, void *priv)
+ {
+ 	(void)priv;
+ 	if (av[2] != NULL)
+ 		xids = strtoul(av[2], NULL, 0);
+ 	VCLI_Out(cli, "XID is %u", xids);
+ }
+ 
+ /*
+  * Default to seed=1, this is the only seed value POSIXl guarantees will
+  * result in a reproducible random number sequence.
+  */
+ static void
+ cli_debug_srandom(struct cli *cli, const char * const *av, void *priv)
+ {
+ 	(void)priv;
+ 	unsigned seed = 1;
+ 
+ 	if (av[2] != NULL)
+ 		seed = strtoul(av[2], NULL, 0);
+ 	srandom(seed);
+ 	srand48(random());
+ 	VCLI_Out(cli, "Random(3) seeded with %lu", seed);
+ }
+ 
+ static struct cli_proto debug_cmds[] = {
+ 	{ "debug.xid", "debug.xid",
+ 		"\tExamine or set XID\n", 0, 1, "d", cli_debug_xid },
+ 	{ "debug.srandom", "debug.srandom",
+ 		"\tSeed the random(3) function\n", 0, 1, "d", cli_debug_srandom },
+ 	{ NULL }
+ };
+ 
+ /*--------------------------------------------------------------------
+  *
+  */
+ 
+ void
+ CNT_Init(void)
+ {
+ 
+ 	srandomdev();
+ 	srand48(random());
+ 	xids = random();
+ 	CLI_AddFuncs(debug_cmds);
+ }
+ 
+ 
diff --cc bin/varnishd/cache/cache_expire.c
index 0000000,23e3fc6..31fd41a
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_expire.c
+++ b/bin/varnishd/cache/cache_expire.c
@@@ -1,0 -1,490 +1,490 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * LRU and object timer handling.
+  *
+  * We have two data structures, a LRU-list and a binary heap for the timers
+  * and two ways to kill objects: TTL-timeouts and LRU cleanups.
+  *
+  * Any object on the LRU is also on the binheap and vice versa.
+  *
+  * We hold a single object reference for both data structures.
+  *
+  * An attempted overview:
+  *
+  *	                        EXP_Ttl()      EXP_Grace()   EXP_Keep()
+  *				   |                |            |
+  *      entered                    v                v            |
+  *         |                       +--------------->+            |
+  *         v                       |      grace                  |
+  *         +---------------------->+                             |
+  *                  ttl            |                             v
+  *                                 +---------------------------->+
+  *                                     keep
+  *
+  */
+ 
+ #include "config.h"
+ 
+ #include <math.h>
+ 
+ #include "cache.h"
+ 
+ #include "binary_heap.h"
+ #include "hash/hash_slinger.h"
+ #include "vtim.h"
+ 
+ static pthread_t exp_thread;
+ static struct binheap *exp_heap;
+ static struct lock exp_mtx;
+ 
+ /*--------------------------------------------------------------------
+  * struct exp manipulations
+  *
+  * The Get/Set functions encapsulate the mutual magic between the
+  * fields in one single place.
+  */
+ 
+ void
+ EXP_Clr(struct exp *e)
+ {
+ 
+ 	e->ttl = -1;
+ 	e->grace = -1;
+ 	e->keep = -1;
+ 	e->age = 0;
+ 	e->entered = 0;
+ }
+ 
+ #define EXP_ACCESS(fld, low_val, extra)				\
+ 	double							\
+ 	EXP_Get_##fld(const struct exp *e)			\
+ 	{							\
+ 		return (e->fld > 0. ? e->fld : low_val);	\
+ 	}							\
+ 								\
+ 	void							\
+ 	EXP_Set_##fld(struct exp *e, double v)			\
+ 	{							\
+ 		if (v > 0.)					\
+ 			e->fld = v;				\
+ 		else {						\
+ 			e->fld = -1.;				\
+ 			extra;					\
+ 		}						\
+ 	}							\
+ 
+ EXP_ACCESS(ttl, -1., (e->grace = e->keep = -1.))
+ EXP_ACCESS(grace, 0., )
+ EXP_ACCESS(keep, 0.,)
+ 
+ /*--------------------------------------------------------------------
+  * Calculate an objects effective keep, grace or ttl time, suitably
+  * adjusted for defaults and by per-session limits.
+  */
+ 
 -static double
++double
+ EXP_Keep(const struct sess *sp, const struct object *o)
+ {
+ 	double r;
+ 
+ 	r = (double)cache_param->default_keep;
+ 	if (o->exp.keep > 0.)
+ 		r = o->exp.keep;
+ 	if (sp != NULL && sp->exp.keep > 0. && sp->exp.keep < r)
+ 		r = sp->exp.keep;
+ 	return (EXP_Ttl(sp, o) + r);
+ }
+ 
+ double
+ EXP_Grace(const struct sess *sp, const struct object *o)
+ {
+ 	double r;
+ 
+ 	r = (double)cache_param->default_grace;
+ 	if (o->exp.grace >= 0.)
+ 		r = o->exp.grace;
+ 	if (sp != NULL && sp->exp.grace > 0. && sp->exp.grace < r)
+ 		r = sp->exp.grace;
+ 	return (EXP_Ttl(sp, o) + r);
+ }
+ 
+ double
+ EXP_Ttl(const struct sess *sp, const struct object *o)
+ {
+ 	double r;
+ 
+ 	r = o->exp.ttl;
+ 	if (sp != NULL && sp->exp.ttl > 0. && sp->exp.ttl < r)
+ 		r = sp->exp.ttl;
+ 	return (o->exp.entered + r);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * When & why does the timer fire for this object ?
+  */
+ 
+ static int
+ update_object_when(const struct object *o)
+ {
+ 	struct objcore *oc;
+ 	double when, w2;
+ 
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	oc = o->objcore;
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	Lck_AssertHeld(&exp_mtx);
+ 
+ 	when = EXP_Keep(NULL, o);
+ 	w2 = EXP_Grace(NULL, o);
+ 	if (w2 > when)
+ 		when = w2;
+ 	assert(!isnan(when));
+ 	if (when == oc->timer_when)
+ 		return (0);
+ 	oc->timer_when = when;
+ 	return (1);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ exp_insert(struct objcore *oc, struct lru *lru)
+ {
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ 
+ 	Lck_AssertHeld(&lru->mtx);
+ 	Lck_AssertHeld(&exp_mtx);
+ 	assert(oc->timer_idx == BINHEAP_NOIDX);
+ 	binheap_insert(exp_heap, oc);
+ 	assert(oc->timer_idx != BINHEAP_NOIDX);
+ 	VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Object has been added to cache, record in lru & binheap.
+  *
+  * The objcore comes with a reference, which we inherit.
+  */
+ 
+ void
+ EXP_Inject(struct objcore *oc, struct lru *lru, double when)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ 
+ 	Lck_Lock(&lru->mtx);
+ 	Lck_Lock(&exp_mtx);
+ 	oc->timer_when = when;
+ 	exp_insert(oc, lru);
+ 	Lck_Unlock(&exp_mtx);
+ 	Lck_Unlock(&lru->mtx);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Object has been added to cache, record in lru & binheap.
+  *
+  * We grab a reference to the object, which will keep it around until
+  * we decide its time to let it go.
+  */
+ 
+ void
+ EXP_Insert(struct object *o)
+ {
+ 	struct objcore *oc;
+ 	struct lru *lru;
+ 
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	oc = o->objcore;
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	AssertObjBusy(o);
+ 	HSH_Ref(oc);
+ 
+ 	assert(o->exp.entered != 0 && !isnan(o->exp.entered));
+ 	o->last_lru = o->exp.entered;
+ 
+ 	lru = oc_getlru(oc);
+ 	CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ 	Lck_Lock(&lru->mtx);
+ 	Lck_Lock(&exp_mtx);
+ 	(void)update_object_when(o);
+ 	exp_insert(oc, lru);
+ 	Lck_Unlock(&exp_mtx);
+ 	Lck_Unlock(&lru->mtx);
+ 	oc_updatemeta(oc);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Object was used, move to tail of LRU list.
+  *
+  * To avoid the exp_mtx becoming a hotspot, we only attempt to move
+  * objects if they have not been moved recently and if the lock is available.
+  * This optimization obviously leaves the LRU list imperfectly sorted.
+  */
+ 
+ int
+ EXP_Touch(struct objcore *oc)
+ {
+ 	struct lru *lru;
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 
+ 	/*
+ 	 * For -spersistent we don't move objects on the lru list.  Each
+ 	 * segment has its own LRU list, and the order on it is not material
+ 	 * for anything.  The code below would move the objects to the
+ 	 * LRU list of the currently open segment, which would prevent
+ 	 * the cleaner from doing its job.
+ 	 */
+ 	if (oc->flags & OC_F_LRUDONTMOVE)
+ 		return (0);
+ 
+ 	lru = oc_getlru(oc);
+ 	CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ 
+ 	/*
+ 	 * We only need the LRU lock here.  The locking order is LRU->EXP
+ 	 * so we can trust the content of the oc->timer_idx without the
+ 	 * EXP lock.   Since each lru list has its own lock, this should
+ 	 * reduce contention a fair bit
+ 	 */
+ 	if (Lck_Trylock(&lru->mtx))
+ 		return (0);
+ 
+ 	if (oc->timer_idx != BINHEAP_NOIDX) {
+ 		VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
+ 		VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list);
+ 		VSC_C_main->n_lru_moved++;
+ 	}
+ 	Lck_Unlock(&lru->mtx);
+ 	return (1);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * We have changed one or more of the object timers, shuffle it
+  * accordingly in the binheap
+  *
+  * The VCL code can send us here on a non-cached object, just return.
+  *
+  * XXX: special case check for ttl = 0 ?
+  */
+ 
+ void
+ EXP_Rearm(const struct object *o)
+ {
+ 	struct objcore *oc;
+ 	struct lru *lru;
+ 
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	oc = o->objcore;
+ 	if (oc == NULL)
+ 		return;
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	lru = oc_getlru(oc);
+ 	Lck_Lock(&lru->mtx);
+ 	Lck_Lock(&exp_mtx);
+ 	/*
+ 	 * The hang-man might have this object of the binheap while
+ 	 * tending to a timer.  If so, we do not muck with it here.
+ 	 */
+ 	if (oc->timer_idx != BINHEAP_NOIDX && update_object_when(o)) {
+ 		assert(oc->timer_idx != BINHEAP_NOIDX);
+ 		binheap_reorder(exp_heap, oc->timer_idx);
+ 		assert(oc->timer_idx != BINHEAP_NOIDX);
+ 	}
+ 	Lck_Unlock(&exp_mtx);
+ 	Lck_Unlock(&lru->mtx);
+ 	oc_updatemeta(oc);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * This thread monitors the root of the binary heap and whenever an
+  * object expires, accounting also for graceability, it is killed.
+  */
+ 
+ static void * __match_proto__(void *start_routine(void *))
+ exp_timer(struct sess *sp, void *priv)
+ {
+ 	struct objcore *oc;
+ 	struct lru *lru;
+ 	double t;
+ 	struct object *o;
+ 
+ 	(void)priv;
+ 	t = VTIM_real();
+ 	oc = NULL;
+ 	while (1) {
+ 		if (oc == NULL) {
+ 			WSL_Flush(sp->wrk, 0);
+ 			WRK_SumStat(sp->wrk);
+ 			VTIM_sleep(cache_param->expiry_sleep);
+ 			t = VTIM_real();
+ 		}
+ 
+ 		Lck_Lock(&exp_mtx);
+ 		oc = binheap_root(exp_heap);
+ 		if (oc == NULL) {
+ 			Lck_Unlock(&exp_mtx);
+ 			continue;
+ 		}
+ 		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 
+ 		/*
+ 		 * We may have expired so many objects that our timestamp
+ 		 * got out of date, refresh it and check again.
+ 		 */
+ 		if (oc->timer_when > t)
+ 			t = VTIM_real();
+ 		if (oc->timer_when > t) {
+ 			Lck_Unlock(&exp_mtx);
+ 			oc = NULL;
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * It's time...
+ 		 * Technically we should drop the exp_mtx, get the lru->mtx
+ 		 * get the exp_mtx again and then check that the oc is still
+ 		 * on the binheap.  We take the shorter route and try to
+ 		 * get the lru->mtx and punt if we fail.
+ 		 */
+ 
+ 		lru = oc_getlru(oc);
+ 		CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ 		if (Lck_Trylock(&lru->mtx)) {
+ 			Lck_Unlock(&exp_mtx);
+ 			oc = NULL;
+ 			continue;
+ 		}
+ 
+ 		/* Remove from binheap */
+ 		assert(oc->timer_idx != BINHEAP_NOIDX);
+ 		binheap_delete(exp_heap, oc->timer_idx);
+ 		assert(oc->timer_idx == BINHEAP_NOIDX);
+ 
+ 		/* And from LRU */
+ 		lru = oc_getlru(oc);
+ 		VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
+ 
+ 		Lck_Unlock(&exp_mtx);
+ 		Lck_Unlock(&lru->mtx);
+ 
+ 		VSC_C_main->n_expired++;
+ 
+ 		CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
+ 		o = oc_getobj(sp->wrk, oc);
+ 		WSL(sp->wrk, SLT_ExpKill, 0, "%u %.0f",
+ 		    o->xid, EXP_Ttl(NULL, o) - t);
+ 		(void)HSH_Deref(sp->wrk, oc, NULL);
+ 	}
+ 	NEEDLESS_RETURN(NULL);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Attempt to make space by nuking the oldest object on the LRU list
+  * which isn't in use.
+  * Returns: 1: did, 0: didn't, -1: can't
+  */
+ 
+ int
+ EXP_NukeOne(struct worker *w, struct lru *lru)
+ {
+ 	struct objcore *oc;
+ 	struct object *o;
+ 
+ 	/* Find the first currently unused object on the LRU.  */
+ 	Lck_Lock(&lru->mtx);
+ 	Lck_Lock(&exp_mtx);
+ 	VTAILQ_FOREACH(oc, &lru->lru_head, lru_list) {
+ 		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 		assert (oc->timer_idx != BINHEAP_NOIDX);
+ 		/*
+ 		 * It wont release any space if we cannot release the last
+ 		 * reference, besides, if somebody else has a reference,
+ 		 * it's a bad idea to nuke this object anyway.
+ 		 */
+ 		if (oc->refcnt == 1)
+ 			break;
+ 	}
+ 	if (oc != NULL) {
+ 		VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
+ 		binheap_delete(exp_heap, oc->timer_idx);
+ 		assert(oc->timer_idx == BINHEAP_NOIDX);
+ 		VSC_C_main->n_lru_nuked++;
+ 	}
+ 	Lck_Unlock(&exp_mtx);
+ 	Lck_Unlock(&lru->mtx);
+ 
+ 	if (oc == NULL)
+ 		return (-1);
+ 
+ 	/* XXX: bad idea for -spersistent */
+ 	o = oc_getobj(w, oc);
+ 	WSL(w, SLT_ExpKill, 0, "%u LRU", o->xid);
+ 	(void)HSH_Deref(w, NULL, &o);
+ 	return (1);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * BinHeap helper functions for objcore.
+  */
+ 
+ static int
+ object_cmp(void *priv, void *a, void *b)
+ {
+ 	struct objcore *aa, *bb;
+ 
+ 	(void)priv;
+ 	CAST_OBJ_NOTNULL(aa, a, OBJCORE_MAGIC);
+ 	CAST_OBJ_NOTNULL(bb, b, OBJCORE_MAGIC);
+ 	return (aa->timer_when < bb->timer_when);
+ }
+ 
+ static void
+ object_update(void *priv, void *p, unsigned u)
+ {
+ 	struct objcore *oc;
+ 
+ 	(void)priv;
+ 	CAST_OBJ_NOTNULL(oc, p, OBJCORE_MAGIC);
+ 	oc->timer_idx = u;
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ EXP_Init(void)
+ {
+ 
+ 	Lck_New(&exp_mtx, lck_exp);
+ 	exp_heap = binheap_new(NULL, object_cmp, object_update);
+ 	XXXAN(exp_heap);
+ 	WRK_BgThread(&exp_thread, "cache-timeout", exp_timer, NULL);
+ }
diff --cc bin/varnishd/cache/cache_fetch.c
index 0000000,a5c0323..cc46222
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@@ -1,0 -1,645 +1,650 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  */
+ 
+ #include "config.h"
+ 
+ #include <inttypes.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache.h"
+ 
+ #include "cache_backend.h"
+ #include "vcli_priv.h"
+ #include "vct.h"
+ #include "vtcp.h"
+ 
+ static unsigned fetchfrag;
+ 
+ /*--------------------------------------------------------------------
+  * We want to issue the first error we encounter on fetching and
+  * supress the rest.  This function does that.
+  *
+  * Other code is allowed to look at w->fetch_failed to bail out
+  *
+  * For convenience, always return -1
+  */
+ 
+ int
+ FetchError2(struct worker *w, const char *error, const char *more)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+ 	if (!w->fetch_failed) {
+ 		if (more == NULL)
+ 			WSLB(w, SLT_FetchError, "%s", error);
+ 		else
+ 			WSLB(w, SLT_FetchError, "%s: %s", error, more);
+ 	}
+ 	w->fetch_failed = 1;
+ 	return (-1);
+ }
+ 
+ int
+ FetchError(struct worker *w, const char *error)
+ {
+ 	return(FetchError2(w, error, NULL));
+ }
+ 
+ /*--------------------------------------------------------------------
+  * VFP_NOP
+  *
+  * This fetch-processor does nothing but store the object.
+  * It also documents the API
+  */
+ 
+ /*--------------------------------------------------------------------
+  * VFP_BEGIN
+  *
+  * Called to set up stuff.
+  *
+  * 'estimate' is the estimate of the number of bytes we expect to receive,
+  * as seen on the socket, or zero if unknown.
+  */
+ static void __match_proto__()
+ vfp_nop_begin(struct worker *w, size_t estimate)
+ {
+ 
+ 	if (estimate > 0)
+ 		(void)FetchStorage(w, estimate);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * VFP_BYTES
+  *
+  * Process (up to) 'bytes' from the socket.
+  *
+  * Return -1 on error, issue FetchError()
+  *	will not be called again, once error happens.
+  * Return 0 on EOF on socket even if bytes not reached.
+  * Return 1 when 'bytes' have been processed.
+  */
+ 
+ static int __match_proto__()
+ vfp_nop_bytes(struct worker *w, struct http_conn *htc, ssize_t bytes)
+ {
+ 	ssize_t l, wl;
+ 	struct storage *st;
+ 
+ 	AZ(w->fetch_failed);
+ 	while (bytes > 0) {
+ 		st = FetchStorage(w, 0);
+ 		if (st == NULL)
+ 			return(-1);
+ 		l = st->space - st->len;
+ 		if (l > bytes)
+ 			l = bytes;
+ 		wl = HTC_Read(w, htc, st->ptr + st->len, l);
+ 		if (wl <= 0)
+ 			return (wl);
+ 		st->len += wl;
+ 		w->fetch_obj->len += wl;
+ 		bytes -= wl;
+ 		if (w->do_stream)
+ 			RES_StreamPoll(w);
+ 	}
+ 	return (1);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * VFP_END
+  *
+  * Finish & cleanup
+  *
+  * Return -1 for error
+  * Return 0 for OK
+  */
+ 
+ static int __match_proto__()
+ vfp_nop_end(struct worker *w)
+ {
+ 	struct storage *st;
+ 
+ 	st = VTAILQ_LAST(&w->fetch_obj->store, storagehead);
+ 	if (st == NULL)
+ 		return (0);
+ 
+ 	if (st->len == 0) {
+ 		VTAILQ_REMOVE(&w->fetch_obj->store, st, list);
+ 		STV_free(st);
+ 		return (0);
+ 	}
+ 	if (st->len < st->space)
+ 		STV_trim(st, st->len);
+ 	return (0);
+ }
+ 
+ static struct vfp vfp_nop = {
+ 	.begin	=	vfp_nop_begin,
+ 	.bytes	=	vfp_nop_bytes,
+ 	.end	=	vfp_nop_end,
+ };
+ 
+ /*--------------------------------------------------------------------
+  * Fetch Storage to put object into.
+  *
+  */
+ 
+ struct storage *
+ FetchStorage(struct worker *w, ssize_t sz)
+ {
+ 	ssize_t l;
+ 	struct storage *st;
+ 	struct object *obj;
+ 
+ 	obj = w->fetch_obj;
+ 	CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ 	st = VTAILQ_LAST(&obj->store, storagehead);
+ 	if (st != NULL && st->len < st->space)
+ 		return (st);
+ 
+ 	l = fetchfrag;
+ 	if (l == 0)
+ 		l = sz;
+ 	if (l == 0)
+ 		l = cache_param->fetch_chunksize;
+ 	st = STV_alloc(w, l);
+ 	if (st == NULL) {
+ 		(void)FetchError(w, "Could not get storage");
+ 		return (NULL);
+ 	}
+ 	AZ(st->len);
+ 	VTAILQ_INSERT_TAIL(&obj->store, st, list);
+ 	return (st);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Convert a string to a size_t safely
+  */
+ 
+ static ssize_t
+ fetch_number(const char *nbr, int radix)
+ {
+ 	uintmax_t cll;
+ 	ssize_t cl;
+ 	char *q;
+ 
+ 	if (*nbr == '\0')
+ 		return (-1);
+ 	cll = strtoumax(nbr, &q, radix);
+ 	if (q == NULL || *q != '\0')
+ 		return (-1);
+ 
+ 	cl = (ssize_t)cll;
+ 	if((uintmax_t)cl != cll) /* Protect against bogusly large values */
+ 		return (-1);
+ 	return (cl);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static int
+ fetch_straight(struct worker *w, struct http_conn *htc, ssize_t cl)
+ {
+ 	int i;
+ 
+ 	assert(w->body_status == BS_LENGTH);
+ 
+ 	if (cl < 0) {
+ 		return (FetchError(w, "straight length field bogus"));
+ 	} else if (cl == 0)
+ 		return (0);
+ 
+ 	i = w->vfp->bytes(w, htc, cl);
+ 	if (i <= 0)
+ 		return (FetchError(w, "straight insufficient bytes"));
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Read a chunked HTTP object.
+  *
+  * XXX: Reading one byte at a time is pretty pessimal.
+  */
+ 
+ static int
+ fetch_chunked(struct worker *w, struct http_conn *htc)
+ {
+ 	int i;
+ 	char buf[20];		/* XXX: 20 is arbitrary */
+ 	unsigned u;
+ 	ssize_t cl;
+ 
+ 	assert(w->body_status == BS_CHUNKED);
+ 	do {
+ 		/* Skip leading whitespace */
+ 		do {
+ 			if (HTC_Read(w, htc, buf, 1) <= 0)
+ 				return (-1);
+ 		} while (vct_islws(buf[0]));
+ 
+ 		if (!vct_ishex(buf[0]))
+ 			return (FetchError(w,"chunked header non-hex"));
+ 
+ 		/* Collect hex digits, skipping leading zeros */
+ 		for (u = 1; u < sizeof buf; u++) {
+ 			do {
+ 				if (HTC_Read(w, htc, buf + u, 1) <= 0)
+ 					return (-1);
+ 			} while (u == 1 && buf[0] == '0' && buf[u] == '0');
+ 			if (!vct_ishex(buf[u]))
+ 				break;
+ 		}
+ 
+ 		if (u >= sizeof buf)
+ 			return (FetchError(w,"chunked header too long"));
+ 
+ 		/* Skip trailing white space */
+ 		while(vct_islws(buf[u]) && buf[u] != '\n')
+ 			if (HTC_Read(w, htc, buf + u, 1) <= 0)
+ 				return (-1);
+ 
+ 		if (buf[u] != '\n')
+ 			return (FetchError(w,"chunked header no NL"));
+ 
+ 		buf[u] = '\0';
+ 		cl = fetch_number(buf, 16);
+ 		if (cl < 0)
+ 			return (FetchError(w,"chunked header number syntax"));
+ 
+ 		if (cl > 0 && w->vfp->bytes(w, htc, cl) <= 0)
+ 			return (-1);
+ 
+ 		i = HTC_Read(w, htc, buf, 1);
+ 		if (i <= 0)
+ 			return (-1);
+ 		if (buf[0] == '\r' && HTC_Read(w, htc, buf, 1) <= 0)
+ 			return (-1);
+ 		if (buf[0] != '\n')
+ 			return (FetchError(w,"chunked tail no NL"));
+ 	} while (cl > 0);
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static int
+ fetch_eof(struct worker *w, struct http_conn *htc)
+ {
+ 	int i;
+ 
+ 	assert(w->body_status == BS_EOF);
+ 	i = w->vfp->bytes(w, htc, SSIZE_MAX);
+ 	if (i < 0)
+ 		return (-1);
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Fetch any body attached to the incoming request, and either write it
+  * to the backend (if we pass) or discard it (anything else).
+  * This is mainly a separate function to isolate the stack buffer and
+  * to contain the complexity when we start handling chunked encoding.
+  */
+ 
+ int
+ FetchReqBody(struct sess *sp)
+ {
+ 	unsigned long content_length;
+ 	char buf[8192];
+ 	char *ptr, *endp;
+ 	int rdcnt;
+ 
+ 	if (http_GetHdr(sp->http, H_Content_Length, &ptr)) {
+ 
+ 		content_length = strtoul(ptr, &endp, 10);
+ 		/* XXX should check result of conversion */
+ 		while (content_length) {
+ 			if (content_length > sizeof buf)
+ 				rdcnt = sizeof buf;
+ 			else
+ 				rdcnt = content_length;
+ 			rdcnt = HTC_Read(sp->wrk, sp->htc, buf, rdcnt);
+ 			if (rdcnt <= 0)
+ 				return (1);
+ 			content_length -= rdcnt;
+ 			if (!sp->sendbody)
+ 				continue;
+ 			(void)WRW_Write(sp->wrk, buf, rdcnt); /* XXX: stats ? */
+ 			if (WRW_Flush(sp->wrk))
+ 				return (2);
+ 		}
+ 	}
+ 	if (http_GetHdr(sp->http, H_Transfer_Encoding, NULL)) {
+ 		/* XXX: Handle chunked encoding. */
+ 		WSP(sp, SLT_Debug, "Transfer-Encoding in request");
+ 		return (1);
+ 	}
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Send request, and receive the HTTP protocol response, but not the
+  * response body.
+  *
+  * Return value:
+  *	-1 failure, not retryable
+  *	 0 success
+  *	 1 failure which can be retried.
+  */
+ 
+ int
+ FetchHdr(struct sess *sp)
+ {
+ 	struct vbc *vc;
+ 	struct worker *w;
+ 	char *b;
+ 	struct http *hp;
+ 	int retry = -1;
+ 	int i;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ 	w = sp->wrk;
+ 
+ 	AN(sp->director);
+ 	AZ(sp->obj);
+ 
+ 	if (sp->objcore != NULL) {		/* pass has no objcore */
+ 		CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ 		AN(sp->objcore->flags & OC_F_BUSY);
+ 	}
+ 
+ 	hp = w->bereq;
+ 
+ 	sp->wrk->vbc = VDI_GetFd(NULL, sp);
+ 	if (sp->wrk->vbc == NULL) {
+ 		WSP(sp, SLT_FetchError, "no backend connection");
+ 		return (-1);
+ 	}
+ 	vc = sp->wrk->vbc;
+ 	if (vc->recycled)
+ 		retry = 1;
+ 
+ 	/*
+ 	 * Now that we know our backend, we can set a default Host:
+ 	 * header if one is necessary.  This cannot be done in the VCL
+ 	 * because the backend may be chosen by a director.
+ 	 */
+ 	if (!http_GetHdr(hp, H_Host, &b))
+ 		VDI_AddHostHeader(sp);
+ 
+ 	(void)VTCP_blocking(vc->fd);	/* XXX: we should timeout instead */
+ 	WRW_Reserve(w, &vc->fd);
+ 	(void)http_Write(w, vc->vsl_id, hp, 0);	/* XXX: stats ? */
+ 
+ 	/* Deal with any message-body the request might have */
+ 	i = FetchReqBody(sp);
+ 	if (WRW_FlushRelease(w) || i > 0) {
+ 		WSP(sp, SLT_FetchError, "backend write error: %d (%s)",
+ 		    errno, strerror(errno));
+ 		VDI_CloseFd(sp->wrk);
+ 		/* XXX: other cleanup ? */
+ 		return (retry);
+ 	}
+ 
+ 	/* Checkpoint the vsl.here */
+ 	WSL_Flush(w, 0);
+ 
+ 	/* XXX is this the right place? */
+ 	VSC_C_main->backend_req++;
+ 
+ 	/* Receive response */
+ 
+ 	HTC_Init(w->htc, w->ws, vc->fd, vc->vsl_id, cache_param->http_resp_size,
+ 	    cache_param->http_resp_hdr_len);
+ 
+ 	VTCP_set_read_timeout(vc->fd, vc->first_byte_timeout);
+ 
+ 	i = HTC_Rx(w->htc);
+ 
+ 	if (i < 0) {
+ 		WSP(sp, SLT_FetchError, "http first read error: %d %d (%s)",
+ 		    i, errno, strerror(errno));
+ 		VDI_CloseFd(sp->wrk);
+ 		/* XXX: other cleanup ? */
+ 		/* Retryable if we never received anything */
+ 		return (i == -1 ? retry : -1);
+ 	}
+ 
+ 	VTCP_set_read_timeout(vc->fd, vc->between_bytes_timeout);
+ 
+ 	while (i == 0) {
+ 		i = HTC_Rx(w->htc);
+ 		if (i < 0) {
+ 			WSP(sp, SLT_FetchError,
+ 			    "http first read error: %d %d (%s)",
+ 			    i, errno, strerror(errno));
+ 			VDI_CloseFd(sp->wrk);
+ 			/* XXX: other cleanup ? */
+ 			return (-1);
+ 		}
+ 	}
+ 
+ 	hp = w->beresp;
+ 
+ 	if (http_DissectResponse(w, w->htc, hp)) {
+ 		WSP(sp, SLT_FetchError, "http format error");
+ 		VDI_CloseFd(sp->wrk);
+ 		/* XXX: other cleanup ? */
+ 		return (-1);
+ 	}
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ int
+ FetchBody(struct worker *w, struct object *obj)
+ {
+ 	int cls;
+ 	struct storage *st;
+ 	int mklen;
+ 	ssize_t cl;
+ 
+ 	CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+ 	AZ(w->fetch_obj);
+ 	CHECK_OBJ_NOTNULL(w->vbc, VBC_MAGIC);
+ 	CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ 	CHECK_OBJ_NOTNULL(obj->http, HTTP_MAGIC);
+ 
+ 	if (w->vfp == NULL)
+ 		w->vfp = &vfp_nop;
+ 
+ 	AssertObjCorePassOrBusy(obj->objcore);
+ 
+ 	AZ(w->vgz_rx);
 -	AZ(VTAILQ_FIRST(&obj->store));
++
++        /* If we've freshened from another object and got a "Not Modified"
++         * response, then we have already duped the other object's body.
++         */
++        if (w->beresp->status != 304)
++        	AZ(VTAILQ_FIRST(&obj->store));
+ 
+ 	w->fetch_obj = obj;
+ 	w->fetch_failed = 0;
+ 
+ 	/* XXX: pick up estimate from objdr ? */
+ 	cl = 0;
+ 	switch (w->body_status) {
+ 	case BS_NONE:
+ 		cls = 0;
+ 		mklen = 0;
+ 		break;
+ 	case BS_ZERO:
+ 		cls = 0;
+ 		mklen = 1;
+ 		break;
+ 	case BS_LENGTH:
+ 		cl = fetch_number( w->h_content_length, 10);
+ 		w->vfp->begin(w, cl > 0 ? cl : 0);
+ 		cls = fetch_straight(w, w->htc, cl);
+ 		mklen = 1;
+ 		if (w->vfp->end(w))
+ 			cls = -1;
+ 		break;
+ 	case BS_CHUNKED:
+ 		w->vfp->begin(w, cl);
+ 		cls = fetch_chunked(w, w->htc);
+ 		mklen = 1;
+ 		if (w->vfp->end(w))
+ 			cls = -1;
+ 		break;
+ 	case BS_EOF:
+ 		w->vfp->begin(w, cl);
+ 		cls = fetch_eof(w, w->htc);
+ 		mklen = 1;
+ 		if (w->vfp->end(w))
+ 			cls = -1;
+ 		break;
+ 	case BS_ERROR:
+ 		cls = 1;
+ 		mklen = 0;
+ 		break;
+ 	default:
+ 		cls = 0;
+ 		mklen = 0;
+ 		INCOMPL();
+ 	}
+ 	AZ(w->vgz_rx);
+ 
+ 	/*
+ 	 * It is OK for ->end to just leave the last storage segment
+ 	 * sitting on w->storage, we will always call vfp_nop_end()
+ 	 * to get it trimmed or thrown out if empty.
+ 	 */
+ 	AZ(vfp_nop_end(w));
+ 
+ 	w->fetch_obj = NULL;
+ 
+ 	WSLB(w, SLT_Fetch_Body, "%u(%s) cls %d mklen %u",
+ 	    w->body_status, body_status(w->body_status),
+ 	    cls, mklen);
+ 
+ 	if (w->body_status == BS_ERROR) {
+ 		VDI_CloseFd(w);
+ 		return (__LINE__);
+ 	}
+ 
+ 	if (cls < 0) {
+ 		w->stats.fetch_failed++;
+ 		/* XXX: Wouldn't this store automatically be released ? */
+ 		while (!VTAILQ_EMPTY(&obj->store)) {
+ 			st = VTAILQ_FIRST(&obj->store);
+ 			VTAILQ_REMOVE(&obj->store, st, list);
+ 			STV_free(st);
+ 		}
+ 		VDI_CloseFd(w);
+ 		obj->len = 0;
+ 		return (__LINE__);
+ 	}
+ 	AZ(w->fetch_failed);
+ 
+ 	if (cls == 0 && w->do_close)
+ 		cls = 1;
+ 
+ 	WSLB(w, SLT_Length, "%u", obj->len);
+ 
+ 	{
+ 	/* Sanity check fetch methods accounting */
+ 		ssize_t uu;
+ 
+ 		uu = 0;
+ 		VTAILQ_FOREACH(st, &obj->store, list)
+ 			uu += st->len;
+ 		if (w->do_stream)
+ 			/* Streaming might have started freeing stuff */
+ 			assert (uu <= obj->len);
+ 
+ 		else
+ 			assert(uu == obj->len);
+ 	}
+ 
+ 	if (mklen > 0) {
+ 		http_Unset(obj->http, H_Content_Length);
+ 		http_PrintfHeader(w, w->vbc->vsl_id, obj->http,
+ 		    "Content-Length: %jd", (intmax_t)obj->len);
+ 	}
+ 
+ 	if (cls)
+ 		VDI_CloseFd(w);
+ 	else
+ 		VDI_RecycleFd(w);
+ 
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Debugging aids
+  */
+ 
+ static void
+ debug_fragfetch(struct cli *cli, const char * const *av, void *priv)
+ {
+ 	(void)priv;
+ 	(void)cli;
+ 	fetchfrag = strtoul(av[2], NULL, 0);
+ }
+ 
+ static struct cli_proto debug_cmds[] = {
+ 	{ "debug.fragfetch", "debug.fragfetch",
+ 		"\tEnable fetch fragmentation\n", 1, 1, "d", debug_fragfetch },
+ 	{ NULL }
+ };
+ 
+ /*--------------------------------------------------------------------
+  *
+  */
+ 
+ void
+ Fetch_Init(void)
+ {
+ 
+ 	CLI_AddFuncs(debug_cmds);
+ }
diff --cc bin/varnishd/cache/cache_hash.c
index 0000000,db865de..5251f6d
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@@ -1,0 -1,752 +1,789 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * This is the central hash-table code, it relies on a chosen hash
+  * implementation only for the actual hashing, all the housekeeping
+  * happens here.
+  *
+  * We have two kinds of structures, objecthead and object.  An objecthead
+  * corresponds to a given (Host:, URL) tupple, and the objects hung from
+  * the objecthead may represent various variations (ie: Vary: header,
+  * different TTL etc) instances of that web-entity.
+  *
+  * Each objecthead has a mutex which locks both its own fields, the
+  * list of objects and fields in the objects.
+  *
+  * The hash implementation must supply a reference count facility on
+  * the objecthead, and return with a reference held after a lookup.
+  *
+  * Lookups in the hash implementation returns with a ref held and each
+  * object hung from the objhead holds a ref as well.
+  *
+  * Objects have refcounts which are locked by the objecthead mutex.
+  *
+  * New objects are always marked busy, and they can go from busy to
+  * not busy only once.
+  */
+ 
+ #include "config.h"
+ 
+ #include <math.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache.h"
+ 
+ #include "hash/hash_slinger.h"
+ #include "vsha256.h"
+ 
+ static const struct hash_slinger *hash;
+ 
+ /*---------------------------------------------------------------------*/
+ /* Precreate an objhead and object for later use */
+ void
+ HSH_Prealloc(const struct sess *sp)
+ {
+ 	struct worker *w;
+ 	struct objhead *oh;
+ 	struct objcore *oc;
+ 	struct waitinglist *wl;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ 	w = sp->wrk;
+ 
+ 	if (w->nobjcore == NULL) {
+ 		ALLOC_OBJ(oc, OBJCORE_MAGIC);
+ 		XXXAN(oc);
+ 		w->nobjcore = oc;
+ 		w->stats.n_objectcore++;
+ 		oc->flags |= OC_F_BUSY;
+ 	}
+ 	CHECK_OBJ_NOTNULL(w->nobjcore, OBJCORE_MAGIC);
+ 
+ 	if (w->nobjhead == NULL) {
+ 		ALLOC_OBJ(oh, OBJHEAD_MAGIC);
+ 		XXXAN(oh);
+ 		oh->refcnt = 1;
+ 		VTAILQ_INIT(&oh->objcs);
+ 		Lck_New(&oh->mtx, lck_objhdr);
+ 		w->nobjhead = oh;
+ 		w->stats.n_objecthead++;
+ 	}
+ 	CHECK_OBJ_NOTNULL(w->nobjhead, OBJHEAD_MAGIC);
+ 
+ 	if (w->nwaitinglist == NULL) {
+ 		ALLOC_OBJ(wl, WAITINGLIST_MAGIC);
+ 		XXXAN(wl);
+ 		VTAILQ_INIT(&wl->list);
+ 		w->nwaitinglist = wl;
+ 		w->stats.n_waitinglist++;
+ 	}
+ 	CHECK_OBJ_NOTNULL(w->nwaitinglist, WAITINGLIST_MAGIC);
+ 
+ 	if (w->nbusyobj == NULL) {
+ 		ALLOC_OBJ(w->nbusyobj, BUSYOBJ_MAGIC);
+ 		XXXAN(w->nbusyobj);
+ 	}
+ 
+ 	if (hash->prep != NULL)
+ 		hash->prep(sp);
+ }
+ 
+ void
+ HSH_Cleanup(struct worker *w)
+ {
+ 
+ 	if (w->nobjcore != NULL) {
+ 		FREE_OBJ(w->nobjcore);
+ 		w->stats.n_objectcore--;
+ 		w->nobjcore = NULL;
+ 	}
+ 	if (w->nobjhead != NULL) {
+ 		Lck_Delete(&w->nobjhead->mtx);
+ 		FREE_OBJ(w->nobjhead);
+ 		w->nobjhead = NULL;
+ 		w->stats.n_objecthead--;
+ 	}
+ 	if (w->nwaitinglist != NULL) {
+ 		FREE_OBJ(w->nwaitinglist);
+ 		w->nwaitinglist = NULL;
+ 	}
+ 	if (w->nhashpriv != NULL) {
+ 		/* XXX: If needed, add slinger method for this */
+ 		free(w->nhashpriv);
+ 		w->nhashpriv = NULL;
+ 	}
+ 	if (w->nbusyobj != NULL) {
+ 		FREE_OBJ(w->nbusyobj);
+ 		w->nbusyobj = NULL;
+ 	}
+ }
+ 
+ void
+ HSH_DeleteObjHead(struct worker *w, struct objhead *oh)
+ {
+ 
+ 	AZ(oh->refcnt);
+ 	assert(VTAILQ_EMPTY(&oh->objcs));
+ 	Lck_Delete(&oh->mtx);
+ 	w->stats.n_objecthead--;
+ 	FREE_OBJ(oh);
+ }
+ 
+ void
+ HSH_AddString(const struct sess *sp, const char *str)
+ {
+ 	int l;
+ 
+ 	if (str == NULL)
+ 		str = "";
+ 	l = strlen(str);
+ 
+ 	SHA256_Update(sp->wrk->sha256ctx, str, l);
+ 	SHA256_Update(sp->wrk->sha256ctx, "#", 1);
+ 
+ 	if (cache_param->log_hash)
+ 		WSP(sp, SLT_Hash, "%s", str);
+ }
+ 
+ /*---------------------------------------------------------------------
+  * This is a debugging hack to enable testing of boundary conditions
+  * in the hash algorithm.
+  * We trap the first 9 different digests and translate them to different
+  * digests with edge bit conditions
+  */
+ 
+ static struct hsh_magiclist {
+ 	unsigned char was[SHA256_LEN];
+ 	unsigned char now[SHA256_LEN];
+ } hsh_magiclist[] = {
+ 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
+ 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
+ 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
+ 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
+ 	{ .now = {	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ 	{ .now = {	0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ 	{ .now = {	0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ 	{ .now = {	0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ };
+ 
+ #define HSH_NMAGIC (sizeof hsh_magiclist / sizeof hsh_magiclist[0])
+ 
+ static void
+ hsh_testmagic(void *result)
+ {
+ 	int i, j;
+ 	static int nused = 0;
+ 
+ 	for (i = 0; i < nused; i++)
+ 		if (!memcmp(hsh_magiclist[i].was, result, SHA256_LEN))
+ 			break;
+ 	if (i == nused && i < HSH_NMAGIC)
+ 		memcpy(hsh_magiclist[nused++].was, result, SHA256_LEN);
+ 	if (i == nused)
+ 		return;
+ 	assert(i < HSH_NMAGIC);
+ 	fprintf(stderr, "HASHMAGIC: <");
+ 	for (j = 0; j < SHA256_LEN; j++)
+ 		fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
+ 	fprintf(stderr, "> -> <");
+ 	memcpy(result, hsh_magiclist[i].now, SHA256_LEN);
+ 	for (j = 0; j < SHA256_LEN; j++)
+ 		fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
+ 	fprintf(stderr, ">\n");
+ }
+ 
+ /*---------------------------------------------------------------------
+  * Insert an object which magically appears out of nowhere or, more likely,
+  * comes off some persistent storage device.
+  * Return it with a reference held.
+  */
+ 
+ struct objcore *
+ HSH_Insert(const struct sess *sp)
+ {
+ 	struct worker *w;
+ 	struct objhead *oh;
+ 	struct objcore *oc;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ 	AN(hash);
+ 	w = sp->wrk;
+ 
+ 	HSH_Prealloc(sp);
+ 	if (cache_param->diag_bitmap & 0x80000000)
+ 		hsh_testmagic(sp->wrk->nobjhead->digest);
+ 
+ 	AZ(sp->hash_objhead);
+ 	AN(w->nobjhead);
+ 	oh = hash->lookup(sp, w->nobjhead);
+ 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ 	if (oh == w->nobjhead)
+ 		w->nobjhead = NULL;
+ 	Lck_Lock(&oh->mtx);
+ 	assert(oh->refcnt > 0);
+ 
+ 	/* Insert (precreated) objcore in objecthead */
+ 	oc = w->nobjcore;
+ 	w->nobjcore = NULL;
+ 	oc->refcnt = 1;
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	AZ(oc->flags & OC_F_BUSY);
+ 
+ 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, list);
+ 	/* NB: do not deref objhead the new object inherits our reference */
+ 	oc->objhead = oh;
+ 	Lck_Unlock(&oh->mtx);
+ 	sp->wrk->stats.n_vampireobject++;
+ 	return (oc);
+ }
+ 
+ /*---------------------------------------------------------------------
+  */
+ 
+ struct objcore *
+ HSH_Lookup(struct sess *sp, struct objhead **poh)
+ {
+ 	struct worker *w;
+ 	struct objhead *oh;
+ 	struct objcore *oc;
+ 	struct objcore *busy_oc, *grace_oc;
+ 	struct object *o;
 -	double grace_ttl;
++	struct object *stale_o;       /* for freshness check */
++	double grace_ttl, stale_ttl;
++        char *p;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->http, HTTP_MAGIC);
+ 	AN(sp->director);
+ 	AN(hash);
++        AZ(sp->stale_obj);
+ 	w = sp->wrk;
+ 
+ 	HSH_Prealloc(sp);
+ 	memcpy(sp->wrk->nobjhead->digest, sp->digest, sizeof sp->digest);
+ 	if (cache_param->diag_bitmap & 0x80000000)
+ 		hsh_testmagic(sp->wrk->nobjhead->digest);
+ 
+ 	if (sp->hash_objhead != NULL) {
+ 		/*
+ 		 * This sess came off the waiting list, and brings a
+ 		 * oh refcnt with it.
+ 		 */
+ 		CHECK_OBJ_NOTNULL(sp->hash_objhead, OBJHEAD_MAGIC);
+ 		oh = sp->hash_objhead;
+ 		sp->hash_objhead = NULL;
+ 	} else {
+ 		AN(w->nobjhead);
+ 		oh = hash->lookup(sp, w->nobjhead);
+ 		if (oh == w->nobjhead)
+ 			w->nobjhead = NULL;
+ 	}
+ 
+ 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ 	Lck_Lock(&oh->mtx);
+ 	assert(oh->refcnt > 0);
+ 	busy_oc = NULL;
+ 	grace_oc = NULL;
++	stale_o = NULL;       /* for freshness check */
+ 	grace_ttl = NAN;
++        stale_ttl = NAN;
+ 	VTAILQ_FOREACH(oc, &oh->objcs, list) {
+ 		/* Must be at least our own ref + the objcore we examine */
+ 		assert(oh->refcnt > 1);
+ 		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 		assert(oc->objhead == oh);
+ 
+ 		if (oc->flags & OC_F_BUSY) {
+ 			CHECK_OBJ_NOTNULL(oc->busyobj, BUSYOBJ_MAGIC);
+ 			if (sp->hash_ignore_busy)
+ 				continue;
+ 
+ 			if (oc->busyobj->vary != NULL &&
+ 			    !VRY_Match(sp, oc->busyobj->vary))
+ 				continue;
+ 
+ 			busy_oc = oc;
+ 			continue;
+ 		}
+ 
+ 		o = oc_getobj(sp->wrk, oc);
+ 		CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 
 -		if (o->exp.ttl <= 0.)
++		if (o->exp.ttl <= 0. && o->exp.grace <= 0.
++		    && o->exp.keep <= 0.)
+ 			continue;
+ 		if (BAN_CheckObject(o, sp))
+ 			continue;
+ 		if (o->vary != NULL && !VRY_Match(sp, o->vary))
+ 			continue;
+ 
+ 		/* If still valid, use it */
+ 		if (EXP_Ttl(sp, o) >= sp->t_req)
+ 			break;
+ 
+ 		/*
+ 		 * Remember any matching objects inside their grace period
+ 		 * and if there are several, use the least expired one.
+ 		 */
+ 		if (EXP_Grace(sp, o) >= sp->t_req) {
+ 			if (grace_oc == NULL ||
+ 			    grace_ttl < o->exp.entered + o->exp.ttl) {
+ 				grace_oc = oc;
+ 				grace_ttl = o->exp.entered + o->exp.ttl;
+ 			}
+ 		}
++
++		/* At this point we know:
++		 * - o's TTL has elapsed
++		 * - o is not busy or banned,
++                 * - o is not a Vary match.
++                 * The object may be used for a conditional backend request if
++                 * - the keep time has not elapsed, and
++                 * - it has a Last-Modified and/or an ETag header.
++                 * If there are several, use the least expired one.
++                 */
++               if (EXP_Keep(sp, o) >= sp->t_req
++                   && (http_GetHdr(o->http, H_Last_Modified, &p)
++                       || http_GetHdr(o->http, H_ETag, &p)))
++                   if (stale_o == NULL ||
++		       stale_ttl < o->exp.entered + o->exp.ttl) {
++			   stale_o = o;
++			   stale_ttl = o->exp.entered + o->exp.ttl;
++                   }
++
+ 	}
+ 
+ 	/*
+ 	 * If we have seen a busy object or the backend is unhealthy, and
+ 	 * we have an object in grace, use it, if req.grace is also
+ 	 * satisified.
+ 	 * XXX: Interesting footnote:  The busy object might be for a
+ 	 * XXX: different "Vary:" than we sought.  We have no way of knowing
+ 	 * XXX: this until the object is unbusy'ed, so in practice we
+ 	 * XXX: serialize fetch of all Vary's if grace is possible.
+ 	 */
+ 
+ 	AZ(sp->objcore);
+ 	sp->objcore = grace_oc;		/* XXX: Hack-ish */
+ 	if (oc == NULL			/* We found no live object */
+ 	    && grace_oc != NULL		/* There is a grace candidate */
+ 	    && (busy_oc != NULL		/* Somebody else is already busy */
+ 	    || !VDI_Healthy(sp->director, sp))) {
+ 					/* Or it is impossible to fetch */
+ 		o = oc_getobj(sp->wrk, grace_oc);
+ 		CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 		oc = grace_oc;
+ 	}
+ 	sp->objcore = NULL;
+ 
+ 	if (oc != NULL && !sp->hash_always_miss) {
+ 		o = oc_getobj(sp->wrk, oc);
+ 		CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 		assert(oc->objhead == oh);
+ 
+ 		/* We found an object we like */
+ 		oc->refcnt++;
+ 		if (o->hits < INT_MAX)
+ 			o->hits++;
+ 		assert(oh->refcnt > 1);
+ 		Lck_Unlock(&oh->mtx);
+ 		assert(hash->deref(oh));
+ 		*poh = oh;
+ 		return (oc);
+ 	}
+ 
+ 	if (busy_oc != NULL) {
+ 		/* There are one or more busy objects, wait for them */
+ 		if (sp->esi_level == 0) {
+ 			CHECK_OBJ_NOTNULL(sp->wrk->nwaitinglist,
+ 			    WAITINGLIST_MAGIC);
+ 			if (oh->waitinglist == NULL) {
+ 				oh->waitinglist = sp->wrk->nwaitinglist;
+ 				sp->wrk->nwaitinglist = NULL;
+ 			}
+ 			VTAILQ_INSERT_TAIL(&oh->waitinglist->list, sp, list);
+ 		}
+ 		if (cache_param->diag_bitmap & 0x20)
+ 			WSP(sp, SLT_Debug,
+ 				"on waiting list <%p>", oh);
+ 		SES_Charge(sp);
+ 		/*
+ 		 * The objhead reference transfers to the sess, we get it
+ 		 * back when the sess comes off the waiting list and
+ 		 * calls us again
+ 		 */
+ 		sp->hash_objhead = oh;
+ 		sp->wrk = NULL;
+ 		Lck_Unlock(&oh->mtx);
+ 		return (NULL);
+ 	}
+ 
++        /* If we're not serving a valid or graced object and we saved stale_o,
++	 * it is a candidate for the conditional backend request. */
++        AZ(oc && !sp->hash_always_miss);
++        AZ(busy_oc);
++        if (stale_o != NULL) {
++                AZ(stale_o->objcore->flags & OC_F_BUSY);
++		CHECK_OBJ_NOTNULL(stale_o->objcore, OBJCORE_MAGIC);
++                Lck_AssertHeld(&oh->mtx);
++                stale_o->objcore->refcnt++;
++                sp->stale_obj = stale_o;
++        }
++
+ 	/* Insert (precreated) objcore in objecthead */
+ 	oc = w->nobjcore;
+ 	w->nobjcore = NULL;
+ 	AN(oc->flags & OC_F_BUSY);
+ 	oc->refcnt = 1;
+ 
+ 	/* XXX: clear w->nbusyobj before use */
+ 	VRY_Validate(sp->vary_b);
+ 	if (sp->vary_l != NULL)
+ 		w->nbusyobj->vary = sp->vary_b;
+ 	else
+ 		w->nbusyobj->vary = NULL;
+ 	oc->busyobj = w->nbusyobj;
+ 	w->nbusyobj = NULL;
+ 
+ 	/*
+ 	 * Busy objects go on the tail, so they will not trip up searches.
+ 	 * HSH_Unbusy() will move them to the front.
+ 	 */
+ 	VTAILQ_INSERT_TAIL(&oh->objcs, oc, list);
+ 	oc->objhead = oh;
+ 	/* NB: do not deref objhead the new object inherits our reference */
+ 	Lck_Unlock(&oh->mtx);
+ 	*poh = oh;
+ 	return (oc);
+ }
+ 
+ /*---------------------------------------------------------------------
+  */
+ 
+ static void
+ hsh_rush(struct objhead *oh)
+ {
+ 	unsigned u;
+ 	struct sess *sp;
+ 	struct waitinglist *wl;
+ 
+ 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ 	Lck_AssertHeld(&oh->mtx);
+ 	wl = oh->waitinglist;
+ 	CHECK_OBJ_NOTNULL(wl, WAITINGLIST_MAGIC);
+ 	for (u = 0; u < cache_param->rush_exponent; u++) {
+ 		sp = VTAILQ_FIRST(&wl->list);
+ 		if (sp == NULL)
+ 			break;
+ 		CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 		AZ(sp->wrk);
+ 		VTAILQ_REMOVE(&wl->list, sp, list);
+ 		DSL(0x20, SLT_Debug, sp->vsl_id, "off waiting list");
+ 		if (SES_Schedule(sp)) {
+ 			/*
+ 			 * We could not schedule the session, leave the
+ 			 * rest on the busy list.
+ 			 */
+ 			break;
+ 		}
+ 	}
+ 	if (VTAILQ_EMPTY(&wl->list)) {
+ 		oh->waitinglist = NULL;
+ 		FREE_OBJ(wl);
+ 	}
+ }
+ 
+ /*---------------------------------------------------------------------
+  * Purge an entire objhead
+  */
+ 
+ void
+ HSH_Purge(const struct sess *sp, struct objhead *oh, double ttl, double grace)
+ {
+ 	struct objcore *oc, **ocp;
+ 	unsigned spc, nobj, n;
+ 	struct object *o;
+ 
+ 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ 	spc = WS_Reserve(sp->wrk->ws, 0);
+ 	ocp = (void*)sp->wrk->ws->f;
+ 	Lck_Lock(&oh->mtx);
+ 	assert(oh->refcnt > 0);
+ 	nobj = 0;
+ 	VTAILQ_FOREACH(oc, &oh->objcs, list) {
+ 		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 		assert(oc->objhead == oh);
+ 		if (oc->flags & OC_F_BUSY) {
+ 			/*
+ 			 * We cannot purge busy objects here, because their
+ 			 * owners have special rights to them, and may nuke
+ 			 * them without concern for the refcount, which by
+ 			 * definition always must be one, so they don't check.
+ 			 */
+ 			continue;
+ 		}
+ 
+ 		(void)oc_getobj(sp->wrk, oc); /* XXX: still needed ? */
+ 
+ 		xxxassert(spc >= sizeof *ocp);
+ 		oc->refcnt++;
+ 		spc -= sizeof *ocp;
+ 		ocp[nobj++] = oc;
+ 	}
+ 	Lck_Unlock(&oh->mtx);
+ 
+ 	/* NB: inverse test to catch NAN also */
+ 	if (!(ttl > 0.))
+ 		ttl = -1.;
+ 	if (!(grace > 0.))
+ 		grace = -1.;
+ 	for (n = 0; n < nobj; n++) {
+ 		oc = ocp[n];
+ 		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 		o = oc_getobj(sp->wrk, oc);
+ 		if (o == NULL)
+ 			continue;
+ 		CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 		o->exp.ttl = ttl;
+ 		o->exp.grace = grace;
+ 		EXP_Rearm(o);
+ 		(void)HSH_Deref(sp->wrk, NULL, &o);
+ 	}
+ 	WS_Release(sp->wrk->ws, 0);
+ }
+ 
+ 
+ /*---------------------------------------------------------------------
+  * Kill a busy object we don't need anyway.
+  * There may be sessions on the waiting list, so we cannot just blow
+  * it out of the water.
+  */
+ 
+ void
+ HSH_Drop(struct sess *sp)
+ {
+ 	struct object *o;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	o = sp->obj;
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	AssertObjCorePassOrBusy(o->objcore);
+ 	o->exp.ttl = -1.;
+ 	if (o->objcore != NULL)		/* Pass has no objcore */
+ 		HSH_Unbusy(sp);
+ 	(void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ }
+ 
+ void
+ HSH_Unbusy(const struct sess *sp)
+ {
+ 	struct object *o;
+ 	struct objhead *oh;
+ 	struct objcore *oc;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	o = sp->obj;
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	oc = o->objcore;
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	oh = oc->objhead;
+ 	CHECK_OBJ(oh, OBJHEAD_MAGIC);
+ 
+ 	AssertObjBusy(o);
+ 	AN(oc->ban);
+ 	assert(oc->refcnt > 0);
+ 	assert(oh->refcnt > 0);
+ 	if (o->ws_o->overflow)
+ 		sp->wrk->stats.n_objoverflow++;
+ 	if (cache_param->diag_bitmap & 0x40)
+ 		WSP(sp, SLT_Debug,
+ 		    "Object %u workspace free %u", o->xid, WS_Free(o->ws_o));
+ 
+ 	/* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
+ 	Lck_Lock(&oh->mtx);
+ 	assert(oh->refcnt > 0);
+ 	/* XXX: strictly speaking, we should sort in Date: order. */
+ 	VTAILQ_REMOVE(&oh->objcs, oc, list);
+ 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, list);
+ 	oc->flags &= ~OC_F_BUSY;
+ 	AZ(sp->wrk->nbusyobj);
+ 	sp->wrk->nbusyobj = oc->busyobj;
+ 	oc->busyobj = NULL;
+ 	if (oh->waitinglist != NULL)
+ 		hsh_rush(oh);
+ 	AN(oc->ban);
+ 	Lck_Unlock(&oh->mtx);
+ 	assert(oc_getobj(sp->wrk, oc) == o);
+ }
+ 
+ void
+ HSH_Ref(struct objcore *oc)
+ {
+ 	struct objhead *oh;
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	oh = oc->objhead;
+ 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ 	Lck_Lock(&oh->mtx);
+ 	assert(oc->refcnt > 0);
+ 	oc->refcnt++;
+ 	Lck_Unlock(&oh->mtx);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Dereference objcore and or object
+  *
+  * Can deal with:
+  *	bare objcore (incomplete fetch)
+  *	bare object (pass)
+  *	object with objcore
+  *	XXX later:  objcore with object (?)
+  *
+  * But you can only supply one of the two arguments at a time.
+  *
+  * Returns zero if target was destroyed.
+  */
+ 
+ int
+ HSH_Deref(struct worker *w, struct objcore *oc, struct object **oo)
+ {
+ 	struct object *o = NULL;
+ 	struct objhead *oh;
+ 	unsigned r;
+ 
+ 	/* Only one arg at a time */
+ 	assert(oc == NULL || oo == NULL);
+ 
+ 	if (oo != NULL) {
+ 		o = *oo;
+ 		*oo = NULL;
+ 		CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 		oc = o->objcore;
+ 	}
+ 
+ 	if (o != NULL && oc == NULL) {
+ 		/*
+ 		 * A pass object with neither objcore nor objhdr reference.
+ 		 * -> simply free the (Transient) storage
+ 		 */
+ 		STV_Freestore(o);
+ 		STV_free(o->objstore);
+ 		w->stats.n_object--;
+ 		return (0);
+ 	}
+ 
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 
+ 	oh = oc->objhead;
+ 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ 
+ 	Lck_Lock(&oh->mtx);
+ 	assert(oh->refcnt > 0);
+ 	assert(oc->refcnt > 0);
+ 	r = --oc->refcnt;
+ 	if (!r)
+ 		VTAILQ_REMOVE(&oh->objcs, oc, list);
+ 	else {
+ 		/* Must have an object */
+ 		AN(oc->methods);
+ 	}
+ 	if (oh->waitinglist != NULL)
+ 		hsh_rush(oh);
+ 	Lck_Unlock(&oh->mtx);
+ 	if (r != 0)
+ 		return (r);
+ 
+ 	BAN_DestroyObj(oc);
+ 	AZ(oc->ban);
+ 
+ 	if (oc->flags & OC_F_BUSY) {
+ 		CHECK_OBJ_NOTNULL(oc->busyobj, BUSYOBJ_MAGIC);
+ 		if (w->nbusyobj == NULL)
+ 			w->nbusyobj = oc->busyobj;
+ 		else
+ 			FREE_OBJ(oc->busyobj);
+ 		oc->busyobj = NULL;
+ 	}
+ 	AZ(oc->busyobj);
+ 
+ 	if (oc->methods != NULL) {
+ 		oc_freeobj(oc);
+ 		w->stats.n_object--;
+ 	}
+ 	FREE_OBJ(oc);
+ 
+ 	w->stats.n_objectcore--;
+ 	/* Drop our ref on the objhead */
+ 	assert(oh->refcnt > 0);
+ 	if (hash->deref(oh))
+ 		return (0);
+ 	HSH_DeleteObjHead(w, oh);
+ 	return (0);
+ }
+ 
+ void
+ HSH_Init(const struct hash_slinger *slinger)
+ {
+ 
+ 	assert(DIGEST_LEN == SHA256_LEN);	/* avoid #include pollution */
+ 	hash = slinger;
+ 	if (hash->start != NULL)
+ 		hash->start();
+ }
diff --cc bin/varnishd/cache/cache_http.c
index 0000000,784eb28..b937d64
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_http.c
+++ b/bin/varnishd/cache/cache_http.c
@@@ -1,0 -1,1119 +1,1236 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * HTTP request storage and manipulation
+  */
+ 
+ #include "config.h"
+ 
+ #include <stdio.h>
+ 
+ #include "cache.h"
++#include "storage/storage.h"
+ 
+ #include "vct.h"
+ 
+ #define HTTPH(a, b, c, d, e, f, g) char b[] = "*" a ":";
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ 
+ /*lint -save -e773 not () */
+ #define LOGMTX2(ax, bx, cx)	[bx] = SLT_##ax##cx
+ 
+ #define LOGMTX1(ax) {					\
+ 	LOGMTX2(ax, HTTP_HDR_REQ,	Request),	\
+ 	LOGMTX2(ax, HTTP_HDR_RESPONSE,	Response),	\
+ 	LOGMTX2(ax, HTTP_HDR_STATUS,	Status),	\
+ 	LOGMTX2(ax, HTTP_HDR_URL,	URL),		\
+ 	LOGMTX2(ax, HTTP_HDR_PROTO,	Protocol),	\
+ 	LOGMTX2(ax, HTTP_HDR_FIRST,	Header),	\
+ 	}
+ 
+ static const enum VSL_tag_e logmtx[][HTTP_HDR_FIRST + 1] = {
+ 	[HTTP_Rx] = LOGMTX1(Rx),
+ 	[HTTP_Tx] = LOGMTX1(Tx),
+ 	[HTTP_Obj] = LOGMTX1(Obj)
+ };
+ /*lint -restore */
+ 
++void http_FilterMissingFields(struct worker *w, int fd, struct http *to,
++    const struct http *fm);
++
+ static enum VSL_tag_e
+ http2shmlog(const struct http *hp, int t)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ 	if (t > HTTP_HDR_FIRST)
+ 		t = HTTP_HDR_FIRST;
+ 	assert(hp->logtag >= HTTP_Rx && hp->logtag <= HTTP_Obj); /*lint !e685*/
+ 	assert(t >= HTTP_HDR_REQ && t <= HTTP_HDR_FIRST);
+ 	return (logmtx[hp->logtag][t]);
+ }
+ 
+ static void
+ WSLH(struct worker *w, unsigned vsl_id, const struct http *hp, unsigned hdr)
+ {
+ 
+ 	AN(vsl_id & (VSL_CLIENTMARKER|VSL_BACKENDMARKER));
+ 	WSLR(w, http2shmlog(hp, hdr), vsl_id, hp->hd[hdr]);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ /* List of canonical HTTP response code names from RFC2616 */
+ 
+ static struct http_msg {
+ 	unsigned	nbr;
+ 	const char	*txt;
+ } http_msg[] = {
+ #define HTTP_RESP(n, t)	{ n, t},
+ #include "tbl/http_response.h"
+ 	{ 0, NULL }
+ };
+ 
+ const char *
+ http_StatusMessage(unsigned status)
+ {
+ 	struct http_msg *mp;
+ 
+ 	assert(status >= 100 && status <= 999);
+ 	for (mp = http_msg; mp->nbr != 0 && mp->nbr <= status; mp++)
+ 		if (mp->nbr == status)
+ 			return (mp->txt);
+ 	return ("Unknown Error");
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ unsigned
+ HTTP_estimate(unsigned nhttp)
+ {
+ 
+ 	/* XXX: We trust the structs to size-aligned as necessary */
+ 	return (sizeof (struct http) + (sizeof (txt) + 1) * nhttp);
+ }
+ 
+ struct http *
+ HTTP_create(void *p, uint16_t nhttp)
+ {
+ 	struct http *hp;
+ 
+ 	hp = p;
+ 	hp->magic = HTTP_MAGIC;
+ 	hp->hd = (void*)(hp + 1);
+ 	hp->shd = nhttp;
+ 	hp->hdf = (void*)(hp->hd + nhttp);
+ 	return (hp);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ http_Setup(struct http *hp, struct ws *ws)
+ {
+ 	uint16_t shd;
+ 	txt *hd;
+ 	unsigned char *hdf;
+ 
+ 	/* XXX: This is not elegant, is it efficient ? */
+ 	shd = hp->shd;
+ 	hd = hp->hd;
+ 	hdf = hp->hdf;
+ 	memset(hp, 0, sizeof *hp);
+ 	memset(hd, 0, sizeof *hd * shd);
+ 	memset(hdf, 0, sizeof *hdf * shd);
+ 	hp->magic = HTTP_MAGIC;
+ 	hp->ws = ws;
+ 	hp->nhd = HTTP_HDR_FIRST;
+ 	hp->shd = shd;
+ 	hp->hd = hd;
+ 	hp->hdf = hdf;
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static int
+ http_IsHdr(const txt *hh, const char *hdr)
+ {
+ 	unsigned l;
+ 
+ 	Tcheck(*hh);
+ 	AN(hdr);
+ 	l = hdr[0];
+ 	assert(l == strlen(hdr + 1));
+ 	assert(hdr[l] == ':');
+ 	hdr++;
+ 	return (!strncasecmp(hdr, hh->b, l));
+ }
+ 
+ /*--------------------------------------------------------------------
+  * This function collapses multiple headerlines of the same name.
+  * The lines are joined with a comma, according to [rfc2616, 4.2bot, p32]
+  */
+ 
+ void
+ http_CollectHdr(struct http *hp, const char *hdr)
+ {
+ 	unsigned u, v, ml, f = 0, x;
+ 	char *b = NULL, *e = NULL;
+ 
+ 	for (u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ 		while (u < hp->nhd && http_IsHdr(&hp->hd[u], hdr)) {
+ 			Tcheck(hp->hd[u]);
+ 			if (f == 0) {
+ 				/* Found first header, just record the fact */
+ 				f = u;
+ 				break;
+ 			}
+ 			if (b == NULL) {
+ 				/* Found second header, start our collection */
+ 				ml = WS_Reserve(hp->ws, 0);
+ 				b = hp->ws->f;
+ 				e = b + ml;
+ 				x = Tlen(hp->hd[f]);
+ 				if (b + x < e) {
+ 					memcpy(b, hp->hd[f].b, x);
+ 					b += x;
+ 				} else
+ 					b = e;
+ 			}
+ 
+ 			AN(b);
+ 			AN(e);
+ 
+ 			/* Append the Nth header we found */
+ 			if (b < e)
+ 				*b++ = ',';
+ 			x = Tlen(hp->hd[u]) - *hdr;
+ 			if (b + x < e) {
+ 				memcpy(b, hp->hd[u].b + *hdr, x);
+ 				b += x;
+ 			} else
+ 				b = e;
+ 
+ 			/* Shift remaining headers up one slot */
+ 			for (v = u; v < hp->nhd - 1; v++)
+ 				hp->hd[v] = hp->hd[v + 1];
+ 			hp->nhd--;
+ 		}
+ 
+ 	}
+ 	if (b == NULL)
+ 		return;
+ 	AN(e);
+ 	if (b >= e) {
+ 		WS_Release(hp->ws, 0);
+ 		return;
+ 	}
+ 	*b = '\0';
+ 	hp->hd[f].b = hp->ws->f;
+ 	hp->hd[f].e = b;
+ 	WS_ReleaseP(hp->ws, b + 1);
+ }
+ 
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static unsigned
+ http_findhdr(const struct http *hp, unsigned l, const char *hdr)
+ {
+ 	unsigned u;
+ 
+ 	for (u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ 		Tcheck(hp->hd[u]);
+ 		if (hp->hd[u].e < hp->hd[u].b + l + 1)
+ 			continue;
+ 		if (hp->hd[u].b[l] != ':')
+ 			continue;
+ 		if (strncasecmp(hdr, hp->hd[u].b, l))
+ 			continue;
+ 		return (u);
+ 	}
+ 	return (0);
+ }
+ 
+ int
+ http_GetHdr(const struct http *hp, const char *hdr, char **ptr)
+ {
+ 	unsigned u, l;
+ 	char *p;
+ 
+ 	l = hdr[0];
+ 	diagnostic(l == strlen(hdr + 1));
+ 	assert(hdr[l] == ':');
+ 	hdr++;
+ 	u = http_findhdr(hp, l - 1, hdr);
+ 	if (u == 0) {
+ 		if (ptr != NULL)
+ 			*ptr = NULL;
+ 		return (0);
+ 	}
+ 	if (ptr != NULL) {
+ 		p = hp->hd[u].b + l;
+ 		while (vct_issp(*p))
+ 			p++;
+ 		*ptr = p;
+ 	}
+ 	return (1);
+ }
+ 
+ 
+ /*--------------------------------------------------------------------
+  * Find a given data element in a header according to RFC2616's #rule
+  * (section 2.1, p15)
+  */
+ 
+ int
+ http_GetHdrData(const struct http *hp, const char *hdr,
+     const char *field, char **ptr)
+ {
+ 	char *h, *e;
+ 	unsigned fl;
+ 
+ 	if (ptr != NULL)
+ 		*ptr = NULL;
+ 	if (!http_GetHdr(hp, hdr, &h))
+ 		return (0);
+ 	AN(h);
+ 	e = strchr(h, '\0');
+ 	fl = strlen(field);
+ 	while (h + fl <= e) {
+ 		/* Skip leading whitespace and commas */
+ 		if (vct_islws(*h) || *h == ',') {
+ 			h++;
+ 			continue;
+ 		}
+ 		/* Check for substrings before memcmp() */
+ 		if ((h + fl == e || vct_issepctl(h[fl])) &&
+ 		    !memcmp(h, field, fl)) {
+ 			if (ptr != NULL) {
+ 				h += fl;
+ 				while (vct_islws(*h))
+ 					h++;
+ 				*ptr = h;
+ 			}
+ 			return (1);
+ 		}
+ 		/* Skip until end of header or comma */
+ 		while (*h && *h != ',')
+ 			h++;
+ 	}
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Find a given headerfields Q value.
+  */
+ 
+ double
+ http_GetHdrQ(const struct http *hp, const char *hdr, const char *field)
+ {
+ 	char *h;
+ 	int i;
+ 	double a, b;
+ 
+ 	h = NULL;
+ 	i = http_GetHdrData(hp, hdr, field, &h);
+ 	if (!i)
+ 		return (0.);
+ 
+ 	if (h == NULL)
+ 		return (1.);
+ 	/* Skip whitespace, looking for '=' */
+ 	while (*h && vct_issp(*h))
+ 		h++;
+ 	if (*h++ != ';')
+ 		return (1.);
+ 	while (*h && vct_issp(*h))
+ 		h++;
+ 	if (*h++ != 'q')
+ 		return (1.);
+ 	while (*h && vct_issp(*h))
+ 		h++;
+ 	if (*h++ != '=')
+ 		return (1.);
+ 	while (*h && vct_issp(*h))
+ 		h++;
+ 	a = 0.;
+ 	while (vct_isdigit(*h)) {
+ 		a *= 10.;
+ 		a += *h - '0';
+ 		h++;
+ 	}
+ 	if (*h++ != '.')
+ 		return (a);
+ 	b = .1;
+ 	while (vct_isdigit(*h)) {
+ 		a += b * (*h - '0');
+ 		b *= .1;
+ 		h++;
+ 	}
+ 	return (a);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Find a given headerfields value.
+  */
+ 
+ int
+ http_GetHdrField(const struct http *hp, const char *hdr,
+     const char *field, char **ptr)
+ {
+ 	char *h;
+ 	int i;
+ 
+ 	if (ptr != NULL)
+ 		*ptr = NULL;
+ 
+ 	h = NULL;
+ 	i = http_GetHdrData(hp, hdr, field, &h);
+ 	if (!i)
+ 		return (i);
+ 
+ 	if (ptr != NULL && h != NULL) {
+ 		/* Skip whitespace, looking for '=' */
+ 		while (*h && vct_issp(*h))
+ 			h++;
+ 		if (*h == '=') {
+ 			h++;
+ 			while (*h && vct_issp(*h))
+ 				h++;
+ 			*ptr = h;
+ 		}
+ 	}
+ 	return (i);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * XXX: redo with http_GetHdrField() ?
+  */
+ 
+ const char *
+ http_DoConnection(const struct http *hp)
+ {
+ 	char *p, *q;
+ 	const char *ret;
+ 	unsigned u;
+ 
+ 	if (!http_GetHdr(hp, H_Connection, &p)) {
+ 		if (hp->protover < 11)
+ 			return ("not HTTP/1.1");
+ 		return (NULL);
+ 	}
+ 	ret = NULL;
+ 	AN(p);
+ 	for (; *p; p++) {
+ 		if (vct_issp(*p))
+ 			continue;
+ 		if (*p == ',')
+ 			continue;
+ 		for (q = p + 1; *q; q++)
+ 			if (*q == ',' || vct_issp(*q))
+ 				break;
+ 		u = pdiff(p, q);
+ 		if (u == 5 && !strncasecmp(p, "close", u))
+ 			ret = "Connection: close";
+ 		u = http_findhdr(hp, u, p);
+ 		if (u != 0)
+ 			hp->hdf[u] |= HDF_FILTER;
+ 		if (!*q)
+ 			break;
+ 		p = q;
+ 	}
+ 	return (ret);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ int
+ http_HdrIs(const struct http *hp, const char *hdr, const char *val)
+ {
+ 	char *p;
+ 
+ 	if (!http_GetHdr(hp, hdr, &p))
+ 		return (0);
+ 	AN(p);
+ 	if (!strcasecmp(p, val))
+ 		return (1);
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ uint16_t
+ http_GetStatus(const struct http *hp)
+ {
+ 
+ 	return (hp->status);
+ }
+ 
+ const char *
+ http_GetReq(const struct http *hp)
+ {
+ 
+ 	Tcheck(hp->hd[HTTP_HDR_REQ]);
+ 	return (hp->hd[HTTP_HDR_REQ].b);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Dissect the headers of the HTTP protocol message.
+  * Detect conditionals (headers which start with '^[Ii][Ff]-')
+  */
+ 
+ static uint16_t
+ http_dissect_hdrs(struct worker *w, struct http *hp, unsigned vsl_id, char *p,
+     const struct http_conn *htc)
+ {
+ 	char *q, *r;
+ 	txt t = htc->rxbuf;
+ 
+ 	if (*p == '\r')
+ 		p++;
+ 
+ 	hp->nhd = HTTP_HDR_FIRST;
+ 	hp->conds = 0;
+ 	r = NULL;		/* For FlexeLint */
+ 	for (; p < t.e; p = r) {
+ 
+ 		/* Find end of next header */
+ 		q = r = p;
+ 		while (r < t.e) {
+ 			if (!vct_iscrlf(*r)) {
+ 				r++;
+ 				continue;
+ 			}
+ 			q = r;
+ 			assert(r < t.e);
+ 			r += vct_skipcrlf(r);
+ 			if (r >= t.e)
+ 				break;
+ 			/* If line does not continue: got it. */
+ 			if (!vct_issp(*r))
+ 				break;
+ 
+ 			/* Clear line continuation LWS to spaces */
+ 			while (vct_islws(*q))
+ 				*q++ = ' ';
+ 		}
+ 
+ 		if (q - p > htc->maxhdr) {
+ 			VSC_C_main->losthdr++;
+ 			WSL(w, SLT_LostHeader, vsl_id, "%.*s",
+ 			    q - p > 20 ? 20 : q - p, p);
+ 			return (413);
+ 		}
+ 
+ 		/* Empty header = end of headers */
+ 		if (p == q)
+ 			break;
+ 
+ 		if ((p[0] == 'i' || p[0] == 'I') &&
+ 		    (p[1] == 'f' || p[1] == 'F') &&
+ 		    p[2] == '-')
+ 			hp->conds = 1;
+ 
+ 		while (q > p && vct_issp(q[-1]))
+ 			q--;
+ 		*q = '\0';
+ 
+ 		if (hp->nhd < hp->shd) {
+ 			hp->hdf[hp->nhd] = 0;
+ 			hp->hd[hp->nhd].b = p;
+ 			hp->hd[hp->nhd].e = q;
+ 			WSLH(w, vsl_id, hp, hp->nhd);
+ 			hp->nhd++;
+ 		} else {
+ 			VSC_C_main->losthdr++;
+ 			WSL(w, SLT_LostHeader, vsl_id, "%.*s",
+ 			    q - p > 20 ? 20 : q - p, p);
+ 			return (413);
+ 		}
+ 	}
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Deal with first line of HTTP protocol message.
+  */
+ 
+ static uint16_t
+ http_splitline(struct worker *w, unsigned vsl_id, struct http *hp,
+     const struct http_conn *htc, int h1, int h2, int h3)
+ {
+ 	char *p, *q;
+ 
+ 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+ 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ 
+ 	/* XXX: Assert a NUL at rx.e ? */
+ 	Tcheck(htc->rxbuf);
+ 
+ 	/* Skip leading LWS */
+ 	for (p = htc->rxbuf.b ; vct_islws(*p); p++)
+ 		continue;
+ 
+ 	/* First field cannot contain SP, CRLF or CTL */
+ 	q = p;
+ 	for (; !vct_issp(*p); p++) {
+ 		if (vct_isctl(*p))
+ 			return (400);
+ 	}
+ 	hp->hd[h1].b = q;
+ 	hp->hd[h1].e = p;
+ 
+ 	/* Skip SP */
+ 	for (; vct_issp(*p); p++) {
+ 		if (vct_isctl(*p))
+ 			return (400);
+ 	}
+ 
+ 	/* Second field cannot contain LWS or CTL */
+ 	q = p;
+ 	for (; !vct_islws(*p); p++) {
+ 		if (vct_isctl(*p))
+ 			return (400);
+ 	}
+ 	hp->hd[h2].b = q;
+ 	hp->hd[h2].e = p;
+ 
+ 	if (!Tlen(hp->hd[h2]))
+ 		return (413);
+ 
+ 	/* Skip SP */
+ 	for (; vct_issp(*p); p++) {
+ 		if (vct_isctl(*p))
+ 			return (400);
+ 	}
+ 
+ 	/* Third field is optional and cannot contain CTL */
+ 	q = p;
+ 	if (!vct_iscrlf(*p)) {
+ 		for (; !vct_iscrlf(*p); p++)
+ 			if (!vct_issep(*p) && vct_isctl(*p))
+ 				return (400);
+ 	}
+ 	hp->hd[h3].b = q;
+ 	hp->hd[h3].e = p;
+ 
+ 	/* Skip CRLF */
+ 	p += vct_skipcrlf(p);
+ 
+ 	*hp->hd[h1].e = '\0';
+ 	WSLH(w, vsl_id, hp, h1);
+ 
+ 	*hp->hd[h2].e = '\0';
+ 	WSLH(w, vsl_id, hp, h2);
+ 
+ 	if (hp->hd[h3].e != NULL) {
+ 		*hp->hd[h3].e = '\0';
+ 		WSLH(w, vsl_id, hp, h3);
+ 	}
+ 
+ 	return (http_dissect_hdrs(w, hp, vsl_id, p, htc));
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ http_ProtoVer(struct http *hp)
+ {
+ 
+ 	if (!strcasecmp(hp->hd[HTTP_HDR_PROTO].b, "HTTP/1.0"))
+ 		hp->protover = 10;
+ 	else if (!strcasecmp(hp->hd[HTTP_HDR_PROTO].b, "HTTP/1.1"))
+ 		hp->protover = 11;
+ 	else
+ 		hp->protover = 9;
+ }
+ 
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ uint16_t
+ http_DissectRequest(struct sess *sp)
+ {
+ 	struct http_conn *htc;
+ 	struct http *hp;
+ 	uint16_t retval;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	htc = sp->htc;
+ 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+ 	hp = sp->http;
+ 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ 
+ 	hp->logtag = HTTP_Rx;
+ 
+ 	retval = http_splitline(sp->wrk, sp->vsl_id, hp, htc,
+ 	    HTTP_HDR_REQ, HTTP_HDR_URL, HTTP_HDR_PROTO);
+ 	if (retval != 0) {
+ 		WSPR(sp, SLT_HttpGarbage, htc->rxbuf);
+ 		return (retval);
+ 	}
+ 	http_ProtoVer(hp);
+ 	return (retval);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ uint16_t
+ http_DissectResponse(struct worker *w, const struct http_conn *htc,
+     struct http *hp)
+ {
+ 	int j;
+ 	uint16_t retval = 0;
+ 	char *p;
+ 
+ 
+ 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+ 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ 	hp->logtag = HTTP_Rx;
+ 
+ 	if (http_splitline(w, htc->vsl_id, hp, htc,
+ 	    HTTP_HDR_PROTO, HTTP_HDR_STATUS, HTTP_HDR_RESPONSE))
+ 		retval = 503;
+ 
+ 	if (retval == 0 && memcmp(hp->hd[HTTP_HDR_PROTO].b, "HTTP/1.", 7))
+ 		retval = 503;
+ 
+ 	if (retval == 0 && Tlen(hp->hd[HTTP_HDR_STATUS]) != 3)
+ 		retval = 503;
+ 
+ 	if (retval == 0) {
+ 		hp->status = 0;
+ 		p = hp->hd[HTTP_HDR_STATUS].b;
+ 		for (j = 100; j != 0; j /= 10) {
+ 			if (!vct_isdigit(*p)) {
+ 				retval = 503;
+ 				break;
+ 			}
+ 			hp->status += (uint16_t)(j * (*p - '0'));
+ 			p++;
+ 		}
+ 		if (*p != '\0')
+ 			retval = 503;
+ 	}
+ 
+ 	if (retval != 0) {
+ 		WSLR(w, SLT_HttpGarbage, htc->vsl_id, htc->rxbuf);
+ 		assert(retval >= 100 && retval <= 999);
+ 		hp->status = retval;
+ 	} else {
+ 		http_ProtoVer(hp);
+ 	}
+ 
+ 	if (hp->hd[HTTP_HDR_RESPONSE].b == NULL ||
+ 	    !Tlen(hp->hd[HTTP_HDR_RESPONSE])) {
+ 		/* Backend didn't send a response string, use the standard */
+ 		hp->hd[HTTP_HDR_RESPONSE].b =
+ 		    TRUST_ME(http_StatusMessage(hp->status));
+ 		hp->hd[HTTP_HDR_RESPONSE].e =
+ 		    strchr(hp->hd[HTTP_HDR_RESPONSE].b, '\0');
+ 	}
+ 	return (retval);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ http_SetH(const struct http *to, unsigned n, const char *fm)
+ {
+ 
+ 	assert(n < to->shd);
+ 	AN(fm);
+ 	to->hd[n].b = TRUST_ME(fm);
+ 	to->hd[n].e = strchr(to->hd[n].b, '\0');
+ 	to->hdf[n] = 0;
+ }
+ 
+ static void
+ http_copyh(const struct http *to, const struct http *fm, unsigned n)
+ {
+ 
+ 	assert(n < HTTP_HDR_FIRST);
+ 	Tcheck(fm->hd[n]);
+ 	to->hd[n] = fm->hd[n];
+ 	to->hdf[n] = fm->hdf[n];
+ }
+ 
+ void
+ http_ForceGet(const struct http *to)
+ {
+ 	if (strcmp(http_GetReq(to), "GET"))
+ 		http_SetH(to, HTTP_HDR_REQ, "GET");
+ }
+ 
+ void
+ http_CopyResp(struct http *to, const struct http *fm)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	http_SetH(to, HTTP_HDR_PROTO, "HTTP/1.1");
+ 	to->status = fm->status;
+ 	http_copyh(to, fm, HTTP_HDR_RESPONSE);
+ }
+ 
+ void
+ http_SetResp(struct http *to, const char *proto, uint16_t status,
+     const char *response)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	http_SetH(to, HTTP_HDR_PROTO, proto);
+ 	assert(status >= 100 && status <= 999);
+ 	to->status = status;
+ 	http_SetH(to, HTTP_HDR_RESPONSE, response);
+ }
+ 
+ static void
+ http_copyheader(struct worker *w, unsigned vsl_id, struct http *to,
+     const struct http *fm, unsigned n)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	assert(n < fm->shd);
+ 	Tcheck(fm->hd[n]);
+ 	if (to->nhd < to->shd) {
+ 		to->hd[to->nhd] = fm->hd[n];
+ 		to->hdf[to->nhd] = 0;
+ 		to->nhd++;
+ 	} else  {
+ 		VSC_C_main->losthdr++;
+ 		WSLR(w, SLT_LostHeader, vsl_id, fm->hd[n]);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Estimate how much workspace we need to Filter this header according
+  * to 'how'.
+  */
+ 
+ unsigned
+ http_EstimateWS(const struct http *fm, unsigned how, uint16_t *nhd)
+ {
+ 	unsigned u, l;
+ 
+ 	l = 0;
+ 	*nhd = HTTP_HDR_FIRST;
+ 	CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ 	for (u = 0; u < fm->nhd; u++) {
+ 		if (fm->hd[u].b == NULL)
+ 			continue;
+ 		if (fm->hdf[u] & HDF_FILTER)
+ 			continue;
+ #define HTTPH(a, b, c, d, e, f, g) \
+ 		if (((e) & how) && http_IsHdr(&fm->hd[u], (b))) \
+ 			continue;
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ 		l += PRNDUP(Tlen(fm->hd[u]) + 1);
+ 		(*nhd)++;
+ 		// fm->hdf[u] |= HDF_COPY;
+ 	}
+ 	return (l);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ http_FilterFields(struct worker *w, unsigned vsl_id, struct http *to,
+     const struct http *fm, unsigned how)
+ {
+ 	unsigned u;
+ 
+ 	CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	to->nhd = HTTP_HDR_FIRST;
+ 	to->status = fm->status;
+ 	for (u = HTTP_HDR_FIRST; u < fm->nhd; u++) {
+ 		if (fm->hd[u].b == NULL)
+ 			continue;
+ 		if (fm->hdf[u] & HDF_FILTER)
+ 			continue;
+ #define HTTPH(a, b, c, d, e, f, g) \
+ 		if (((e) & how) && http_IsHdr(&fm->hd[u], (b))) \
+ 			continue;
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ 		http_copyheader(w, vsl_id, to, fm, u);
+ 	}
+ }
+ 
++/*---------------------------------------------------------------------
++ * Same as http_FilterFields but keep any existing hdrs in fm.
++ * Furthermore, before copy, check if fm already has that hdr, and if so
++ * do not copy.  Used for 304 refresh processing.
++ */
++
++/* XXX: uplex/GS: Also, don't filter according to the "how" bitmap in
++ *      http_headers.h. We only use this to copy from one cached object to
++ *      another, so if a header made into the first object, we want it.
++ */
++
++void
++http_FilterMissingFields(struct worker *w, int fd, struct http *to,
++    const struct http *fm)
++{
++	unsigned u;
++	unsigned hdrlen;
++
++	CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
++	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
++	for (u = HTTP_HDR_FIRST; u < fm->nhd; u++) {
++		if (fm->hd[u].b == NULL)
++			continue;
++                hdrlen = strchr(fm->hd[u].b, ':') - fm->hd[u].b;
++                if (http_findhdr(to, hdrlen, fm->hd[u].b))
++                    continue;
++                http_copyheader(w, fd, to, fm, u);
++	}
++}
++
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ http_FilterHeader(const struct sess *sp, unsigned how)
+ {
+ 	struct http *hp;
+ 
+ 	hp = sp->wrk->bereq;
+ 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ 	hp->logtag = HTTP_Tx;
+ 
+ 	http_copyh(hp, sp->http, HTTP_HDR_REQ);
+ 	http_copyh(hp, sp->http, HTTP_HDR_URL);
+ 	if (how == HTTPH_R_FETCH)
+ 		http_SetH(hp, HTTP_HDR_PROTO, "HTTP/1.1");
+ 	else
+ 		http_copyh(hp, sp->http, HTTP_HDR_PROTO);
+ 	http_FilterFields(sp->wrk, sp->vsl_id, hp, sp->http, how);
+ 	http_PrintfHeader(sp->wrk, sp->vsl_id, hp, "X-Varnish: %u", sp->xid);
+ }
+ 
++/*-------------------------------------------------------------------
++ * This function checks for sp->freshen_obj.  If present, HSH_Lookup()
++ * found an expired object that qualifies for a refresh check,
++ * so add the appropriate headers.
++ */
++
++void
++http_CheckRefresh(struct sess *sp)
++{
++	struct object *freshen_obj;
++	struct http *obj_hp, *bereq_hp;
++	char *p;
++
++	freshen_obj = sp->stale_obj;
++	CHECK_OBJ_NOTNULL(freshen_obj, OBJECT_MAGIC);
++	bereq_hp = sp->wrk->bereq;
++	CHECK_OBJ_NOTNULL(bereq_hp, HTTP_MAGIC);
++	obj_hp = freshen_obj->http;
++	CHECK_OBJ_NOTNULL(obj_hp, HTTP_MAGIC);
++
++	if(http_GetHdr(obj_hp, H_ETag, &p))
++		http_PrintfHeader(sp->wrk, sp->fd, bereq_hp, "If-None-Match: %s", p);
++
++	if(http_GetHdr(obj_hp, H_Last_Modified, &p))
++		http_PrintfHeader(sp->wrk, sp->fd, bereq_hp, "If-Modified-Since: %s",p);
++}
++
++/*-------------------------------------------------------------------
++ * Called after fetch and sp->freshen_obj present.  Check
++ * response and handle as needed.
++ */
++
++void
++http_Check304(struct sess *sp)
++{
++	struct object *o, *o_stale;
++	char *p;
++
++	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++	o_stale = sp->stale_obj;
++	CHECK_OBJ_NOTNULL(o_stale, OBJECT_MAGIC);
++	o = sp->obj;
++	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
++
++	if (sp->wrk->beresp->status != 304) {
++	    /*
++	     * IMS/INM headers may have been removed in VCL, so only count a
++	     * non-validating response if they were present in the request.
++	     */
++	    if (http_GetHdr(sp->wrk->bereq, H_If_Modified_Since, &p)
++		|| http_GetHdr(sp->wrk->bereq, H_If_None_Match, &p))
++		sp->wrk->stats.cond_not_validated++;
++	    return;
++	}
++
++	/* 
++	 * Copy headers we need from the stale object into the 304 response
++	 */
++	http_FilterMissingFields(sp->wrk, sp->fd, sp->obj->http,
++				 sp->stale_obj->http);
++
++	/*
++	 * Dup the stale object's storage in to the new object
++	 * and reset Content-Length from the size of the storage.
++	 */
++	STV_dup(sp, o_stale, o);
++	http_Unset(o->http, H_Content_Length);
++	http_PrintfHeader(sp->wrk, sp->fd, o->http, "Content-Length: %u", o->len);
++
++	http_SetResp(o->http, "HTTP/1.1", 200, "Ok Not Modified");
++	http_SetH(o->http, HTTP_HDR_REQ, "GET");
++	http_copyh(o->http, sp->wrk->bereq, HTTP_HDR_URL);
++
++	/*
++	 * XXX: Are we copying all the necessary fields from stale_obj?
++	 *	Should we copy o_stale->hits into o->hits?
++	 */
++	o->response = 200;
++	o->gziped = o_stale->gziped;
++
++        AZ(o_stale->objcore->flags & OC_F_BUSY);
++}
++
+ /*--------------------------------------------------------------------
+  * This function copies any header fields which reference foreign
+  * storage into our own WS.
+  */
+ 
+ void
+ http_CopyHome(struct worker *w, unsigned vsl_id, const struct http *hp)
+ {
+ 	unsigned u, l;
+ 	char *p;
+ 
+ 	for (u = 0; u < hp->nhd; u++) {
+ 		if (hp->hd[u].b == NULL)
+ 			continue;
+ 		if (hp->hd[u].b >= hp->ws->s && hp->hd[u].e <= hp->ws->e) {
+ 			WSLH(w, vsl_id, hp, u);
+ 			continue;
+ 		}
+ 		l = Tlen(hp->hd[u]);
+ 		p = WS_Alloc(hp->ws, l + 1);
+ 		if (p != NULL) {
+ 			WSLH(w, vsl_id, hp, u);
+ 			memcpy(p, hp->hd[u].b, l + 1L);
+ 			hp->hd[u].b = p;
+ 			hp->hd[u].e = p + l;
+ 		} else {
+ 			/* XXX This leaves a slot empty */
+ 			VSC_C_main->losthdr++;
+ 			WSLR(w, SLT_LostHeader, vsl_id, hp->hd[u]);
+ 			hp->hd[u].b = NULL;
+ 			hp->hd[u].e = NULL;
+ 		}
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ http_ClrHeader(struct http *to)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	to->nhd = HTTP_HDR_FIRST;
+ 	to->status = 0;
+ 	to->protover = 0;
+ 	to->conds = 0;
+ 	memset(to->hd, 0, sizeof *to->hd * to->shd);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ http_SetHeader(struct worker *w, unsigned vsl_id, struct http *to,
+     const char *hdr)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	if (to->nhd >= to->shd) {
+ 		VSC_C_main->losthdr++;
+ 		WSL(w, SLT_LostHeader, vsl_id, "%s", hdr);
+ 		return;
+ 	}
+ 	http_SetH(to, to->nhd++, hdr);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ http_PutField(struct worker *w, unsigned vsl_id, const struct http *to,
+     int field, const char *string)
+ {
+ 	char *p;
+ 	unsigned l;
+ 
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	l = strlen(string);
+ 	p = WS_Alloc(to->ws, l + 1);
+ 	if (p == NULL) {
+ 		WSL(w, SLT_LostHeader, vsl_id, "%s", string);
+ 		to->hd[field].b = NULL;
+ 		to->hd[field].e = NULL;
+ 		to->hdf[field] = 0;
+ 	} else {
+ 		memcpy(p, string, l + 1L);
+ 		to->hd[field].b = p;
+ 		to->hd[field].e = p + l;
+ 		to->hdf[field] = 0;
+ 	}
+ }
+ 
+ void
+ http_PutProtocol(struct worker *w, unsigned vsl_id, const struct http *to,
+     const char *protocol)
+ {
+ 
+ 	http_PutField(w, vsl_id, to, HTTP_HDR_PROTO, protocol);
+ 	if (to->hd[HTTP_HDR_PROTO].b == NULL)
+ 		http_SetH(to, HTTP_HDR_PROTO, "HTTP/1.1");
+ 	Tcheck(to->hd[HTTP_HDR_PROTO]);
+ }
+ 
+ void
+ http_PutStatus(struct http *to, uint16_t status)
+ {
+ 
+ 	assert(status >= 100 && status <= 999);
+ 	to->status = status;
+ }
+ 
+ void
+ http_PutResponse(struct worker *w, unsigned vsl_id, const struct http *to,
+     const char *response)
+ {
+ 
+ 	http_PutField(w, vsl_id, to, HTTP_HDR_RESPONSE, response);
+ 	if (to->hd[HTTP_HDR_RESPONSE].b == NULL)
+ 		http_SetH(to, HTTP_HDR_RESPONSE, "Lost Response");
+ 	Tcheck(to->hd[HTTP_HDR_RESPONSE]);
+ }
+ 
+ void
+ http_PrintfHeader(struct worker *w, unsigned vsl_id, struct http *to,
+     const char *fmt, ...)
+ {
+ 	va_list ap;
+ 	unsigned l, n;
+ 
+ 	CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ 	l = WS_Reserve(to->ws, 0);
+ 	va_start(ap, fmt);
+ 	n = vsnprintf(to->ws->f, l, fmt, ap);
+ 	va_end(ap);
+ 	if (n + 1 >= l || to->nhd >= to->shd) {
+ 		VSC_C_main->losthdr++;
+ 		WSL(w, SLT_LostHeader, vsl_id, "%s", to->ws->f);
+ 		WS_Release(to->ws, 0);
+ 	} else {
+ 		to->hd[to->nhd].b = to->ws->f;
+ 		to->hd[to->nhd].e = to->ws->f + n;
+ 		to->hdf[to->nhd] = 0;
+ 		WS_Release(to->ws, n + 1);
+ 		to->nhd++;
+ 	}
+ }
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ http_Unset(struct http *hp, const char *hdr)
+ {
+ 	uint16_t u, v;
+ 
+ 	for (v = u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ 		if (hp->hd[u].b == NULL)
+ 			continue;
+ 		if (http_IsHdr(&hp->hd[u], hdr))
+ 			continue;
+ 		if (v != u) {
+ 			memcpy(&hp->hd[v], &hp->hd[u], sizeof *hp->hd);
+ 			memcpy(&hp->hdf[v], &hp->hdf[u], sizeof *hp->hdf);
+ 		}
+ 		v++;
+ 	}
+ 	hp->nhd = v;
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ HTTP_Copy(struct http *to, const struct http * const fm)
+ {
+ 
+ 	to->conds = fm->conds;
+ 	to->logtag = fm->logtag;
+ 	to->status = fm->status;
+ 	to->protover = fm->protover;
+ 	to->nhd = fm->nhd;
+ 	assert(fm->nhd <= to->shd);
+ 	memcpy(to->hd, fm->hd, fm->nhd * sizeof *to->hd);
+ 	memcpy(to->hdf, fm->hdf, fm->nhd * sizeof *to->hdf);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ unsigned
+ http_Write(struct worker *w, unsigned vsl_id, const struct http *hp, int resp)
+ {
+ 	unsigned u, l;
+ 
+ 	if (resp) {
+ 		l = WRW_WriteH(w, &hp->hd[HTTP_HDR_PROTO], " ");
+ 		WSLH(w, vsl_id, hp, HTTP_HDR_PROTO);
+ 
+ 		hp->hd[HTTP_HDR_STATUS].b = WS_Alloc(w->ws, 4);
+ 		AN(hp->hd[HTTP_HDR_STATUS].b);
+ 
+ 		sprintf(hp->hd[HTTP_HDR_STATUS].b, "%3d", hp->status);
+ 		hp->hd[HTTP_HDR_STATUS].e = hp->hd[HTTP_HDR_STATUS].b + 3;
+ 
+ 		l += WRW_WriteH(w, &hp->hd[HTTP_HDR_STATUS], " ");
+ 		WSLH(w, vsl_id, hp, HTTP_HDR_STATUS);
+ 
+ 		l += WRW_WriteH(w, &hp->hd[HTTP_HDR_RESPONSE], "\r\n");
+ 		WSLH(w, vsl_id, hp, HTTP_HDR_RESPONSE);
+ 	} else {
+ 		AN(hp->hd[HTTP_HDR_URL].b);
+ 		l = WRW_WriteH(w, &hp->hd[HTTP_HDR_REQ], " ");
+ 		WSLH(w, vsl_id, hp, HTTP_HDR_REQ);
+ 		l += WRW_WriteH(w, &hp->hd[HTTP_HDR_URL], " ");
+ 		WSLH(w, vsl_id, hp, HTTP_HDR_URL);
+ 		l += WRW_WriteH(w, &hp->hd[HTTP_HDR_PROTO], "\r\n");
+ 		WSLH(w, vsl_id, hp, HTTP_HDR_PROTO);
+ 	}
+ 	for (u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ 		if (hp->hd[u].b == NULL)
+ 			continue;
+ 		AN(hp->hd[u].b);
+ 		AN(hp->hd[u].e);
+ 		l += WRW_WriteH(w, &hp->hd[u], "\r\n");
+ 		WSLH(w, vsl_id, hp, u);
+ 	}
+ 	l += WRW_Write(w, "\r\n", -1);
+ 	return (l);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ HTTP_Init(void)
+ {
+ 
+ #define HTTPH(a, b, c, d, e, f, g) b[0] = (char)strlen(b + 1);
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ }
diff --cc bin/varnishd/cache/cache_vrt.c
index 0000000,5e19ccc..27964dc
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_vrt.c
+++ b/bin/varnishd/cache/cache_vrt.c
@@@ -1,0 -1,535 +1,544 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Runtime support for compiled VCL programs
+  */
+ 
+ #include "config.h"
+ 
+ #include <netinet/in.h>
+ #include <arpa/inet.h>
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache.h"
+ 
+ #include "cache_backend.h"
+ #include "hash/hash_slinger.h"
+ #include "vav.h"
+ #include "vcl.h"
+ #include "vrt.h"
+ #include "vrt_obj.h"
+ #include "vtim.h"
+ 
+ const void * const vrt_magic_string_end = &vrt_magic_string_end;
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_error(struct sess *sp, unsigned code, const char *reason)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	WSL(sp->wrk, SLT_Debug, 0, "VCL_error(%u, %s)", code, reason ?
+ 	    reason : "(null)");
+ 	if (code < 100 || code > 999)
+ 		code = 503;
+ 	sp->err_code = (uint16_t)code;
+ 	sp->err_reason = reason ? reason : http_StatusMessage(sp->err_code);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_count(const struct sess *sp, unsigned u)
+ {
+ 
+ 	if (sp == NULL)
+ 		return;
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	if (cache_param->vcl_trace)
+ 		WSP(sp, SLT_VCL_trace, "%u %d.%d", u,
+ 		    sp->vcl->ref[u].line, sp->vcl->ref[u].pos);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_acl_log(const struct sess *sp, const char *msg)
+ {
+ 	WSP(sp, SLT_VCL_acl, msg);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static struct http *
+ vrt_selecthttp(const struct sess *sp, enum gethdr_e where)
+ {
+ 	struct http *hp;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	switch (where) {
+ 	case HDR_REQ:
+ 		hp = sp->http;
+ 		break;
+ 	case HDR_BEREQ:
+ 		hp = sp->wrk->bereq;
+ 		break;
+ 	case HDR_BERESP:
+ 		hp = sp->wrk->beresp;
+ 		break;
+ 	case HDR_RESP:
+ 		hp = sp->wrk->resp;
+ 		break;
+ 	case HDR_OBJ:
+ 		CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ 		hp = sp->obj->http;
+ 		break;
++        case HDR_STALE_OBJ:
++		CHECK_OBJ_NOTNULL(sp->stale_obj, OBJECT_MAGIC);
++		hp = sp->stale_obj->http;
++		break;
+ 	default:
+ 		INCOMPL();
+ 	}
+ 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ 	return (hp);
+ }
+ 
+ char *
+ VRT_GetHdr(const struct sess *sp, enum gethdr_e where, const char *n)
+ {
+ 	char *p;
+ 	struct http *hp;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++        if (where == HDR_STALE_OBJ && sp->stale_obj == NULL) {
++	    WSP(sp, SLT_VCL_error,
++		"stale_obj does not exist (reading header %s)", n);
++            return NULL;
++	}
+ 	hp = vrt_selecthttp(sp, where);
+ 	if (!http_GetHdr(hp, n, &p))
+ 		return (NULL);
+ 	return (p);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * XXX: Optimize the single element case ?
+  */
+ 
+ char *
+ VRT_StringList(char *d, unsigned dl, const char *p, va_list ap)
+ {
+ 	char *b, *e;
+ 	unsigned x;
+ 
+ 	b = d;
+ 	e = b + dl;
+ 	while (p != vrt_magic_string_end && b < e) {
+ 		if (p != NULL) {
+ 			x = strlen(p);
+ 			if (b + x < e)
+ 				memcpy(b, p, x);
+ 			b += x;
+ 		}
+ 		p = va_arg(ap, const char *);
+ 	}
+ 	if (b >= e)
+ 		return (NULL);
+ 	*b++ = '\0';
+ 	return (b);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * XXX: Optimize the single element case ?
+  */
+ 
+ char *
+ VRT_String(struct ws *ws, const char *h, const char *p, va_list ap)
+ {
+ 	char *b, *e;
+ 	unsigned u, x;
+ 
+ 	u = WS_Reserve(ws, 0);
+ 	e = b = ws->f;
+ 	e += u;
+ 	if (h != NULL) {
+ 		x = strlen(h);
+ 		if (b + x < e)
+ 			memcpy(b, h, x);
+ 		b += x;
+ 		if (b < e)
+ 			*b = ' ';
+ 		b++;
+ 	}
+ 	b = VRT_StringList(b, e > b ? e - b : 0, p, ap);
+ 	if (b == NULL || b == e) {
+ 		WS_Release(ws, 0);
+ 		return (NULL);
+ 	}
+ 	e = b;
+ 	b = ws->f;
+ 	WS_Release(ws, e - b);
+ 	return (b);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Build a string on the worker threads workspace
+  */
+ 
+ const char *
+ VRT_WrkString(const struct sess *sp, const char *p, ...)
+ {
+ 	va_list ap;
+ 	char *b;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	va_start(ap, p);
+ 	b = VRT_String(sp->wrk->ws, NULL, p, ap);
+ 	va_end(ap);
+ 	return (b);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_SetHdr(const struct sess *sp , enum gethdr_e where, const char *hdr,
+     const char *p, ...)
+ {
+ 	struct http *hp;
+ 	va_list ap;
+ 	char *b;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	hp = vrt_selecthttp(sp, where);
+ 	va_start(ap, p);
+ 	if (p == NULL) {
+ 		http_Unset(hp, hdr);
+ 	} else {
+ 		b = VRT_String(hp->ws, hdr + 1, p, ap);
+ 		if (b == NULL) {
+ 			WSP(sp, SLT_LostHeader, "%s", hdr + 1);
+ 		} else {
+ 			http_Unset(hp, hdr);
+ 			http_SetHeader(sp->wrk, sp->vsl_id, hp, b);
+ 		}
+ 	}
+ 	va_end(ap);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_handling(struct sess *sp, unsigned hand)
+ {
+ 
+ 	if (sp == NULL) {
+ 		assert(hand == VCL_RET_OK);
+ 		return;
+ 	}
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	assert(hand < VCL_RET_MAX);
+ 	sp->handling = hand;
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Add an element to the array/list of hash bits.
+  */
+ 
+ void
+ VRT_hashdata(const struct sess *sp, const char *str, ...)
+ {
+ 	va_list ap;
+ 	const char *p;
+ 
+ 	HSH_AddString(sp, str);
+ 	va_start(ap, str);
+ 	while (1) {
+ 		p = va_arg(ap, const char *);
+ 		if (p == vrt_magic_string_end)
+ 			break;
+ 		HSH_AddString(sp, p);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ double
+ VRT_r_now(const struct sess *sp)
+ {
+ 
+ 	(void)sp;
+ 	return (VTIM_real());
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ char *
+ VRT_IP_string(const struct sess *sp, const struct sockaddr_storage *sa)
+ {
+ 	char *p;
+ 	const struct sockaddr_in *si4;
+ 	const struct sockaddr_in6 *si6;
+ 	const void *addr;
+ 	int len;
+ 
+ 	switch (sa->ss_family) {
+ 	case AF_INET:
+ 		len = INET_ADDRSTRLEN;
+ 		si4 = (const void *)sa;
+ 		addr = &(si4->sin_addr);
+ 		break;
+ 	case AF_INET6:
+ 		len = INET6_ADDRSTRLEN;
+ 		si6 = (const void *)sa;
+ 		addr = &(si6->sin6_addr);
+ 		break;
+ 	default:
+ 		INCOMPL();
+ 	}
+ 	XXXAN(len);
+ 	AN(p = WS_Alloc(sp->http->ws, len));
+ 	AN(inet_ntop(sa->ss_family, addr, p, len));
+ 	return (p);
+ }
+ 
+ char *
+ VRT_int_string(const struct sess *sp, int num)
+ {
+ 	char *p;
+ 	int size;
+ 
+ 	size = snprintf(NULL, 0, "%d", num) + 1;
+ 	AN(p = WS_Alloc(sp->http->ws, size));
+ 	assert(snprintf(p, size, "%d", num) < size);
+ 	return (p);
+ }
+ 
+ char *
+ VRT_double_string(const struct sess *sp, double num)
+ {
+ 	char *p;
+ 	int size;
+ 
+ 	size = snprintf(NULL, 0, "%.3f", num) + 1;
+ 	AN(p = WS_Alloc(sp->http->ws, size));
+ 	assert(snprintf(p, size, "%.3f", num) < size);
+ 	return (p);
+ }
+ 
+ char *
+ VRT_time_string(const struct sess *sp, double t)
+ {
+ 	char *p;
+ 
+ 	AN(p = WS_Alloc(sp->http->ws, VTIM_FORMAT_SIZE));
+ 	VTIM_format(t, p);
+ 	return (p);
+ }
+ 
+ const char *
+ VRT_backend_string(const struct sess *sp, const struct director *d)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	if (d == NULL)
+ 		d = sp->director;
+ 	if (d == NULL)
+ 		return (NULL);
+ 	return (d->vcl_name);
+ }
+ 
+ const char *
+ VRT_bool_string(const struct sess *sp, unsigned val)
+ {
+ 
+ 	(void)sp;
+ 	return (val ? "true" : "false");
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_Rollback(struct sess *sp)
+ {
+ 
+ 	HTTP_Copy(sp->http, sp->http0);
+ 	WS_Reset(sp->ws, sp->ws_req);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_panic(const struct sess *sp, const char *str, ...)
+ {
+ 	va_list ap;
+ 	char *b;
+ 
+ 	va_start(ap, str);
+ 	b = VRT_String(sp->http->ws, "PANIC: ", str, ap);
+ 	va_end(ap);
+ 	VAS_Fail("VCL", "", 0, b, 0, 2);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_synth_page(const struct sess *sp, unsigned flags, const char *str, ...)
+ {
+ 	va_list ap;
+ 	const char *p;
+ 	struct vsb *vsb;
+ 
+ 	(void)flags;
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ 	vsb = SMS_Makesynth(sp->obj);
+ 	AN(vsb);
+ 
+ 	VSB_cat(vsb, str);
+ 	va_start(ap, str);
+ 	p = va_arg(ap, const char *);
+ 	while (p != vrt_magic_string_end) {
+ 		if (p == NULL)
+ 			p = "(null)";
+ 		VSB_cat(vsb, p);
+ 		p = va_arg(ap, const char *);
+ 	}
+ 	va_end(ap);
+ 	SMS_Finish(sp->obj);
+ 	http_Unset(sp->obj->http, H_Content_Length);
+ 	http_PrintfHeader(sp->wrk, sp->vsl_id, sp->obj->http,
+ 	    "Content-Length: %d", sp->obj->len);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_ban(struct sess *sp, char *cmds, ...)
+ {
+ 	char *a1, *a2, *a3;
+ 	va_list ap;
+ 	struct ban *b;
+ 	int good;
+ 
+ 	(void)sp;
+ 	b = BAN_New();
+ 	va_start(ap, cmds);
+ 	a1 = cmds;
+ 	good = 0;
+ 	while (a1 != NULL) {
+ 		good = 0;
+ 		a2 = va_arg(ap, char *);
+ 		if (a2 == NULL)
+ 			break;
+ 		a3 = va_arg(ap, char *);
+ 		if (a3 == NULL)
+ 			break;
+ 		if (BAN_AddTest(NULL, b, a1, a2, a3))
+ 			break;
+ 		a1 = va_arg(ap, char *);
+ 		good = 1;
+ 	}
+ 	if (!good)
+ 		/* XXX: report error how ? */
+ 		BAN_Free(b);
+ 	else
+ 		BAN_Insert(b);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_ban_string(struct sess *sp, const char *str)
+ {
+ 	char *a1, *a2, *a3;
+ 	char **av;
+ 	struct ban *b;
+ 	int good;
+ 	int i;
+ 
+ 	(void)sp;
+ 	av = VAV_Parse(str, NULL, ARGV_NOESC);
+ 	if (av[0] != NULL) {
+ 		/* XXX: report error how ? */
+ 		VAV_Free(av);
+ 		return;
+ 	}
+ 	b = BAN_New();
+ 	good = 0;
+ 	for (i = 1; ;) {
+ 		a1 = av[i++];
+ 		if (a1 == NULL)
+ 			break;
+ 		good = 0;
+ 		a2 = av[i++];
+ 		if (a2 == NULL)
+ 			break;
+ 		a3 = av[i++];
+ 		if (a3 == NULL)
+ 			break;
+ 		if (BAN_AddTest(NULL, b, a1, a2, a3))
+ 			break;
+ 		good = 1;
+ 		if (av[i] == NULL)
+ 			break;
+ 		good = 0;
+ 		if (strcmp(av[i++], "&&"))
+ 			break;
+ 	}
+ 	if (!good)
+ 		/* XXX: report error how ? */
+ 		BAN_Free(b);
+ 	else
+ 		BAN_Insert(b);
+ 	VAV_Free(av);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * "real" purges
+  */
+ 
+ void
+ VRT_purge(const struct sess *sp, double ttl, double grace)
+ {
+ 	if (sp->cur_method == VCL_MET_HIT)
+ 		HSH_Purge(sp, sp->obj->objcore->objhead, ttl, grace);
+ 	else if (sp->cur_method == VCL_MET_MISS)
+ 		HSH_Purge(sp, sp->objcore->objhead, ttl, grace);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Simple stuff
+  */
+ 
+ int
+ VRT_strcmp(const char *s1, const char *s2)
+ {
+ 	if (s1 == NULL || s2 == NULL)
+ 		return(1);
+ 	return (strcmp(s1, s2));
+ }
+ 
+ void
+ VRT_memmove(void *dst, const void *src, unsigned len)
+ {
+ 
+ 	(void)memmove(dst, src, len);
+ }
diff --cc bin/varnishd/cache/cache_vrt_var.c
index 0000000,860c7aa..407de1d
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_vrt_var.c
+++ b/bin/varnishd/cache/cache_vrt_var.c
@@@ -1,0 -1,550 +1,620 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Runtime support for compiled VCL programs
+  */
+ #include "config.h"
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache.h"
+ #include "common/heritage.h"
+ 
+ #include "cache_backend.h"
+ #include "vrt_obj.h"
+ #include "vtcp.h"
+ #include "vtim.h"
+ 
++#define ILLEGAL_R(sess, obj, field)                                         \
++WSP(sess, SLT_VCL_error, "%s does not exist (reading field %s)", obj, field)
++
+ static char vrt_hostname[255] = "";
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ vrt_do_string(struct worker *w, int fd, const struct http *hp, int fld,
+     const char *err, const char *p, va_list ap)
+ {
+ 	char *b;
+ 
+ 	// AN(p);
+ 	AN(hp);
+ 	b = VRT_String(hp->ws, NULL, p, ap);
+ 	if (b == NULL || *b == '\0') {
+ 		WSL(w, SLT_LostHeader, fd, err);
+ 	} else {
+ 		http_SetH(hp, fld, b);
+ 	}
+ 	va_end(ap);
+ }
+ 
 -#define VRT_DO_HDR(obj, hdr, http, fld)				\
++#define VRT_DO_HDR_l(obj, hdr, cont, http, fld)			\
+ void								\
+ VRT_l_##obj##_##hdr(const struct sess *sp, const char *p, ...)	\
+ {								\
+ 	va_list ap;						\
+ 								\
+ 	va_start(ap, p);					\
+ 	vrt_do_string(sp->wrk, sp->fd,				\
 -	    http, fld, #obj "." #hdr, p, ap);			\
++	    cont->http, fld, #obj "." #hdr, p, ap);		\
+ 	va_end(ap);						\
 -}								\
 -								\
++}
++
++#define VRT_DO_HDR_r(obj, hdr, cont, http, fld, nullable)	\
+ const char *							\
+ VRT_r_##obj##_##hdr(const struct sess *sp)			\
+ {								\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
 -	CHECK_OBJ_NOTNULL(http, HTTP_MAGIC);			\
 -	return (http->hd[fld].b);				\
 -}
 -
 -VRT_DO_HDR(req,   request,	sp->http,		HTTP_HDR_REQ)
 -VRT_DO_HDR(req,   url,		sp->http,		HTTP_HDR_URL)
 -VRT_DO_HDR(req,   proto,	sp->http,		HTTP_HDR_PROTO)
 -VRT_DO_HDR(bereq, request,	sp->wrk->bereq,		HTTP_HDR_REQ)
 -VRT_DO_HDR(bereq, url,		sp->wrk->bereq,		HTTP_HDR_URL)
 -VRT_DO_HDR(bereq, proto,	sp->wrk->bereq,		HTTP_HDR_PROTO)
 -VRT_DO_HDR(obj,   proto,	sp->obj->http,		HTTP_HDR_PROTO)
 -VRT_DO_HDR(obj,   response,	sp->obj->http,		HTTP_HDR_RESPONSE)
 -VRT_DO_HDR(resp,  proto,	sp->wrk->resp,		HTTP_HDR_PROTO)
 -VRT_DO_HDR(resp,  response,	sp->wrk->resp,		HTTP_HDR_RESPONSE)
 -VRT_DO_HDR(beresp,  proto,	sp->wrk->beresp,	HTTP_HDR_PROTO)
 -VRT_DO_HDR(beresp,  response,	sp->wrk->beresp,	HTTP_HDR_RESPONSE)
++        if (!nullable || cont != NULL) {			\
++            CHECK_OBJ_NOTNULL(cont->http, HTTP_MAGIC);		\
++            return (cont->http->hd[fld].b);			\
++        }							\
++        ILLEGAL_R(sp, #obj, #hdr);				\
++        return(NULL);                                           \
++}                                                               \
++
++#define VRT_DO_HDR(obj, hdr, cont, http, fld, nullable)		\
++VRT_DO_HDR_l(obj, hdr, cont, http, fld)				\
++VRT_DO_HDR_r(obj, hdr, cont, http, fld, nullable)		\
++
++VRT_DO_HDR(req,         request,    sp,             http,	HTTP_HDR_REQ,	    0)
++VRT_DO_HDR(req,         url,        sp,             http,	HTTP_HDR_URL,	    0)
++VRT_DO_HDR(req,         proto,      sp,             http,	HTTP_HDR_PROTO,	    0)
++VRT_DO_HDR(bereq,       request,    sp->wrk,        bereq,	HTTP_HDR_REQ,	    0)
++VRT_DO_HDR(bereq,       url,        sp->wrk,        bereq,	HTTP_HDR_URL,	    0)
++VRT_DO_HDR(bereq,       proto,      sp->wrk,        bereq,	HTTP_HDR_PROTO,	    0)
++VRT_DO_HDR(obj,         proto,      sp->obj,        http,	HTTP_HDR_PROTO,	    0)
++VRT_DO_HDR(obj,         response,   sp->obj,        http,	HTTP_HDR_RESPONSE,  0)
++VRT_DO_HDR(resp,        proto,      sp->wrk,        resp,	HTTP_HDR_PROTO,	    0)
++VRT_DO_HDR(resp,        response,   sp->wrk,        resp,	HTTP_HDR_RESPONSE,  0)
++VRT_DO_HDR(beresp,      proto,      sp->wrk,        beresp,	HTTP_HDR_PROTO,	    0)
++VRT_DO_HDR(beresp,      response,   sp->wrk,        beresp,	HTTP_HDR_RESPONSE,  0)
++VRT_DO_HDR_r(stale_obj,   proto,      sp->stale_obj,  http,	HTTP_HDR_PROTO,	    1)
++VRT_DO_HDR_r(stale_obj,   response,   sp->stale_obj,  http,	HTTP_HDR_RESPONSE,  1)
+ 
+ /*--------------------------------------------------------------------*/
+ 
 -#define VRT_DO_STATUS(obj, http)				\
++#define VRT_DO_STATUS_l(obj, cont, http)			\
+ void								\
+ VRT_l_##obj##_status(const struct sess *sp, int num)		\
+ {								\
+ 								\
+ 	assert(num >= 100 && num <= 999);			\
 -	http->status = (uint16_t)num;				\
 -}								\
 -								\
++	cont->http->status = (uint16_t) num;			\
++}
++
++#define VRT_DO_STATUS_r(obj, cont, http, nullable)		\
+ int								\
+ VRT_r_##obj##_status(const struct sess *sp)			\
+ {								\
+ 								\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
 -	return(http->status);					\
++        if (nullable && cont == NULL) {				\
++            ILLEGAL_R(sp, #obj, "status");			\
++            return (503);                                       \
++        }                                                       \
++	return(cont->http->status);				\
+ }
+ 
 -VRT_DO_STATUS(obj, sp->obj->http)
 -VRT_DO_STATUS(beresp, sp->wrk->beresp)
 -VRT_DO_STATUS(resp, sp->wrk->resp)
++#define VRT_DO_STATUS(obj, cont, http, nullable)		\
++VRT_DO_STATUS_l(obj, cont, http)				\
++VRT_DO_STATUS_r(obj, cont, http, nullable)			\
++
++VRT_DO_STATUS(obj,          sp->obj,        http,	0)
++VRT_DO_STATUS(beresp,       sp->wrk,        beresp,	0)
++VRT_DO_STATUS(resp,	    sp->wrk,        resp,	0)
++VRT_DO_STATUS_r(stale_obj,  sp->stale_obj,  http,	1)
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ /* XXX: review this */
+ /* Add an objecthead to the saintmode list for the (hopefully) relevant
+  * backend. Some double-up asserting here to avoid assert-errors when there
+  * is no object.
+  */
+ void
+ VRT_l_beresp_saintmode(const struct sess *sp, double a)
+ {
+ 	struct trouble *new;
+ 	struct trouble *tr;
+ 	struct trouble *tr2;
+ 	struct worker *wrk;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	wrk = sp->wrk;
+ 	if (!wrk->vbc)
+ 		return;
+ 	CHECK_OBJ_NOTNULL(wrk->vbc, VBC_MAGIC);
+ 	if (!wrk->vbc->backend)
+ 		return;
+ 	CHECK_OBJ_NOTNULL(wrk->vbc->backend, BACKEND_MAGIC);
+ 	if (!sp->objcore)
+ 		return;
+ 	CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ 
+ 	/* Setting a negative holdoff period is a mistake. Detecting this
+ 	 * when compiling the VCL would be better.
+ 	 */
+ 	assert(a > 0);
+ 
+ 	ALLOC_OBJ(new, TROUBLE_MAGIC);
+ 	AN(new);
+ 	new->target = (uintptr_t)(sp->objcore->objhead);
+ 	new->timeout = sp->t_req + a;
+ 
+ 	/* Insert the new item on the list before the first item with a
+ 	 * timeout at a later date (ie: sort by which entry will time out
+ 	 * from the list
+ 	 */
+ 	Lck_Lock(&wrk->vbc->backend->mtx);
+ 	VTAILQ_FOREACH_SAFE(tr, &wrk->vbc->backend->troublelist, list, tr2) {
+ 		if (tr->timeout < new->timeout) {
+ 			VTAILQ_INSERT_BEFORE(tr, new, list);
+ 			new = NULL;
+ 			break;
+ 		}
+ 	}
+ 
+ 	/* Insert the item at the end if the list is empty or all other
+ 	 * items have a longer timeout.
+ 	 */
+ 	if (new)
+ 		VTAILQ_INSERT_TAIL(&wrk->vbc->backend->troublelist, new, list);
+ 
+ 	Lck_Unlock(&wrk->vbc->backend->mtx);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ #define VBERESP(dir, type, onm, field)					\
+ void									\
+ VRT_l_##dir##_##onm(const struct sess *sp, type a)			\
+ {									\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);				\
+ 	sp->wrk->field = a;						\
+ }									\
+ 									\
+ type									\
+ VRT_r_##dir##_##onm(const struct sess *sp)				\
+ {									\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);				\
+ 	return (sp->wrk->field);					\
+ }
+ 
+ VBERESP(beresp, unsigned, do_esi, do_esi)
+ VBERESP(beresp, unsigned, do_gzip, do_gzip)
+ VBERESP(beresp, unsigned, do_gunzip, do_gunzip)
+ VBERESP(beresp, unsigned, do_stream, do_stream)
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ const char * __match_proto__()
+ VRT_r_client_identity(struct sess *sp)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	if (sp->client_identity != NULL)
+ 		return (sp->client_identity);
+ 	else
+ 		return (sp->addr);
+ }
+ 
+ void
+ VRT_l_client_identity(struct sess *sp, const char *str, ...)
+ {
+ 	va_list ap;
+ 	char *b;
+ 
+ 	va_start(ap, str);
+ 	b = VRT_String(sp->http->ws, NULL, str, ap);
+ 	va_end(ap);
+ 	sp->client_identity = b;
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ #define BEREQ_TIMEOUT(which)					\
+ void __match_proto__()						\
+ VRT_l_bereq_##which(struct sess *sp, double num)		\
+ {								\
+ 								\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
+ 	sp->wrk->which = (num > 0.0 ? num : 0.0);		\
+ }								\
+ 								\
+ double __match_proto__()					\
+ VRT_r_bereq_##which(struct sess *sp)				\
+ {								\
+ 								\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
+ 	return(sp->wrk->which);					\
+ }
+ 
+ BEREQ_TIMEOUT(connect_timeout)
+ BEREQ_TIMEOUT(first_byte_timeout)
+ BEREQ_TIMEOUT(between_bytes_timeout)
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ const char *
+ VRT_r_beresp_backend_name(const struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->wrk->vbc, VBC_MAGIC);
+ 	return(sp->wrk->vbc->backend->vcl_name);
+ }
+ 
+ struct sockaddr_storage *
+ VRT_r_beresp_backend_ip(const struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->wrk->vbc, VBC_MAGIC);
+ 	return(sp->wrk->vbc->addr);
+ }
+ 
+ int
+ VRT_r_beresp_backend_port(const struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->wrk->vbc, VBC_MAGIC);
+ 	return (VTCP_port(sp->wrk->vbc->addr));
+ }
+ 
+ const char * __match_proto__()
+ VRT_r_beresp_storage(struct sess *sp)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	if (sp->wrk->storage_hint != NULL)
+ 		return (sp->wrk->storage_hint);
+ 	else
+ 		return (NULL);
+ }
+ 
+ void __match_proto__()
+ VRT_l_beresp_storage(struct sess *sp, const char *str, ...)
+ {
+ 	va_list ap;
+ 	char *b;
+ 
+ 	va_start(ap, str);
+ 	b = VRT_String(sp->wrk->ws, NULL, str, ap);
+ 	va_end(ap);
+ 	sp->wrk->storage_hint = b;
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_l_req_backend(struct sess *sp, struct director *be)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	sp->director = be;
+ }
+ 
+ struct director * __match_proto__()
+ VRT_r_req_backend(struct sess *sp)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	return (sp->director);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ VRT_l_req_esi(struct sess *sp, unsigned process_esi)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	/*
+ 	 * Only allow you to turn of esi in the main request
+ 	 * else everything gets confused
+ 	 */
+ 	if(sp->esi_level == 0)
+ 		sp->disable_esi = !process_esi;
+ }
+ 
+ unsigned __match_proto__()
+ VRT_r_req_esi(struct sess *sp)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	return (!sp->disable_esi);
+ }
+ 
+ int
+ VRT_r_req_esi_level(const struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	return(sp->esi_level);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ unsigned __match_proto__()
+ VRT_r_req_can_gzip(struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	return (RFC2616_Req_Gzip(sp));
+ }
+ 
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ int
+ VRT_r_req_restarts(const struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	return (sp->restarts);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * NB: TTL is relative to when object was created, whereas grace and
+  * keep are relative to ttl.
+  */
+ 
 -#define VRT_DO_EXP(which, exp, fld, offset, extra)		\
 -								\
 -void __match_proto__()						\
 -VRT_l_##which##_##fld(struct sess *sp, double a)		\
 -{								\
 -								\
 -	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
 -	if (a > 0.)						\
 -		a += offset;					\
 -	EXP_Set_##fld(&exp, a);					\
 -	extra;							\
 -}								\
 -								\
 -double __match_proto__()					\
 -VRT_r_##which##_##fld(struct sess *sp)				\
 -{								\
 -								\
 -	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
 -	return(EXP_Get_##fld(&exp) - offset);			\
 -}
++#define VRT_DO_EXP_l(which, cont, fld, offset, extra)		    \
++void __match_proto__()						    \
++VRT_l_##which##_##fld(struct sess *sp, double a)		    \
++{								    \
++								    \
++	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			    \
++	if (a > 0.)						    \
++		a += offset;					    \
++	EXP_Set_##fld(&cont->exp, a);				    \
++	extra;							    \
++}
++
++#define VRT_DO_EXP_r(which, cont, fld, offset, nullable)	    \
++double __match_proto__()					    \
++VRT_r_##which##_##fld(struct sess *sp)				    \
++{								    \
++								    \
++	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			    \
++	if (nullable && cont == NULL) {				    \
++	    ILLEGAL_R(sp, #which, #fld);			    \
++	    return (-1);					    \
++	}							    \
++	return(EXP_Get_##fld(&cont->exp) - offset);		    \
++}
++
++#define VRT_DO_EXP(which, cont, fld, offset, nullable, extra)	    \
++VRT_DO_EXP_l(which, cont, fld, offset, extra)  		            \
++VRT_DO_EXP_r(which, cont, fld, offset, nullable)
+ 
+ static void
+ vrt_wsp_exp(const struct sess *sp, unsigned xid, const struct exp *e)
+ {
+ 	WSP(sp, SLT_TTL, "%u VCL %.0f %.0f %.0f %.0f %.0f",
+ 	    xid, e->ttl - (sp->t_req - e->entered), e->grace, e->keep,
+ 	    sp->t_req, e->age + (sp->t_req - e->entered));
+ }
+ 
 -VRT_DO_EXP(req, sp->exp, ttl, 0, )
 -VRT_DO_EXP(req, sp->exp, grace, 0, )
 -VRT_DO_EXP(req, sp->exp, keep, 0, )
++VRT_DO_EXP(req, sp, ttl, 0, 0, )
++VRT_DO_EXP(req, sp, grace, 0, 0, )
++VRT_DO_EXP(req, sp, keep, 0, 0, )
+ 
 -VRT_DO_EXP(obj, sp->obj->exp, grace, 0,
++VRT_DO_EXP(obj, sp->obj, grace, 0, 0,
+    EXP_Rearm(sp->obj);
+    vrt_wsp_exp(sp, sp->obj->xid, &sp->obj->exp);)
 -VRT_DO_EXP(obj, sp->obj->exp, ttl, (sp->t_req - sp->obj->exp.entered),
++VRT_DO_EXP(obj, sp->obj, ttl, (sp->t_req - sp->obj->exp.entered), 0,
+    EXP_Rearm(sp->obj);
+    vrt_wsp_exp(sp, sp->obj->xid, &sp->obj->exp);)
 -VRT_DO_EXP(obj, sp->obj->exp, keep, 0,
++VRT_DO_EXP(obj, sp->obj, keep, 0, 0,
+    EXP_Rearm(sp->obj);
+    vrt_wsp_exp(sp, sp->obj->xid, &sp->obj->exp);)
+ 
 -VRT_DO_EXP(beresp, sp->wrk->exp, grace, 0,
++VRT_DO_EXP(beresp, sp->wrk, grace, 0, 0,
+    vrt_wsp_exp(sp, sp->xid, &sp->wrk->exp);)
 -VRT_DO_EXP(beresp, sp->wrk->exp, ttl, 0,
++VRT_DO_EXP(beresp, sp->wrk, ttl, 0, 0,
+    vrt_wsp_exp(sp, sp->xid, &sp->wrk->exp);)
 -VRT_DO_EXP(beresp, sp->wrk->exp, keep, 0,
++VRT_DO_EXP(beresp, sp->wrk, keep, 0, 0,
+    vrt_wsp_exp(sp, sp->xid, &sp->wrk->exp);)
++    
++VRT_DO_EXP_r(stale_obj, sp->stale_obj, grace, 0, 1)
++VRT_DO_EXP_r(stale_obj, sp->stale_obj, ttl, 0, 1)
++VRT_DO_EXP_r(stale_obj, sp->stale_obj, keep, 0, 1)
+ 
+ /*--------------------------------------------------------------------
+  * req.xid
+  */
+ 
+ const char * __match_proto__()
+ VRT_r_req_xid(struct sess *sp)
+ {
+ 	char *p;
+ 	int size;
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 
+ 	size = snprintf(NULL, 0, "%u", sp->xid) + 1;
+ 	AN(p = WS_Alloc(sp->http->ws, size));
+ 	assert(snprintf(p, size, "%u", sp->xid) < size);
+ 	return (p);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ #define REQ_BOOL(which)						\
+ void __match_proto__()						\
+ VRT_l_req_##which(struct sess *sp, unsigned val)		\
+ {								\
+ 								\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
+ 	sp->which = val ? 1 : 0;				\
+ }								\
+ 								\
+ unsigned __match_proto__()					\
+ VRT_r_req_##which(struct sess *sp)				\
+ {								\
+ 								\
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);			\
+ 	return(sp->which);					\
+ }
+ 
+ REQ_BOOL(hash_ignore_busy)
+ REQ_BOOL(hash_always_miss)
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct sockaddr_storage *
+ VRT_r_client_ip(struct sess *sp)
+ {
+ 
+ 	return (&sp->sockaddr);
+ }
+ 
+ struct sockaddr_storage *
+ VRT_r_server_ip(struct sess *sp)
+ {
+ 	int i;
+ 
+ 	if (sp->mysockaddr.ss_family == AF_UNSPEC) {
+ 		i = getsockname(sp->fd,
+ 		    (void*)&sp->mysockaddr, &sp->mysockaddrlen);
+ 		assert(VTCP_Check(i));
+ 	}
+ 
+ 	return (&sp->mysockaddr);
+ }
+ 
+ const char*
+ VRT_r_server_identity(struct sess *sp)
+ {
+ 	(void)sp;
+ 
+ 	if (heritage.identity[0] != '\0')
+ 		return (heritage.identity);
+ 	else
+ 		return (heritage.name);
+ }
+ 
+ 
+ const char*
+ VRT_r_server_hostname(struct sess *sp)
+ {
+ 	(void)sp;
+ 
+ 	if (vrt_hostname[0] == '\0')
+ 		AZ(gethostname(vrt_hostname, sizeof(vrt_hostname)));
+ 
+ 	return (vrt_hostname);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * XXX: This is pessimistically silly
+  */
+ 
+ int
+ VRT_r_server_port(struct sess *sp)
+ {
+ 	int i;
+ 
+ 	if (sp->mysockaddr.ss_family == AF_UNSPEC) {
+ 		i = getsockname(sp->fd,
+ 		    (void*)&sp->mysockaddr, &sp->mysockaddrlen);
+ 		assert(VTCP_Check(i));
+ 	}
+ 	return (VTCP_port(&sp->mysockaddr));
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
++/* XXX: uplex/GS: a nice macro would eliminate the repetition here ... */
++
+ int
+ VRT_r_obj_hits(const struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);	/* XXX */
+ 	return (sp->obj->hits);
+ }
+ 
++int
++VRT_r_stale_obj_hits(const struct sess *sp)
++{
++
++	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++        if (sp->stale_obj == NULL) {
++            ILLEGAL_R(sp, "stale_obj", "hits");
++            return (0);
++        }
++	CHECK_OBJ(sp->stale_obj, OBJECT_MAGIC);	/* XXX */
++	return (sp->stale_obj->hits);
++}
++
+ double
+ VRT_r_obj_lastuse(const struct sess *sp)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);	/* XXX */
+ 	return (VTIM_real() - sp->obj->last_use);
+ }
+ 
++double
++VRT_r_stale_obj_lastuse(const struct sess *sp)
++{
++
++	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++        if (sp->stale_obj == NULL) {
++            ILLEGAL_R(sp, "stale_obj", "lastuse");
++            return (0);
++        }
++	CHECK_OBJ(sp->stale_obj, OBJECT_MAGIC);	/* XXX */
++	return (VTIM_real() - sp->stale_obj->last_use);
++}
++
+ unsigned
+ VRT_r_req_backend_healthy(const struct sess *sp)
+ {
+ 	CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ 	CHECK_OBJ_NOTNULL(sp->director, DIRECTOR_MAGIC);
+ 	return (VDI_Healthy(sp->director, sp));
+ }
+ 
++unsigned
++VRT_r_stale_obj(const struct sess *sp)
++{
++        CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++        return (sp->stale_obj != NULL);
++}
diff --cc bin/varnishd/mgt/mgt_param.c
index 0000000,e4be5d8..991793a
mode 000000,100644..100644
--- a/bin/varnishd/mgt/mgt_param.c
+++ b/bin/varnishd/mgt/mgt_param.c
@@@ -1,0 -1,1368 +1,1368 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  */
+ 
+ #include "config.h"
+ 
+ #include <grp.h>
+ #include <limits.h>
+ #include <math.h>
+ #include <pwd.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <unistd.h>
+ 
+ #include "mgt/mgt.h"
+ #include "common/heritage.h"
+ #include "common/params.h"
+ 
+ #include "mgt/mgt_param.h"
+ #include "waiter/cache_waiter.h"
+ #include "vav.h"
+ #include "vcli.h"
+ #include "vcli_common.h"
+ #include "vcli_priv.h"
+ #include "vnum.h"
+ #include "vss.h"
+ 
+ #include "mgt_cli.h"
+ 
+ #define MAGIC_INIT_STRING	"\001"
+ struct params mgt_param;
+ static int nparspec;
+ static struct parspec const ** parspec;
+ static int margin;
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static const struct parspec *
+ mcf_findpar(const char *name)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < nparspec; i++)
+ 		if (!strcmp(parspec[i]->name, name))
+ 			return (parspec[i]);
+ 	return (NULL);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_generic_timeout(struct cli *cli, volatile unsigned *dst, const char *arg)
+ {
+ 	unsigned u;
+ 
+ 	if (arg != NULL) {
+ 		u = strtoul(arg, NULL, 0);
+ 		if (u == 0) {
+ 			VCLI_Out(cli, "Timeout must be greater than zero\n");
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		*dst = u;
+ 	} else
+ 		VCLI_Out(cli, "%u", *dst);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ tweak_timeout(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	volatile unsigned *dest;
+ 
+ 	dest = par->priv;
+ 	tweak_generic_timeout(cli, dest, arg);
+ }
+ 
+ static void
+ tweak_timeout_double(struct cli *cli, const struct parspec *par,
+     const char *arg)
+ {
+ 	volatile double *dest;
+ 	double u;
+ 
+ 	dest = par->priv;
+ 	if (arg != NULL) {
+ 		u = strtod(arg, NULL);
+ 		if (u < par->min) {
+ 			VCLI_Out(cli,
+ 			    "Timeout must be greater or equal to %.g\n",
+ 				 par->min);
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		if (u > par->max) {
+ 			VCLI_Out(cli,
+ 			    "Timeout must be less than or equal to %.g\n",
+ 				 par->max);
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		*dest = u;
+ 	} else
+ 		VCLI_Out(cli, "%.6f", *dest);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_generic_double(struct cli *cli, const struct parspec *par,
+     const char *arg)
+ {
+ 	volatile double *dest;
+ 	double u;
+ 
+ 	dest = par->priv;
+ 	if (arg != NULL) {
+ 		u = strtod(arg, NULL);
+ 		if (u < par->min) {
+ 			VCLI_Out(cli,
+ 			    "Must be greater or equal to %.g\n",
+ 				 par->min);
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		if (u > par->max) {
+ 			VCLI_Out(cli,
+ 			    "Must be less than or equal to %.g\n",
+ 				 par->max);
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		*dest = u;
+ 	} else
+ 		VCLI_Out(cli, "%f", *dest);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_generic_bool(struct cli *cli, volatile unsigned *dest, const char *arg)
+ {
+ 	if (arg != NULL) {
+ 		if (!strcasecmp(arg, "off"))
+ 			*dest = 0;
+ 		else if (!strcasecmp(arg, "disable"))
+ 			*dest = 0;
+ 		else if (!strcasecmp(arg, "no"))
+ 			*dest = 0;
+ 		else if (!strcasecmp(arg, "false"))
+ 			*dest = 0;
+ 		else if (!strcasecmp(arg, "on"))
+ 			*dest = 1;
+ 		else if (!strcasecmp(arg, "enable"))
+ 			*dest = 1;
+ 		else if (!strcasecmp(arg, "yes"))
+ 			*dest = 1;
+ 		else if (!strcasecmp(arg, "true"))
+ 			*dest = 1;
+ 		else {
+ 			VCLI_Out(cli, "use \"on\" or \"off\"\n");
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 	} else
+ 		VCLI_Out(cli, *dest ? "on" : "off");
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_bool(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	volatile unsigned *dest;
+ 
+ 	dest = par->priv;
+ 	tweak_generic_bool(cli, dest, arg);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ tweak_generic_uint(struct cli *cli, volatile unsigned *dest, const char *arg,
+     unsigned min, unsigned max)
+ {
+ 	unsigned u;
+ 
+ 	if (arg != NULL) {
+ 		if (!strcasecmp(arg, "unlimited"))
+ 			u = UINT_MAX;
+ 		else
+ 			u = strtoul(arg, NULL, 0);
+ 		if (u < min) {
+ 			VCLI_Out(cli, "Must be at least %u\n", min);
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		if (u > max) {
+ 			VCLI_Out(cli, "Must be no more than %u\n", max);
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		*dest = u;
+ 	} else if (*dest == UINT_MAX) {
+ 		VCLI_Out(cli, "unlimited", *dest);
+ 	} else {
+ 		VCLI_Out(cli, "%u", *dest);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ tweak_uint(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	volatile unsigned *dest;
+ 
+ 	dest = par->priv;
+ 	tweak_generic_uint(cli, dest, arg, (uint)par->min, (uint)par->max);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ fmt_bytes(struct cli *cli, ssize_t t)
+ {
+ 	const char *p;
+ 
+ 	if (t & 0xff) {
+ 		VCLI_Out(cli, "%zub", t);
+ 		return;
+ 	}
+ 	for (p = "kMGTPEZY"; *p; p++) {
+ 		if (t & 0x300) {
+ 			VCLI_Out(cli, "%.2f%c", t / 1024.0, *p);
+ 			return;
+ 		}
+ 		t /= 1024;
+ 		if (t & 0x0ff) {
+ 			VCLI_Out(cli, "%zu%c", t, *p);
+ 			return;
+ 		}
+ 	}
+ 	VCLI_Out(cli, "(bogus number)");
+ }
+ 
+ static void
+ tweak_generic_bytes(struct cli *cli, volatile ssize_t *dest, const char *arg,
+     double min, double max)
+ {
+ 	uintmax_t r;
+ 	const char *p;
+ 
+ 	if (arg != NULL) {
+ 		p = VNUM_2bytes(arg, &r, 0);
+ 		if (p != NULL) {
+ 			VCLI_Out(cli, "Could not convert to bytes.\n");
+ 			VCLI_Out(cli, "%s\n", p);
+ 			VCLI_Out(cli,
+ 			    "  Try something like '80k' or '120M'\n");
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		if ((uintmax_t)((ssize_t)r) != r || r > max) {
+ 			VCLI_Out(cli, "Must be no more than ");
+ 			fmt_bytes(cli, (ssize_t)max);
+ 			VCLI_Out(cli, "\n");
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		if (r < min) {
+ 			VCLI_Out(cli, "Must be at least ");
+ 			fmt_bytes(cli, (ssize_t)min);
+ 			VCLI_Out(cli, "\n");
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		*dest = r;
+ 	} else {
+ 		fmt_bytes(cli, *dest);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_bytes(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	volatile ssize_t *dest;
+ 
+ 	assert(par->min >= 0);
+ 	dest = par->priv;
+ 	tweak_generic_bytes(cli, dest, arg, par->min, par->max);
+ }
+ 
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_bytes_u(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	volatile unsigned *d1;
+ 	volatile ssize_t dest;
+ 
+ 	assert(par->max <= UINT_MAX);
+ 	assert(par->min >= 0);
+ 	d1 = par->priv;
+ 	dest = *d1;
+ 	tweak_generic_bytes(cli, &dest, arg, par->min, par->max);
+ 	*d1 = dest;
+ }
+ 
+ /*--------------------------------------------------------------------
+  * XXX: slightly magic.  We want to initialize to "nobody" (XXX: shouldn't
+  * XXX: that be something autocrap found for us ?) but we don't want to
+  * XXX: fail initialization if that user doesn't exists, even though we
+  * XXX: do want to fail it, in subsequent sets.
+  * XXX: The magic init string is a hack for this.
+  */
+ 
+ static void
+ tweak_user(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	struct passwd *pw;
+ 	struct group *gr;
+ 
+ 	(void)par;
+ 	if (arg != NULL) {
+ 		if (!strcmp(arg, MAGIC_INIT_STRING)) {
+ 			pw = getpwnam("nobody");
+ 			if (pw == NULL) {
+ 				mgt_param.uid = getuid();
+ 				return;
+ 			}
+ 		} else
+ 			pw = getpwnam(arg);
+ 		if (pw == NULL) {
+ 			VCLI_Out(cli, "Unknown user");
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		REPLACE(mgt_param.user, pw->pw_name);
+ 		mgt_param.uid = pw->pw_uid;
+ 		mgt_param.gid = pw->pw_gid;
+ 
+ 		/* set group to user's primary group */
+ 		if ((gr = getgrgid(pw->pw_gid)) != NULL &&
+ 		    (gr = getgrnam(gr->gr_name)) != NULL &&
+ 		    gr->gr_gid == pw->pw_gid)
+ 			REPLACE(mgt_param.group, gr->gr_name);
+ 	} else if (mgt_param.user) {
+ 		VCLI_Out(cli, "%s (%d)", mgt_param.user, (int)mgt_param.uid);
+ 	} else {
+ 		VCLI_Out(cli, "%d", (int)mgt_param.uid);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------
+  * XXX: see comment for tweak_user, same thing here.
+  */
+ 
+ static void
+ tweak_group(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	struct group *gr;
+ 
+ 	(void)par;
+ 	if (arg != NULL) {
+ 		if (!strcmp(arg, MAGIC_INIT_STRING)) {
+ 			gr = getgrnam("nogroup");
+ 			if (gr == NULL) {
+ 				/* Only replace if tweak_user didn't */
+ 				if (mgt_param.gid == 0)
+ 					mgt_param.gid = getgid();
+ 				return;
+ 			}
+ 		} else
+ 			gr = getgrnam(arg);
+ 		if (gr == NULL) {
+ 			VCLI_Out(cli, "Unknown group");
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			return;
+ 		}
+ 		REPLACE(mgt_param.group, gr->gr_name);
+ 		mgt_param.gid = gr->gr_gid;
+ 	} else if (mgt_param.group) {
+ 		VCLI_Out(cli, "%s (%d)", mgt_param.group, (int)mgt_param.gid);
+ 	} else {
+ 		VCLI_Out(cli, "%d", (int)mgt_param.gid);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ clean_listen_sock_head(struct listen_sock_head *lsh)
+ {
+ 	struct listen_sock *ls, *ls2;
+ 
+ 	VTAILQ_FOREACH_SAFE(ls, lsh, list, ls2) {
+ 		CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
+ 		VTAILQ_REMOVE(lsh, ls, list);
+ 		free(ls->name);
+ 		free(ls->addr);
+ 		FREE_OBJ(ls);
+ 	}
+ }
+ 
+ static void
+ tweak_listen_address(struct cli *cli, const struct parspec *par,
+     const char *arg)
+ {
+ 	char **av;
+ 	int i;
+ 	struct listen_sock		*ls;
+ 	struct listen_sock_head		lsh;
+ 
+ 	(void)par;
+ 	if (arg == NULL) {
+ 		VCLI_Quote(cli, mgt_param.listen_address);
+ 		return;
+ 	}
+ 
+ 	av = VAV_Parse(arg, NULL, ARGV_COMMA);
+ 	if (av == NULL) {
+ 		VCLI_Out(cli, "Parse error: out of memory");
+ 		VCLI_SetResult(cli, CLIS_PARAM);
+ 		return;
+ 	}
+ 	if (av[0] != NULL) {
+ 		VCLI_Out(cli, "Parse error: %s", av[0]);
+ 		VCLI_SetResult(cli, CLIS_PARAM);
+ 		VAV_Free(av);
+ 		return;
+ 	}
+ 	if (av[1] == NULL) {
+ 		VCLI_Out(cli, "Empty listen address");
+ 		VCLI_SetResult(cli, CLIS_PARAM);
+ 		VAV_Free(av);
+ 		return;
+ 	}
+ 	VTAILQ_INIT(&lsh);
+ 	for (i = 1; av[i] != NULL; i++) {
+ 		struct vss_addr **ta;
+ 		int j, n;
+ 
+ 		n = VSS_resolve(av[i], "http", &ta);
+ 		if (n == 0) {
+ 			VCLI_Out(cli, "Invalid listen address ");
+ 			VCLI_Quote(cli, av[i]);
+ 			VCLI_SetResult(cli, CLIS_PARAM);
+ 			break;
+ 		}
+ 		for (j = 0; j < n; ++j) {
+ 			ALLOC_OBJ(ls, LISTEN_SOCK_MAGIC);
+ 			AN(ls);
+ 			ls->sock = -1;
+ 			ls->addr = ta[j];
+ 			ls->name = strdup(av[i]);
+ 			AN(ls->name);
+ 			VTAILQ_INSERT_TAIL(&lsh, ls, list);
+ 		}
+ 		free(ta);
+ 	}
+ 	VAV_Free(av);
+ 	if (cli != NULL && cli->result != CLIS_OK) {
+ 		clean_listen_sock_head(&lsh);
+ 		return;
+ 	}
+ 
+ 	REPLACE(mgt_param.listen_address, arg);
+ 
+ 	clean_listen_sock_head(&heritage.socks);
+ 	heritage.nsocks = 0;
+ 
+ 	while (!VTAILQ_EMPTY(&lsh)) {
+ 		ls = VTAILQ_FIRST(&lsh);
+ 		VTAILQ_REMOVE(&lsh, ls, list);
+ 		CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
+ 		VTAILQ_INSERT_TAIL(&heritage.socks, ls, list);
+ 		heritage.nsocks++;
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_string(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	char **p = TRUST_ME(par->priv);
+ 
+ 	AN(p);
+ 	/* XXX should have tweak_generic_string */
+ 	if (arg == NULL) {
+ 		VCLI_Quote(cli, *p);
+ 	} else {
+ 		REPLACE(*p, arg);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_waiter(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 
+ 	/* XXX should have tweak_generic_string */
+ 	(void)par;
+ 	WAIT_tweak_waiter(cli, arg);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ tweak_diag_bitmap(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ 	unsigned u;
+ 
+ 	(void)par;
+ 	if (arg != NULL) {
+ 		u = strtoul(arg, NULL, 0);
+ 		mgt_param.diag_bitmap = u;
+ 	} else {
+ 		VCLI_Out(cli, "0x%x", mgt_param.diag_bitmap);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ /*
+  * Make sure to end all lines with either a space or newline of the
+  * formatting will go haywire.
+  */
+ 
+ #define DELAYED_EFFECT_TEXT \
+ 	"\nNB: This parameter may take quite some time to take (full) effect."
+ 
+ #define MUST_RESTART_TEXT \
+ 	"\nNB: This parameter will not take any effect until the " \
+ 	"child process has been restarted."
+ 
+ #define MUST_RELOAD_TEXT \
+ 	"\nNB: This parameter will not take any effect until the " \
+ 	"VCL programs have been reloaded."
+ 
+ #define EXPERIMENTAL_TEXT \
+ 	"\nNB: We do not know yet if it is a good idea to change " \
+ 	"this parameter, or if the default value is even sensible.  " \
+ 	"Caution is advised, and feedback is most welcome."
+ 
+ #define WIZARD_TEXT \
+ 	"\nNB: Do not change this parameter, unless a developer tell " \
+ 	"you to do so."
+ 
+ /*
+  * Remember to update varnishd.1 whenever you add / remove a parameter or
+  * change its default value.
+  * XXX: we should generate the relevant section of varnishd.1 from here.
+  */
+ static const struct parspec input_parspec[] = {
+ 	{ "user", tweak_user, NULL, 0, 0,
+ 		"The unprivileged user to run as.  Setting this will "
+ 		"also set \"group\" to the specified user's primary group.",
+ 		MUST_RESTART,
+ 		MAGIC_INIT_STRING },
+ 	{ "group", tweak_group, NULL, 0, 0,
+ 		"The unprivileged group to run as.",
+ 		MUST_RESTART,
+ 		MAGIC_INIT_STRING },
+ 	{ "default_ttl", tweak_timeout_double, &mgt_param.default_ttl,
+ 		0, UINT_MAX,
+ 		"The TTL assigned to objects if neither the backend nor "
+ 		"the VCL code assigns one.\n"
+ 		"Objects already cached will not be affected by changes "
+ 		"made until they are fetched from the backend again.\n"
+ 		"To force an immediate effect at the expense of a total "
+ 		"flush of the cache use \"ban.url .\"",
+ 		0,
+ 		"120", "seconds" },
+ 	{ "sess_workspace",
+ 		tweak_bytes_u, &mgt_param.sess_workspace, 1024, UINT_MAX,
+ 		"Bytes of HTTP protocol workspace allocated for sessions. "
+ 		"This space must be big enough for the entire HTTP protocol "
+ 		"header and any edits done to it in the VCL code.\n"
+ 		"Minimum is 1024 bytes.",
+ 		DELAYED_EFFECT,
+ 		"64k", "bytes" },
+ 	{ "http_req_hdr_len",
+ 		tweak_bytes_u, &mgt_param.http_req_hdr_len,
+ 		40, UINT_MAX,
+ 		"Maximum length of any HTTP client request header we will "
+ 		"allow.  The limit is inclusive its continuation lines.\n",
+ 		0,
+ 		"8k", "bytes" },
+ 	{ "http_req_size",
+ 		tweak_bytes_u, &mgt_param.http_req_size,
+ 		256, UINT_MAX,
+ 		"Maximum number of bytes of HTTP client request we will deal "
+ 		"with.  This is a limit on all bytes up to the double blank "
+ 		"line which ends the HTTP request.\n"
+ 		"The memory for the request is allocated from the session "
+ 		"workspace (param: sess_workspace) and this parameter limits "
+ 		"how much of that the request is allowed to take up.",
+ 		0,
+ 		"32k", "bytes" },
+ 	{ "http_resp_hdr_len",
+ 		tweak_bytes_u, &mgt_param.http_resp_hdr_len,
+ 		40, UINT_MAX,
+ 		"Maximum length of any HTTP backend response header we will "
+ 		"allow.  The limit is inclusive its continuation lines.\n",
+ 		0,
+ 		"8k", "bytes" },
+ 	{ "http_resp_size",
+ 		tweak_bytes_u, &mgt_param.http_resp_size,
+ 		256, UINT_MAX,
+ 		"Maximum number of bytes of HTTP backend resonse we will deal "
+ 		"with.  This is a limit on all bytes up to the double blank "
+ 		"line which ends the HTTP request.\n"
+ 		"The memory for the request is allocated from the worker "
+ 		"workspace (param: sess_workspace) and this parameter limits "
+ 		"how much of that the request is allowed to take up.",
+ 		0,
+ 		"32k", "bytes" },
+ 	{ "http_max_hdr", tweak_uint, &mgt_param.http_max_hdr, 32, 65535,
+ 		"Maximum number of HTTP headers we will deal with in "
+ 		"client request or backend reponses.  "
+ 		"Note that the first line occupies five header fields.\n"
+ 		"This paramter does not influence storage consumption, "
+ 		"objects allocate exact space for the headers they store.\n",
+ 		0,
+ 		"64", "header lines" },
+ 	{ "shm_workspace",
+ 		tweak_bytes_u, &mgt_param.shm_workspace, 4096, UINT_MAX,
+ 		"Bytes of shmlog workspace allocated for worker threads. "
+ 		"If too big, it wastes some ram, if too small it causes "
+ 		"needless flushes of the SHM workspace.\n"
+ 		"These flushes show up in stats as "
+ 		"\"SHM flushes due to overflow\".\n"
+ 		"Minimum is 4096 bytes.",
+ 		DELAYED_EFFECT,
+ 		"8k", "bytes" },
+ 	{ "shm_reclen",
+ 		tweak_bytes_u, &mgt_param.shm_reclen, 16, 65535,
+ 		"Maximum number of bytes in SHM log record.\n"
+ 		"Maximum is 65535 bytes.",
+ 		0,
+ 		"255", "bytes" },
+ 	{ "default_grace", tweak_timeout_double, &mgt_param.default_grace,
+ 		0, UINT_MAX,
+ 		"Default grace period.  We will deliver an object "
+ 		"this long after it has expired, provided another thread "
+ 		"is attempting to get a new copy.\n"
+ 		"Objects already cached will not be affected by changes "
+ 		"made until they are fetched from the backend again.\n",
+ 		DELAYED_EFFECT,
+ 		"10", "seconds" },
+ 	{ "default_keep", tweak_timeout_double, &mgt_param.default_keep,
+ 		0, UINT_MAX,
 -		"Default keep period.  We will keep a useless object "
++		"Default keep period.  We will keep a stale object "
+ 		"around this long, making it available for conditional "
+ 		"backend fetches.  "
+ 		"That means that the object will be removed from the "
 -		"cache at the end of ttl+grace+keep.",
++		"cache at the end of ttl+max(grace,keep).",
+ 		DELAYED_EFFECT,
 -		"0", "seconds" },
++		"10", "seconds" },
+ 	{ "sess_timeout", tweak_timeout, &mgt_param.sess_timeout, 0, 0,
+ 		"Idle timeout for persistent sessions. "
+ 		"If a HTTP request has not been received in this many "
+ 		"seconds, the session is closed.",
+ 		0,
+ 		"5", "seconds" },
+ 	{ "expiry_sleep", tweak_timeout_double, &mgt_param.expiry_sleep, 0, 60,
+ 		"How long the expiry thread sleeps when there is nothing "
+ 		"for it to do.\n",
+ 		0,
+ 		"1", "seconds" },
+ 	{ "pipe_timeout", tweak_timeout, &mgt_param.pipe_timeout, 0, 0,
+ 		"Idle timeout for PIPE sessions. "
+ 		"If nothing have been received in either direction for "
+ 		"this many seconds, the session is closed.\n",
+ 		0,
+ 		"60", "seconds" },
+ 	{ "send_timeout", tweak_timeout, &mgt_param.send_timeout, 0, 0,
+ 		"Send timeout for client connections. "
+ 		"If the HTTP response hasn't been transmitted in this many\n"
+                 "seconds the session is closed. \n"
+ 		"See setsockopt(2) under SO_SNDTIMEO for more information.",
+ 		DELAYED_EFFECT,
+ 		"600", "seconds" },
+ 	{ "idle_send_timeout", tweak_timeout, &mgt_param.idle_send_timeout, 0, 0,
+ 		"Time to wait with no data sent. "
+ 		"If no data has been transmitted in this many\n"
+                 "seconds the session is closed. \n"
+ 		"See setsockopt(2) under SO_SNDTIMEO for more information.",
+ 		DELAYED_EFFECT,
+ 		"60", "seconds" },
+ 	{ "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,
+ 		"Restart child process automatically if it dies.\n",
+ 		0,
+ 		"on", "bool" },
+ 	{ "nuke_limit",
+ 		tweak_uint, &mgt_param.nuke_limit, 0, UINT_MAX,
+ 		"Maximum number of objects we attempt to nuke in order"
+ 		"to make space for a object body.",
+ 		EXPERIMENTAL,
+ 		"50", "allocations" },
+ 	{ "fetch_chunksize",
+ 		tweak_bytes_u,
+ 		    &mgt_param.fetch_chunksize, 4 * 1024, UINT_MAX,
+ 		"The default chunksize used by fetcher. "
+ 		"This should be bigger than the majority of objects with "
+ 		"short TTLs.\n"
+ 		"Internal limits in the storage_file module makes increases "
+ 		"above 128kb a dubious idea.",
+ 		EXPERIMENTAL,
+ 		"128k", "bytes" },
+ 	{ "fetch_maxchunksize",
+ 		tweak_bytes_u,
+ 		    &mgt_param.fetch_maxchunksize, 64 * 1024, UINT_MAX,
+ 		"The maximum chunksize we attempt to allocate from storage. "
+ 		"Making this too large may cause delays and storage "
+ 		"fragmentation.\n",
+ 		EXPERIMENTAL,
+ 		"256m", "bytes" },
+ #ifdef SENDFILE_WORKS
+ 	{ "sendfile_threshold",
+ 		tweak_bytes, &mgt_param.sendfile_threshold, 0, HUGE_VAL,
+ 		"The minimum size of objects transmitted with sendfile.",
+ 		EXPERIMENTAL,
+ 		"1E", "bytes" },
+ #endif /* SENDFILE_WORKS */
+ 	{ "vcl_trace", tweak_bool,  &mgt_param.vcl_trace, 0, 0,
+ 		"Trace VCL execution in the shmlog.\n"
+ 		"Enabling this will allow you to see the path each "
+ 		"request has taken through the VCL program.\n"
+ 		"This generates a lot of logrecords so it is off by "
+ 		"default.",
+ 		0,
+ 		"off", "bool" },
+ 	{ "listen_address", tweak_listen_address, NULL, 0, 0,
+ 		"Whitespace separated list of network endpoints where "
+ 		"Varnish will accept requests.\n"
+ 		"Possible formats: host, host:port, :port",
+ 		MUST_RESTART,
+ 		":80" },
+ 	{ "listen_depth", tweak_uint, &mgt_param.listen_depth, 0, UINT_MAX,
+ 		"Listen queue depth.",
+ 		MUST_RESTART,
+ 		"1024", "connections" },
+ 	{ "cli_buffer",
+ 		tweak_bytes_u, &mgt_param.cli_buffer, 4096, UINT_MAX,
+ 		"Size of buffer for CLI command input."
+ 		"\nYou may need to increase this if you have big VCL files "
+ 		"and use the vcl.inline CLI command.\n"
+ 		"NB: Must be specified with -p to have effect.\n",
+ 		0,
+ 		"8k", "bytes" },
+ 	{ "cli_limit",
+ 		tweak_bytes_u, &mgt_param.cli_limit, 128, 99999999,
+ 		"Maximum size of CLI response.  If the response exceeds"
+ 		" this limit, the reponse code will be 201 instead of"
+ 		" 200 and the last line will indicate the truncation.",
+ 		0,
+ 		"4k", "bytes" },
+ 	{ "cli_timeout", tweak_timeout, &mgt_param.cli_timeout, 0, 0,
+ 		"Timeout for the childs replies to CLI requests from "
+ 		"the mgt_param.",
+ 		0,
+ 		"10", "seconds" },
+ 	{ "ping_interval", tweak_uint, &mgt_param.ping_interval, 0, UINT_MAX,
+ 		"Interval between pings from parent to child.\n"
+ 		"Zero will disable pinging entirely, which makes "
+ 		"it possible to attach a debugger to the child.",
+ 		MUST_RESTART,
+ 		"3", "seconds" },
+ 	{ "lru_interval", tweak_timeout, &mgt_param.lru_timeout, 0, 0,
+ 		"Grace period before object moves on LRU list.\n"
+ 		"Objects are only moved to the front of the LRU "
+ 		"list if they have not been moved there already inside "
+ 		"this timeout period.  This reduces the amount of lock "
+ 		"operations necessary for LRU list access.",
+ 		EXPERIMENTAL,
+ 		"2", "seconds" },
+ 	{ "cc_command", tweak_string, &mgt_cc_cmd, 0, 0,
+ 		"Command used for compiling the C source code to a "
+ 		"dlopen(3) loadable object.  Any occurrence of %s in "
+ 		"the string will be replaced with the source file name, "
+ 		"and %o will be replaced with the output file name.",
+ 		MUST_RELOAD,
+ 		VCC_CC , NULL },
+ 	{ "max_restarts", tweak_uint, &mgt_param.max_restarts, 0, UINT_MAX,
+ 		"Upper limit on how many times a request can restart."
+ 		"\nBe aware that restarts are likely to cause a hit against "
+ 		"the backend, so don't increase thoughtlessly.\n",
+ 		0,
+ 		"4", "restarts" },
+ 	{ "esi_syntax",
+ 		tweak_uint, &mgt_param.esi_syntax, 0, UINT_MAX,
+ 		"Bitmap controlling ESI parsing code:\n"
+ 		"  0x00000001 - Don't check if it looks like XML\n"
+ 		"  0x00000002 - Ignore non-esi elements\n"
+ 		"  0x00000004 - Emit parsing debug records\n"
+ 		"  0x00000008 - Force-split parser input (debugging)\n"
+ 		"\n"
+ 		"Use 0x notation and do the bitor in your head :-)\n",
+ 		0,
+ 		"0", "bitmap" },
+ 	{ "max_esi_depth",
+ 		tweak_uint, &mgt_param.max_esi_depth, 0, UINT_MAX,
+ 		"Maximum depth of esi:include processing.\n",
+ 		0,
+ 		"5", "levels" },
+ 	{ "connect_timeout", tweak_timeout_double,
+ 		&mgt_param.connect_timeout,0, UINT_MAX,
+ 		"Default connection timeout for backend connections. "
+ 		"We only try to connect to the backend for this many "
+ 		"seconds before giving up. "
+ 		"VCL can override this default value for each backend and "
+ 		"backend request.",
+ 		0,
+ 		"0.7", "s" },
+ 	{ "first_byte_timeout", tweak_timeout_double,
+ 		&mgt_param.first_byte_timeout,0, UINT_MAX,
+ 		"Default timeout for receiving first byte from backend. "
+ 		"We only wait for this many seconds for the first "
+ 		"byte before giving up. A value of 0 means it will never time "
+ 		"out. "
+ 		"VCL can override this default value for each backend and "
+ 		"backend request. This parameter does not apply to pipe.",
+ 		0,
+ 		"60", "s" },
+ 	{ "between_bytes_timeout", tweak_timeout_double,
+ 		&mgt_param.between_bytes_timeout,0, UINT_MAX,
+ 		"Default timeout between bytes when receiving data from "
+ 		"backend. "
+ 		"We only wait for this many seconds between bytes "
+ 		"before giving up. A value of 0 means it will never time out. "
+ 		"VCL can override this default value for each backend request "
+ 		"and backend request. This parameter does not apply to pipe.",
+ 		0,
+ 		"60", "s" },
+ 	{ "acceptor_sleep_max", tweak_timeout_double,
+ 		&mgt_param.acceptor_sleep_max, 0,  10,
+ 		"If we run out of resources, such as file descriptors or "
+ 		"worker threads, the acceptor will sleep between accepts.\n"
+ 		"This parameter limits how long it can sleep between "
+ 		"attempts to accept new connections.",
+ 		EXPERIMENTAL,
+ 		"0.050", "s" },
+ 	{ "acceptor_sleep_incr", tweak_timeout_double,
+ 		&mgt_param.acceptor_sleep_incr, 0,  1,
+ 		"If we run out of resources, such as file descriptors or "
+ 		"worker threads, the acceptor will sleep between accepts.\n"
+ 		"This parameter control how much longer we sleep, each time "
+ 		"we fail to accept a new connection.",
+ 		EXPERIMENTAL,
+ 		"0.001", "s" },
+ 	{ "acceptor_sleep_decay", tweak_generic_double,
+ 		&mgt_param.acceptor_sleep_decay, 0,  1,
+ 		"If we run out of resources, such as file descriptors or "
+ 		"worker threads, the acceptor will sleep between accepts.\n"
+ 		"This parameter (multiplicatively) reduce the sleep duration "
+ 		"for each succesfull accept. (ie: 0.9 = reduce by 10%)",
+ 		EXPERIMENTAL,
+ 		"0.900", "" },
+ 	{ "clock_skew", tweak_uint, &mgt_param.clock_skew, 0, UINT_MAX,
+ 		"How much clockskew we are willing to accept between the "
+ 		"backend and our own clock.",
+ 		0,
+ 		"10", "s" },
+ 	{ "prefer_ipv6", tweak_bool, &mgt_param.prefer_ipv6, 0, 0,
+ 		"Prefer IPv6 address when connecting to backends which "
+ 		"have both IPv4 and IPv6 addresses.",
+ 		0,
+ 		"off", "bool" },
+ 	{ "session_max", tweak_uint,
+ 		&mgt_param.max_sess, 1000, UINT_MAX,
+ 		"Maximum number of sessions we will allocate from one pool "
+ 		"before just dropping connections.\n"
+ 		"This is mostly an anti-DoS measure, and setting it plenty "
+ 		"high should not hurt, as long as you have the memory for "
+ 		"it.\n",
+ 		0,
+ 		"100000", "sessions" },
+ 	{ "session_linger", tweak_uint,
+ 		&mgt_param.session_linger,0, UINT_MAX,
+ 		"How long time the workerthread lingers on the session "
+ 		"to see if a new request appears right away.\n"
+ 		"If sessions are reused, as much as half of all reuses "
+ 		"happen within the first 100 msec of the previous request "
+ 		"completing.\n"
+ 		"Setting this too high results in worker threads not doing "
+ 		"anything for their keep, setting it too low just means that "
+ 		"more sessions take a detour around the waiter.",
+ 		EXPERIMENTAL,
+ 		"50", "ms" },
+ 	{ "log_hashstring", tweak_bool, &mgt_param.log_hash, 0, 0,
+ 		"Log the hash string components to shared memory log.\n",
+ 		0,
+ 		"on", "bool" },
+ 	{ "log_local_address", tweak_bool, &mgt_param.log_local_addr, 0, 0,
+ 		"Log the local address on the TCP connection in the "
+ 		"SessionOpen shared memory record.\n",
+ 		0,
+ 		"off", "bool" },
+ 	{ "waiter", tweak_waiter, NULL, 0, 0,
+ 		"Select the waiter kernel interface.\n",
+ 		EXPERIMENTAL | MUST_RESTART,
+ 		"default", NULL },
+ 	{ "diag_bitmap", tweak_diag_bitmap, 0, 0, 0,
+ 		"Bitmap controlling diagnostics code:\n"
+ 		"  0x00000001 - CNT_Session states.\n"
+ 		"  0x00000002 - workspace debugging.\n"
+ 		"  0x00000004 - kqueue debugging.\n"
+ 		"  0x00000008 - mutex logging.\n"
+ 		"  0x00000010 - mutex contests.\n"
+ 		"  0x00000020 - waiting list.\n"
+ 		"  0x00000040 - object workspace.\n"
+ 		"  0x00001000 - do not core-dump child process.\n"
+ 		"  0x00002000 - only short panic message.\n"
+ 		"  0x00004000 - panic to stderr.\n"
+ 		"  0x00010000 - synchronize shmlog.\n"
+ 		"  0x00020000 - synchronous start of persistence.\n"
+ 		"  0x00040000 - release VCL early.\n"
+ 		"  0x00080000 - ban-lurker debugging.\n"
+ 		"  0x80000000 - do edge-detection on digest.\n"
+ 		"\n"
+ 		"Use 0x notation and do the bitor in your head :-)\n",
+ 		0,
+ 		"0", "bitmap" },
+ 	{ "ban_dups", tweak_bool, &mgt_param.ban_dups, 0, 0,
+ 		"Detect and eliminate duplicate bans.\n",
+ 		0,
+ 		"on", "bool" },
+ 	{ "syslog_cli_traffic", tweak_bool, &mgt_param.syslog_cli_traffic, 0, 0,
+ 		"Log all CLI traffic to syslog(LOG_INFO).\n",
+ 		0,
+ 		"on", "bool" },
+ 	{ "ban_lurker_sleep", tweak_timeout_double,
+ 		&mgt_param.ban_lurker_sleep, 0, UINT_MAX,
+ 		"How long time does the ban lurker thread sleeps between "
+ 		"successful attempts to push the last item up the ban "
+ 		" list.  It always sleeps a second when nothing can be done.\n"
+ 		"A value of zero disables the ban lurker.",
+ 		0,
+ 		"0.01", "s" },
+ 	{ "saintmode_threshold", tweak_uint,
+ 		&mgt_param.saintmode_threshold, 0, UINT_MAX,
+ 		"The maximum number of objects held off by saint mode before "
+ 		"no further will be made to the backend until one times out.  "
+ 		"A value of 0 disables saintmode.",
+ 		EXPERIMENTAL,
+ 		"10", "objects" },
+ 	{ "http_range_support", tweak_bool, &mgt_param.http_range_support, 0, 0,
+ 		"Enable support for HTTP Range headers.\n",
+ 		EXPERIMENTAL,
+ 		"on", "bool" },
+ 	{ "http_gzip_support", tweak_bool, &mgt_param.http_gzip_support, 0, 0,
+ 		"Enable gzip support. When enabled Varnish will compress "
+ 		"uncompressed objects before they are stored in the cache. "
+ 		"If a client does not support gzip encoding Varnish will "
+ 		"uncompress compressed objects on demand. Varnish will also "
+ 		"rewrite the Accept-Encoding header of clients indicating "
+ 		"support for gzip to:\n"
+ 		"  Accept-Encoding: gzip\n\n"
+ 		"Clients that do not support gzip will have their "
+ 		"Accept-Encoding header removed. For more information on how "
+ 		"gzip is implemented please see the chapter on gzip in the "
+ 		"Varnish reference.",
+ 		EXPERIMENTAL,
+ 		"on", "bool" },
+ 	{ "gzip_tmp_space", tweak_uint, &mgt_param.gzip_tmp_space, 0, 2,
+ 		"Where temporary space for gzip/gunzip is allocated:\n"
+ 		"  0 - malloc\n"
+ 		"  2 - thread workspace\n"
+ 		"\n"
+ 		"If you have much gzip/gunzip activity, it may be an"
+ 		" advantage to use workspace for these allocations to reduce"
+ 		" malloc activity.  Be aware that gzip needs 256+KB and gunzip"
+ 		" needs 32+KB of workspace (64+KB if ESI processing).",
+ 		EXPERIMENTAL,
+ 		"0", "" },
+ 	{ "gzip_level", tweak_uint, &mgt_param.gzip_level, 0, 9,
+ 		"Gzip compression level: 0=debug, 1=fast, 9=best",
+ 		0,
+ 		"6", ""},
+ 	{ "gzip_window", tweak_uint, &mgt_param.gzip_window, 8, 15,
+ 		"Gzip window size 8=least, 15=most compression.\n"
+ 		"Memory impact is 8=1k, 9=2k, ... 15=128k.",
+ 		0,
+ 		"15", ""},
+ 	{ "gzip_memlevel", tweak_uint, &mgt_param.gzip_memlevel, 1, 9,
+ 		"Gzip memory level 1=slow/least, 9=fast/most compression.\n"
+ 		"Memory impact is 1=1k, 2=2k, ... 9=256k.",
+ 		0,
+ 		"8", ""},
+ 	{ "gzip_stack_buffer",
+ 		tweak_bytes_u, &mgt_param.gzip_stack_buffer,
+ 	        2048, UINT_MAX,
+ 		"Size of stack buffer used for gzip processing.\n"
+ 		"The stack buffers are used for in-transit data,"
+ 		" for instance gunzip'ed data being sent to a client."
+ 		"Making this space to small results in more overhead,"
+ 		" writes to sockets etc, making it too big is probably"
+ 		" just a waste of memory.",
+ 		EXPERIMENTAL,
+ 		"32k", "bytes" },
+ 	{ "shortlived", tweak_timeout_double,
+ 		&mgt_param.shortlived, 0, UINT_MAX,
+ 		"Objects created with TTL shorter than this are always "
+ 		"put in transient storage.\n",
+ 		0,
+ 		"10.0", "s" },
+ 	{ "critbit_cooloff", tweak_timeout_double,
+ 		&mgt_param.critbit_cooloff, 60, 254,
+ 		"How long time the critbit hasher keeps deleted objheads "
+ 		"on the cooloff list.\n",
+ 		WIZARD,
+ 		"180.0", "s" },
+ 	{ "vcl_dir", tweak_string, &mgt_vcl_dir, 0, 0,
+ 		"Directory from which relative VCL filenames (vcl.load and "
+ 		"include) are opened.",
+ 		0,
+ #ifdef VARNISH_VCL_DIR
+ 		VARNISH_VCL_DIR,
+ #else
+ 		".",
+ #endif
+ 		NULL },
+ 	{ "vmod_dir", tweak_string, &mgt_vmod_dir, 0, 0,
+ 		"Directory where VCL modules are to be found.",
+ 		0,
+ #ifdef VARNISH_VMOD_DIR
+ 		VARNISH_VMOD_DIR,
+ #else
+ 		".",
+ #endif
+ 		NULL },
+ 	{ "vcc_err_unref", tweak_bool, &mgt_vcc_err_unref, 0, 0,
+ 		"Unreferenced VCL objects result in error.\n",
+ 		0,
+ 		"on", "bool" },
+ 
+ 
+ 	{ "pcre_match_limit", tweak_uint,
+ 		&mgt_param.vre_limits.match,
+ 		1, UINT_MAX,
+ 		"The limit for the  number of internal matching function"
+ 		" calls in a pcre_exec() execution.",
+ 		0,
+ 		"10000", ""},
+ 
+ 	{ "pcre_match_limit_recursion", tweak_uint,
+ 		&mgt_param.vre_limits.match_recursion,
+ 		1, UINT_MAX,
+ 		"The limit for the  number of internal matching function"
+ 		" recursions in a pcre_exec() execution.",
+ 		0,
+ 		"10000", ""},
+ 
+ 	{ "vsl_space", tweak_bytes,
+ 		&mgt_param.vsl_space, 1024*1024, HUGE_VAL,
+ 		"The amount of space to allocate for the VSL fifo buffer"
+ 		" in the VSM memory segment."
+ 		"  If you make this too small, varnish{ncsa|log} etc will"
+ 		" not be able to keep up."
+ 		"  Making it too large just costs memory resources.",
+ 		MUST_RESTART,
+ 		"80M", "bytes"},
+ 
+ 	{ "vsm_space", tweak_bytes,
+ 		&mgt_param.vsm_space, 1024*1024, HUGE_VAL,
+ 		"The amount of space to allocate for stats counters"
+ 		" in the VSM memory segment."
+ 		"  If you make this too small, some counters will be"
+ 		" invisible."
+ 		"  Making it too large just costs memory resources.",
+ 		MUST_RESTART,
+ 		"1M", "bytes"},
+ 
+ 	{ NULL, NULL, NULL }
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ #define WIDTH 76
+ 
+ static void
+ mcf_wrap(struct cli *cli, const char *text)
+ {
+ 	const char *p, *q;
+ 
+ 	/* Format text to COLUMNS width */
+ 	for (p = text; *p != '\0'; ) {
+ 		q = strchr(p, '\n');
+ 		if (q == NULL)
+ 			q = strchr(p, '\0');
+ 		if (q > p + WIDTH - margin) {
+ 			q = p + WIDTH - margin;
+ 			while (q > p && *q != ' ')
+ 				q--;
+ 			AN(q);
+ 		}
+ 		VCLI_Out(cli, "%*s %.*s\n", margin, "", (int)(q - p), p);
+ 		p = q;
+ 		if (*p == ' ' || *p == '\n')
+ 			p++;
+ 	}
+ }
+ 
+ void
+ mcf_param_show(struct cli *cli, const char * const *av, void *priv)
+ {
+ 	int i;
+ 	const struct parspec *pp;
+ 	int lfmt;
+ 
+ 	(void)priv;
+ 	if (av[2] == NULL || strcmp(av[2], "-l"))
+ 		lfmt = 0;
+ 	else
+ 		lfmt = 1;
+ 	for (i = 0; i < nparspec; i++) {
+ 		pp = parspec[i];
+ 		if (av[2] != NULL && !lfmt && strcmp(pp->name, av[2]))
+ 			continue;
+ 		VCLI_Out(cli, "%-*s ", margin, pp->name);
+ 		if (pp->func == NULL) {
+ 			VCLI_Out(cli, "Not implemented.\n");
+ 			if (av[2] != NULL && !lfmt)
+ 				return;
+ 			else
+ 				continue;
+ 		}
+ 		pp->func(cli, pp, NULL);
+ 		if (pp->units != NULL)
+ 			VCLI_Out(cli, " [%s]\n", pp->units);
+ 		else
+ 			VCLI_Out(cli, "\n");
+ 		if (av[2] != NULL) {
+ 			VCLI_Out(cli, "%-*s Default is %s\n",
+ 			    margin, "", pp->def);
+ 			mcf_wrap(cli, pp->descr);
+ 			if (pp->flags & DELAYED_EFFECT)
+ 				mcf_wrap(cli, DELAYED_EFFECT_TEXT);
+ 			if (pp->flags & EXPERIMENTAL)
+ 				mcf_wrap(cli, EXPERIMENTAL_TEXT);
+ 			if (pp->flags & MUST_RELOAD)
+ 				mcf_wrap(cli, MUST_RELOAD_TEXT);
+ 			if (pp->flags & MUST_RESTART)
+ 				mcf_wrap(cli, MUST_RESTART_TEXT);
+ 			if (pp->flags & WIZARD)
+ 				mcf_wrap(cli, WIZARD_TEXT);
+ 			if (!lfmt)
+ 				return;
+ 			else
+ 				VCLI_Out(cli, "\n");
+ 		}
+ 	}
+ 	if (av[2] != NULL && !lfmt) {
+ 		VCLI_SetResult(cli, CLIS_PARAM);
+ 		VCLI_Out(cli, "Unknown parameter \"%s\".", av[2]);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ MCF_ParamSet(struct cli *cli, const char *param, const char *val)
+ {
+ 	const struct parspec *pp;
+ 
+ 	pp = mcf_findpar(param);
+ 	if (pp == NULL) {
+ 		VCLI_SetResult(cli, CLIS_PARAM);
+ 		VCLI_Out(cli, "Unknown parameter \"%s\".", param);
+ 		return;
+ 	}
+ 	pp->func(cli, pp, val);
+ 
+ 	if (cli->result == CLIS_OK && heritage.param != NULL)
+ 		*heritage.param = mgt_param;
+ 
+ 	if (cli->result != CLIS_OK) {
+ 		VCLI_Out(cli, "(attempting to set param %s to %s)\n",
+ 		    pp->name, val);
+ 	} else if (child_pid >= 0 && pp->flags & MUST_RESTART) {
+ 		VCLI_Out(cli, "Change will take effect"
+ 		    " when child is restarted");
+ 	} else if (pp->flags & MUST_RELOAD) {
+ 		VCLI_Out(cli, "Change will take effect"
+ 		    " when VCL script is reloaded");
+ 	}
+ }
+ 
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ mcf_param_set(struct cli *cli, const char * const *av, void *priv)
+ {
+ 
+ 	(void)priv;
+ 	MCF_ParamSet(cli, av[2], av[3]);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Add a group of parameters to the global set and sort by name.
+  */
+ 
+ static int
+ parspec_cmp(const void *a, const void *b)
+ {
+ 	struct parspec * const * pa = a;
+ 	struct parspec * const * pb = b;
+ 	return (strcmp((*pa)->name, (*pb)->name));
+ }
+ 
+ static void
+ MCF_AddParams(const struct parspec *ps)
+ {
+ 	const struct parspec *pp;
+ 	int n;
+ 
+ 	n = 0;
+ 	for (pp = ps; pp->name != NULL; pp++) {
+ 		if (mcf_findpar(pp->name) != NULL)
+ 			fprintf(stderr, "Duplicate param: %s\n", pp->name);
+ 		if (strlen(pp->name) + 1 > margin)
+ 			margin = strlen(pp->name) + 1;
+ 		n++;
+ 	}
+ 	parspec = realloc(parspec, (1L + nparspec + n) * sizeof *parspec);
+ 	XXXAN(parspec);
+ 	for (pp = ps; pp->name != NULL; pp++)
+ 		parspec[nparspec++] = pp;
+ 	parspec[nparspec] = NULL;
+ 	qsort (parspec, nparspec, sizeof parspec[0], parspec_cmp);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Set defaults for all parameters
+  */
+ 
+ static void
+ MCF_SetDefaults(struct cli *cli)
+ {
+ 	const struct parspec *pp;
+ 	int i;
+ 
+ 	for (i = 0; i < nparspec; i++) {
+ 		pp = parspec[i];
+ 		if (cli != NULL)
+ 			VCLI_Out(cli,
+ 			    "Set Default for %s = %s\n", pp->name, pp->def);
+ 		pp->func(cli, pp, pp->def);
+ 		if (cli != NULL && cli->result != CLIS_OK)
+ 			return;
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ MCF_ParamInit(struct cli *cli)
+ {
+ 
+ 	MCF_AddParams(input_parspec);
+ 	MCF_AddParams(WRK_parspec);
+ 
+ 	/* XXX: We do this twice, to get past any interdependencies */
+ 	MCF_SetDefaults(NULL);
+ 	MCF_SetDefaults(cli);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ MCF_DumpRst(void)
+ {
+ 	const struct parspec *pp;
+ 	const char *p, *q;
+ 	int i;
+ 
+ 	printf("\n.. The following is the autogenerated output from varnishd -x dumprst\n\n");
+ 	for (i = 0; i < nparspec; i++) {
+ 		pp = parspec[i];
+ 		printf("%s\n", pp->name);
+ 		if (pp->units != NULL && *pp->units != '\0')
+ 			printf("\t- Units: %s\n", pp->units);
+ 		printf("\t- Default: %s\n",
+ 		    strcmp(pp->def,MAGIC_INIT_STRING) == 0 ? "magic" : pp->def);
+ 		/*
+ 		 * XXX: we should mark the params with one/two flags
+ 		 * XXX: that say if ->min/->max are valid, so we
+ 		 * XXX: can emit those also in help texts.
+ 		 */
+ 		if (pp->flags) {
+ 			printf("\t- Flags: ");
+ 			q = "";
+ 			if (pp->flags & DELAYED_EFFECT) {
+ 				printf("%sdelayed", q);
+ 				q = ", ";
+ 			}
+ 			if (pp->flags & MUST_RESTART) {
+ 				printf("%smust_restart", q);
+ 				q = ", ";
+ 			}
+ 			if (pp->flags & MUST_RELOAD) {
+ 				printf("%smust_reload", q);
+ 				q = ", ";
+ 			}
+ 			if (pp->flags & EXPERIMENTAL) {
+ 				printf("%sexperimental", q);
+ 				q = ", ";
+ 			}
+ 			printf("\n");
+ 		}
+ 		printf("\n\t");
+ 		for (p = pp->descr; *p; p++) {
+ 			if (*p == '\n' && p[1] =='\0')
+ 				break;
+ 			if (*p == '\n' && p[1] =='\n') {
+ 				printf("\n\n\t");
+ 				p++;
+ 			} else if (*p == '\n') {
+ 				printf("\n\t");
+ 			} else if (*p == ':' && p[1] == '\n') {
+ 				/*
+ 				 * Start of definition list,
+ 				 * use RSTs code mode for this
+ 				 */
+ 				printf("::\n");
+ 			} else {
+ 				printf("%c", *p);
+ 			}
+ 		}
+ 		printf("\n\n");
+ 	}
+ 	printf("\n");
+ }
diff --cc bin/varnishd/mgt/mgt_sandbox_solaris.c
index 0000000,79f6650..114d6a4
mode 000000,100644..100644
--- a/bin/varnishd/mgt/mgt_sandbox_solaris.c
+++ b/bin/varnishd/mgt/mgt_sandbox_solaris.c
@@@ -1,0 -1,233 +1,234 @@@
+ /*-
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *	   Nils Goroll <nils.goroll at uplex.de>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Sandboxing child processes on Solaris
+  *
+  */
+ 
+ #include "config.h"
+ 
+ #ifdef HAVE_SETPPRIV
+ 
+ #ifdef HAVE_PRIV_H
+ #include <priv.h>
+ #endif
+ #include <stdio.h>
+ #include <string.h>
+ #include <syslog.h>
+ #include <unistd.h>
+ 
+ #include "mgt/mgt.h"
+ 
+ #include "common/heritage.h"
++#include "common/params.h"
+ 
+ /*--------------------------------------------------------------------
+  * SOLARIS PRIVILEGES: Note on use of symbolic PRIV_* constants
+  *
+  * For privileges which existed in Solaris 10 FCS, we may use the constants from
+  * sys/priv_names.h
+  *
+  * For privileges which have been added later, we need to use strings in order
+  * not to break builds of varnish on these platforms. To remain binary
+  * compatible, we need to silently ignore errors from priv_addset when using
+  * these strings.
+  *
+  * For optimal build and binary forward comatibility, we could use subtractive
+  * set specs like
+  *
+  *       basic,!file_link_any,!proc_exec,!proc_fork,!proc_info,!proc_session
+  *
+  * but I (Nils) have a preference for making an informed decision about which
+  * privileges the varnish child should have and which it shouldn't.
+  *
+  * Newly introduced privileges should be annotated with their PSARC / commit ID
+  * (as long as Oracle reveils these :/ )
+  *
+  * SOLARIS PRIVILEGES: Note on accidentally setting the SNOCD flag
+  *
+  * When setting privileges, we need to take care not to accidentally set the
+  * SNOCD flag which will disable core dumps unnecessarily. (see
+  * https://www.varnish-cache.org/trac/ticket/671 )
+  *
+  * When changing the logic herein, always check with mdb -k. Replace _PID_ with
+  * the pid of your varnish child, the result should be 0, otherwise a regression
+  * has been introduced.
+  *
+  * > 0t_PID_::pid2proc | ::print proc_t p_flag | >a
+  * > (<a & 0x10000000)=X
+  *                 0
+  *
+  * (a value of 0x10000000 indicates that SNOCD is set)
+  *
+  * NOTE that on Solaris changing the uid will _always_ set SNOCD, so make sure
+  * you run this test with appropriate privileges, but without proc_setid, so
+  * varnish won't setuid(), e.g.
+  *
+  * pfexec ppriv -e -s A=basic,net_privaddr,sys_resource varnish ...
+  *
+  * SOLARIS COREDUMPS with setuid(): See coreadm(1M) - global-setid / proc-setid
+  *
+  */
+ 
+ /* effective during runtime of the child */
+ static inline void
+ mgt_sandbox_solaris_add_effective(priv_set_t *pset)
+ {
+ 	/* PSARC/2009/685 - 8eca52188202 - onnv_132 */
+ 	priv_addset(pset, "net_access");
+ 
+ 	/* PSARC/2009/378 - 63678502e95e - onnv_140 */
+ 	priv_addset(pset, "file_read");
+ 	priv_addset(pset, "file_write");
+ }
+ 
+ /* permitted during runtime of the child - for privilege bracketing */
+ static inline void
+ mgt_sandbox_solaris_add_permitted(priv_set_t *pset)
+ {
+ 	/* for raising limits in cache_waiter_ports.c */
+ 	priv_addset(pset, PRIV_SYS_RESOURCE);
+ }
+ 
+ /* effective during mgt_sandbox */
+ static inline void
+ mgt_sandbox_solaris_add_initial(priv_set_t *pset)
+ {
+ 	/* for setgid/setuid */
+ 	priv_addset(pset, PRIV_PROC_SETID);
+ }
+ 
+ /*
+  * if we are not yet privilege-aware already (ie we have been started
+  * not-privilege aware wird euid 0), we need to grab any additional privileges
+  * needed during mgt_standbox, until we reduce to least privileges in
+  * mgt_sandbox_waive, otherwise we would loose them with setuid()
+  */
+ 
+ void
+ mgt_sandbox_solaris_init(void)
+ {
+ 	priv_set_t *priv_all;
+ 
+ 	if (! (priv_all = priv_allocset())) {
+ 		REPORT(LOG_ERR,
+ 		    "Child start warning: mgt_sandbox_init - priv_allocset failed: errno=%d (%s)",
+ 		    errno, strerror(errno));
+ 		return;
+ 	}
+ 
+ 	priv_emptyset(priv_all);
+ 
+ 	mgt_sandbox_solaris_add_effective(priv_all);
+ 	mgt_sandbox_solaris_add_permitted(priv_all);
+ 	mgt_sandbox_solaris_add_initial(priv_all);
+ 
+ 	setppriv(PRIV_ON, PRIV_PERMITTED, priv_all);
+ 	setppriv(PRIV_ON, PRIV_EFFECTIVE, priv_all);
+ 	setppriv(PRIV_ON, PRIV_INHERITABLE, priv_all);
+ 
+ 	priv_freeset(priv_all);
+ }
+ 
+ void
+ mgt_sandbox_solaris_privsep(void)
+ {
+ 	if (priv_ineffect(PRIV_PROC_SETID)) {
+                 if (getgid() != mgt_param.gid)
+                         XXXAZ(setgid(mgt_param.gid));
+                 if (getuid() != mgt_param.uid)
+                         XXXAZ(setuid(mgt_param.uid));
+         } else {
+                 REPORT(LOG_INFO, "Privilege %s missing, will not change uid/gid",
+ 		    PRIV_PROC_SETID);
+         }
+ }
+ 
+ /*
+  * Waive most privileges in the child
+  *
+  * as of onnv_151a, we should end up with:
+  *
+  * > ppriv -v #pid of varnish child
+  * PID:  .../varnishd ...
+  * flags = PRIV_AWARE
+  *      E: file_read,file_write,net_access
+  *      I: none
+  *      P: file_read,file_write,net_access,sys_resource
+  *      L: file_read,file_write,net_access,sys_resource
+  *
+  * We should keep sys_resource in P in order to adjust our limits if we need to
+  */
+ 
+ void
+ mgt_sandbox_solaris_fini(void)
+ {
+ 	priv_set_t *effective, *inheritable, *permitted;
+ 
+ 	if (!(effective = priv_allocset()) ||
+ 	    !(inheritable = priv_allocset()) ||
+ 	    !(permitted = priv_allocset())) {
+ 		REPORT(LOG_ERR,
+ 		    "Child start warning: mgt_sandbox_waive - priv_allocset failed: errno=%d (%s)",
+ 		    errno, strerror(errno));
+ 		return;
+ 	}
+ 
+ 	priv_emptyset(inheritable);
+ 
+ 	priv_emptyset(effective);
+ 	mgt_sandbox_solaris_add_effective(effective);
+ 
+ 	priv_copyset(effective, permitted);
+ 	mgt_sandbox_solaris_add_permitted(permitted);
+ 
+ 	/*
+ 	 * invert the sets and clear privileges such that setppriv will always
+ 	 * succeed
+ 	 */
+ 	priv_inverse(inheritable);
+ 	priv_inverse(effective);
+ 	priv_inverse(permitted);
+ 
+ #define SETPPRIV(which, set)						\
+ 	if (setppriv(PRIV_OFF, which, set))				\
+ 		REPORT(LOG_ERR,						\
+ 		    "Child start warning: Waiving privileges failed on %s: errno=%d (%s)", \
+ 		    #which, errno, strerror(errno));
+ 
+ 	SETPPRIV(PRIV_LIMIT, permitted);
+ 	SETPPRIV(PRIV_PERMITTED, permitted);
+ 	SETPPRIV(PRIV_EFFECTIVE, effective);
+ 	SETPPRIV(PRIV_INHERITABLE, inheritable);
+ #undef SETPPRIV
+ 
+ 	priv_freeset(inheritable);
+ 	priv_freeset(effective);
+ }
+ 
+ #endif /* HAVE_SETPPRIV */
diff --cc bin/varnishd/storage/stevedore.c
index 0000000,860604e..71241f5
mode 000000,100644..100644
--- a/bin/varnishd/storage/stevedore.c
+++ b/bin/varnishd/storage/stevedore.c
@@@ -1,0 -1,466 +1,514 @@@
+ /*-
+  * Copyright (c) 2007-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Dag-Erling Smørgav <des at des.no>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * STEVEDORE: one who works at or is responsible for loading and
+  * unloading ships in port.  Example: "on the wharves, stevedores were
+  * unloading cargo from the far corners of the world." Origin: Spanish
+  * estibador, from estibar to pack.  First Known Use: 1788
+  */
+ 
+ #include "config.h"
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache/cache.h"
+ 
+ #include "storage/storage.h"
+ #include "vrt.h"
+ #include "vrt_obj.h"
+ 
+ static const struct stevedore * volatile stv_next;
+ 
+ /*---------------------------------------------------------------------
+  * Default objcore methods
+  */
+ 
+ static struct object * __match_proto__(getobj_f)
+ default_oc_getobj(struct worker *wrk, struct objcore *oc)
+ {
+ 	struct object *o;
+ 
+ 	(void)wrk;
+ 	if (oc->priv == NULL)
+ 		return (NULL);
+ 	CAST_OBJ_NOTNULL(o, oc->priv, OBJECT_MAGIC);
+ 	return (o);
+ }
+ 
+ static void
+ default_oc_freeobj(struct objcore *oc)
+ {
+ 	struct object *o;
+ 
+ 	CAST_OBJ_NOTNULL(o, oc->priv, OBJECT_MAGIC);
+ 	oc->priv = NULL;
+ 	oc->methods = NULL;
+ 
+ 	STV_Freestore(o);
+ 	STV_free(o->objstore);
+ }
+ 
+ static struct lru *
+ default_oc_getlru(const struct objcore *oc)
+ {
+ 	struct object *o;
+ 
+ 	CAST_OBJ_NOTNULL(o, oc->priv, OBJECT_MAGIC);
+ 	return (o->objstore->stevedore->lru);
+ }
+ 
+ static struct objcore_methods default_oc_methods = {
+ 	.getobj = default_oc_getobj,
+ 	.freeobj = default_oc_freeobj,
+ 	.getlru = default_oc_getlru,
+ };
+ 
+ 
+ /*--------------------------------------------------------------------
+  */
+ 
+ struct lru *
+ LRU_Alloc(void)
+ {
+ 	struct lru *l;
+ 
+ 	ALLOC_OBJ(l, LRU_MAGIC);
+ 	AN(l);
+ 	VTAILQ_INIT(&l->lru_head);
+ 	Lck_New(&l->mtx, lck_lru);
+ 	return (l);
+ }
+ 
+ void
+ LRU_Free(struct lru *lru)
+ {
+ 	CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ 	Lck_Delete(&lru->mtx);
+ 	FREE_OBJ(lru);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * XXX: trust pointer writes to be atomic
+  */
+ 
+ static struct stevedore *
+ stv_pick_stevedore(const struct sess *sp, const char **hint)
+ {
+ 	struct stevedore *stv;
+ 
+ 	AN(hint);
+ 	if (*hint != NULL && **hint != '\0') {
+ 		VTAILQ_FOREACH(stv, &stv_stevedores, list) {
+ 			if (!strcmp(stv->ident, *hint))
+ 				return (stv);
+ 		}
+ 		if (!strcmp(TRANSIENT_STORAGE, *hint))
+ 			return (stv_transient);
+ 
+ 		/* Hint was not valid, nuke it */
+ 		WSP(sp, SLT_Debug, "Storage hint not usable");
+ 		*hint = NULL;
+ 	}
+ 	/* pick a stevedore and bump the head along */
+ 	stv = VTAILQ_NEXT(stv_next, list);
+ 	if (stv == NULL)
+ 		stv = VTAILQ_FIRST(&stv_stevedores);
+ 	AN(stv);
+ 	AN(stv->name);
+ 	stv_next = stv;
+ 	return (stv);
+ }
+ 
+ /*-------------------------------------------------------------------*/
+ 
+ static struct storage *
+ stv_alloc(struct worker *w, const struct object *obj, size_t size)
+ {
+ 	struct storage *st;
+ 	struct stevedore *stv;
+ 	unsigned fail = 0;
+ 
+ 	/*
+ 	 * Always use the stevedore which allocated the object in order to
+ 	 * keep an object inside the same stevedore.
+ 	 */
+ 	CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ 	CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+ 	stv = obj->objstore->stevedore;
+ 	CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+ 
+ 	if (size > cache_param->fetch_maxchunksize)
+ 		size = cache_param->fetch_maxchunksize;
+ 
+ 	for (;;) {
+ 		/* try to allocate from it */
+ 		AN(stv->alloc);
+ 		st = stv->alloc(stv, size);
+ 		if (st != NULL)
+ 			break;
+ 
+ 		if (size > cache_param->fetch_chunksize) {
+ 			size >>= 1;
+ 			continue;
+ 		}
+ 
+ 		/* no luck; try to free some space and keep trying */
+ 		if (EXP_NukeOne(w, stv->lru) == -1)
+ 			break;
+ 
+ 		/* Enough is enough: try another if we have one */
+ 		if (++fail >= cache_param->nuke_limit)
+ 			break;
+ 	}
+ 	if (st != NULL)
+ 		CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ 	return (st);
+ }
+ 
+ 
+ /*-------------------------------------------------------------------*
+  * Structure used to transport internal knowledge from STV_NewObject()
+  * to STV_MkObject().  Nobody else should mess with this struct.
+  */
+ 
+ struct stv_objsecrets {
+ 	unsigned	magic;
+ #define STV_OBJ_SECRETES_MAGIC	0x78c87247
+ 	uint16_t	nhttp;
+ 	unsigned	lhttp;
+ 	unsigned	wsl;
+ 	struct exp	*exp;
+ };
+ 
+ /*--------------------------------------------------------------------
+  * This function is called by stevedores ->allocobj() method, which
+  * very often will be stv_default_allocobj() below, to convert a slab
+  * of storage into object which the stevedore can then register in its
+  * internal state, before returning it to STV_NewObject().
+  * As you probably guessed: All this for persistence.
+  */
+ 
+ struct object *
+ STV_MkObject(struct sess *sp, void *ptr, unsigned ltot,
+     const struct stv_objsecrets *soc)
+ {
+ 	struct object *o;
+ 	unsigned l;
+ 
+ 	CHECK_OBJ_NOTNULL(soc, STV_OBJ_SECRETES_MAGIC);
+ 
+ 	assert(PAOK(ptr));
+ 	assert(PAOK(soc->wsl));
+ 	assert(PAOK(soc->lhttp));
+ 
+ 	assert(ltot >= sizeof *o + soc->lhttp + soc->wsl);
+ 
+ 	o = ptr;
+ 	memset(o, 0, sizeof *o);
+ 	o->magic = OBJECT_MAGIC;
+ 
+ 	l = PRNDDN(ltot - (sizeof *o + soc->lhttp));
+ 	assert(l >= soc->wsl);
+ 
+ 	o->http = HTTP_create(o + 1, soc->nhttp);
+ 	WS_Init(o->ws_o, "obj", (char *)(o + 1) + soc->lhttp, soc->wsl);
+ 	WS_Assert(o->ws_o);
+ 	assert(o->ws_o->e <= (char*)ptr + ltot);
+ 
+ 	http_Setup(o->http, o->ws_o);
+ 	o->http->magic = HTTP_MAGIC;
+ 	o->exp = *soc->exp;
+ 	VTAILQ_INIT(&o->store);
+ 	sp->wrk->stats.n_object++;
+ 
+ 	if (sp->objcore != NULL) {
+ 		CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ 
+ 		o->objcore = sp->objcore;
+ 		sp->objcore = NULL;     /* refcnt follows pointer. */
+ 		BAN_NewObjCore(o->objcore);
+ 
+ 		o->objcore->methods = &default_oc_methods;
+ 		o->objcore->priv = o;
+ 	}
+ 	return (o);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * This is the default ->allocobj() which all stevedores who do not
+  * implement persistent storage can rely on.
+  */
+ 
+ struct object *
+ stv_default_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot,
+     const struct stv_objsecrets *soc)
+ {
+ 	struct object *o;
+ 	struct storage *st;
+ 
+ 	CHECK_OBJ_NOTNULL(soc, STV_OBJ_SECRETES_MAGIC);
+ 	st = stv->alloc(stv, ltot);
+ 	if (st == NULL)
+ 		return (NULL);
+ 	if (st->space < ltot) {
+ 		stv->free(st);
+ 		return (NULL);
+ 	}
+ 	ltot = st->len = st->space;
+ 	o = STV_MkObject(sp, st->ptr, ltot, soc);
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	o->objstore = st;
+ 	return (o);
+ }
+ 
+ /*-------------------------------------------------------------------
+  * Allocate storage for an object, based on the header information.
+  * XXX: If we know (a hint of) the length, we could allocate space
+  * XXX: for the body in the same allocation while we are at it.
+  */
+ 
+ struct object *
+ STV_NewObject(struct sess *sp, const char *hint, unsigned wsl, struct exp *ep,
+     uint16_t nhttp)
+ {
+ 	struct object *o;
+ 	struct stevedore *stv, *stv0;
+ 	unsigned lhttp, ltot;
+ 	struct stv_objsecrets soc;
+ 	int i;
+ 
+ 	assert(wsl > 0);
+ 	wsl = PRNDUP(wsl);
+ 
+ 	lhttp = HTTP_estimate(nhttp);
+ 	lhttp = PRNDUP(lhttp);
+ 
+ 	memset(&soc, 0, sizeof soc);
+ 	soc.magic = STV_OBJ_SECRETES_MAGIC;
+ 	soc.nhttp = nhttp;
+ 	soc.lhttp = lhttp;
+ 	soc.wsl = wsl;
+ 	soc.exp = ep;
+ 
+ 	ltot = sizeof *o + wsl + lhttp;
+ 
+ 	stv = stv0 = stv_pick_stevedore(sp, &hint);
+ 	AN(stv->allocobj);
+ 	o = stv->allocobj(stv, sp, ltot, &soc);
+ 	if (o == NULL && hint == NULL) {
+ 		do {
+ 			stv = stv_pick_stevedore(sp, &hint);
+ 			AN(stv->allocobj);
+ 			o = stv->allocobj(stv, sp, ltot, &soc);
+ 		} while (o == NULL && stv != stv0);
+ 	}
+ 	if (o == NULL) {
+ 		/* no luck; try to free some space and keep trying */
+ 		for (i = 0; o == NULL && i < cache_param->nuke_limit; i++) {
+ 			if (EXP_NukeOne(sp->wrk, stv->lru) == -1)
+ 				break;
+ 			o = stv->allocobj(stv, sp, ltot, &soc);
+ 		}
+ 	}
+ 
+ 	if (o == NULL)
+ 		return (NULL);
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	CHECK_OBJ_NOTNULL(o->objstore, STORAGE_MAGIC);
+ 	return (o);
+ }
+ 
+ /*-------------------------------------------------------------------*/
+ 
+ void
+ STV_Freestore(struct object *o)
+ {
+ 	struct storage *st, *stn;
+ 
+ 	if (o->esidata != NULL) {
+ 		STV_free(o->esidata);
+ 		o->esidata = NULL;
+ 	}
+ 	VTAILQ_FOREACH_SAFE(st, &o->store, list, stn) {
+ 		CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ 		VTAILQ_REMOVE(&o->store, st, list);
+ 		STV_free(st);
+ 	}
+ }
+ 
+ /*-------------------------------------------------------------------*/
+ 
+ struct storage *
+ STV_alloc(struct worker *w, size_t size)
+ {
++	struct object *obj = w->fetch_obj;
++	if (obj == NULL)
++		obj = w->sp->obj;
+ 
 -	return (stv_alloc(w, w->fetch_obj, size));
++	return (stv_alloc(w, obj, size));
+ }
+ 
+ void
+ STV_trim(struct storage *st, size_t size)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ 	AN(st->stevedore);
+ 	if (st->stevedore->trim)
+ 		st->stevedore->trim(st, size);
+ }
+ 
++/*
++ * Duplicate the object storage (HTML body) from src into target, using a
++ * stevedore-specific dup method for src's stevedore.
++ *
++ * Currently, every method just copies storage from one object to the other,
++ * but this method of encapsulation opens the path to future techniques of
++ * sharing storage together with reference counting.
++ */
++void
++STV_dup(const struct sess *sp, struct object *src, struct object *target)
++{
++        struct stevedore *stv;
++
++        CHECK_OBJ_NOTNULL(src, OBJECT_MAGIC);
++        CHECK_OBJ_NOTNULL(target, OBJECT_MAGIC);
++        CHECK_OBJ_NOTNULL(src->objstore, STORAGE_MAGIC);
++        CHECK_OBJ_NOTNULL(src->objstore->stevedore, STEVEDORE_MAGIC);
++
++        stv = src->objstore->stevedore;
++        AN(stv->dup);
++        
++        stv->dup(sp, src, target);
++}
++
+ void
+ STV_free(struct storage *st)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ 	AN(st->stevedore);
+ 	AN(st->stevedore->free);
+ 	st->stevedore->free(st);
+ }
+ 
+ void
+ STV_open(void)
+ {
+ 	struct stevedore *stv;
+ 
+ 	VTAILQ_FOREACH(stv, &stv_stevedores, list) {
+ 		stv->lru = LRU_Alloc();
+ 		if (stv->open != NULL)
+ 			stv->open(stv);
+ 	}
+ 	stv = stv_transient;
+ 	if (stv->open != NULL) {
+ 		stv->lru = LRU_Alloc();
+ 		stv->open(stv);
+ 	}
+ 	stv_next = VTAILQ_FIRST(&stv_stevedores);
+ }
+ 
+ void
+ STV_close(void)
+ {
+ 	struct stevedore *stv;
+ 
+ 	VTAILQ_FOREACH(stv, &stv_stevedores, list)
+ 		if (stv->close != NULL)
+ 			stv->close(stv);
+ 	stv = stv_transient;
+ 	if (stv->close != NULL)
+ 		stv->close(stv);
+ }
+ 
+ 
+ /*--------------------------------------------------------------------
+  * VRT functions for stevedores
+  */
+ 
+ static const struct stevedore *
+ stv_find(const char *nm)
+ {
+ 	const struct stevedore *stv;
+ 
+ 	VTAILQ_FOREACH(stv, &stv_stevedores, list)
+ 		if (!strcmp(stv->ident, nm))
+ 			return (stv);
+ 	if (!strcmp(TRANSIENT_STORAGE, nm))
+ 		return (stv_transient);
+ 	return (NULL);
+ }
+ 
+ int
+ VRT_Stv(const char *nm)
+ {
+ 
+ 	if (stv_find(nm) != NULL)
+ 		return (1);
+ 	return (0);
+ }
+ 
+ #define VRTSTVVAR(nm, vtype, ctype, dval)	\
+ ctype						\
+ VRT_Stv_##nm(const char *nm)			\
+ {						\
+ 	const struct stevedore *stv;		\
+ 						\
+ 	stv = stv_find(nm);			\
+ 	if (stv == NULL)			\
+ 		return (dval);			\
+ 	if (stv->var_##nm == NULL)		\
+ 		return (dval);			\
+ 	return (stv->var_##nm(stv));		\
+ }
+ 
+ #include "tbl/vrt_stv_var.h"
+ #undef VRTSTVVAR
++
++/*
++ * Default object store dup just copies the storage.
++ */
++void
++default_dup(const struct sess *sp, struct object *src, struct object *target)
++{
++        struct storage *st, *st2;
++        unsigned cl;
++
++        VTAILQ_FOREACH(st2, &src->store, list) {
++		cl = st2->len;
++		st = STV_alloc(sp->wrk, cl);
++		XXXAN(st);
++                assert(st->space >= cl);
++		VTAILQ_INSERT_TAIL(&target->store, st, list);
++		st->len = cl;
++		target->len += cl;
++		memcpy(st->ptr, st2->ptr, cl);
++	}
++}
diff --cc bin/varnishd/storage/storage.h
index 0000000,a813a36..879a6fb
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage.h
+++ b/bin/varnishd/storage/storage.h
@@@ -1,0 -1,104 +1,110 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * This defines the backend interface between the stevedore and the
+  * pluggable storage implementations.
+  *
+  */
+ 
+ struct stv_objsecrets;
+ struct stevedore;
+ struct sess;
+ struct lru;
+ 
+ typedef void storage_init_f(struct stevedore *, int ac, char * const *av);
+ typedef void storage_open_f(const struct stevedore *);
+ typedef struct storage *storage_alloc_f(struct stevedore *, size_t size);
++typedef void storage_dup_f(const struct sess *sp, struct object *src, struct object *target);
+ typedef void storage_trim_f(struct storage *, size_t size);
+ typedef void storage_free_f(struct storage *);
+ typedef struct object *storage_allocobj_f(struct stevedore *, struct sess *sp,
+     unsigned ltot, const struct stv_objsecrets *);
+ typedef void storage_close_f(const struct stevedore *);
+ 
+ /* Prototypes for VCL variable responders */
+ #define VRTSTVTYPE(ct) typedef ct storage_var_##ct(const struct stevedore *);
+ #include "tbl/vrt_stv_var.h"
+ #undef VRTSTVTYPE
+ 
+ extern storage_allocobj_f stv_default_allocobj;
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ struct stevedore {
+ 	unsigned		magic;
+ #define STEVEDORE_MAGIC		0x4baf43db
+ 	const char		*name;
+ 	unsigned		transient;
+ 	storage_init_f		*init;		/* called by mgt process */
+ 	storage_open_f		*open;		/* called by cache process */
+ 	storage_alloc_f		*alloc;		/* --//-- */
+ 	storage_trim_f		*trim;		/* --//-- */
++        storage_dup_f           *dup;           /* --//-- */
+ 	storage_free_f		*free;		/* --//-- */
+ 	storage_close_f		*close;		/* --//-- */
+ 	storage_allocobj_f	*allocobj;	/* --//-- */
+ 
+ 	struct lru		*lru;
+ 
+ #define VRTSTVVAR(nm, vtype, ctype, dval) storage_var_##ctype *var_##nm;
+ #include "tbl/vrt_stv_var.h"
+ #undef VRTSTVVAR
+ 
+ 	/* private fields */
+ 	void			*priv;
+ 
+ 	VTAILQ_ENTRY(stevedore)	list;
+ 	char			ident[16];	/* XXX: match VSM_chunk.ident */
+ };
+ 
+ VTAILQ_HEAD(stevedore_head, stevedore);
+ 
+ extern struct stevedore_head stv_stevedores;
+ extern struct stevedore *stv_transient;
+ 
+ /*--------------------------------------------------------------------*/
+ int STV_GetFile(const char *fn, int *fdp, const char **fnp, const char *ctx);
+ uintmax_t STV_FileSize(int fd, const char *size, unsigned *granularity,
+     const char *ctx);
+ struct object *STV_MkObject(struct sess *sp, void *ptr, unsigned ltot,
+     const struct stv_objsecrets *soc);
+ 
+ struct lru *LRU_Alloc(void);
+ void LRU_Free(struct lru *lru);
+ 
+ /*--------------------------------------------------------------------*/
+ extern const struct stevedore sma_stevedore;
+ extern const struct stevedore smf_stevedore;
+ extern const struct stevedore smp_stevedore;
+ #ifdef HAVE_LIBUMEM
+ extern const struct stevedore smu_stevedore;
+ #endif
++
++/* Default dup method */
++void STV_dup(const struct sess *sp, struct object *src, struct object *target);
++void default_dup(const struct sess *sp, struct object *src, struct object *target);
diff --cc bin/varnishd/storage/storage_file.c
index 0000000,9eb44d9..028563d
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_file.c
+++ b/bin/varnishd/storage/storage_file.c
@@@ -1,0 -1,616 +1,617 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Storage method based on mmap'ed file
+  */
+ 
+ #include "config.h"
+ 
+ #include <sys/mman.h>
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+ 
+ #include "vnum.h"
+ 
+ #ifndef MAP_NOCORE
+ #define MAP_NOCORE 0 /* XXX Linux */
+ #endif
+ 
+ #ifndef MAP_NOSYNC
+ #define MAP_NOSYNC 0 /* XXX Linux */
+ #endif
+ 
+ #define MINPAGES		128
+ 
+ /*
+  * Number of buckets on free-list.
+  *
+  * Last bucket is "larger than" so choose number so that the second
+  * to last bucket matches the 128k CHUNKSIZE in cache_fetch.c when
+  * using the a 4K minimal page size
+  */
+ #define NBUCKET			(128 / 4 + 1)
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ VTAILQ_HEAD(smfhead, smf);
+ 
+ struct smf {
+ 	unsigned		magic;
+ #define SMF_MAGIC		0x0927a8a0
+ 	struct storage		s;
+ 	struct smf_sc		*sc;
+ 
+ 	int			alloc;
+ 
+ 	off_t			size;
+ 	off_t			offset;
+ 	unsigned char		*ptr;
+ 
+ 	VTAILQ_ENTRY(smf)	order;
+ 	VTAILQ_ENTRY(smf)	status;
+ 	struct smfhead		*flist;
+ };
+ 
+ struct smf_sc {
+ 	unsigned		magic;
+ #define SMF_SC_MAGIC		0x52962ee7
+ 	struct lock		mtx;
+ 	struct VSC_C_smf	*stats;
+ 
+ 	const char		*filename;
+ 	int			fd;
+ 	unsigned		pagesize;
+ 	uintmax_t		filesize;
+ 	struct smfhead		order;
+ 	struct smfhead		free[NBUCKET];
+ 	struct smfhead		used;
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ smf_initfile(struct smf_sc *sc, const char *size)
+ {
+ 	sc->filesize = STV_FileSize(sc->fd, size, &sc->pagesize, "-sfile");
+ 
+ 	AZ(ftruncate(sc->fd, (off_t)sc->filesize));
+ 
+ 	/* XXX: force block allocation here or in open ? */
+ }
+ 
+ static const char default_size[] = "100M";
+ static const char default_filename[] = ".";
+ 
+ static void
+ smf_init(struct stevedore *parent, int ac, char * const *av)
+ {
+ 	const char *size, *fn, *r;
+ 	struct smf_sc *sc;
+ 	unsigned u;
+ 	uintmax_t page_size;
+ 
+ 	AZ(av[ac]);
+ 
+ 	fn = default_filename;
+ 	size = default_size;
+ 	page_size = getpagesize();
+ 
+ 	if (ac > 3)
+ 		ARGV_ERR("(-sfile) too many arguments\n");
+ 	if (ac > 0 && *av[0] != '\0')
+ 		fn = av[0];
+ 	if (ac > 1 && *av[1] != '\0')
+ 		size = av[1];
+ 	if (ac > 2 && *av[2] != '\0') {
+ 
+ 		r = VNUM_2bytes(av[2], &page_size, 0);
+ 		if (r != NULL)
+ 			ARGV_ERR("(-sfile) granularity \"%s\": %s\n", av[2], r);
+ 	}
+ 
+ 	AN(fn);
+ 	AN(size);
+ 
+ 	ALLOC_OBJ(sc, SMF_SC_MAGIC);
+ 	XXXAN(sc);
+ 	VTAILQ_INIT(&sc->order);
+ 	for (u = 0; u < NBUCKET; u++)
+ 		VTAILQ_INIT(&sc->free[u]);
+ 	VTAILQ_INIT(&sc->used);
+ 	sc->pagesize = page_size;
+ 
+ 	parent->priv = sc;
+ 
+ 	(void)STV_GetFile(fn, &sc->fd, &sc->filename, "-sfile");
+ 
+ 	mgt_child_inherit(sc->fd, "storage_file");
+ 	smf_initfile(sc, size);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Insert/Remove from correct freelist
+  */
+ 
+ static void
+ insfree(struct smf_sc *sc, struct smf *sp)
+ {
+ 	size_t b;
+ 	struct smf *sp2;
+ 	size_t ns;
+ 
+ 	assert(sp->alloc == 0);
+ 	assert(sp->flist == NULL);
+ 	Lck_AssertHeld(&sc->mtx);
+ 	b = sp->size / sc->pagesize;
+ 	if (b >= NBUCKET) {
+ 		b = NBUCKET - 1;
+ 		sc->stats->g_smf_large++;
+ 	} else {
+ 		sc->stats->g_smf_frag++;
+ 	}
+ 	sp->flist = &sc->free[b];
+ 	ns = b * sc->pagesize;
+ 	VTAILQ_FOREACH(sp2, sp->flist, status) {
+ 		assert(sp2->size >= ns);
+ 		assert(sp2->alloc == 0);
+ 		assert(sp2->flist == sp->flist);
+ 		if (sp->offset < sp2->offset)
+ 			break;
+ 	}
+ 	if (sp2 == NULL)
+ 		VTAILQ_INSERT_TAIL(sp->flist, sp, status);
+ 	else
+ 		VTAILQ_INSERT_BEFORE(sp2, sp, status);
+ }
+ 
+ static void
+ remfree(const struct smf_sc *sc, struct smf *sp)
+ {
+ 	size_t b;
+ 
+ 	assert(sp->alloc == 0);
+ 	assert(sp->flist != NULL);
+ 	Lck_AssertHeld(&sc->mtx);
+ 	b = sp->size / sc->pagesize;
+ 	if (b >= NBUCKET) {
+ 		b = NBUCKET - 1;
+ 		sc->stats->g_smf_large--;
+ 	} else {
+ 		sc->stats->g_smf_frag--;
+ 	}
+ 	assert(sp->flist == &sc->free[b]);
+ 	VTAILQ_REMOVE(sp->flist, sp, status);
+ 	sp->flist = NULL;
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Allocate a range from the first free range that is large enough.
+  */
+ 
+ static struct smf *
+ alloc_smf(struct smf_sc *sc, size_t bytes)
+ {
+ 	struct smf *sp, *sp2;
+ 	size_t b;
+ 
+ 	assert(!(bytes % sc->pagesize));
+ 	b = bytes / sc->pagesize;
+ 	if (b >= NBUCKET)
+ 		b = NBUCKET - 1;
+ 	sp = NULL;
+ 	for (; b < NBUCKET - 1; b++) {
+ 		sp = VTAILQ_FIRST(&sc->free[b]);
+ 		if (sp != NULL)
+ 			break;
+ 	}
+ 	if (sp == NULL) {
+ 		VTAILQ_FOREACH(sp, &sc->free[NBUCKET -1], status)
+ 			if (sp->size >= bytes)
+ 				break;
+ 	}
+ 	if (sp == NULL)
+ 		return (sp);
+ 
+ 	assert(sp->size >= bytes);
+ 	remfree(sc, sp);
+ 
+ 	if (sp->size == bytes) {
+ 		sp->alloc = 1;
+ 		VTAILQ_INSERT_TAIL(&sc->used, sp, status);
+ 		return (sp);
+ 	}
+ 
+ 	/* Split from front */
+ 	sp2 = malloc(sizeof *sp2);
+ 	XXXAN(sp2);
+ 	sc->stats->g_smf++;
+ 	*sp2 = *sp;
+ 
+ 	sp->offset += bytes;
+ 	sp->ptr += bytes;
+ 	sp->size -= bytes;
+ 
+ 	sp2->size = bytes;
+ 	sp2->alloc = 1;
+ 	VTAILQ_INSERT_BEFORE(sp, sp2, order);
+ 	VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
+ 	insfree(sc, sp);
+ 	return (sp2);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Free a range.  Attempt merge forward and backward, then sort into
+  * free list according to age.
+  */
+ 
+ static void
+ free_smf(struct smf *sp)
+ {
+ 	struct smf *sp2;
+ 	struct smf_sc *sc = sp->sc;
+ 
+ 	CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
+ 	assert(sp->alloc != 0);
+ 	assert(sp->size > 0);
+ 	assert(!(sp->size % sc->pagesize));
+ 	VTAILQ_REMOVE(&sc->used, sp, status);
+ 	sp->alloc = 0;
+ 
+ 	sp2 = VTAILQ_NEXT(sp, order);
+ 	if (sp2 != NULL &&
+ 	    sp2->alloc == 0 &&
+ 	    (sp2->ptr == sp->ptr + sp->size) &&
+ 	    (sp2->offset == sp->offset + sp->size)) {
+ 		sp->size += sp2->size;
+ 		VTAILQ_REMOVE(&sc->order, sp2, order);
+ 		remfree(sc, sp2);
+ 		free(sp2);
+ 		sc->stats->g_smf--;
+ 	}
+ 
+ 	sp2 = VTAILQ_PREV(sp, smfhead, order);
+ 	if (sp2 != NULL &&
+ 	    sp2->alloc == 0 &&
+ 	    (sp->ptr == sp2->ptr + sp2->size) &&
+ 	    (sp->offset == sp2->offset + sp2->size)) {
+ 		remfree(sc, sp2);
+ 		sp2->size += sp->size;
+ 		VTAILQ_REMOVE(&sc->order, sp, order);
+ 		free(sp);
+ 		sc->stats->g_smf--;
+ 		sp = sp2;
+ 	}
+ 
+ 	insfree(sc, sp);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Trim the tail of a range.
+  */
+ 
+ static void
+ trim_smf(struct smf *sp, size_t bytes)
+ {
+ 	struct smf *sp2;
+ 	struct smf_sc *sc = sp->sc;
+ 
+ 	assert(sp->alloc != 0);
+ 	assert(bytes > 0);
+ 	assert(bytes < sp->size);
+ 	assert(!(bytes % sc->pagesize));
+ 	assert(!(sp->size % sc->pagesize));
+ 	CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
+ 	sp2 = malloc(sizeof *sp2);
+ 	XXXAN(sp2);
+ 	sc->stats->g_smf++;
+ 	*sp2 = *sp;
+ 
+ 	sp2->size -= bytes;
+ 	sp->size = bytes;
+ 	sp2->ptr += bytes;
+ 	sp2->offset += bytes;
+ 	VTAILQ_INSERT_AFTER(&sc->order, sp, sp2, order);
+ 	VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
+ 	free_smf(sp2);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Insert a newly created range as busy, then free it to do any collapses
+  */
+ 
+ static void
+ new_smf(struct smf_sc *sc, unsigned char *ptr, off_t off, size_t len)
+ {
+ 	struct smf *sp, *sp2;
+ 
+ 	assert(!(len % sc->pagesize));
+ 	sp = calloc(sizeof *sp, 1);
+ 	XXXAN(sp);
+ 	sp->magic = SMF_MAGIC;
+ 	sp->s.magic = STORAGE_MAGIC;
+ 	sc->stats->g_smf++;
+ 
+ 	sp->sc = sc;
+ 	sp->size = len;
+ 	sp->ptr = ptr;
+ 	sp->offset = off;
+ 	sp->alloc = 1;
+ 
+ 	VTAILQ_FOREACH(sp2, &sc->order, order) {
+ 		if (sp->ptr < sp2->ptr) {
+ 			VTAILQ_INSERT_BEFORE(sp2, sp, order);
+ 			break;
+ 		}
+ 	}
+ 	if (sp2 == NULL)
+ 		VTAILQ_INSERT_TAIL(&sc->order, sp, order);
+ 
+ 	VTAILQ_INSERT_HEAD(&sc->used, sp, status);
+ 
+ 	free_smf(sp);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ /*
+  * XXX: This may be too aggressive and soak up too much address room.
+  * XXX: On the other hand, the user, directly or implicitly asked us to
+  * XXX: use this much storage, so we should make a decent effort.
+  * XXX: worst case (I think), malloc will fail.
+  */
+ 
+ static void
+ smf_open_chunk(struct smf_sc *sc, off_t sz, off_t off, off_t *fail, off_t *sum)
+ {
+ 	void *p;
+ 	off_t h;
+ 
+ 	assert(sz != 0);
+ 	assert(!(sz % sc->pagesize));
+ 
+ 	if (*fail < (uintmax_t)sc->pagesize * MINPAGES)
+ 		return;
+ 
+ 	if (sz > 0 && sz < *fail && sz < SSIZE_MAX) {
+ 		p = mmap(NULL, sz, PROT_READ|PROT_WRITE,
+ 		    MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, off);
+ 		if (p != MAP_FAILED) {
+ 			(void) madvise(p, sz, MADV_RANDOM);
+ 			(*sum) += sz;
+ 			new_smf(sc, p, off, sz);
+ 			return;
+ 		}
+ 	}
+ 
+ 	if (sz < *fail)
+ 		*fail = sz;
+ 
+ 	h = sz / 2;
+ 	if (h > SSIZE_MAX)
+ 		h = SSIZE_MAX;
+ 	h -= (h % sc->pagesize);
+ 
+ 	smf_open_chunk(sc, h, off, fail, sum);
+ 	smf_open_chunk(sc, sz - h, off + h, fail, sum);
+ }
+ 
+ static void
+ smf_open(const struct stevedore *st)
+ {
+ 	struct smf_sc *sc;
+ 	off_t fail = 1 << 30;	/* XXX: where is OFF_T_MAX ? */
+ 	off_t sum = 0;
+ 
+ 	CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
+ 	sc->stats = VSM_Alloc(sizeof *sc->stats,
+ 	    VSC_CLASS, VSC_TYPE_SMF, st->ident);
+ 	Lck_New(&sc->mtx, lck_smf);
+ 	Lck_Lock(&sc->mtx);
+ 	smf_open_chunk(sc, sc->filesize, 0, &fail, &sum);
+ 	Lck_Unlock(&sc->mtx);
+ 	printf("SMF.%s mmap'ed %ju bytes of %ju\n",
+ 	    st->ident, (uintmax_t)sum, sc->filesize);
+ 
+ 	/* XXX */
+ 	if (sum < MINPAGES * (off_t)getpagesize())
+ 		exit (2);
+ 
+ 	sc->stats->g_space += sc->filesize;
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static struct storage *
+ smf_alloc(struct stevedore *st, size_t size)
+ {
+ 	struct smf *smf;
+ 	struct smf_sc *sc;
+ 
+ 	CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
+ 	assert(size > 0);
+ 	size += (sc->pagesize - 1);
+ 	size &= ~(sc->pagesize - 1);
+ 	Lck_Lock(&sc->mtx);
+ 	sc->stats->c_req++;
+ 	smf = alloc_smf(sc, size);
+ 	if (smf == NULL) {
+ 		sc->stats->c_fail++;
+ 		Lck_Unlock(&sc->mtx);
+ 		return (NULL);
+ 	}
+ 	CHECK_OBJ_NOTNULL(smf, SMF_MAGIC);
+ 	sc->stats->g_alloc++;
+ 	sc->stats->c_bytes += smf->size;
+ 	sc->stats->g_bytes += smf->size;
+ 	sc->stats->g_space -= smf->size;
+ 	Lck_Unlock(&sc->mtx);
+ 	CHECK_OBJ_NOTNULL(&smf->s, STORAGE_MAGIC);	/*lint !e774 */
+ 	XXXAN(smf);
+ 	assert(smf->size == size);
+ 	smf->s.space = size;
+ 	smf->s.priv = smf;
+ 	smf->s.ptr = smf->ptr;
+ 	smf->s.len = 0;
+ 	smf->s.stevedore = st;
+ #ifdef SENDFILE_WORKS
+ 	smf->s.fd = smf->sc->fd;
+ 	smf->s.where = smf->offset;
+ #endif
+ 	return (&smf->s);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void
+ smf_trim(struct storage *s, size_t size)
+ {
+ 	struct smf *smf;
+ 	struct smf_sc *sc;
+ 
+ 	CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ 	assert(size > 0);
+ 	assert(size <= s->space);
+ 	xxxassert(size > 0);	/* XXX: seen */
+ 	CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
+ 	assert(size <= smf->size);
+ 	sc = smf->sc;
+ 	size += (sc->pagesize - 1);
+ 	size &= ~(sc->pagesize - 1);
+ 	if (smf->size > size) {
+ 		Lck_Lock(&sc->mtx);
+ 		sc->stats->c_freed += (smf->size - size);
+ 		sc->stats->g_bytes -= (smf->size - size);
+ 		sc->stats->g_space += (smf->size - size);
+ 		trim_smf(smf, size);
+ 		assert(smf->size == size);
+ 		Lck_Unlock(&sc->mtx);
+ 		s->space = size;
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ static void __match_proto__(storage_free_f)
+ smf_free(struct storage *s)
+ {
+ 	struct smf *smf;
+ 	struct smf_sc *sc;
+ 
+ 	CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ 	CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
+ 	sc = smf->sc;
+ 	Lck_Lock(&sc->mtx);
+ 	sc->stats->g_alloc--;
+ 	sc->stats->c_freed += smf->size;
+ 	sc->stats->g_bytes -= smf->size;
+ 	sc->stats->g_space += smf->size;
+ 	free_smf(smf);
+ 	Lck_Unlock(&sc->mtx);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ const struct stevedore smf_stevedore = {
+ 	.magic	=	STEVEDORE_MAGIC,
+ 	.name	=	"file",
+ 	.init	=	smf_init,
+ 	.open	=	smf_open,
+ 	.alloc	=	smf_alloc,
+ 	.trim	=	smf_trim,
+ 	.free	=	smf_free,
++        .dup    =       default_dup,
+ };
+ 
+ #ifdef INCLUDE_TEST_DRIVER
+ 
+ void vca_flush(struct sess *sp) {}
+ 
+ #define N	100
+ #define M	(128*1024)
+ 
+ struct storage *s[N];
+ 
+ static void
+ dumpit(void)
+ {
+ 	struct smf_sc *sc = smf_stevedore.priv;
+ 	struct smf *s;
+ 
+ 	return (0);
+ 	printf("----------------\n");
+ 	printf("Order:\n");
+ 	VTAILQ_FOREACH(s, &sc->order, order) {
+ 		printf("%10p %12ju %12ju %12ju\n",
+ 		    s, s->offset, s->size, s->offset + s->size);
+ 	}
+ 	printf("Used:\n");
+ 	VTAILQ_FOREACH(s, &sc->used, status) {
+ 		printf("%10p %12ju %12ju %12ju\n",
+ 		    s, s->offset, s->size, s->offset + s->size);
+ 	}
+ 	printf("Free:\n");
+ 	VTAILQ_FOREACH(s, &sc->free, status) {
+ 		printf("%10p %12ju %12ju %12ju\n",
+ 		    s, s->offset, s->size, s->offset + s->size);
+ 	}
+ 	printf("================\n");
+ }
+ 
+ int
+ main(int argc, char **argv)
+ {
+ 	int i, j;
+ 
+ 	setbuf(stdout, NULL);
+ 	smf_init(&smf_stevedore, "");
+ 	smf_open(&smf_stevedore);
+ 	while (1) {
+ 		dumpit();
+ 		i = random() % N;
+ 		do
+ 			j = random() % M;
+ 		while (j == 0);
+ 		if (s[i] == NULL) {
+ 			s[i] = smf_alloc(&smf_stevedore, j);
+ 			printf("A %10p %12d\n", s[i], j);
+ 		} else if (j < s[i]->space) {
+ 			smf_trim(s[i], j);
+ 			printf("T %10p %12d\n", s[i], j);
+ 		} else {
+ 			smf_free(s[i]);
+ 			printf("D %10p\n", s[i]);
+ 			s[i] = NULL;
+ 		}
+ 	}
+ }
+ 
+ #endif /* INCLUDE_TEST_DRIVER */
diff --cc bin/varnishd/storage/storage_malloc.c
index 0000000,156c832..79fefb6
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_malloc.c
+++ b/bin/varnishd/storage/storage_malloc.c
@@@ -1,0 -1,256 +1,257 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Storage method based on malloc(3)
+  */
+ 
+ #include "config.h"
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+ 
+ #include "vnum.h"
+ 
+ struct sma_sc {
+ 	unsigned		magic;
+ #define SMA_SC_MAGIC		0x1ac8a345
+ 	struct lock		sma_mtx;
+ 	size_t			sma_max;
+ 	size_t			sma_alloc;
+ 	struct VSC_C_sma	*stats;
+ };
+ 
+ struct sma {
+ 	unsigned		magic;
+ #define SMA_MAGIC		0x69ae9bb9
+ 	struct storage		s;
+ 	size_t			sz;
+ 	struct sma_sc		*sc;
+ };
+ 
+ static struct storage *
+ sma_alloc(struct stevedore *st, size_t size)
+ {
+ 	struct sma_sc *sma_sc;
+ 	struct sma *sma = NULL;
+ 	void *p;
+ 
+ 	CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ 	Lck_Lock(&sma_sc->sma_mtx);
+ 	sma_sc->stats->c_req++;
+ 	if (sma_sc->sma_alloc + size > sma_sc->sma_max) {
+ 		sma_sc->stats->c_fail += size;
+ 		size = 0;
+ 	} else {
+ 		sma_sc->sma_alloc += size;
+ 		sma_sc->stats->c_bytes += size;
+ 		sma_sc->stats->g_alloc++;
+ 		sma_sc->stats->g_bytes += size;
+ 		if (sma_sc->sma_max != SIZE_MAX)
+ 			sma_sc->stats->g_space -= size;
+ 	}
+ 	Lck_Unlock(&sma_sc->sma_mtx);
+ 
+ 	if (size == 0)
+ 		return (NULL);
+ 
+ 	/*
+ 	 * Do not collaps the sma allocation with sma->s.ptr: it is not
+ 	 * a good idea.  Not only would it make ->trim impossible,
+ 	 * performance-wise it would be a catastropy with chunksized
+ 	 * allocations growing another full page, just to accomodate the sma.
+ 	 */
+ 
+ 	p = malloc(size);
+ 	if (p != NULL) {
+ 		ALLOC_OBJ(sma, SMA_MAGIC);
+ 		if (sma != NULL)
+ 			sma->s.ptr = p;
+ 		else
+ 			free(p);
+ 	}
+ 	if (sma == NULL) {
+ 		Lck_Lock(&sma_sc->sma_mtx);
+ 		/*
+ 		 * XXX: Not nice to have counters go backwards, but we do
+ 		 * XXX: Not want to pick up the lock twice just for stats.
+ 		 */
+ 		sma_sc->stats->c_fail++;
+ 		sma_sc->stats->c_bytes -= size;
+ 		sma_sc->stats->g_alloc--;
+ 		sma_sc->stats->g_bytes -= size;
+ 		if (sma_sc->sma_max != SIZE_MAX)
+ 			sma_sc->stats->g_space += size;
+ 		Lck_Unlock(&sma_sc->sma_mtx);
+ 		return (NULL);
+ 	}
+ 	sma->sc = sma_sc;
+ 	sma->sz = size;
+ 	sma->s.priv = sma;
+ 	sma->s.len = 0;
+ 	sma->s.space = size;
+ #ifdef SENDFILE_WORKS
+ 	sma->s.fd = -1;
+ #endif
+ 	sma->s.stevedore = st;
+ 	sma->s.magic = STORAGE_MAGIC;
+ 	return (&sma->s);
+ }
+ 
+ static void __match_proto__(storage_free_f)
+ sma_free(struct storage *s)
+ {
+ 	struct sma_sc *sma_sc;
+ 	struct sma *sma;
+ 
+ 	CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ 	CAST_OBJ_NOTNULL(sma, s->priv, SMA_MAGIC);
+ 	sma_sc = sma->sc;
+ 	assert(sma->sz == sma->s.space);
+ 	Lck_Lock(&sma_sc->sma_mtx);
+ 	sma_sc->sma_alloc -= sma->sz;
+ 	sma_sc->stats->g_alloc--;
+ 	sma_sc->stats->g_bytes -= sma->sz;
+ 	sma_sc->stats->c_freed += sma->sz;
+ 	if (sma_sc->sma_max != SIZE_MAX)
+ 		sma_sc->stats->g_space += sma->sz;
+ 	Lck_Unlock(&sma_sc->sma_mtx);
+ 	free(sma->s.ptr);
+ 	free(sma);
+ }
+ 
+ static void
+ sma_trim(struct storage *s, size_t size)
+ {
+ 	struct sma_sc *sma_sc;
+ 	struct sma *sma;
+ 	void *p;
+ 	size_t delta;
+ 
+ 	CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ 	CAST_OBJ_NOTNULL(sma, s->priv, SMA_MAGIC);
+ 	sma_sc = sma->sc;
+ 
+ 	assert(sma->sz == sma->s.space);
+ 	assert(size < sma->sz);
+ 	delta = sma->sz - size;
+ 	if (delta < 256)
+ 		return;
+ 	if ((p = realloc(sma->s.ptr, size)) != NULL) {
+ 		Lck_Lock(&sma_sc->sma_mtx);
+ 		sma_sc->sma_alloc -= delta;
+ 		sma_sc->stats->g_bytes -= delta;
+ 		sma_sc->stats->c_freed += delta;
+ 		if (sma_sc->sma_max != SIZE_MAX)
+ 			sma_sc->stats->g_space += delta;
+ 		sma->sz = size;
+ 		Lck_Unlock(&sma_sc->sma_mtx);
+ 		sma->s.ptr = p;
+ 		s->space = size;
+ 	}
+ }
+ 
+ static double
+ sma_used_space(const struct stevedore *st)
+ {
+ 	struct sma_sc *sma_sc;
+ 
+ 	CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ 	return (sma_sc->sma_alloc);
+ }
+ 
+ static double
+ sma_free_space(const struct stevedore *st)
+ {
+ 	struct sma_sc *sma_sc;
+ 
+ 	CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ 	return (sma_sc->sma_max - sma_sc->sma_alloc);
+ }
+ 
+ static void
+ sma_init(struct stevedore *parent, int ac, char * const *av)
+ {
+ 	const char *e;
+ 	uintmax_t u;
+ 	struct sma_sc *sc;
+ 
+ 	ASSERT_MGT();
+ 	ALLOC_OBJ(sc, SMA_SC_MAGIC);
+ 	AN(sc);
+ 	sc->sma_max = SIZE_MAX;
+ 	assert(sc->sma_max == SIZE_MAX);
+ 	parent->priv = sc;
+ 
+ 	AZ(av[ac]);
+ 	if (ac > 1)
+ 		ARGV_ERR("(-smalloc) too many arguments\n");
+ 
+ 	if (ac == 0 || *av[0] == '\0')
+ 		 return;
+ 
+ 	e = VNUM_2bytes(av[0], &u, 0);
+ 	if (e != NULL)
+ 		ARGV_ERR("(-smalloc) size \"%s\": %s\n", av[0], e);
+ 	if ((u != (uintmax_t)(size_t)u))
+ 		ARGV_ERR("(-smalloc) size \"%s\": too big\n", av[0]);
+ 	if (u < 1024*1024)
+ 		ARGV_ERR("(-smalloc) size \"%s\": too small, "
+ 			 "did you forget to specify M or G?\n", av[0]);
+ 
+ 	sc->sma_max = u;
+ }
+ 
+ static void
+ sma_open(const struct stevedore *st)
+ {
+ 	struct sma_sc *sma_sc;
+ 
+ 	CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ 	Lck_New(&sma_sc->sma_mtx, lck_sma);
+ 	sma_sc->stats = VSM_Alloc(sizeof *sma_sc->stats,
+ 	    VSC_CLASS, VSC_TYPE_SMA, st->ident);
+ 	memset(sma_sc->stats, 0, sizeof *sma_sc->stats);
+ 	if (sma_sc->sma_max != SIZE_MAX)
+ 		sma_sc->stats->g_space = sma_sc->sma_max;
+ }
+ 
+ const struct stevedore sma_stevedore = {
+ 	.magic	=	STEVEDORE_MAGIC,
+ 	.name	=	"malloc",
+ 	.init	=	sma_init,
+ 	.open	=	sma_open,
+ 	.alloc	=	sma_alloc,
+ 	.free	=	sma_free,
+ 	.trim	=	sma_trim,
+ 	.var_free_space =	sma_free_space,
+ 	.var_used_space =	sma_used_space,
++        .dup    =       default_dup,
+ };
diff --cc bin/varnishd/storage/storage_persistent.c
index 0000000,ded638b..095fcf2
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_persistent.c
+++ b/bin/varnishd/storage/storage_persistent.c
@@@ -1,0 -1,678 +1,679 @@@
+ /*-
+  * Copyright (c) 2008-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Persistent storage method
+  *
+  * XXX: Before we start the client or maybe after it stops, we should give the
+  * XXX: stevedores a chance to examine their storage for consistency.
+  *
+  * XXX: Do we ever free the LRU-lists ?
+  */
+ 
+ #include "config.h"
+ 
+ #include <sys/param.h>
+ #include <sys/mman.h>
+ 
+ #include <stdint.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+ 
+ #include "hash/hash_slinger.h"
+ #include "vcli.h"
+ #include "vcli_priv.h"
+ #include "vend.h"
+ #include "vsha256.h"
+ 
+ #include "persistent.h"
+ #include "storage/storage_persistent.h"
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ /*
+  * silos is unlocked, it only changes during startup when we are
+  * single-threaded
+  */
+ static VTAILQ_HEAD(,smp_sc)	silos = VTAILQ_HEAD_INITIALIZER(silos);
+ 
+ /*--------------------------------------------------------------------
+  * Add bans to silos
+  */
+ 
+ static void
+ smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx,
+     uint32_t len, const uint8_t *ban)
+ {
+ 	uint8_t *ptr, *ptr2;
+ 
+ 	(void)sc;
+ 	ptr = ptr2 = SIGN_END(ctx);
+ 
+ 	memcpy(ptr, "BAN", 4);
+ 	ptr += 4;
+ 
+ 	vbe32enc(ptr, len);
+ 	ptr += 4;
+ 
+ 	memcpy(ptr, ban, len);
+ 	ptr += len;
+ 
+ 	smp_append_sign(ctx, ptr2, ptr - ptr2);
+ }
+ 
+ /* Trust that cache_ban.c takes care of locking */
+ 
+ void
+ SMP_NewBan(const uint8_t *ban, unsigned ln)
+ {
+ 	struct smp_sc *sc;
+ 
+ 	VTAILQ_FOREACH(sc, &silos, list) {
+ 		smp_appendban(sc, &sc->ban1, ln, ban);
+ 		smp_appendban(sc, &sc->ban2, ln, ban);
+ 	}
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Attempt to open and read in a ban list
+  */
+ 
+ static int
+ smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx)
+ {
+ 	uint8_t *ptr, *pe;
+ 	uint32_t length;
+ 	int i, retval = 0;
+ 
+ 	ASSERT_CLI();
+ 	(void)sc;
+ 	i = smp_chk_sign(ctx);
+ 	if (i)
+ 		return (i);
+ 	ptr = SIGN_DATA(ctx);
+ 	pe = ptr + ctx->ss->length;
+ 
+ 	while (ptr < pe) {
+ 		if (memcmp(ptr, "BAN", 4)) {
+ 			retval = 1001;
+ 			break;
+ 		}
+ 		ptr += 4;
+ 
+ 		length = vbe32dec(ptr);
+ 		ptr += 4;
+ 
+ 		if (ptr + length > pe) {
+ 			retval = 1003;
+ 			break;
+ 		}
+ 
+ 		BAN_Reload(ptr, length);
+ 
+ 		ptr += length;
+ 	}
+ 	assert(ptr <= pe);
+ 	return (retval);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Attempt to open and read in a segment list
+  */
+ 
+ static int
+ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx)
+ {
+ 	uint64_t length, l;
+ 	struct smp_segptr *ss, *se;
+ 	struct smp_seg *sg, *sg1, *sg2;
+ 	int i, n = 0;
+ 
+ 	ASSERT_CLI();
+ 	i = smp_chk_sign(ctx);
+ 	if (i)
+ 		return (i);
+ 
+ 	ss = SIGN_DATA(ctx);
+ 	length = ctx->ss->length;
+ 
+ 	if (length == 0) {
+ 		/* No segments */
+ 		sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF];
+ 		return (0);
+ 	}
+ 	se = ss + length / sizeof *ss;
+ 	se--;
+ 	assert(ss <= se);
+ 
+ 	/*
+ 	 * Locate the free reserve, there are only two basic cases,
+ 	 * but once we start dropping segments, things gets more complicated.
+ 	 */
+ 
+ 	sc->free_offset = se->offset + se->length;
+ 	l = sc->mediasize - sc->free_offset;
+ 	if (se->offset > ss->offset && l >= sc->free_reserve) {
+ 		/*
+ 		 * [__xxxxyyyyzzzz___]
+ 		 * Plenty of space at tail, do nothing.
+ 		 */
+ 	} else if (ss->offset > se->offset) {
+ 		/*
+ 		 * [zzzz____xxxxyyyy_]
+ 		 * (make) space between ends
+ 		 * We might nuke the entire tail end without getting
+ 		 * enough space, in which case we fall through to the
+ 		 * last check.
+ 		 */
+ 		while (ss < se && ss->offset > se->offset) {
+ 			l = ss->offset - (se->offset + se->length);
+ 			if (l > sc->free_reserve)
+ 				break;
+ 			ss++;
+ 			n++;
+ 		}
+ 	}
+ 
+ 	if (l < sc->free_reserve) {
+ 		/*
+ 		 * [__xxxxyyyyzzzz___]
+ 		 * (make) space at front
+ 		 */
+ 		sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF];
+ 		while (ss < se) {
+ 			l = ss->offset - sc->free_offset;
+ 			if (l > sc->free_reserve)
+ 				break;
+ 			ss++;
+ 			n++;
+ 		}
+ 	}
+ 
+ 	assert (l >= sc->free_reserve);
+ 
+ 
+ 	sg1 = NULL;
+ 	sg2 = NULL;
+ 	for(; ss <= se; ss++) {
+ 		ALLOC_OBJ(sg, SMP_SEG_MAGIC);
+ 		AN(sg);
+ 		sg->lru = LRU_Alloc();
+ 		CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC);
+ 		sg->p = *ss;
+ 
+ 		sg->flags |= SMP_SEG_MUSTLOAD;
+ 
+ 		/*
+ 		 * HACK: prevent save_segs from nuking segment until we have
+ 		 * HACK: loaded it.
+ 		 */
+ 		sg->nobj = 1;
+ 		if (sg1 != NULL) {
+ 			assert(sg1->p.offset != sg->p.offset);
+ 			if (sg1->p.offset < sg->p.offset)
+ 				assert(smp_segend(sg1) <= sg->p.offset);
+ 			else
+ 				assert(smp_segend(sg) <= sg1->p.offset);
+ 		}
+ 		if (sg2 != NULL) {
+ 			assert(sg2->p.offset != sg->p.offset);
+ 			if (sg2->p.offset < sg->p.offset)
+ 				assert(smp_segend(sg2) <= sg->p.offset);
+ 			else
+ 				assert(smp_segend(sg) <= sg2->p.offset);
+ 		}
+ 
+ 		/* XXX: check that they are inside silo */
+ 		/* XXX: check that they don't overlap */
+ 		/* XXX: check that they are serial */
+ 		sg->sc = sc;
+ 		VTAILQ_INSERT_TAIL(&sc->segments, sg, list);
+ 		sg2 = sg;
+ 		if (sg1 == NULL)
+ 			sg1 = sg;
+ 	}
+ 	printf("Dropped %d segments to make free_reserve\n", n);
+ 	return (0);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Silo worker thread
+  */
+ 
+ static void *
+ smp_thread(struct sess *sp, void *priv)
+ {
+ 	struct smp_sc	*sc;
+ 	struct smp_seg *sg;
+ 
+ 	(void)sp;
+ 	CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC);
+ 
+ 	/* First, load all the objects from all segments */
+ 	VTAILQ_FOREACH(sg, &sc->segments, list)
+ 		if (sg->flags & SMP_SEG_MUSTLOAD)
+ 			smp_load_seg(sp, sc, sg);
+ 
+ 	sc->flags |= SMP_SC_LOADED;
+ 	BAN_TailDeref(&sc->tailban);
+ 	AZ(sc->tailban);
+ 	printf("Silo completely loaded\n");
+ 	while (1) {
+ 		(void)sleep (1);
+ 		sg = VTAILQ_FIRST(&sc->segments);
+ 		if (sg != NULL && sg -> sc->cur_seg &&
+ 		    sg->nobj == 0) {
+ 			Lck_Lock(&sc->mtx);
+ 			smp_save_segs(sc);
+ 			Lck_Unlock(&sc->mtx);
+ 		}
+ 	}
+ 	NEEDLESS_RETURN(NULL);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Open a silo in the worker process
+  */
+ 
+ static void
+ smp_open(const struct stevedore *st)
+ {
+ 	struct smp_sc	*sc;
+ 
+ 	ASSERT_CLI();
+ 
+ 	CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC);
+ 
+ 	Lck_New(&sc->mtx, lck_smp);
+ 	Lck_Lock(&sc->mtx);
+ 
+ 	sc->stevedore = st;
+ 
+ 	/* We trust the parent to give us a valid silo, for good measure: */
+ 	AZ(smp_valid_silo(sc));
+ 
+ 	AZ(mprotect(sc->base, 4096, PROT_READ));
+ 
+ 	sc->ident = SIGN_DATA(&sc->idn);
+ 
+ 	/* We attempt ban1 first, and if that fails, try ban2 */
+ 	if (smp_open_bans(sc, &sc->ban1))
+ 		AZ(smp_open_bans(sc, &sc->ban2));
+ 
+ 	/* We attempt seg1 first, and if that fails, try seg2 */
+ 	if (smp_open_segs(sc, &sc->seg1))
+ 		AZ(smp_open_segs(sc, &sc->seg2));
+ 
+ 	/*
+ 	 * Grap a reference to the tail of the ban list, until the thread
+ 	 * has loaded all objects, so we can be sure that all of our
+ 	 * proto-bans survive until then.
+ 	 */
+ 	sc->tailban = BAN_TailRef();
+ 	AN(sc->tailban);
+ 
+ 	/* XXX: save segments to ensure consistency between seg1 & seg2 ? */
+ 
+ 	/* XXX: abandon early segments to make sure we have free space ? */
+ 
+ 	/* Open a new segment, so we are ready to write */
+ 	smp_new_seg(sc);
+ 
+ 	/* Start the worker silo worker thread, it will load the objects */
+ 	WRK_BgThread(&sc->thread, "persistence", smp_thread, sc);
+ 
+ 	VTAILQ_INSERT_TAIL(&silos, sc, list);
+ 	Lck_Unlock(&sc->mtx);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Close a silo
+  */
+ 
+ static void
+ smp_close(const struct stevedore *st)
+ {
+ 	struct smp_sc	*sc;
+ 
+ 	ASSERT_CLI();
+ 
+ 	CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC);
+ 	Lck_Lock(&sc->mtx);
+ 	smp_close_seg(sc, sc->cur_seg);
+ 	Lck_Unlock(&sc->mtx);
+ 
+ 	/* XXX: reap thread */
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Allocate a bite.
+  *
+  * Allocate [min_size...max_size] space from the bottom of the segment,
+  * as is convenient.
+  *
+  * If 'so' + 'idx' is given, also allocate a smp_object from the top
+  * of the segment.
+  *
+  * Return the segment in 'ssg' if given.
+  */
+ 
+ static struct storage *
+ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size,
+     struct smp_object **so, unsigned *idx, struct smp_seg **ssg)
+ {
+ 	struct smp_sc *sc;
+ 	struct storage *ss;
+ 	struct smp_seg *sg;
+ 	unsigned tries;
+ 	uint64_t left, extra;
+ 
+ 	CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC);
+ 	assert(min_size <= max_size);
+ 
+ 	max_size = IRNUP(sc, max_size);
+ 	min_size = IRNUP(sc, min_size);
+ 
+ 	extra = IRNUP(sc, sizeof(*ss));
+ 	if (so != NULL) {
+ 		extra += sizeof(**so);
+ 		AN(idx);
+ 	}
+ 
+ 	Lck_Lock(&sc->mtx);
+ 	sg = NULL;
+ 	ss = NULL;
+ 	for (tries = 0; tries < 3; tries++) {
+ 		left = smp_spaceleft(sc, sc->cur_seg);
+ 		if (left >= extra + min_size)
+ 			break;
+ 		smp_close_seg(sc, sc->cur_seg);
+ 		smp_new_seg(sc);
+ 	}
+ 	if (left >= extra + min_size)  {
+ 		if (left < extra + max_size)
+ 			max_size = IRNDN(sc, left - extra);
+ 
+ 		sg = sc->cur_seg;
+ 		ss = (void*)(sc->base + sc->next_bot);
+ 		sc->next_bot += max_size + IRNUP(sc, sizeof(*ss));
+ 		sg->nalloc++;
+ 		if (so != NULL) {
+ 			sc->next_top -= sizeof(**so);
+ 			*so = (void*)(sc->base + sc->next_top);
+ 			/* Render this smp_object mostly harmless */
+ 			(*so)->ttl = 0.;
+ 			(*so)->ban = 0.;
+ 			(*so)->ptr = 0;;
+ 			sg->objs = *so;
+ 			*idx = ++sg->p.lobjlist;
+ 		}
+ 		(void)smp_spaceleft(sc, sg);	/* for the assert */
+ 	}
+ 	Lck_Unlock(&sc->mtx);
+ 
+ 	if (ss == NULL)
+ 		return (ss);
+ 	AN(sg);
+ 	assert(max_size >= min_size);
+ 
+ 	/* Fill the storage structure */
+ 	memset(ss, 0, sizeof *ss);
+ 	ss->magic = STORAGE_MAGIC;
+ 	ss->ptr = PRNUP(sc, ss + 1);
+ 	ss->space = max_size;
+ 	ss->priv = sc;
+ 	ss->stevedore = st;
+ #ifdef SENDFILE_WORKS
+ 	ss->fd = sc->fd;
+ #endif
+ 	if (ssg != NULL)
+ 		*ssg = sg;
+ 	return (ss);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Allocate an object
+  */
+ 
+ static struct object *
+ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot,
+     const struct stv_objsecrets *soc)
+ {
+ 	struct object *o;
+ 	struct storage *st;
+ 	struct smp_sc	*sc;
+ 	struct smp_seg *sg;
+ 	struct smp_object *so;
+ 	struct objcore *oc;
+ 	unsigned objidx;
+ 
+ 	if (sp->objcore == NULL)
+ 		return (NULL);		/* from cnt_error */
+ 	CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC);
+ 	AN(sp->objcore);
+ 	AN(sp->wrk->exp.ttl > 0.);
+ 
+ 	ltot = IRNUP(sc, ltot);
+ 
+ 	st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg);
+ 	if (st == NULL)
+ 		return (NULL);
+ 
+ 	assert(st->space >= ltot);
+ 	ltot = st->len = st->space;
+ 
+ 	o = STV_MkObject(sp, st->ptr, ltot, soc);
+ 	CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ 	o->objstore = st;
+ 
+ 	oc = o->objcore;
+ 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ 	oc->flags |= OC_F_LRUDONTMOVE;
+ 
+ 	Lck_Lock(&sc->mtx);
+ 	sg->nfixed++;
+ 	sg->nobj++;
+ 
+ 	/* We have to do this somewhere, might as well be here... */
+ 	assert(sizeof so->hash == DIGEST_LEN);
+ 	memcpy(so->hash, oc->objhead->digest, DIGEST_LEN);
+ 	so->ttl = EXP_Grace(NULL, o);
+ 	so->ptr = (uint8_t*)o - sc->base;
+ 	so->ban = BAN_Time(oc->ban);
+ 
+ 	smp_init_oc(oc, sg, objidx);
+ 
+ 	Lck_Unlock(&sc->mtx);
+ 	return (o);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Allocate a bite
+  */
+ 
+ static struct storage *
+ smp_alloc(struct stevedore *st, size_t size)
+ {
+ 
+ 	return (smp_allocx(st,
+ 	    size > 4096 ? 4096 : size, size, NULL, NULL, NULL));
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Trim a bite
+  * XXX: We could trim the last allocation.
+  */
+ 
+ static void
+ smp_trim(struct storage *ss, size_t size)
+ {
+ 
+ 	(void)ss;
+ 	(void)size;
+ }
+ 
+ /*--------------------------------------------------------------------
+  * We don't track frees of storage, we track the objects which own the
+  * storage and when there are no more objects in in the first segment,
+  * it can be reclaimed.
+  * XXX: We could free the last allocation, but does that happen ?
+  */
+ 
+ static void __match_proto__(storage_free_f)
+ smp_free(struct storage *st)
+ {
+ 
+ 	/* XXX */
+ 	(void)st;
+ }
+ 
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ const struct stevedore smp_stevedore = {
+ 	.magic	=	STEVEDORE_MAGIC,
+ 	.name	=	"persistent",
+ 	.init	=	smp_mgt_init,
+ 	.open	=	smp_open,
+ 	.close	=	smp_close,
+ 	.alloc	=	smp_alloc,
+ 	.allocobj =	smp_allocobj,
+ 	.free	=	smp_free,
+ 	.trim	=	smp_trim,
++        .dup    =       default_dup,
+ };
+ 
+ /*--------------------------------------------------------------------
+  * Persistence is a bear to test unadultered, so we cheat by adding
+  * a cli command we can use to make it do tricks for us.
+  */
+ 
+ static void
+ debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs)
+ {
+ 	struct smp_seg *sg;
+ 	struct objcore *oc;
+ 
+ 	VCLI_Out(cli, "Silo: %s (%s)\n",
+ 	    sc->stevedore->ident, sc->filename);
+ 	VTAILQ_FOREACH(sg, &sc->segments, list) {
+ 		VCLI_Out(cli, "  Seg: [0x%jx ... +0x%jx]\n",
+ 		   (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length);
+ 		if (sg == sc->cur_seg)
+ 			VCLI_Out(cli,
+ 			   "    Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n",
+ 			   (uintmax_t)(sc->next_bot),
+ 			   (uintmax_t)(sc->next_top),
+ 			   (uintmax_t)(sc->next_top - sc->next_bot));
+ 		VCLI_Out(cli, "    %u nobj, %u alloc, %u lobjlist, %u fixed\n",
+ 		    sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed);
+ 		if (objs) {
+ 			VTAILQ_FOREACH(oc, &sg->lru->lru_head, lru_list)
+ 				VCLI_Out(cli, "      OC %p\n", oc);
+ 		}
+ 	}
+ }
+ 
+ static void
+ debug_persistent(struct cli *cli, const char * const * av, void *priv)
+ {
+ 	struct smp_sc *sc;
+ 
+ 	(void)priv;
+ 
+ 	if (av[2] == NULL) {
+ 		VTAILQ_FOREACH(sc, &silos, list)
+ 			debug_report_silo(cli, sc, 0);
+ 		return;
+ 	}
+ 	VTAILQ_FOREACH(sc, &silos, list)
+ 		if (!strcmp(av[2], sc->stevedore->ident))
+ 			break;
+ 	if (sc == NULL) {
+ 		VCLI_Out(cli, "Silo <%s> not found\n", av[2]);
+ 		VCLI_SetResult(cli, CLIS_PARAM);
+ 		return;
+ 	}
+ 	if (av[3] == NULL) {
+ 		debug_report_silo(cli, sc, 0);
+ 		return;
+ 	}
+ 	Lck_Lock(&sc->mtx);
+ 	if (!strcmp(av[3], "sync")) {
+ 		smp_close_seg(sc, sc->cur_seg);
+ 		smp_new_seg(sc);
+ 	} else if (!strcmp(av[3], "dump")) {
+ 		debug_report_silo(cli, sc, 1);
+ 	} else {
+ 		VCLI_Out(cli, "Unknown operation\n");
+ 		VCLI_SetResult(cli, CLIS_PARAM);
+ 	}
+ 	Lck_Unlock(&sc->mtx);
+ }
+ 
+ static struct cli_proto debug_cmds[] = {
+         { "debug.persistent", "debug.persistent",
+                 "Persistent debugging magic:\n"
+ 		"\tdebug.persistent [stevedore [cmd]]\n"
+ 		"With no cmd arg, a summary of the silo is returned.\n"
+ 		"Possible commands:\n"
+ 		"\tsync\tClose current segment, open a new one\n"
+ 		"\tdump\tinclude objcores in silo summary\n"
+ 		"",
+ 		0, 2, "d", debug_persistent },
+         { NULL }
+ };
+ 
+ /*--------------------------------------------------------------------*/
+ 
+ void
+ SMP_Init(void)
+ {
+ 	CLI_AddFuncs(debug_cmds);
+ }
+ 
+ /*--------------------------------------------------------------------
+  * Pause until all silos have loaded.
+  */
+ 
+ void
+ SMP_Ready(void)
+ {
+ 	struct smp_sc *sc;
+ 
+ 	ASSERT_CLI();
+ 	do {
+ 		VTAILQ_FOREACH(sc, &silos, list)
+ 			if (!(sc->flags & SMP_SC_LOADED))
+ 				break;
+ 		if (sc != NULL)
+ 			(void)sleep(1);
+ 	} while (sc != NULL);
+ }
diff --cc bin/varnishd/storage/storage_synth.c
index 0000000,e9e9b2f..062cfba
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_synth.c
+++ b/bin/varnishd/storage/storage_synth.c
@@@ -1,0 -1,120 +1,121 @@@
+ /*-
+  * Copyright (c) 2008-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Storage method for synthetic content, based on vsb.
+  */
+ 
+ #include "config.h"
+ 
+ #include <stdlib.h>
+ 
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+ 
+ 
+ static struct lock		sms_mtx;
+ 
+ static void
+ sms_free(struct storage *sto)
+ {
+ 
+ 	CHECK_OBJ_NOTNULL(sto, STORAGE_MAGIC);
+ 	Lck_Lock(&sms_mtx);
+ 	VSC_C_main->sms_nobj--;
+ 	VSC_C_main->sms_nbytes -= sto->len;
+ 	VSC_C_main->sms_bfree += sto->len;
+ 	Lck_Unlock(&sms_mtx);
+ 	VSB_delete(sto->priv);
+ 	free(sto);
+ }
+ 
+ void
+ SMS_Init(void)
+ {
+ 
+ 	Lck_New(&sms_mtx, lck_sms);
+ }
+ 
+ static struct stevedore sms_stevedore = {
+ 	.magic	=	STEVEDORE_MAGIC,
+ 	.name	=	"synth",
+ 	.free	=	sms_free,
++        .dup    =       default_dup,
+ };
+ 
+ struct vsb *
+ SMS_Makesynth(struct object *obj)
+ {
+ 	struct storage *sto;
+ 	struct vsb *vsb;
+ 
+ 	CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ 	STV_Freestore(obj);
+ 	obj->len = 0;
+ 
+ 	Lck_Lock(&sms_mtx);
+ 	VSC_C_main->sms_nreq++;
+ 	VSC_C_main->sms_nobj++;
+ 	Lck_Unlock(&sms_mtx);
+ 
+ 	sto = calloc(sizeof *sto, 1);
+ 	XXXAN(sto);
+ 	vsb = VSB_new_auto();
+ 	XXXAN(vsb);
+ 	sto->priv = vsb;
+ 	sto->len = 0;
+ 	sto->space = 0;
+ #ifdef SENDFILE_WORKS
+ 	sto->fd = -1;
+ #endif
+ 	sto->stevedore = &sms_stevedore;
+ 	sto->magic = STORAGE_MAGIC;
+ 
+ 	VTAILQ_INSERT_TAIL(&obj->store, sto, list);
+ 	return (vsb);
+ }
+ 
+ void
+ SMS_Finish(struct object *obj)
+ {
+ 	struct storage *sto;
+ 	struct vsb *vsb;
+ 
+ 	CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ 	sto = VTAILQ_FIRST(&obj->store);
+ 	assert(sto->stevedore == &sms_stevedore);
+ 	vsb = sto->priv;
+ 	AZ(VSB_finish(vsb));
+ 
+ 	sto->ptr = (void*)VSB_data(vsb);
+ 	sto->len = VSB_len(vsb);
+ 	sto->space = VSB_len(vsb);
+ 	obj->len = sto->len;
+ 	Lck_Lock(&sms_mtx);
+ 	VSC_C_main->sms_nbytes += sto->len;
+ 	VSC_C_main->sms_balloc += sto->len;
+ 	Lck_Unlock(&sms_mtx);
+ }
diff --cc bin/varnishd/storage/storage_umem.c
index 0000000,52d238d..add6bd7
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_umem.c
+++ b/bin/varnishd/storage/storage_umem.c
@@@ -1,0 -1,166 +1,167 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Storage method based on umem_alloc(3MALLOC)
+  */
+ 
+ #include "config.h"
+ 
+ #ifdef HAVE_LIBUMEM
+ 
+ #include <sys/types.h>
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <umem.h>
+ 
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+ 
+ static size_t			smu_max = SIZE_MAX;
+ static MTX			smu_mtx;
+ 
+ struct smu {
+ 	struct storage		s;
+ 	size_t			sz;
+ };
+ 
+ static struct storage *
+ smu_alloc(struct stevedore *st, size_t size)
+ {
+ 	struct smu *smu;
+ 
+ 	Lck_Lock(&smu_mtx);
+ 	VSC_C_main->sma_nreq++;
+ 	if (VSC_C_main->sma_nbytes + size > smu_max)
+ 		size = 0;
+ 	else {
+ 		VSC_C_main->sma_nobj++;
+ 		VSC_C_main->sma_nbytes += size;
+ 		VSC_C_main->sma_balloc += size;
+ 	}
+ 	Lck_Unlock(&smu_mtx);
+ 
+ 	if (size == 0)
+ 		return (NULL);
+ 
+ 	smu = umem_zalloc(sizeof *smu, UMEM_DEFAULT);
+ 	if (smu == NULL)
+ 		return (NULL);
+ 	smu->sz = size;
+ 	smu->s.priv = smu;
+ 	smu->s.ptr = umem_alloc(size, UMEM_DEFAULT);
+ 	XXXAN(smu->s.ptr);
+ 	smu->s.len = 0;
+ 	smu->s.space = size;
+ 	smu->s.fd = -1;
+ 	smu->s.stevedore = st;
+ 	smu->s.magic = STORAGE_MAGIC;
+ 	return (&smu->s);
+ }
+ 
+ static void
+ smu_free(struct storage *s)
+ {
+ 	struct smu *smu;
+ 
+ 	CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ 	smu = s->priv;
+ 	assert(smu->sz == smu->s.space);
+ 	Lck_Lock(&smu_mtx);
+ 	VSC_C_main->sma_nobj--;
+ 	VSC_C_main->sma_nbytes -= smu->sz;
+ 	VSC_C_main->sma_bfree += smu->sz;
+ 	Lck_Unlock(&smu_mtx);
+ 	umem_free(smu->s.ptr, smu->s.space);
+ 	umem_free(smu, sizeof *smu);
+ }
+ 
+ static void
+ smu_trim(const struct storage *s, size_t size)
+ {
+ 	struct smu *smu;
+ 	void *p;
+ 
+ 	CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ 	smu = s->priv;
+ 	assert(smu->sz == smu->s.space);
+ 	if ((p = umem_alloc(size, UMEM_DEFAULT)) != NULL) {
+ 		memcpy(p, smu->s.ptr, size);
+ 		umem_free(smu->s.ptr, smu->s.space);
+ 		Lck_Lock(&smu_mtx);
+ 		VSC_C_main->sma_nbytes -= (smu->sz - size);
+ 		VSC_C_main->sma_bfree += smu->sz - size;
+ 		smu->sz = size;
+ 		Lck_Unlock(&smu_mtx);
+ 		smu->s.ptr = p;
+ 		smu->s.space = size;
+ 	}
+ }
+ 
+ static void
+ smu_init(struct stevedore *parent, int ac, char * const *av)
+ {
+ 	const char *e;
+ 	uintmax_t u;
+ 
+ 	(void)parent;
+ 
+ 	AZ(av[ac]);
+ 	if (ac > 1)
+ 		ARGV_ERR("(-sumem) too many arguments\n");
+ 
+ 	if (ac == 0 || *av[0] == '\0')
+ 		 return;
+ 
+ 	e = VNUM_2bytes(av[0], &u, 0);
+ 	if (e != NULL)
+ 		ARGV_ERR("(-sumem) size \"%s\": %s\n", av[0], e);
+ 	if ((u != (uintmax_t)(size_t)u))
+ 		ARGV_ERR("(-sumem) size \"%s\": too big\n", av[0]);
+ 	smu_max = u;
+ }
+ 
+ static void
+ smu_open(const struct stevedore *st)
+ {
+ 	(void)st;
+ 	AZ(pthread_mutex_init(&smu_mtx, NULL));
+ }
+ 
+ const struct stevedore smu_stevedore = {
+ 	.magic	=	STEVEDORE_MAGIC,
+ 	.name	=	"umem",
+ 	.init	=	smu_init,
+ 	.open	=	smu_open,
+ 	.alloc	=	smu_alloc,
+ 	.free	=	smu_free,
+ 	.trim	=	smu_trim,
++        .dup    =       default_dup,
+ };
+ 
+ #endif /* HAVE_UMEM_H */
diff --cc include/tbl/vsc_fields.h
index 0000000,738703c..824ba6d
mode 000000,100644..100644
--- a/include/tbl/vsc_fields.h
+++ b/include/tbl/vsc_fields.h
@@@ -1,0 -1,417 +1,419 @@@
+ /*-
+  * Copyright (c) 2006 Verdens Gang AS
+  * Copyright (c) 2006-2011 Varnish Software AS
+  * All rights reserved.
+  *
+  * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION, "")
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * Definition of all shared memory statistics below.
+  *
+  * Fields (n, t, l, f, e, d):
+  *    n - Name:		Field name, in C-source and stats programs
+  *    t - Type:		C-type, uint64_t, unless marked in 'f'
+  *    l - Local:	Local counter in worker thread.
+  *    f - Format:	Semantics of the value in this field
+  *				'a' - Accumulator (deprecated, use 'c')
+  *				'b' - Bitmap
+  *				'c' - Counter, never decreases.
+  *				'g' - Gauge, goes up and down
+  *				'i' - Integer (deprecated, use 'g')
+  *    e - Explantion:	Short explanation of field (for screen use)
+  *    d - Description:	Long explanation of field (for doc use)
+  *
+  * Please describe Gauge variables as "Number of..." to indicate that
+  * this is a snapshot, and Counter variables as "Count of" to indicate
+  * accumulative count.
+  *
+  * -----------------------
+  * NB: Cleanup in progress
+  * -----------------------
+  *
+  * Insufficient attention has caused this to become a swamp of conflicting
+  * conventions, shorthands and general mumbo-jumbo.  I'm trying to clean
+  * it up as I go over the code in other business.
+  *
+  * Please see the sessmem section for how it should look.
+  *
+  */
+ 
+ /**********************************************************************/
+ 
+ #ifdef VSC_DO_MAIN
+ 
+ /*---------------------------------------------------------------------
+  * Sessions
+  *    see: cache_acceptor.c and cache_pool.c
+  */
+ 
+ VSC_F(sess_conn,		uint64_t, 1, 'c',
+     "Sessions accepted",
+ 	"Count of sessions succesfully accepted"
+ )
+ VSC_F(sess_drop,		uint64_t, 1, 'c',
+     "Sessions dropped",
+ 	"Count of sessions silently dropped due to lack of session memory."
+ 	"  See parameter 'max_sess'."
+ )
+ 
+ VSC_F(sess_fail,		uint64_t, 1, 'c',
+     "Session accept failures",
+ 	"Count of failures to accept TCP connection."
+ 	"  Either the client changed its mind, or the kernel ran out of"
+ 	" some resource like filedescriptors."
+ )
+ 
+ /*---------------------------------------------------------------------*/
+ 
+ VSC_F(client_req,		uint64_t, 1, 'a',
+       "Client requests received",
+       "")
+ 
+ VSC_F(cache_hit,		uint64_t, 1, 'a',
+       "Cache hits",
+       "Count of cache hits. "
+       "  A cache hit indicates that an object has been delivered to a"
+       "  client without fetching it from a backend server."
+ )
+ 
+ VSC_F(cache_hitpass,	uint64_t, 1, 'a',
+       "Cache hits for pass",
+       "Count of hits for pass"
+       "  A cache hit for pass indicates that Varnish is going to"
+       "  pass the request to the backend and this decision has been "
+       "  cached in it self. This counts how many times the cached "
+       "  decision is being used."
+ )
+ VSC_F(cache_miss,		uint64_t, 1, 'a',
+       "Cache misses",
+       "Count of misses"
+       "  A cache miss indicates the object was fetched from the"
+       "  backend before delivering it to the backend.")
+ 
+ VSC_F(backend_conn,	uint64_t, 0, 'a',
+       "Backend conn. success",
+       "")
+ 
+ VSC_F(backend_unhealthy,	uint64_t, 0, 'a',
+       "Backend conn. not attempted",
+       ""
+ )
+ VSC_F(backend_busy,	uint64_t, 0, 'a', "Backend conn. too many", "")
+ VSC_F(backend_fail,	uint64_t, 0, 'a', "Backend conn. failures", "")
+ VSC_F(backend_reuse,	uint64_t, 0, 'a',
+       "Backend conn. reuses",
+       "Count of backend connection reuses"
+       "  This counter is increased whenever we reuse a recycled connection.")
+ VSC_F(backend_toolate,	uint64_t, 0, 'a', "Backend conn. was closed", "")
+ VSC_F(backend_recycle,	uint64_t, 0, 'a',
+       "Backend conn. recycles",
+       "Count of backend connection recycles"
+       "  This counter is increased whenever we have a keep-alive"
+       "  connection that is put back into the pool of connections."
+       "  It has not yet been used, but it might be, unless the backend"
+       "  closes it.")
+ VSC_F(backend_retry,	uint64_t, 0, 'a', "Backend conn. retry", "")
+ 
+ VSC_F(fetch_head,		uint64_t, 1, 'a', "Fetch head", "")
+ VSC_F(fetch_length,		uint64_t, 1, 'a', "Fetch with Length", "")
+ VSC_F(fetch_chunked,		uint64_t, 1, 'a', "Fetch chunked", "")
+ VSC_F(fetch_eof,		uint64_t, 1, 'a', "Fetch EOF", "")
+ VSC_F(fetch_bad,		uint64_t, 1, 'a', "Fetch had bad headers", "")
+ VSC_F(fetch_close,		uint64_t, 1, 'a', "Fetch wanted close", "")
+ VSC_F(fetch_oldhttp,		uint64_t, 1, 'a', "Fetch pre HTTP/1.1 closed", "")
+ VSC_F(fetch_zero,		uint64_t, 1, 'a', "Fetch zero len", "")
+ VSC_F(fetch_failed,		uint64_t, 1, 'a', "Fetch failed", "")
+ VSC_F(fetch_1xx,		uint64_t, 1, 'a', "Fetch no body (1xx)", "")
+ VSC_F(fetch_204,		uint64_t, 1, 'a', "Fetch no body (204)", "")
+ VSC_F(fetch_304,		uint64_t, 1, 'a', "Fetch no body (304)", "")
+ 
+ /*---------------------------------------------------------------------
+  * Session Memory
+  *    see: cache_session.c
+  */
+ 
+ VSC_F(sessmem_size,		uint64_t, 1, 'g',
+     "Session mem size",
+ 	"Bytes of memory allocated for last allocated session."
+ )
+ 
+ VSC_F(sessmem_alloc,		uint64_t, 1, 'c',
+     "Session mem allocated",
+ 	"Count of all allocations of session memory."
+ )
+ 
+ VSC_F(sessmem_free,		uint64_t, 1, 'c',
+     "Session mem freed",
+ 	"Count of all frees of session memory."
+ )
+ 
+ VSC_F(sessmem_fail,		uint64_t, 1, 'c',
+     "Session mem alloc failed",
+ 	"Count of session memory allocation failures."
+ )
+ 
+ VSC_F(sessmem_limit,		uint64_t, 1, 'c',
+     "Session mem alloc limited",
+ 	"Count of session memory allocations blocked by limit (max_sess)."
+ )
+ 
+ /*---------------------------------------------------------------------
+  * Pools, threads, and sessions
+  *    see: cache_pool.c
+  *
+  */
+ 
+ VSC_F(pools,			uint64_t, 1, 'g',
+     "Number of thread pools",
+ 	"Number of thread pools.  See also param wthread_pools."
+ 	"  NB: Presently pools cannot be removed once created."
+ )
+ 
+ VSC_F(threads,			uint64_t, 1, 'g',
+     "Total number of threads",
+ 	"Number of threads in all pools."
+ 	"  See also params thread_pools, thread_pool_min & thread_pool_max."
+ )
+ 
+ VSC_F(threads_limited,		uint64_t, 1, 'c',
+     "Threads hit max",
+ 	"Number of times more threads were needed, but limit was reached"
+ 	" in a thread pool."
+ 	"  See also param thread_pool_max."
+ )
+ 
+ VSC_F(threads_created,		uint64_t, 1, 'c',
+     "Threads created",
+ 	"Total number of threads created in all pools."
+ )
+ 
+ VSC_F(threads_destroyed,	uint64_t, 1, 'c',
+     "Threads destoryed",
+ 	"Total number of threads destroyed in all pools."
+ )
+ 
+ VSC_F(threads_failed,		uint64_t, 1, 'c',
+     "Thread creation failed",
+ 	"Number of times creating a thread failed."
+ 	"  See VSL::Debug for diagnostics."
+ 	"  See also param thread_fail_delay."
+ )
+ 
+ VSC_F(thread_queue_len,		uint64_t, 1, 'g',
+     "Length of session queue",
+ 	"Length of session queue waiting for threads."
+ 	"  NB: Only updates once per second."
+ 	"  See also param queue_max."
+ )
+ 
+ VSC_F(sess_queued,		uint64_t, 1, 'c',
+     "Sessions queued for thread",
+ 	"Number of times session was queued waiting for a thread."
+ 	"  See also param queue_max."
+ )
+ 
+ VSC_F(sess_dropped,		uint64_t, 1, 'c',
+     "Sessions dropped for thread",
+ 	"Number of times session was dropped because the queue were too"
+ 	" long already."
+ 	"  See also param queue_max."
+ )
+ 
+ /*---------------------------------------------------------------------*/
+ 
+ VSC_F(n_sess_mem,		uint64_t, 0, 'i', "N struct sess_mem", "")
+ VSC_F(n_sess,			uint64_t, 0, 'i', "N struct sess", "")
+ VSC_F(n_object,			uint64_t, 1, 'i', "N struct object", "")
+ VSC_F(n_vampireobject,		uint64_t, 1, 'i', "N unresurrected objects", "")
+ VSC_F(n_objectcore,		uint64_t, 1, 'i', "N struct objectcore", "")
+ VSC_F(n_objecthead,		uint64_t, 1, 'i', "N struct objecthead", "")
+ VSC_F(n_waitinglist,		uint64_t, 1, 'i', "N struct waitinglist", "")
+ 
+ VSC_F(n_vbc,		uint64_t, 0, 'i', "N struct vbc", "")
+ 
+ VSC_F(n_backend,		uint64_t, 0, 'i', "N backends", "")
+ 
+ VSC_F(n_expired,		uint64_t, 0, 'i', "N expired objects", "")
+ VSC_F(n_lru_nuked,		uint64_t, 0, 'i', "N LRU nuked objects", "")
+ VSC_F(n_lru_moved,		uint64_t, 0, 'i', "N LRU moved objects", "")
+ 
+ VSC_F(losthdr,		uint64_t, 0, 'a', "HTTP header overflows", "")
+ 
+ VSC_F(n_objsendfile,	uint64_t, 0, 'a', "Objects sent with sendfile",
+       "The number of objects sent with the sendfile system call. If enabled "
+       "sendfile will be used on object larger than a certain size.")
+ VSC_F(n_objwrite,		uint64_t, 0, 'a', "Objects sent with write",
+       "The number of objects sent with regular write calls."
+       "Writes are used when the objects are too small for sendfile "
+       "or if the sendfile call has been disabled")
+ VSC_F(n_objoverflow,	uint64_t, 1, 'a',
+ 					"Objects overflowing workspace", "")
+ 
+ VSC_F(s_sess,		uint64_t, 1, 'a', "Total Sessions", "")
+ VSC_F(s_req,		uint64_t, 1, 'a', "Total Requests", "")
+ VSC_F(s_pipe,		uint64_t, 1, 'a', "Total pipe", "")
+ VSC_F(s_pass,		uint64_t, 1, 'a', "Total pass", "")
+ VSC_F(s_fetch,		uint64_t, 1, 'a', "Total fetch", "")
+ VSC_F(s_hdrbytes,		uint64_t, 1, 'a', "Total header bytes", "")
+ VSC_F(s_bodybytes,		uint64_t, 1, 'a', "Total body bytes", "")
+ 
+ VSC_F(sess_closed,		uint64_t, 1, 'a', "Session Closed", "")
+ VSC_F(sess_pipeline,	uint64_t, 1, 'a', "Session Pipeline", "")
+ VSC_F(sess_readahead,	uint64_t, 1, 'a', "Session Read Ahead", "")
+ VSC_F(sess_linger,		uint64_t, 1, 'a', "Session Linger", "")
+ VSC_F(sess_herd,		uint64_t, 1, 'a', "Session herd", "")
+ 
+ VSC_F(shm_records,		uint64_t, 0, 'a', "SHM records", "")
+ VSC_F(shm_writes,		uint64_t, 0, 'a', "SHM writes", "")
+ VSC_F(shm_flushes,		uint64_t, 0, 'a', "SHM flushes due to overflow", "")
+ VSC_F(shm_cont,		uint64_t, 0, 'a', "SHM MTX contention", "")
+ VSC_F(shm_cycles,		uint64_t, 0, 'a', "SHM cycles through buffer", "")
+ 
+ VSC_F(sms_nreq,		uint64_t, 0, 'a', "SMS allocator requests", "")
+ VSC_F(sms_nobj,		uint64_t, 0, 'i', "SMS outstanding allocations", "")
+ VSC_F(sms_nbytes,		uint64_t, 0, 'i', "SMS outstanding bytes", "")
+ VSC_F(sms_balloc,		uint64_t, 0, 'i', "SMS bytes allocated", "")
+ VSC_F(sms_bfree,		uint64_t, 0, 'i', "SMS bytes freed", "")
+ 
+ VSC_F(backend_req,		uint64_t, 0, 'a', "Backend requests made", "")
+ 
+ VSC_F(n_vcl,		uint64_t, 0, 'a', "N vcl total", "")
+ VSC_F(n_vcl_avail,		uint64_t, 0, 'a', "N vcl available", "")
+ VSC_F(n_vcl_discard,	uint64_t, 0, 'a', "N vcl discarded", "")
+ 
+ /**********************************************************************/
+ 
+ VSC_F(bans,			uint64_t, 0, 'g',
+    "Count of bans",
+ 	"Number of all bans in system, including bans superseded"
+ 	" by newer bans and bans already checked by the ban-lurker."
+ )
+ VSC_F(bans_gone,		uint64_t, 0, 'g',
+     "Number of bans marked 'gone'",
+ 	"Number of bans which are no longer active, either because they"
+ 	" got checked by the ban-lurker or superseded by newer identical bans."
+ )
+ VSC_F(bans_req,			uint64_t, 0, 'g',
+     "Number of bans using req.*",
+ 	"Number of bans which use req.* variables.  These bans can not"
+ 	" be washed by the ban-lurker."
+ )
+ VSC_F(bans_added,		uint64_t, 0, 'c',
+     "Bans added",
+ 	"Counter of bans added to ban list."
+ )
+ VSC_F(bans_deleted,		uint64_t, 0, 'c',
+     "Bans deleted",
+ 	"Counter of bans deleted from ban list."
+ )
+ 
+ VSC_F(bans_tested,		uint64_t, 0, 'c',
+     "Bans tested against objects",
+ 	"Count of how many bans and objects have been tested against"
+ 	" each other."
+ )
+ VSC_F(bans_tests_tested,	uint64_t, 0, 'c',
+     "Ban tests tested against objects",
+ 	"Count of how many tests and objects have been tested against"
+ 	" each other.  'ban req.url == foo && req.http.host == bar'"
+ 	" counts as one in 'bans_tested' and as two in 'bans_tests_tested'"
+ )
+ VSC_F(bans_dups,		uint64_t, 0, 'c',
+     "Bans superseded by other bans",
+ 	"Count of bans replaced by later identical bans."
+ )
+ 
+ /**********************************************************************/
+ 
+ VSC_F(hcb_nolock,		uint64_t, 0, 'a', "HCB Lookups without lock", "")
+ VSC_F(hcb_lock,		uint64_t, 0, 'a', "HCB Lookups with lock", "")
+ VSC_F(hcb_insert,		uint64_t, 0, 'a', "HCB Inserts", "")
+ 
+ VSC_F(esi_errors,		uint64_t, 0, 'a', "ESI parse errors (unlock)", "")
+ VSC_F(esi_warnings,		uint64_t, 0, 'a', "ESI parse warnings (unlock)", "")
+ VSC_F(client_drop_late,	uint64_t, 0, 'a', "Connection dropped late", "")
+ VSC_F(uptime,		uint64_t, 0, 'a', "Client uptime", "")
+ 
+ VSC_F(dir_dns_lookups,	uint64_t, 0, 'a', "DNS director lookups", "")
+ VSC_F(dir_dns_failed,	uint64_t, 0, 'a', "DNS director failed lookups", "")
+ VSC_F(dir_dns_hit,		uint64_t, 0, 'a', "DNS director cached lookups hit", "")
+ VSC_F(dir_dns_cache_full,	uint64_t, 0, 'a', "DNS director full dnscache", "")
+ 
+ VSC_F(vmods,		uint64_t, 0, 'i', "Loaded VMODs", "")
+ 
+ VSC_F(n_gzip,			uint64_t, 0, 'a', "Gzip operations", "")
+ VSC_F(n_gunzip,			uint64_t, 0, 'a', "Gunzip operations", "")
+ 
++VSC_F(cond_not_validated,	uint64_t, 1, 'c', "Non-validating responses",
++      "Count of backend responses to conditional requests with status != 304")
+ #endif
+ 
+ /**********************************************************************/
+ 
+ #ifdef VSC_DO_LCK
+ 
+ VSC_F(creat,		uint64_t, 0, 'a', "Created locks", "")
+ VSC_F(destroy,		uint64_t, 0, 'a', "Destroyed locks", "")
+ VSC_F(locks,		uint64_t, 0, 'a', "Lock Operations", "")
+ VSC_F(colls,		uint64_t, 0, 'a', "Collisions", "")
+ 
+ #endif
+ 
+ /**********************************************************************
+  * All Stevedores support these counters
+  */
+ 
+ #if defined(VSC_DO_SMA) || defined (VSC_DO_SMF)
+ VSC_F(c_req,		uint64_t, 0, 'a', "Allocator requests", "")
+ VSC_F(c_fail,		uint64_t, 0, 'a', "Allocator failures", "")
+ VSC_F(c_bytes,		uint64_t, 0, 'a', "Bytes allocated", "")
+ VSC_F(c_freed,		uint64_t, 0, 'a', "Bytes freed", "")
+ VSC_F(g_alloc,		uint64_t, 0, 'i', "Allocations outstanding", "")
+ VSC_F(g_bytes,		uint64_t, 0, 'i', "Bytes outstanding", "")
+ VSC_F(g_space,		uint64_t, 0, 'i', "Bytes available", "")
+ #endif
+ 
+ 
+ /**********************************************************************/
+ 
+ #ifdef VSC_DO_SMA
+ /* No SMA specific counters */
+ #endif
+ 
+ /**********************************************************************/
+ 
+ #ifdef VSC_DO_SMF
+ VSC_F(g_smf,			uint64_t, 0, 'i', "N struct smf", "")
+ VSC_F(g_smf_frag,		uint64_t, 0, 'i', "N small free smf", "")
+ VSC_F(g_smf_large,		uint64_t, 0, 'i', "N large free smf", "")
+ #endif
+ 
+ /**********************************************************************/
+ 
+ #ifdef VSC_DO_VBE
+ 
+ VSC_F(vcls,			uint64_t, 0, 'i', "VCL references", "")
+ VSC_F(happy,		uint64_t, 0, 'b', "Happy health probes", "")
+ 
+ #endif
+ 



More information about the varnish-commit mailing list