varnish 3 trunk crash with persistent storage enabled
mailing.lists.wam
mailing.lists.wam at gmail.com
Thu Sep 15 19:28:41 CEST 2011
Hello,
We encounter various varnish panic (the forked processus crash, won't
restart and nothing listen to http/80 port anymore), with persistent
storage (tested with 20/35/40/90G) and kernel address randomize On/Off.
Same servers with file,malloc parameters instead of persistent are
healthy. Feel free to contact me to get the full coredump.
All details below :)
Varnish Version : 3 - trunk d56069e Sep 06, 2011
d56069e8ef221310d75455feb9b03483c9caf63b
Ubuntu 10.04 64Bits Linux 2.6.32-33-generic #72-Ubuntu SMP Fri Jul 29
21:07:13 UTC 2011 x86_64 GNU/Linux
48G RAM / two Intel(R) Xeon(R) CPU L5640 @ 2.27GHz
SSD-SATA 90G
2) Startup config :
VARNISH_INSTANCE=default
START=yes
NFILES="131072"
MEMLOCK="82000"
VARNISH_VCL_CONF=/etc/varnish/default/default.vcl
VARNISH_LISTEN_ADDRESS=
VARNISH_LISTEN_PORT=80
VARNISH_ADMIN_LISTEN_ADDRESS=127.0.0.1
VARNISH_ADMIN_LISTEN_PORT=6082
VARNISH_SECRET_FILE=/etc/varnish/default/secret
VARNISH_THREAD_POOLS=12
VARNISH_STORAGE_FILE_1=/mnt/ssd/varnish/cachefile1
VARNISH_STORAGE_SIZE=30G
VARNISH_STORAGE_1="persistent,${VARNISH_STORAGE_FILE_1},${VARNISH_STORAGE_SIZE}"
DAEMON_OPTS=" -n ${VARNISH_INSTANCE} \
-u root \
-a ${VARNISH_LISTEN_ADDRESS}:${VARNISH_LISTEN_PORT} \
-f ${VARNISH_VCL_CONF} \
-T
${VARNISH_ADMIN_LISTEN_ADDRESS}:${VARNISH_ADMIN_LISTEN_PORT} \
-S ${VARNISH_SECRET_FILE} \
-s ${VARNISH_STORAGE_1} \
-s Transient=malloc,1G\
-p first_byte_timeout=5 \
-p between_bytes_timeout=5 \
-p pipe_timeout=5 \
-p send_timeout=2700 \
-p default_grace=240 \
-p default_ttl=3600 \
-p http_gzip_support=off \
-p http_range_support=on \
-p max_restarts=2 \
-p thread_pool_add_delay=2 \
-p thread_pool_max=4000 \
-p thread_pool_min=80 \
-p thread_pool_timeout=120 \
-p thread_pools=12 \
-p thread_stats_rate=50
#### VCL FILE #####
### SECDownMod
### https://github.com/footplus/libvmod-secdown
import secdown;
include "/etc/varnish/backend/director_edge_2xx.vcl";
include "/etc/varnish/acl/purge.vcl";
sub vcl_recv {
set req.backend = origin;
if (req.request !~ "(GET|HEAD|PURGE)") {
error 405 "Not allowed.";
}
if (req.url ~ "^/files") {
set req.url = secdown.check_url(req.url,
"MySecretIsNotYourSecret", "/link-expired.html", "/link-error.html");
}
# Before anything else we need to fix gzip compression
if (req.http.Accept-Encoding) {
if (req.url ~
"\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|flv|ts|mp4)$") {
# No point in compressing these
remove req.http.Accept-Encoding;
} else if (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} else if (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
} else {
# unknown algorithm
remove req.http.Accept-Encoding;
}
}
# Allow a PURGE method to clear cache via regular expression.
if (req.request == "PURGE") {
# If the client has not an authorized IP or
# if he comes from the HTTPS proxy on localhost, deny it.
if (!client.ip ~ purge || req.http.X-Forwarded-For) {
error 405 "Not allowed.";
}
ban_url(req.url);
error 200 "Expression " + req.url + " added to ban.list.";
}
}
sub vcl_pipe {
set bereq.http.connection = "close";
}
sub vcl_pass {
# return (pass);
}
sub vcl_hash {
hash_data(req.url);
return (hash);
}
sub vcl_hit {
# return (deliver);
}
sub vcl_miss {
# return (fetch);
}
sub vcl_fetch {
unset beresp.http.expires;
set beresp.http.cache-control = "max-age=86400";
set beresp.ttl = 365d;
if (beresp.status >= 400) {
set beresp.ttl = 1m;
}
if ((beresp.status == 301) || (beresp.status == 302) ||
(beresp.status == 401)) {
return (hit_for_pass);
}
}
sub vcl_deliver {
# Rename Varnish XIDs headers
if (resp.http.X-Varnish)
{
set resp.http.X-Object-ID = resp.http.X-Varnish;
unset resp.http.X-Varnish;
}
remove resp.http.Via;
remove resp.http.X-Powered-By;
# return (deliver);
}
sub vcl_error {
# Do not reveal what's inside the box :)
remove obj.http.Server;
set obj.http.Server = "EdgeCache/1.4";
}
sub vcl_init {
# return (ok);
}
sub vcl_fini {
# return (ok);
}
3) Assert Message (from syslog)
Sep 15 18:21:02 e101 default[18290]: Child (19438) said Out of space in
persistent silo
Sep 15 18:21:02 e101 default[18290]: Child (19438) said Committing
suicide, restart will make space
Sep 15 18:21:02 e101 default[18290]: Child (19438) ended
Sep 15 18:21:02 e101 default[18290]: Child cleanup complete
Sep 15 18:21:02 e101 default[18290]: child (20924) Started
Sep 15 18:21:02 e101 default[18290]: Child (20924) said Child starts
Sep 15 18:21:02 e101 default[18290]: Child (20924) said Dropped 11
segments to make free_reserve
Sep 15 18:21:02 e101 default[18290]: Child (20924) said Silo completely
loaded
Sep 15 18:21:27 e101 default[18290]: Child (20924) died signal=6 (core
dumped)
Sep 15 18:21:27 e101 default[18290]: Child (20924) Panic message: Assert
error in smp_oc_getobj(), storage_persistent_silo.c line 401:#012
Condition((o)->mag
ic == 0x32851d42) not true.#012thread = (ban-lurker)#012ident =
Linux,2.6.32-33-generic,x86_64,-spersistent,-smalloc,-hcritbit,epoll#012Backtrace:#012
0x437e
49: pan_backtrace+19#012 0x43811e: pan_ic+1ad#012 0x45da38:
smp_oc_getobj+282#012 0x415407: oc_getobj+14c#012 0x417848:
ban_lurker_work+299#012 0x41793d:
ban_lurker+5b#012 0x43ad91: wrk_bgthread+184#012 0x7ffff6a9c9ca:
_end+7ffff6408692#012 0x7ffff67f970d: _end+7ffff61653d5#012
Sep 15 18:21:27 e101 default[18290]: Child cleanup complete
Sep 15 18:21:27 e101 default[18290]: child (21898) Started
Sep 15 18:21:27 e101 default[18290]: Pushing vcls failed: CLI
communication error (hdr)
Sep 15 18:21:27 e101 default[18290]: Stopping Child
Sep 15 18:21:27 e101 default[18290]: Child (21898) died signal=6 (core
dumped)
Sep 15 18:21:27 e101 default[18290]: Child (21898) Panic message: Assert
error in smp_open_segs(), storage_persistent.c line 239:#012
Condition(sg1->p.offset
!= sg->p.offset) not true.#012thread = (cache-main)#012ident =
Linux,2.6.32-33-generic,x86_64,-spersistent,-smalloc,-hcritbit,no_waiter#012Backtrace:#012
0x
437e49: pan_backtrace+19#012 0x43811e: pan_ic+1ad#012 0x45a568:
smp_open_segs+415#012 0x45ab93: smp_open+236#012 0x456391:
STV_open+40#012 0x435fa4: chil
d_main+124#012 0x44d3a7: start_child+36a#012 0x44ddce:
mgt_sigchld+3e7#012 0x7ffff7bd1fec: _end+7ffff753dcb4#012
0x7ffff7bd2348: _end+7ffff753e010#012
Sep 15 18:21:27 e101 default[18290]: Child (-1) said Child starts
Sep 15 18:21:27 e101 default[18290]: Child cleanup complete
4) GDB Core bt
(gdb) bt
#0 0x00007ffff6746a75 in raise () from /lib/libc.so.6
#1 0x00007ffff674a5c0 in abort () from /lib/libc.so.6
#2 0x00000000004381dd in pan_ic (func=0x482dd5 "smp_open_segs",
file=0x4827c4 "storage_persistent.c", line=239,
cond=0x48283f "sg1->p.offset != sg->p.offset", err=0, xxx=0) at
cache_panic.c:374
#3 0x000000000045a568 in smp_open_segs (sc=0x7ffff6433000,
ctx=0x7ffff6433220) at storage_persistent.c:239
#4 0x000000000045ab93 in smp_open (st=0x7ffff64213c0) at
storage_persistent.c:331
#5 0x0000000000456391 in STV_open () at stevedore.c:406
#6 0x0000000000435fa4 in child_main () at cache_main.c:128
#7 0x000000000044d3a7 in start_child (cli=0x0) at mgt_child.c:345
#8 0x000000000044ddce in mgt_sigchld (e=0x7ffff64da1d0, what=-1) at
mgt_child.c:524
#9 0x00007ffff7bd1fec in vev_sched_signal (evb=0x7ffff6408380) at vev.c:435
#10 0x00007ffff7bd2348 in vev_schedule_one (evb=0x7ffff6408380) at vev.c:478
#11 0x00007ffff7bd1d2a in vev_schedule (evb=0x7ffff6408380) at vev.c:363
#12 0x000000000044e1c9 in MGT_Run () at mgt_child.c:602
#13 0x0000000000461a64 in main (argc=0, argv=0x7fffffffebd0) at
varnishd.c:650
5) Last lines of varnishlog
221 SessionOpen c 85.93.199.29 58335 :80
234 SessionOpen c 77.196.147.182 2273 :80
More information about the varnish-bugs
mailing list