r888 - trunk/varnish-cache/bin/varnishd

phk at projects.linpro.no phk at projects.linpro.no
Tue Aug 22 10:55:15 CEST 2006


Author: phk
Date: 2006-08-22 10:55:15 +0200 (Tue, 22 Aug 2006)
New Revision: 888

Modified:
   trunk/varnish-cache/bin/varnishd/hash_classic.c
Log:

Eliminate the MD5 optional code.  There is no sufficiently strong
statistical basis for using MD5 that will outweigh the performance
penalty or "IT USES THE BR0K3N MD5 ALGORITM" cries on slashdot.

The only known artifact in CRC32 is that hashing it with a power
of two is slightly inefficient (a few percent in bucket length
standard deviation) if you have URLs with fixed width fields of
a limited charset (such as numeric) fields in them.

Avoid this by checking the hash width specified for power of two,
and reduce it by one telling the user why.

Using a hash width that is a prime number does provable advantage
over just not using power-of-two width.  I have heard a mathematician
say that this is inherent in the design of the polynomia chosen
for CRC algorithms error (optimized for bit error detection) but
the actual math is way beyond me.

Increase default hash width to 16383, which is probably still
smaller than it should be.



Modified: trunk/varnish-cache/bin/varnishd/hash_classic.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/hash_classic.c	2006-08-22 08:18:15 UTC (rev 887)
+++ trunk/varnish-cache/bin/varnishd/hash_classic.c	2006-08-22 08:55:15 UTC (rev 888)
@@ -14,15 +14,6 @@
 #include <shmlog.h>
 #include <cache.h>
 
-#if defined(HASH_CLASSIC_MD5) && !defined(HAVE_MD5)
-/* MD5 is not available */
-#undef HASH_CLASSIC_MD5
-#endif
-
-#ifdef HASH_CLASSIC_MD5
-#include <md5.h>
-#endif
-
 /*--------------------------------------------------------------------*/
 
 struct hcl_entry {
@@ -45,12 +36,11 @@
 	pthread_mutex_t		mtx;
 };
 
-static unsigned			hcl_nhash = 4096;
+static unsigned			hcl_nhash = 16383;
 static struct hcl_hd		*hcl_head;
 
 /*--------------------------------------------------------------------*/
 
-#ifndef HASH_CLASSIC_MD5
 static uint32_t crc32bits[] = {
     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
     0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
@@ -113,7 +103,6 @@
 	return (crc ^ ~0U);
 }
 
-#endif /* HASH_CLASSIC_MD5 */
 
 /*--------------------------------------------------------------------
  * The ->init method allows the management process to pass arguments
@@ -128,6 +117,15 @@
 	i = sscanf(p, "%u", &u);
 	if (i <= 0 || u == 0)
 		return (0);
+	if (u > 2 && !(u & (u - 1))) {
+		fprintf(stderr,
+		    "NOTE:\n"
+		    "\tA power of two number of hash buckets is "
+		    "marginally less efficient\n"
+		    "\twith systematic URLs.  Reducing by one"
+		    " hash bucket.\n");
+		u--;
+	}
 	hcl_nhash = u;
 	fprintf(stderr, "Classic hash: %u buckets\n", hcl_nhash);
 	return (0);
@@ -169,23 +167,10 @@
 	struct hcl_hd *hp;
 	unsigned u1, digest, kl1, kl2, kl, r;
 	int i;
-#ifdef HASH_CLASSIC_MD5
-	MD5_CTX c;
-	unsigned char md5[MD5_DIGEST_LENGTH];
-#endif
 
 	CHECK_OBJ_NOTNULL(noh, OBJHEAD_MAGIC);
 
-#ifdef HASH_CLASSIC_MD5
-	MD5Init(&c);
-	MD5Update(&c, key1, strlen(key1));
-	MD5Update(&c, "", 1);
-	MD5Update(&c, key2, strlen(key2));
-	MD5Final(md5, &c);
-	memcpy(&digest, md5, sizeof digest);
-#else
 	digest = crc32(key1, key2);
-#endif
 
 	u1 = digest % hcl_nhash;
 	hp = &hcl_head[u1];




More information about the varnish-commit mailing list