CrapWow

The faster, slightly sketchier version of Crap8. The mixing appears to be quite solid, although the seed avalanching is not perfect. If you test sequential keys instead of random keys, there are some biased bit patterns, but nothing terrible. If you're in a low to mid risk situation and need more speed, this is a great fit. Note that this, like Crap8, still needs to be forceinlined on all MSVC compilers! It's unofficial theme song is CrapWow!

Like Crap8, icc/gcc have unacceptable flaws in the generated code so a hand coded version must be provided. MSVC also needs __forceinline to avoid needless handling of the stack frame. It's possible to unroll CrapWow to do 16 bytes per loop and get even more speed on large keys, but smaller keys slow down a little with the resulting code bloat.

Tests

Performance

Implementation

finline u32 fastcall CrapWow( const u8 *key, u32 len, u32 seed ) {
#if !defined(__LP64__) && !defined(_MSC_VER) && ( defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) )
	// esi = k, ebx = h
	u32 hash;
	asm(
		"leal 0x5052acdb(%%ecx,%%esi), %%esi\n"
		"movl %%ecx, %%ebx\n"
		"cmpl $8, %%ecx\n"
		"jb DW%=\n"
	"QW%=:\n"
		"movl $0x5052acdb, %%eax\n"
		"mull (%%edi)\n"
		"addl $-8, %%ecx\n"
		"xorl %%eax, %%ebx\n"
		"xorl %%edx, %%esi\n"
		"movl $0x57559429, %%eax\n"
		"mull 4(%%edi)\n"
		"xorl %%eax, %%esi\n"
		"xorl %%edx, %%ebx\n"
		"addl $8, %%edi\n"
		"cmpl $8, %%ecx\n"
		"jae QW%=\n"
	"DW%=:\n"
		"cmpl $4, %%ecx\n"
		"jb B%=\n"
		"movl $0x5052acdb, %%eax\n"
		"mull (%%edi)\n"
		"addl $4, %%edi\n"
		"xorl %%eax, %%ebx\n"
		"addl $-4, %%ecx\n"
		"xorl %%edx, %%esi\n"
	"B%=:\n"
		"testl %%ecx, %%ecx\n"
		"jz F%=\n"
		"shll $3, %%ecx\n"
		"movl $1, %%edx\n"
		"movl $0x57559429, %%eax\n"
		"shll %%cl, %%edx\n"
		"addl $-1, %%edx\n"
		"andl (%%edi), %%edx\n"
		"mull %%edx\n"
		"xorl %%eax, %%esi\n"
		"xorl %%edx, %%ebx\n"
	"F%=:\n"
		"leal 0x5052acdb(%%esi), %%edx\n"
		"xorl %%ebx, %%edx\n"
		"movl $0x5052acdb, %%eax\n"
		"mull %%edx\n"
		"xorl %%ebx, %%eax\n"
		"xorl %%edx, %%esi\n"
		"xorl %%esi, %%eax\n"
		: "=a"(hash), "=c"(len), "=S"(len), "=D"(key)
		: "c"(len), "S"(seed), "D"(key)
		: "%ebx", "%edx", "cc" 
	);
	return hash;
#else
	#define cwfold( a, b, lo, hi ) { p = (u32)(a) * (u64)(b); lo ^= (u32)p; hi ^= (u32)(p >> 32); }
	#define cwmixa( in ) { cwfold( in, m, k, h ); }
	#define cwmixb( in ) { cwfold( in, n, h, k ); }

	const u32 m = 0x57559429, n = 0x5052acdb, *key4 = (const u32 *)key;
	u32 h = len, k = len + seed + n;
	u64 p;

	while ( len >= 8 ) { cwmixb(key4[0]) cwmixa(key4[1]) key4 += 2; len -= 8; }
	if ( len >= 4 ) { cwmixb(key4[0]) key4 += 1; len -= 4; }
	if ( len ) { cwmixa( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
	cwmixb( h ^ (k + n) )
	return k ^ h;
#endif
}