From 94c63a237d77654bab2ea34146b06fd4b0acfc99 Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Tue, 9 Dec 2008 03:47:01 +0000 Subject: Use better and faster hashing of objects. --- ckcapi-util.c | 138 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 83 insertions(+), 55 deletions(-) (limited to 'ckcapi-util.c') diff --git a/ckcapi-util.c b/ckcapi-util.c index 3bd18a5..80e1b40 100644 --- a/ckcapi-util.c +++ b/ckcapi-util.c @@ -260,7 +260,6 @@ typedef struct _HashEntry struct _HashEntry* next; unsigned int hash; const void* key; - size_t klen; void* val; } HashEntry; @@ -275,6 +274,8 @@ HashEntry; struct _CkCapiHash { HashEntry** array; + CkCapiHashFunc hash_func; + CkCapiHashEqual equal_func; size_t count; size_t max; }; @@ -282,6 +283,12 @@ struct _CkCapiHash #define INITIAL_MAX 15 /* tunable == 2^n - 1 */ +static int +equal_default(const void* a, const void* b) +{ + return a == b; +} + /* * Hash creation functions. */ @@ -293,11 +300,13 @@ alloc_array(CkCapiHash* ht, size_t max) } CkCapiHash* -ckcapi_hash_new() +ckcapi_hash_new(CkCapiHashFunc hash_func, CkCapiHashEqual equal_func) { CkCapiHash* ht = malloc(sizeof(CkCapiHash)); if(ht) { + ht->hash_func = hash_func ? hash_func : ckcapi_hash_pointer; + ht->equal_func = equal_func ? equal_func : equal_default; ht->count = 0; ht->max = INITIAL_MAX; ht->array = alloc_array(ht, ht->max); @@ -379,62 +388,19 @@ expand_array(CkCapiHash* ht) */ static HashEntry** -find_entry(CkCapiHash* ht, const void* key, size_t klen, void* val) +find_entry(CkCapiHash* ht, const void* key, void* val) { HashEntry** hep; HashEntry* he; - const unsigned char* p; unsigned int hash; - size_t i; - - /* - * This is the popular `times 33' hash algorithm which is used by - * perl and also appears in Berkeley DB. This is one of the best - * known hash functions for strings because it is both computed - * very fast and distributes very well. - * - * The originator may be Dan Bernstein but the code in Berkeley DB - * cites Chris Torek as the source. The best citation I have found - * is "Chris Torek, Hash function for text in C, Usenet message - * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich - * Salz's USENIX 1992 paper about INN which can be found at - * . - * - * The magic of number 33, i.e. why it works better than many other - * constants, prime or not, has never been adequately explained by - * anyone. So I try an explanation: if one experimentally tests all - * multipliers between 1 and 256 (as I did while writing a low-level - * data structure library some time ago) one detects that even - * numbers are not useable at all. The remaining 128 odd numbers - * (except for the number 1) work more or less all equally well. - * They all distribute in an acceptable way and this way fill a hash - * table with an average percent of approx. 86%. - * - * If one compares the chi^2 values of the variants (see - * Bob Jenkins ``Hashing Frequently Asked Questions'' at - * http://burtleburtle.net/bob/hash/hashfaq.html for a description - * of chi^2), the number 33 not even has the best value. But the - * number 33 and a few other equally good numbers like 17, 31, 63, - * 127 and 129 have nevertheless a great advantage to the remaining - * numbers in the large set of possible multipliers: their multiply - * operation can be replaced by a faster operation based on just one - * shift plus either a single addition or subtraction operation. And - * because a hash function has to both distribute good _and_ has to - * be very fast to compute, those few numbers should be preferred. - * - * -- Ralf S. Engelschall - */ - hash = 0; - for(p = key, i = klen; i; i--, p++) - hash = hash * 33 + *p; + hash = (ht->hash_func)(key); /* scan linked list */ for(hep = &ht->array[hash & ht->max], he = *hep; he; hep = &he->next, he = *hep) { - if(he->hash == hash && he->klen == klen && - memcmp(he->key, key, klen) == 0) + if(he->hash == hash && (ht->equal_func)(he->key, key)) break; } @@ -447,7 +413,6 @@ find_entry(CkCapiHash* ht, const void* key, size_t klen, void* val) { /* Key points to external data */ he->key = key; - he->klen = klen; he->next = NULL; he->hash = hash; @@ -461,9 +426,9 @@ find_entry(CkCapiHash* ht, const void* key, size_t klen, void* val) } void* -ckcapi_hash_get(CkCapiHash* ht, const void *key, size_t klen) +ckcapi_hash_get(CkCapiHash* ht, const void *key) { - HashEntry** he = find_entry(ht, key, klen, NULL); + HashEntry** he = find_entry(ht, key, NULL); if(he && *he) return (void*)((*he)->val); else @@ -471,9 +436,9 @@ ckcapi_hash_get(CkCapiHash* ht, const void *key, size_t klen) } int -ckcapi_hash_set(CkCapiHash* ht, const void* key, size_t klen, void* val) +ckcapi_hash_set(CkCapiHash* ht, const void* key, void* val) { - HashEntry** hep = find_entry(ht, key, klen, val); + HashEntry** hep = find_entry(ht, key, val); if(hep && *hep) { /* replace entry */ @@ -493,9 +458,9 @@ ckcapi_hash_set(CkCapiHash* ht, const void* key, size_t klen, void* val) } void* -ckcapi_hash_rem(CkCapiHash* ht, const void* key, size_t klen) +ckcapi_hash_rem(CkCapiHash* ht, const void* key) { - HashEntry** hep = find_entry(ht, key, klen, NULL); + HashEntry** hep = find_entry(ht, key, NULL); void* val = NULL; if(hep && *hep) @@ -515,3 +480,66 @@ ckcapi_hash_count(CkCapiHash* ht) { return ht->count; } + +unsigned int +ckcapi_hash_pointer(const void* ptr) +{ + return (unsigned int)ptr; +} + +unsigned int +ckcapi_hash_data(const void* data, size_t n_data) +{ + unsigned int hash = 0; + const unsigned char* end; + const unsigned char* p; + + /* + * This is the popular `times 33' hash algorithm which is used by + * perl and also appears in Berkeley DB. This is one of the best + * known hash functions for strings because it is both computed + * very fast and distributes very well. + * + * The originator may be Dan Bernstein but the code in Berkeley DB + * cites Chris Torek as the source. The best citation I have found + * is "Chris Torek, Hash function for text in C, Usenet message + * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich + * Salz's USENIX 1992 paper about INN which can be found at + * . + * + * The magic of number 33, i.e. why it works better than many other + * constants, prime or not, has never been adequately explained by + * anyone. So I try an explanation: if one experimentally tests all + * multipliers between 1 and 256 (as I did while writing a low-level + * data structure library some time ago) one detects that even + * numbers are not useable at all. The remaining 128 odd numbers + * (except for the number 1) work more or less all equally well. + * They all distribute in an acceptable way and this way fill a hash + * table with an average percent of approx. 86%. + * + * If one compares the chi^2 values of the variants (see + * Bob Jenkins ``Hashing Frequently Asked Questions'' at + * http://burtleburtle.net/bob/hash/hashfaq.html for a description + * of chi^2), the number 33 not even has the best value. But the + * number 33 and a few other equally good numbers like 17, 31, 63, + * 127 and 129 have nevertheless a great advantage to the remaining + * numbers in the large set of possible multipliers: their multiply + * operation can be replaced by a faster operation based on just one + * shift plus either a single addition or subtraction operation. And + * because a hash function has to both distribute good _and_ has to + * be very fast to compute, those few numbers should be preferred. + * + * -- Ralf S. Engelschall + */ + + for(p = data, end = p + n_data; p != end; ++p) + hash = hash * 33 + *p; + + return hash; +} + +unsigned int +ckcapi_hash_integer(int integer) +{ + return integer; +} -- cgit v1.2.3