summaryrefslogtreecommitdiff
path: root/ckcapi-util.c
diff options
context:
space:
mode:
authorStef Walter <stef@memberwebs.com>2008-12-09 03:47:01 +0000
committerStef Walter <stef@memberwebs.com>2008-12-09 03:47:01 +0000
commit94c63a237d77654bab2ea34146b06fd4b0acfc99 (patch)
treee726b7d285dd9f188ba92830355f64b8cba644ab /ckcapi-util.c
parenta71df0e849b4e286f29ae6e26973961d3412cd83 (diff)
Use better and faster hashing of objects.
Diffstat (limited to 'ckcapi-util.c')
-rw-r--r--ckcapi-util.c138
1 files changed, 83 insertions, 55 deletions
diff --git a/ckcapi-util.c b/ckcapi-util.c
index 3bd18a5..80e1b40 100644
--- a/ckcapi-util.c
+++ b/ckcapi-util.c
@@ -260,7 +260,6 @@ typedef struct _HashEntry
struct _HashEntry* next;
unsigned int hash;
const void* key;
- size_t klen;
void* val;
}
HashEntry;
@@ -275,6 +274,8 @@ HashEntry;
struct _CkCapiHash
{
HashEntry** array;
+ CkCapiHashFunc hash_func;
+ CkCapiHashEqual equal_func;
size_t count;
size_t max;
};
@@ -282,6 +283,12 @@ struct _CkCapiHash
#define INITIAL_MAX 15 /* tunable == 2^n - 1 */
+static int
+equal_default(const void* a, const void* b)
+{
+ return a == b;
+}
+
/*
* Hash creation functions.
*/
@@ -293,11 +300,13 @@ alloc_array(CkCapiHash* ht, size_t max)
}
CkCapiHash*
-ckcapi_hash_new()
+ckcapi_hash_new(CkCapiHashFunc hash_func, CkCapiHashEqual equal_func)
{
CkCapiHash* ht = malloc(sizeof(CkCapiHash));
if(ht)
{
+ ht->hash_func = hash_func ? hash_func : ckcapi_hash_pointer;
+ ht->equal_func = equal_func ? equal_func : equal_default;
ht->count = 0;
ht->max = INITIAL_MAX;
ht->array = alloc_array(ht, ht->max);
@@ -379,62 +388,19 @@ expand_array(CkCapiHash* ht)
*/
static HashEntry**
-find_entry(CkCapiHash* ht, const void* key, size_t klen, void* val)
+find_entry(CkCapiHash* ht, const void* key, void* val)
{
HashEntry** hep;
HashEntry* he;
- const unsigned char* p;
unsigned int hash;
- size_t i;
-
- /*
- * This is the popular `times 33' hash algorithm which is used by
- * perl and also appears in Berkeley DB. This is one of the best
- * known hash functions for strings because it is both computed
- * very fast and distributes very well.
- *
- * The originator may be Dan Bernstein but the code in Berkeley DB
- * cites Chris Torek as the source. The best citation I have found
- * is "Chris Torek, Hash function for text in C, Usenet message
- * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
- * Salz's USENIX 1992 paper about INN which can be found at
- * <http://citeseer.nj.nec.com/salz92internetnews.html>.
- *
- * The magic of number 33, i.e. why it works better than many other
- * constants, prime or not, has never been adequately explained by
- * anyone. So I try an explanation: if one experimentally tests all
- * multipliers between 1 and 256 (as I did while writing a low-level
- * data structure library some time ago) one detects that even
- * numbers are not useable at all. The remaining 128 odd numbers
- * (except for the number 1) work more or less all equally well.
- * They all distribute in an acceptable way and this way fill a hash
- * table with an average percent of approx. 86%.
- *
- * If one compares the chi^2 values of the variants (see
- * Bob Jenkins ``Hashing Frequently Asked Questions'' at
- * http://burtleburtle.net/bob/hash/hashfaq.html for a description
- * of chi^2), the number 33 not even has the best value. But the
- * number 33 and a few other equally good numbers like 17, 31, 63,
- * 127 and 129 have nevertheless a great advantage to the remaining
- * numbers in the large set of possible multipliers: their multiply
- * operation can be replaced by a faster operation based on just one
- * shift plus either a single addition or subtraction operation. And
- * because a hash function has to both distribute good _and_ has to
- * be very fast to compute, those few numbers should be preferred.
- *
- * -- Ralf S. Engelschall <rse@engelschall.com>
- */
- hash = 0;
- for(p = key, i = klen; i; i--, p++)
- hash = hash * 33 + *p;
+ hash = (ht->hash_func)(key);
/* scan linked list */
for(hep = &ht->array[hash & ht->max], he = *hep;
he; hep = &he->next, he = *hep)
{
- if(he->hash == hash && he->klen == klen &&
- memcmp(he->key, key, klen) == 0)
+ if(he->hash == hash && (ht->equal_func)(he->key, key))
break;
}
@@ -447,7 +413,6 @@ find_entry(CkCapiHash* ht, const void* key, size_t klen, void* val)
{
/* Key points to external data */
he->key = key;
- he->klen = klen;
he->next = NULL;
he->hash = hash;
@@ -461,9 +426,9 @@ find_entry(CkCapiHash* ht, const void* key, size_t klen, void* val)
}
void*
-ckcapi_hash_get(CkCapiHash* ht, const void *key, size_t klen)
+ckcapi_hash_get(CkCapiHash* ht, const void *key)
{
- HashEntry** he = find_entry(ht, key, klen, NULL);
+ HashEntry** he = find_entry(ht, key, NULL);
if(he && *he)
return (void*)((*he)->val);
else
@@ -471,9 +436,9 @@ ckcapi_hash_get(CkCapiHash* ht, const void *key, size_t klen)
}
int
-ckcapi_hash_set(CkCapiHash* ht, const void* key, size_t klen, void* val)
+ckcapi_hash_set(CkCapiHash* ht, const void* key, void* val)
{
- HashEntry** hep = find_entry(ht, key, klen, val);
+ HashEntry** hep = find_entry(ht, key, val);
if(hep && *hep)
{
/* replace entry */
@@ -493,9 +458,9 @@ ckcapi_hash_set(CkCapiHash* ht, const void* key, size_t klen, void* val)
}
void*
-ckcapi_hash_rem(CkCapiHash* ht, const void* key, size_t klen)
+ckcapi_hash_rem(CkCapiHash* ht, const void* key)
{
- HashEntry** hep = find_entry(ht, key, klen, NULL);
+ HashEntry** hep = find_entry(ht, key, NULL);
void* val = NULL;
if(hep && *hep)
@@ -515,3 +480,66 @@ ckcapi_hash_count(CkCapiHash* ht)
{
return ht->count;
}
+
+unsigned int
+ckcapi_hash_pointer(const void* ptr)
+{
+ return (unsigned int)ptr;
+}
+
+unsigned int
+ckcapi_hash_data(const void* data, size_t n_data)
+{
+ unsigned int hash = 0;
+ const unsigned char* end;
+ const unsigned char* p;
+
+ /*
+ * This is the popular `times 33' hash algorithm which is used by
+ * perl and also appears in Berkeley DB. This is one of the best
+ * known hash functions for strings because it is both computed
+ * very fast and distributes very well.
+ *
+ * The originator may be Dan Bernstein but the code in Berkeley DB
+ * cites Chris Torek as the source. The best citation I have found
+ * is "Chris Torek, Hash function for text in C, Usenet message
+ * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
+ * Salz's USENIX 1992 paper about INN which can be found at
+ * <http://citeseer.nj.nec.com/salz92internetnews.html>.
+ *
+ * The magic of number 33, i.e. why it works better than many other
+ * constants, prime or not, has never been adequately explained by
+ * anyone. So I try an explanation: if one experimentally tests all
+ * multipliers between 1 and 256 (as I did while writing a low-level
+ * data structure library some time ago) one detects that even
+ * numbers are not useable at all. The remaining 128 odd numbers
+ * (except for the number 1) work more or less all equally well.
+ * They all distribute in an acceptable way and this way fill a hash
+ * table with an average percent of approx. 86%.
+ *
+ * If one compares the chi^2 values of the variants (see
+ * Bob Jenkins ``Hashing Frequently Asked Questions'' at
+ * http://burtleburtle.net/bob/hash/hashfaq.html for a description
+ * of chi^2), the number 33 not even has the best value. But the
+ * number 33 and a few other equally good numbers like 17, 31, 63,
+ * 127 and 129 have nevertheless a great advantage to the remaining
+ * numbers in the large set of possible multipliers: their multiply
+ * operation can be replaced by a faster operation based on just one
+ * shift plus either a single addition or subtraction operation. And
+ * because a hash function has to both distribute good _and_ has to
+ * be very fast to compute, those few numbers should be preferred.
+ *
+ * -- Ralf S. Engelschall <rse@engelschall.com>
+ */
+
+ for(p = data, end = p + n_data; p != end; ++p)
+ hash = hash * 33 + *p;
+
+ return hash;
+}
+
+unsigned int
+ckcapi_hash_integer(int integer)
+{
+ return integer;
+}