Asp Forum - Hash table implementation.

lolzy

8/11/2011 5:43:00 PM

Hello comp.lang.c,

This is a hash table implementation I wrote, please comment. Thanks in
advance.

# START CODE

/*
* Hash table demo.
* (C) Jori Koolstra - 19:40 11-8-2011
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define HASH_TABLE_SIZE 100

#define HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(t, n) if
(hash_table_delete_element(t, n) == 0) hash_table_fatal_error(0, n)

#define HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(t, n, z) if
((z = hash_table_get_string_from_table(t, n)) == NULL)
hash_table_fatal_error(1, n)

struct table_data
{
char *uid;
char *d;
};

typedef struct table_data table_data;

struct hash_table
{
table_data data;
struct hash_table *x;
};

typedef struct hash_table hash_table;

hash_table ** hash_table_create_table(int);
hash_table * hash_table_get_string_from_table(hash_table **, char *);
int hash_table_hash(char *);
void hash_table_add_element(hash_table **, char *, char *);
int hash_table_delete_element(hash_table **, char *);
void hash_table_fatal_error(int, char *);

int main(void)
{
hash_table **table;
hash_table *y;

table = hash_table_create_table(HASH_TABLE_SIZE);

hash_table_add_element(table, "John", "Hello John!");
hash_table_add_element(table, "Kim", "Hello Kim!");
hash_table_add_element(table, "Jori", "Hello Jori!");
hash_table_add_element(table, "Jack", "Hello Jack!");

HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(table, "Kim");

HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(table, "Jack", y);

printf("%s\n", y->data.d);

return 0;
}

/*
* Print error and quit.
*/
void hash_table_fatal_error(int t, char *el)
{
switch (t)
{
case 0:
printf("ERROR: Could not delete element '%s' from hash table.\n",
el);
break;
case 1:
printf("ERROR: Could not find key '%s' in hash table.\n", el);
break;
}

exit(0);
}

/*
* Create a hash table with size 'size' and return a hash table array.
*/
hash_table ** hash_table_create_table(int size)
{
register int i = 0;
hash_table **x;

x = (hash_table **) calloc(size, sizeof(hash_table *));

for (; i <= size; ++i)
{
x[i] = (hash_table *) calloc(1, sizeof(hash_table));
}

return x;
}

/*
* Returns a specified hash table element from table 't' with key 's',
* or NULL on failure.
*/
hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
{
int id = hash_table_hash(s);
hash_table *q;

if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, s) != 0)
{
if (t[id]->x != NULL)
{
/* Move trough linked list */
for (q = t[id]->x; ; q = q->x)
{
if (strcmp(q->data.uid, s) == 0)
{
/* Found */
return q;
}

if (q->x == NULL)
{
break;
}
}
}

return NULL;
}

return (t[id]->data.uid == NULL ? NULL : t[id]);
}

/*
* Add 'data' to table 't' with key 'uid'.
*/
void hash_table_add_element(hash_table **t, char *uid, char *data)
{
int hash = hash_table_hash(uid);
hash_table *q = (hash_table *) calloc(1, sizeof(hash_table));
hash_table *c;

q->data.uid = strdup(uid);
q->data.d = strdup(data);
q->x = NULL;

c = t[hash];

if (c->data.uid != NULL)
{
while (c->x != NULL)
{
c = c->x;
}

c->x = q;
}
else
{
t[hash]->data.uid = strdup(uid);
t[hash]->data.d = strdup(data);
}
}

/*
* Delete element identified by 'uid' from hash table 't'.
* Returns: 1 on success, 0 on failure.
*/
int hash_table_delete_element(hash_table **t, char *uid)
{
int id = hash_table_hash(uid);
hash_table *q;

if (t[id]->data.uid == NULL)
{
return 0;
}

if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, uid) != 0)
{
/* Move trough linked list */
for (q = t[id]; ; q = q->x)
{
if (strcmp(q->x->data.uid, uid) == 0)
{
if (q->x->x != NULL)
{
free(q->x);
q->x = q->x->x;
}
else
{
free(q->x);
q->x = NULL;
}

return 1;
}

if (q->x == NULL)
{
break;
}
}

return 0;
}

if (t[id]->x == NULL)
{
t[id]->data.uid = NULL;
}
else
{
t[id]->data.uid = t[id]->x->data.uid;
t[id]->data.d = t[id]->x->data.d;
t[id]->x = t[id]->x->x;
}

return 1;
}

/***** NOT UNDER MY COPYRIGHT *****/
/*
* UNIX ELF hash.
* Published hash algorithm used in the UNIX ELF format for object
files.
*/
int hash_table_hash(char *data)
{
int h = 0, g;

while (*data)
{
h = (h << 4) + *data++;

if (g = h & 0xF0000000)
{
h ^= g >> 24;
}

h &= ~g;
}

return h % HASH_TABLE_SIZE;
}

# END CODE

41 Answers

China Blue Veins

8/11/2011 6:26:00 PM

In article <0c914904-6a93-4354-94a2-2bbeb6a53ddb@a17g2000yqk.googlegroups.com>,
lolzy@live.nl wrote:

> Hello comp.lang.c,
>
> This is a hash table implementation I wrote, please comment. Thanks in
> advance.

It's not that much more complicated to make a resizable hash table H.

H fields:
n: number of entries
a: number of allocated entries
t: pointer to entries of type table_data

deleted: variable of type table_data

initialise H:
H.n = 8
H.a = 0
H.t = calloc(H.n,sizeof table_data)

probe H for uid returning matching and firstfree:
matching = -1
firstfree = -1
for (h=hash_table_hash(h) % H.n; H.t[h].uid!=NULL; h=(h+3) % H.n)
if H.t[h].uid==deleted.uid and firstfree<0,
firstfree = h
else if strcmp(uid, H.t[h].uid)==0,
matching = h
if matching<0 then firstfree = h

reorganise H to size m:
n = H.n
t = H.t
H.n = m
H.a = 0
H.t = calloc(H.n,sizeof table_data)
for (k=0; k<n; k++)
if t[k].uid!=NULL and t[k].uid!=deleted.uid,
add table_data t[k] to H
free t

find uid in H returning data d:
matching, firstfree = probe H for uid
if matching>=0,
return H.t[h].d
if matching<0,
return NULL

add table_data e to H:
matching, firstfree = probe H for e.uid
if matching>=0,
H.t[matching].d = copy of e.d
if matching<0,
H.t[matching].uid = copy of e.uid
H.t[matching].d = copy of e.d
H.a++
if H.a/H.n >= 0.75,
reorganise H to size 2*H.n

delete uid from H:
matching, firstfree = probe H for uid
if matching>=0,
free H.t[matching].uid
free H.t[matching].d
H.t[matching] = deleted
H.a--
if H.a/H.n <= 0.25 and H.n > 8,
reorganise H to size H.n/2

> # START CODE
>
> /*
> * Hash table demo.
> * (C) Jori Koolstra - 19:40 11-8-2011
> */
>
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
>
>
> #define HASH_TABLE_SIZE 100
>
>
> #define HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(t, n) if
> (hash_table_delete_element(t, n) == 0) hash_table_fatal_error(0, n)
>
> #define HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(t, n, z) if
> ((z = hash_table_get_string_from_table(t, n)) == NULL)
> hash_table_fatal_error(1, n)
>
>
> struct table_data
> {
> char *uid;
> char *d;
> };
>
>
> typedef struct table_data table_data;
>
>
>
> struct hash_table
> {
> table_data data;
> struct hash_table *x;
> };
>
>
> typedef struct hash_table hash_table;
>
>
>
> hash_table ** hash_table_create_table(int);
> hash_table * hash_table_get_string_from_table(hash_table **, char *);
> int hash_table_hash(char *);
> void hash_table_add_element(hash_table **, char *, char *);
> int hash_table_delete_element(hash_table **, char *);
> void hash_table_fatal_error(int, char *);
>
>
>
> int main(void)
> {
> hash_table **table;
> hash_table *y;
>
> table = hash_table_create_table(HASH_TABLE_SIZE);
>
> hash_table_add_element(table, "John", "Hello John!");
> hash_table_add_element(table, "Kim", "Hello Kim!");
> hash_table_add_element(table, "Jori", "Hello Jori!");
> hash_table_add_element(table, "Jack", "Hello Jack!");
>
> HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(table, "Kim");
>
> HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(table, "Jack", y);
>
> printf("%s\n", y->data.d);
>
>
>
>
> return 0;
> }
>
>
>
> /*
> * Print error and quit.
> */
> void hash_table_fatal_error(int t, char *el)
> {
> switch (t)
> {
> case 0:
> printf("ERROR: Could not delete element '%s' from hash table.\n",
> el);
> break;
> case 1:
> printf("ERROR: Could not find key '%s' in hash table.\n", el);
> break;
> }
>
>
> exit(0);
> }
>
>
>
> /*
> * Create a hash table with size 'size' and return a hash table array.
> */
> hash_table ** hash_table_create_table(int size)
> {
> register int i = 0;
> hash_table **x;
>
>
> x = (hash_table **) calloc(size, sizeof(hash_table *));
>
> for (; i <= size; ++i)
> {
> x[i] = (hash_table *) calloc(1, sizeof(hash_table));
> }
>
> return x;
> }
>
>
>
> /*
> * Returns a specified hash table element from table 't' with key 's',
> * or NULL on failure.
> */
> hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
> {
> int id = hash_table_hash(s);
> hash_table *q;
>
> if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, s) != 0)
> {
> if (t[id]->x != NULL)
> {
> /* Move trough linked list */
> for (q = t[id]->x; ; q = q->x)
> {
> if (strcmp(q->data.uid, s) == 0)
> {
> /* Found */
> return q;
> }
>
>
> if (q->x == NULL)
> {
> break;
> }
> }
> }
>
> return NULL;
> }
>
> return (t[id]->data.uid == NULL ? NULL : t[id]);
> }
>
>
>
> /*
> * Add 'data' to table 't' with key 'uid'.
> */
> void hash_table_add_element(hash_table **t, char *uid, char *data)
> {
> int hash = hash_table_hash(uid);
> hash_table *q = (hash_table *) calloc(1, sizeof(hash_table));
> hash_table *c;
>
> q->data.uid = strdup(uid);
> q->data.d = strdup(data);
> q->x = NULL;
>
>
> c = t[hash];
>
>
> if (c->data.uid != NULL)
> {
> while (c->x != NULL)
> {
> c = c->x;
> }
>
> c->x = q;
> }
> else
> {
> t[hash]->data.uid = strdup(uid);
> t[hash]->data.d = strdup(data);
> }
> }
>
>
>
> /*
> * Delete element identified by 'uid' from hash table 't'.
> * Returns: 1 on success, 0 on failure.
> */
> int hash_table_delete_element(hash_table **t, char *uid)
> {
> int id = hash_table_hash(uid);
> hash_table *q;
>
>
> if (t[id]->data.uid == NULL)
> {
> return 0;
> }
>
>
> if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, uid) != 0)
> {
> /* Move trough linked list */
> for (q = t[id]; ; q = q->x)
> {
> if (strcmp(q->x->data.uid, uid) == 0)
> {
> if (q->x->x != NULL)
> {
> free(q->x);
> q->x = q->x->x;
> }
> else
> {
> free(q->x);
> q->x = NULL;
> }
>
> return 1;
> }
>
>
> if (q->x == NULL)
> {
> break;
> }
> }
>
> return 0;
> }
>
>
> if (t[id]->x == NULL)
> {
> t[id]->data.uid = NULL;
> }
> else
> {
> t[id]->data.uid = t[id]->x->data.uid;
> t[id]->data.d = t[id]->x->data.d;
> t[id]->x = t[id]->x->x;
> }
>
> return 1;
> }
>
>
>
>
> /***** NOT UNDER MY COPYRIGHT *****/
> /*
> * UNIX ELF hash.
> * Published hash algorithm used in the UNIX ELF format for object
> files.
> */
> int hash_table_hash(char *data)
> {
> int h = 0, g;
>
> while (*data)
> {
> h = (h << 4) + *data++;
>
> if (g = h & 0xF0000000)
> {
> h ^= g >> 24;
> }
>
> h &= ~g;
> }
>
> return h % HASH_TABLE_SIZE;
> }
>
> # END CODE

--
I remember finding out about you, | How to loosen bolts with a hammer.
Everyday my mind is all around you,| I'm whoever you want me to be.
Looking out from my lonely room | Annoying Usenet one post at a time.
Day after day. | At least I can stay in character.

ram

8/11/2011 6:29:00 PM

lolzy@live.nl writes:
>x = (hash_table **) calloc(size, sizeof(hash_table *));
> for (; i <= size; ++i)
> {
> x[i] = (hash_table *) calloc(1, sizeof(hash_table));

x might be 0 here.

Keith Thompson

8/11/2011 6:50:00 PM

ram@zedat.fu-berlin.de (Stefan Ram) writes:
> lolzy@live.nl writes:
>>x = (hash_table **) calloc(size, sizeof(hash_table *));
>> for (; i <= size; ++i)
>> {
>> x[i] = (hash_table *) calloc(1, sizeof(hash_table));
>
> x might be 0 here.

And if x isn't null, the elements of x might not be null pointers;
there's no guarantee that a null pointer is represented as
all-bits-zero. If you're satisfied with portability only to
systems where null pointers *are* all-bits-zero, you can do that,
but I'd suggest at least documenting it. Or you can use malloc()
and then set each element to NULL in a loop.

--
Keith Thompson (The_Other_Keith) kst-u@mib.org <http://www.ghoti.ne...
Nokia
"We must do something. This is something. Therefore, we must do this."
-- Antony Jay and Jonathan Lynn, "Yes Minister"

Ralf Damaschke

8/11/2011 8:02:00 PM

Keith Thompson <kst-u@mib.org> wrote:

> ram@zedat.fu-berlin.de (Stefan Ram) writes:
>> lolzy@live.nl writes:
>>>x = (hash_table **) calloc(size, sizeof(hash_table *));
>>> for (; i <= size; ++i)
>>> {
>>> x[i] = (hash_table *) calloc(1, sizeof(hash_table));
>>
>> x might be 0 here.
>
> And if x isn't null, the elements of x might not be null pointers;

You are right, at the place marked "here" for a non-null x these
pointers are either null pointers or point to an object
sufficiently large to hold an instance of hash_table.

-- Ralf

Ike Naar

8/11/2011 10:03:00 PM

On 2011-08-11, lolzy@live.nl <lolzy@live.nl> wrote:
> hash_table ** hash_table_create_table(int size)
> {
> register int i = 0;

It's probably a good idea to drop the 'register' qualification
and leave this kind of optimization to the compiler.
I'd prefer to make 'size' and 'i' of type size_t.

> hash_table **x;
>
>
> x = (hash_table **) calloc(size, sizeof(hash_table *));

The cast is unnecessary; the recommended idiom is

x = calloc(size, sizeof *x);

calloc can fail, and return NULL; you should check for that.
It's not necessary to set the elements of the array to all-bits-zero,
because you immediately overwrite each element in the for loop below.
So you might as well allocate the array using

x = malloc(size * sizeof *x);

> for (; i <= size; ++i)
> {
> x[i] = (hash_table *) calloc(1, sizeof(hash_table));
> }

This looks like an off-by-error; x has only 'size' elements,
numbered 0 to size-1. The last iteration of the for loop assigns
to the nonexisting element x[size].
>
> return x;
> }

Scott Fluhrer

8/11/2011 10:15:00 PM

More comments...

<lolzy@live.nl> wrote in message
news:0c914904-6a93-4354-94a2-2bbeb6a53ddb@a17g2000yqk.googlegroups.com...
> Hello comp.lang.c,
>
> This is a hash table implementation I wrote, please comment. Thanks in
> advance.
>
>
> # START CODE
>
>
> /*
> * Print error and quit.
> */
> void hash_table_fatal_error(int t, char *el)
> {
> switch (t)
> {
> case 0:
> printf("ERROR: Could not delete element '%s' from hash table.\n",
> el);

In case this is used in a utility which produces output which is used in a
pipeline, you might want to make this:
fprintf( stderr, "Error message\n" );

> break;
> case 1:
> printf("ERROR: Could not find key '%s' in hash table.\n", el);
> break;
> }
>
>
> exit(0);
> }
>
>
>
> /*
> * Create a hash table with size 'size' and return a hash table array.
> */
> hash_table ** hash_table_create_table(int size)

What's the point in having the caller specify a hash table size, given that
your hash function always returns values between 0 and 99? If the caller
specifies a value less that 100, you have a potential array overwrite when
you add an element; if the caller specifies a value greater than 100, that's
just wasted space.

> {
> register int i = 0;
> hash_table **x;
>
>
> x = (hash_table **) calloc(size, sizeof(hash_table *));
>
> for (; i <= size; ++i)

Array overflow! You want:
for (; i < size; ++i)

> {
> x[i] = (hash_table *) calloc(1, sizeof(hash_table));
> }

I see you have the convention that you represent an empty list with a dummy
element. While this is neither unethical nor immoral, it is a bit
fattening, as it makes your search/insert/delete code more complex than it
needs to be. Why not represent an empty list with a NULL?

>
> return x;
> }
>
>
>
> /*
> * Returns a specified hash table element from table 't' with key 's',
> * or NULL on failure.
> */
> hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
> {
> int id = hash_table_hash(s);
> hash_table *q;
>
> if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, s) != 0)
> {
> if (t[id]->x != NULL)
> {
> /* Move trough linked list */
> for (q = t[id]->x; ; q = q->x)
> {
> if (strcmp(q->data.uid, s) == 0)
> {
> /* Found */
> return q;
> }
>
>
> if (q->x == NULL)
> {
> break;
> }
> }
> }
>
> return NULL;
> }
>
> return (t[id]->data.uid == NULL ? NULL : t[id]);
> }

Sigh, this is rather more complex that it really needs to be:

for (q = t[hash_table_hash(s)]; q; q = q->link) {
if (q->data.uid && 0 == strcmp( q->data.uid, s ))
return s;
}
return NULL;

The insert/remove functions can also be simplified a good bit.

>
>
>
> /*
> * Add 'data' to table 't' with key 'uid'.
> */
> void hash_table_add_element(hash_table **t, char *uid, char *data)
> {
> int hash = hash_table_hash(uid);
> hash_table *q = (hash_table *) calloc(1, sizeof(hash_table));
> hash_table *c;
>
> q->data.uid = strdup(uid);
> q->data.d = strdup(data);
> q->x = NULL;
>
>
> c = t[hash];
>
>
> if (c->data.uid != NULL)
> {
> while (c->x != NULL)
> {
> c = c->x;
> }
>
> c->x = q;
> }
> else
> {
> t[hash]->data.uid = strdup(uid);
> t[hash]->data.d = strdup(data);
Don't you leak the q element (and the strings it points to) here?

> }
> }
>
>
>
> /*
> * Delete element identified by 'uid' from hash table 't'.
> * Returns: 1 on success, 0 on failure.
> */
> int hash_table_delete_element(hash_table **t, char *uid)
> {
> int id = hash_table_hash(uid);
> hash_table *q;
>
>
> if (t[id]->data.uid == NULL)
> {
> return 0;
> }
>
>
> if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, uid) != 0)
> {
> /* Move trough linked list */
> for (q = t[id]; ; q = q->x)
> {
> if (strcmp(q->x->data.uid, uid) == 0)
> {
> if (q->x->x != NULL)
> {
> free(q->x);
> q->x = q->x->x;
> }
> else
> {
> free(q->x);
> q->x = NULL;
> }
>
> return 1;
> }
>
>
> if (q->x == NULL)
> {
> break;
> }
> }
>
> return 0;
> }
>
>
> if (t[id]->x == NULL)
> {
> t[id]->data.uid = NULL;
> }
> else
> {
> t[id]->data.uid = t[id]->x->data.uid;
> t[id]->data.d = t[id]->x->data.d;
> t[id]->x = t[id]->x->x;
> }
>
> return 1;
> }
>
>
>
>
> /***** NOT UNDER MY COPYRIGHT *****/
> /*
> * UNIX ELF hash.
> * Published hash algorithm used in the UNIX ELF format for object
> files.
> */
> int hash_table_hash(char *data)

This hash function has the property that the lower 2 bits of the result will
always be the lower 2 bits of the last character of the string being hashed.
Because the hash table is depending on the hashes to be well distributed
(even if the strings might not be uniformly random), this is less than
desirable.

Also, more generally, if this hash function were for general use as a
utility, it's probably desirable for the user to be able to specify his own
hash function. The best that a generic hash function can do is act
randomly; if the user knows about nonrandomness in his strings being hashed,
he can often exploit that to create a hash function that actually works
(generates a more uniform distribution) better than random (!).

> {
> int h = 0, g;
>
> while (*data)
> {
> h = (h << 4) + *data++;
>
> if (g = h & 0xF0000000)
> {
> h ^= g >> 24;
> }
>
> h &= ~g;
> }
>
> return h % HASH_TABLE_SIZE;
> }

>
> # END CODE

lolzy

8/12/2011 6:56:00 AM

Thanks for all your comments. I still have some questions :) :

* Why do you prefer malloc() over calloc() ?
* Why not cast for C++ compatibility?
* Why shouldn't I use the register keyword for loop variables?

lolzy

8/12/2011 8:39:00 AM

I took a look at the code, and I optimized it to:
(Still looking for answers for my question 1 post up)
Thanks in advance.

/*
* Hash table demo.
* (C) Jori Koolstra - 19:40 11-8-2011
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define HASH_TABLE_SIZE 100

#define HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(t, n) if
(hash_table_delete_element(t, n) == 0) hash_table_fatal_error(0, n)

#define HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(t, n, z) if
((z = hash_table_get_string_from_table(t, n)) == NULL)
hash_table_fatal_error(1, n)

struct table_data
{
char *uid;
char *d;
};

typedef struct table_data table_data;

struct hash_table
{
table_data data;
struct hash_table *x;
};

typedef struct hash_table hash_table;

hash_table ** hash_table_create_table(int);
hash_table * hash_table_get_string_from_table(hash_table **, char *);
int hash_table_hash(char *);
void hash_table_add_element(hash_table **, char *, char *);
int hash_table_delete_element(hash_table **, char *);
void hash_table_fatal_error(int, char *);
void * hash_table_safe_calloc(size_t, size_t);

int main(void)
{
hash_table **table;
hash_table *y;

table = hash_table_create_table(HASH_TABLE_SIZE);

hash_table_add_element(table, "John", "Hello John!");
hash_table_add_element(table, "Kim", "Hello Kim!");
hash_table_add_element(table, "Jori", "Hello Jori!");
hash_table_add_element(table, "Jack", "Hello Jack!");

HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(table, "Kim");

HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(table, "Jack", y);

printf("%s\n", y->data.d);

return 0;
}

/*
* Print error and quit.
*/
void hash_table_fatal_error(int t, char *el)
{
switch (t)
{
case 0:
printf("ERROR: Could not delete element '%s' from hash table.\n",
el);
break;
case 1:
printf("ERROR: Could not find key '%s' in hash table.\n", el);
break;
case 2:
printf("ERROR: Could not allocate memory.\n");
break;
}

exit(0);
}

/*
* Create a hash table with size 'size' and return a hash table array.
*/
hash_table ** hash_table_create_table(int size)
{
register int i = 0;
hash_table **x;

x = (hash_table **) hash_table_safe_calloc(size, sizeof(hash_table
*));

for (; i < size; ++i)
{
x[i] = (hash_table *) hash_table_safe_calloc(1, sizeof(hash_table));
}

return x;
}

/*
* Returns a specified hash table element from table 't' with key 's',
* or NULL on failure.
*/
hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
{
hash_table *q;

for (q = t[hash_table_hash(s)]; q; q = q->x)
{
if (q->data.uid && 0 == strcmp(q->data.uid, s))
{
return q;
}
}

return NULL;
}

/*
* Add 'data' to table 't' with key 'uid'.
*/
void hash_table_add_element(hash_table **t, char *uid, char *data)
{
int h = hash_table_hash(uid);
hash_table *q, *c = t[h];

if (c->data.uid != NULL)
{
q = (hash_table *) hash_table_safe_calloc(1, sizeof(hash_table));
q->data.uid = strdup(uid);
q->data.d = strdup(data);

for (; c->x != NULL; c = c->x)

c->x = q;
}
else
{
t[h]->data.uid = strdup(uid);
t[h]->data.d = strdup(data);
}
}

/*
* Delete element identified by 'uid' from hash table 't'.
* Returns: 1 on success, 0 on failure.
*/
int hash_table_delete_element(hash_table **t, char *uid)
{
int h = hash_table_hash(uid);
hash_table *q;

if (t[h]->data.uid != NULL && strcmp(t[h]->data.uid, uid) != 0)
{
/* Move trough linked list */
for (q = t[h]; q; q = q->x)
{
if (strcmp(q->x->data.uid, uid) == 0)
{
free(q->x);
q->x = q->x->x;

return 1;
}
}

return 0;
}

if (t[h]->x == NULL)
{
if (t[h]->data.uid == NULL)
{
return 0;
}
else
{
t[h]->data.uid = NULL;
}
}
else
{
t[h]->data.uid = t[h]->x->data.uid;
t[h]->data.d = t[h]->x->data.d;
t[h]->x = t[h]->x->x;
}

return 1;
}

/*
* Allocate space with error checking.
*/
void * hash_table_safe_calloc(size_t num, size_t size)
{
void *p;

if ((p = calloc(num, size)) == NULL)
{
hash_table_fatal_error(2, "");
}

return p;
}

/***** NOT UNDER MY COPYRIGHT *****/
/*
* UNIX ELF hash.
* Published hash algorithm used in the UNIX ELF format for object
files.
*/
int hash_table_hash(char *data)
{
int h = 0, g;

while (*data)
{
h = (h << 4) + *data++;

if (g = h & 0xF0000000)
{
h ^= g >> 24;
}

h &= ~g;
}

return h % HASH_TABLE_SIZE;
}

John Doe

8/12/2011 8:48:00 AM

On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:

> Thanks for all your comments. I still have some questions :) :
>
> * Why do you prefer malloc() over calloc() ?

calloc() zeros the allocated memory, which is inefficient if you don't
actually need to do so.

> * Why not cast for C++ compatibility?

There are arguments for and against casting. The main argument against is
that casting can prevent a diagnostic being issued if the prototype isn't
in scope.

> * Why shouldn't I use the register keyword for loop variables?

At best, it's a waste of both keystrokes and bytes; the compiler will
probably ignore it when deciding where to store the variable (i.e. the
only effect of a "register" qualifier is likely to be that you get an
error if you attempt to take the variable's address). At worst, the
compiler might actually pay attention to it, and you get less efficient
code than the compiler would have generated without it.

The "register" qualifier originated in an era where compilers performed a
fairly direct translation from source code to object code, without any
significant optimisation. It is somewhere between useless and worse than
useless with a modern compiler.

Michael Angelo Ravera

8/12/2011 9:54:00 AM

One thing that you may want to do is to use the size as more of a suggestion than a firm number. Many hash functions work better when the main size is a prime number. You can pick a prime number that is close to the size requested or pick the first prime that is not less than the size requested. Primacy is not really a requirement when the size of the table gets really large. It would be exceedingly reasonable to keep a list of primes less than 256 and require that the main size have no factors less than 256. This would allow you easily to pick a prime number less 64K and give you a "size that isn't completely stupid" for larger tables. For sizes less than 256, you just scan the prime table upward to find the first prime.

If you don't always use a realy cool hash function, then I agree that you might like to use the "object inspired" approach of storing a function pointer to a hash function.

comp.lang.c

Hash table implementation.

lolzy

China Blue Veins

ram

Keith Thompson

Ralf Damaschke

Ike Naar

Scott Fluhrer

lolzy

lolzy

John Doe

Michael Angelo Ravera

x Login to ForumsZone