Tuesday lecture code

(download)
(back to top)

hash.c


#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <err.h>
#include <sysexits.h>

#ifndef SIZE_T_MAX
#define SIZE_T_MAX 1ULL<<31
#endif

enum approach {
	HASH_PHP,
	HASH_PLUS,
	HASH_PLUSTIMES,
	HASH_PLUSPLUS,
	HASH_UNIVERSAL
};

static size_t key_hash (char *k, size_t N, enum approach approach);

#define wrapping_add(x,y,max) \
	(((x) + (y)) % (max))    
    
int main (int argc, char *argv[])
{
	if (argc != 2 && argc != 3)
		errx (EX_USAGE, "usage: hash <size> [<approach>]");

	size_t ht_size = (size_t) strtol (argv[1], NULL, 10);

	enum approach app = HASH_UNIVERSAL;
	if (argc == 3) {
		long x = strtol (argv[2], NULL, 10);
		assert (HASH_PHP <= x && x <= HASH_UNIVERSAL);
		app = (enum approach) x;
	}

	char buf[BUFSIZ];
	while (fgets (buf, BUFSIZ, stdin) != NULL) {
		*(strrchr (buf, '\n')) = '\0';
		printf ("%zu\t%s\n", key_hash (buf, ht_size, app), buf);
	}
}

static size_t key_hash (char *k, size_t N, enum approach approach)
{
	switch (approach) {
	case HASH_PHP:
		return strlen (k) % N;

	case HASH_PLUS: {
		size_t h = 0;
		for (size_t i = 0; k[i] != '\0'; i++) // h += k[i];
			h = wrapping_add (h, (unsigned) k[i], SIZE_T_MAX);
		return h % N;
	}

	case HASH_PLUSTIMES: {
		size_t h = 0;
		for (size_t i = 0; k[i] != '\0'; i++) // h += k[i] * i;
			h = wrapping_add (h, i * (unsigned) k[i], SIZE_T_MAX);
		return h % N;
	}

	case HASH_PLUSPLUS: {
		size_t h = 0;
		unsigned a = 127; // prime!
		for (size_t i = 0; k[i] != '\0'; i++) {
#if 0
			printf ("at [%zu], a * h + k[i] = %u * %zu + %hhu\n",
					i, a, h, k[i]);
#endif
			h = wrapping_add (a * h, (unsigned) k[i], SIZE_T_MAX) % N;
		}
		return h;
	}

	case HASH_UNIVERSAL: {
		size_t h = 0;
		unsigned a = 31415, b = 21783;
		for (size_t i = 0; k[i] != '\0'; i++) {
			a = (a * b) % (N - 1);
#if 0
			printf ("at [%zu], a * h + k[i] ="
					" %u * %zu + %hhu ="
					" %zu\n",
					i, a, h, k[i], ((a * h) + k[i]) % N);
#endif
			h = wrapping_add (a * h, (unsigned)k[i], SIZE_T_MAX) % N;
		}
		return h;
	}
	}
}


(download)
(back to top)

ht.h

#include <stdlib.h>
#include <limits.h>

#ifndef SIZE_T_MAX
#define SIZE_T_MAX 1ULL<<31
#endif

typedef struct ht ht;

typedef char *key;
#define key_clone(x) (strdup(x))
#define key_drop(x) (free((x)))
#define key_eq(a,b) (strcmp((a), (b)) == 0)

typedef char *value;
#define value_clone(x) (strdup(x))
#define value_drop(x) (free(x))

ht *ht_new (size_t size);
value ht_select (ht *, key);
void ht_insert (ht *, key, value);
void ht_update (ht *, key, value);
void ht_upsert (ht *, key, value);
void ht_delete (ht *, key);
void ht_keys (ht *, key *);
size_t ht_count (ht *);
void ht_drop (ht *);

void ht_show (ht *);

#define HASH_UNIVERSAL

#define wrapping_add(x,y,max) \
	(((x) + (y)) % (max))

#ifdef HASH_PHP
#define key_hash(k,N) \
	({ \
		strlen ((k)) % (N); \
	})
#endif

#ifdef HASH_PLUS
#define key_hash(k,N) \
	({ \
		size_t h = 0; \
		for (size_t i = 0; k[i] != '\0'; i++) \
			h = wrapping_add (h, (unsigned)k[i], SIZE_T_MAX); \
		h % N; \
	})
#endif

#ifdef HASH_PLUSPLUS
#define key_hash(k,N) \
	({ \
		size_t h = 0; \
		unsigned a = 127; \
		for (size_t i = 0; k[i] != '\0'; i++) \
			h = wrapping_add (a * h, (unsigned)k[i], SIZE_T_MAX) % N; \
		h; \
	})
#endif

#ifdef HASH_UNIVERSAL
#define key_hash(k,N) \
	({ \
		size_t h = 0; \
		unsigned a = 31415, b = 21783; \
		for (size_t i = 0; k[i] != '\0'; i++) { \
			a = (a * b) % (N - 1); \
			h = wrapping_add (a * h, (unsigned)k[i], SIZE_T_MAX) % N; \
		} \
		h; \
	})
#endif


(download)
(back to top)

htc.c

#include <string.h>
#include <stdio.h>

#include "ht.h"

int main (void)
{
	size_t ht_size = 17;
	ht *ht = ht_new (ht_size);
	ht_show (ht);

	char buf[BUFSIZ];
	while (fgets (buf, BUFSIZ, stdin) != NULL) {
		// lose the newline
		*(strrchr (buf, '\n')) = '\0';
		// split on tab
		char *val = strchr (buf, '\t');
		*val = '\0';
		val++;

		//printf ("'%s' [%zu]\n", buf, key_hash (buf, ht_size));
		ht_insert (ht, buf, val);
	}

	ht_show (ht);
}


(download)
(back to top)

ht_none.c

#include <assert.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>

#include "ht.h"

typedef struct ht_entry ht_entry;
struct ht {
	size_t nitems, nslots;
	struct ht_entry { key key; value value; } *entries;
};

ht *ht_new (size_t size)
{
	ht *new = malloc (sizeof *new);
	*new = (ht) {
		.nitems = 0,
		.nslots = size,
		.entries = calloc (size, sizeof (ht_entry))
	};
	return new;
}

void ht_insert (ht *ht, key key, value value)
{
	size_t hash = key_hash (key, ht->nslots);
	assert (ht->entries[hash].key == NULL);

	ht->entries[hash] = (ht_entry) {
		.key = key_clone (key),
		.value = value_clone (value),
	};
	ht->nitems++;
}

static void ht_resize (ht *ht, size_t old, size_t new)
{
	ht_entry *old_entries = ht->entries;
	ht->entries = calloc (new, sizeof (ht_entry));
	ht->nslots = new; ht->nitems = 0;
	for (size_t i = 0; i < old; i++) {
		ht_insert (ht, old_entries[i]);
	}
	free (old_entries);
}

void ht_drop (ht *ht)
{
	free (ht->entries);
	free (ht);
}

void ht_show (ht *ht)
{
	assert (ht != NULL);
	printf ("ht<%p>, contains %zu items\n", (void *)ht, ht->nitems);
	for (size_t i = 0; i < ht->nslots; i++) {
		printf ("\t[%zu] =\n", i);
		ht_entry *curr = &ht->entries[i];
		printf ("\t\t{ '%s' => '%s' }\n", curr->key, curr->value);
	}
}


(download)
(back to top)

ht_ll.c

#include <assert.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>

#include "ht.h"

typedef struct ht_entry ht_entry;
struct ht {
	size_t nitems, nslots;
	struct ht_entry {
		key key;
		value value;
		ht_entry *next;
	} **entries;
};

ht *ht_new (size_t size)
{
	ht *new = malloc (sizeof *new);
	*new = (ht) {
		.nitems = 0,
		.nslots = size,
		.entries = calloc (size, sizeof (ht_entry *))
	};
	return new;
}

void ht_insert (ht *ht, key key, value value)
{
	size_t hash = key_hash (key, ht->nslots);
	ht_entry *new = malloc (sizeof *new);
	(*new) = (ht_entry) {
		.key = key_clone (key),
		.value = value_clone (value),
		.next = ht->entries[hash],
	};
	ht->entries[hash] = new;
	ht->nitems++;
}

value ht_select (ht *ht, key key)
{
	size_t hash = key_hash (key, ht->nslots);

	for (ht_entry *curr = ht->entries[hash];
		 curr != NULL; curr = curr->next)
		if (key_eq (key, curr->key))
			return curr->value;

	return NULL;
}

void ht_delete (ht *ht, key key)
{
   	size_t hash = key_hash (key, ht->nslots);
	for (ht_entry *prev, *curr = ht->entries[hash];
		 curr != NULL; curr = curr->next) {
		prev = curr;
		if (! key_eq (key, curr->key)) continue;
		prev->next = curr->next;
		value_drop (curr->value);
		key_drop (curr->key);
		free (curr);
		curr = prev->next;
	}
}

void ht_drop (ht *ht)
{
	free (ht->entries);
	free (ht);
}

void ht_show (ht *ht)
{
	assert (ht != NULL);
	printf ("ht<%p>, contains %zu items\n", (void *)ht, ht->nitems);
	for (size_t i = 0; i < ht->nslots; i++) {
		printf ("\t[%zu] =\n", i);
		for (ht_entry *curr = ht->entries[i];
			 curr != NULL; curr = curr->next)
			printf ("\t\t{ '%s' => '%s' }\n", curr->key, curr->value);
	}
}


(download)
(back to top)

ht.c

#include <assert.h>
#include <err.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>

#include "ht.h"

typedef struct ht_entry ht_entry;
struct ht {
	size_t count;
	size_t size;
	struct ht_entry {
		char *key;
		char *value;
		ht_entry *next;
	} **table;
};

ht *ht_new (size_t size)
{
	ht *new = malloc (sizeof *new);
	if (new == NULL) err (EX_OSERR, "couldn't allocate ht");

	(*new) = (ht){
		.count = 0,
		.size = size,
		.table = calloc (size, sizeof (ht_entry *)),
	};

	if (new->table == NULL)
		err (EX_OSERR, "couldn't allocate hashtable entries");

	return new;
}

value ht_select (ht *ht, key key)
{
	assert (ht != NULL);
	assert (key != NULL);

	size_t hash = key_hash (key, ht->size);
	ht_entry **curr = &ht->table[hash];
	while (*curr != NULL) {
		if (key_eq (key, (*curr)->key))
			return (*curr)->value;
		curr = &((*curr)->next);
	}

	return NULL;
}

void ht_insert (ht *ht, key key, value value)
{
	assert (ht != NULL);
	assert (key != NULL);
	assert (value != NULL);

	// create a new entry
	ht_entry *new = malloc (sizeof *new);
	if (new == NULL) err (EX_OSERR, "couldn't allocate ht_entry");
	(*new) = (ht_entry) {
		.key = key_clone (key),
		.value = value_clone (value),
		.next = NULL
	};

	// tail-insert into the chain
	size_t hash = key_hash (key, ht->size);
	ht_entry **curr = &ht->table[hash];
	while (*curr != NULL)
		curr = &((*curr)->next);
	(*curr) = new;

	ht->count++;
}

void ht_update (ht *ht, key key, value value)
{
	assert (ht != NULL);
	assert (key != NULL);
	assert (value != NULL);

	size_t hash = key_hash (key, ht->size);
	ht_entry **curr = &ht->table[hash];
	while (*curr != NULL) {
		if (key_eq (key, (*curr)->key)) break;
		curr = &((*curr)->next);
	}
	if (*curr == NULL) return;

	value_drop ((*curr)->value);
	(*curr)->value = value_clone (value);
}

void ht_upsert (ht *ht, key key, value value)
{
	assert (ht != NULL);
	assert (key != NULL);
	assert (value != NULL);

	// if exists, update, else insert
	if (ht_select (ht, key) != NULL)
		ht_update (ht, key, value);
	else
		ht_insert (ht, key, value);
}

void ht_delete (ht *ht, key key)
{
	assert (ht != NULL);
	assert (key != NULL);

	size_t hash = key_hash (key, ht->size);
	ht_entry *prev = NULL, *curr = ht->table[hash];

	while (curr != NULL) {
		if (key_eq (key, curr->key)) break;
		prev = curr;
		curr = curr->next;
	}

	if (prev != NULL)
		prev->next = (curr != NULL) ? curr->next : NULL;
	else
		ht->table[hash] = curr->next;

	if (curr != NULL) {
		key_drop (curr->key);
		value_drop (curr->value);
		free (curr);
		ht->count--;
	}

	return;
}

void ht_drop (ht *ht)
{
	if (ht == NULL) return;

	for (size_t i = 0; i < ht->size; i++) {
		for (ht_entry *tmp, *curr = ht->table[i];
			 curr != NULL; curr = tmp) {
			tmp = curr->next;
			key_drop (curr->key);
			value_drop (curr->value);
			free (curr);
		}
	}

	free (ht->table);
	free (ht);
}

void ht_keys (ht *ht, key *dest)
{
	assert (ht != NULL);
	assert (dest != NULL);

	size_t idx = 0;
	for (size_t i = 0; i < ht->size; i++)
		for (ht_entry *curr = ht->table[i];
			 curr != NULL; curr = curr->next)
			dest[idx++] = curr->key;

	assert (idx <= ht->count);
}

size_t ht_count (ht *ht)
{
	assert (ht != NULL);
	return ht->count;
}


void ht_show (ht *ht)
{
	assert (ht != NULL);
	printf ("ht<%p>, contains %zu items\n", (void *)ht, ht->count);
	for (size_t i = 0; i < ht->size; i++) {
		printf ("\t[%zu] =\n", i);
		for (ht_entry *curr = ht->table[i];
			 curr != NULL; curr = curr->next)
			printf ("\t\t{ '%s' => '%s' }\n", curr->key, curr->value);
	}
}