❖ Hashing: Reminder |
Goal is to use keys as indexes, e.g.
courses["COMP3311"] = "Database Systems";
printf("%s\n", courses["COMP3311"]);
Since strings can't be indexes in C, use via a hash function, e.g.
courses[h("COMP3311")] = "Database Systems";
printf("%s\n", courses[h("COMP3311")]);
Hash function h
Problem: collisions, where k ≠ j but hash(k,N) = hash(j,N)
❖ Collision Resolution |
Three approaches to dealing with hash collisions:
Itemhash()❖ Separate Chaining |
Solve collisions by having multiple items per array entry.
Make each element the start of linked-list of Items.
All items in a given list have the same hash()
❖ ... Separate Chaining |
Concrete data structure for hashing via chaining
typedef struct HashTabRep {
List *lists; // array of Lists of Items
int N; // # elements in array
int nitems; // # items stored in HashTable
} HashTabRep;
HashTable newHashTable(int N)
{
HashTabRep *new = malloc(sizeof(HashTabRep));
assert(new != NULL);
new->lists = malloc(N*sizeof(List));
assert(new->lists != NULL);
for (int i = 0; i < N; i++)
new->lists[i] = newList();
new->N = N; new->nitems = 0;
return new;
}
❖ ... Separate Chaining |
Using the List
#include "List.h"
Item *HashGet(HashTable ht, Key k)
{
int i = hash(k, ht->N);
return ListSearch(ht->lists[i], k);
}
Even without List
Using sorted lists gives only small performance gain.
❖ ... Separate Chaining |
Other list operations are also simple:
#include "List.h"
void HashInsert(HashTable ht, Item it) {
Key k = key(it);
int i = hash(k, ht->N);
ListInsert(ht->lists[i], it);
}
void HashDelete(HashTable ht, Key k) {
int i = hash(k, ht->N);
ListDelete(ht->lists[i], k);
}
Essentially: select a list; operate on that list.
❖ ... Separate Chaining |
Cost analysis:
❖ Linear Probing |
Collision resolution by finding a new location for Item
❖ ... Linear Probing |
Concrete data structures for hashing via linear probing:
typedef struct HashTabRep {
Item **items; // array of pointers to Items
int N; // # elements in array
int nitems; // # items stored in HashTable
} HashTabRep;
HashTable newHashTable(int N)
{
HashTabRep *new = malloc(sizeof(HashTabRep));
assert(new != NULL);
new->items = malloc(N*sizeof(Item *));
assert(new->items != NULL);
for (int i = 0; i < N; i++) new->items[i] = NULL;
new->N = N; new->nitems = 0;
return new;
}
❖ ... Linear Probing |
Insert function for linear probing:
void HashInsert(HashTable ht, Item it)
{
assert(ht->nitems < ht->N);
int N = ht->N;
Key k = key(it);
Item **a = ht->items;
int i = hash(k,N);
for (int j = 0; j < N; j++) {
if (a[i] == NULL) break;
if (equal(k,key(*(a[i])))) break;
i = (i+1) % N;
}
if (a[i] == NULL) ht->nitems++;
if (a[i] != NULL) free(a[i]);
a[i] = copy(it);
}
❖ ... Linear Probing |
Search function for linear probing:
Item *HashGet(HashTable ht, Key k)
{
int N = ht->N;
Item **a = ht->items;
int i = hash(k,N);
for (int j = 0; j < N; j++) {
if (a[i] == NULL) break;
if (equal(k,key(*(a[i]))))
return a[i];
i = (i+1) % N;
}
return NULL;
}
❖ ... Linear Probing |
Search cost analysis:
Item| load (α) | 0.50 | 0.67 | 0.75 | 0.90 |
| search hit | 1.5 | 2.0 | 3.0 | 5.5 |
| search miss | 2.5 | 5.0 | 8.5 | 55.5 |
❖ ... Linear Probing |
Deletion slightly tricky for linear probing.
Need to ensure no NULL
(i.e. previously relocated items moved to appropriate location)
❖ ... Linear Probing |
Delete function for linear probing:
void HashDelete(HashTable ht, Key k)
{
int N = ht->N;
Item *a = ht->items;
int i = hash(k,N);
for (int j = 0; j < N; j++) {
if (a[i] == NULL) return; // k not in table
if (equal(k,key(*(a[i])))) break;
i = (i+1) % N;
}
free(a[i]); a[i] = NULL; ht->nitems--;
// clean up probe path
i = (i+1) % N;
while (a[i] != NULL) {
Item it = *(a[i]);
a[i] = NULL; // remove 'it'
ht->nitems--;
HashInsert(ht, it); // insert 'it' again
i = (i+1) % N;
}
}
❖ ... Linear Probing |
A problem with linear probing: clusters
E.g. insert 5, 6, 15, 16, 14, 25, with hash(k) = k%10
❖ Double Hashing |
Double hashing improves on linear probing:
hash2()❖ ... Double Hashing |
Concrete data structures for hashing via double hashing:
typedef struct HashTabRep {
Item **items; // array of pointers to Items
int N; // # elements in array
int nitems; // # items stored in HashTable
int nhash2; // second hash mod
} HashTabRep;
#define hash2(k,N2) (((k)%N2)+1)
HashTable newHashTable(int N)
{
HashTabRep *new = malloc(sizeof(HashTabRep));
assert(new != NULL);
new->items = malloc(N*sizeof(Item *));
assert(new->items != NULL);
for (int i = 0; i < N; i++)
new->items[i] = NULL;
new->N = N; new->nitems = 0;
new->nhash2 = findSuitablePrime(N);
return new;
}
❖ ... Double Hashing |
Search function for double hashing:
Item *HashGet(HashTable ht, Key k)
{
Item **a = ht->items;
int N = ht->N;
int i = hash(k,N);
int incr = hash2(k,ht->nhash2);
for (int j = 0, j < N; j++) {
if (a[i] == NULL) break; // k not found
if (equal(k,key(*(a[i]))) return a[i];
i = (i+incr) % N;
}
return NULL;
}
❖ ... Double Hashing |
Insert function for double hashing:
void HashInsert(HashTable ht, Item it)
{
assert(ht->nitems < ht->N); // table full
Item **a = ht->items;
Key k = key(it);
int N = ht->N;
int i = hash(k,N);
int incr = hash2(k,ht->nhash2);
for (int j = 0, j < N; j++) {
if (a[i] == NULL) break;
if (equal(k,key(*(a[i])))) break;
i = (i+incr) % N;
}
if (a[i] == NULL) ht->nitems++;
if (a[i] != NULL) free(a[i]);
a[i] = copy(it);
}
❖ ... Double Hashing |
Search cost analysis:
Item
| load (α) | 0.5 | 0.67 | 0.75 | 0.90 |
| search hit | 1.4 | 1.6 | 1.8 | 2.6 |
| search miss | 1.5 | 2.0 | 3.0 | 5.5 |
Can be significantly better than linear probing
❖ Hashing Summary |
Collision resolution approaches:
1, complex deletion
1
For arrays, once M exceeds initial choice of N,
Produced: 10 Jul 2020