This is a (more or less) continuation of a previous post I made: Generic implementation of a hashtable with double-linked list.
I got lots of valuable feedback, so I decided to rewrite the entire module from scratch to improve readability, performance and also to add new features.
Specifications:
- The hashmap implementation is able to store any data (generic)
- The hashmap provides collision handling using a single linked list.
- The hashmap dynamically resizes, to allow for good performance
- With many collisions (e.g. because of a small bucket count), access-time per element on average is not O(1)
- Therefore, when a certain threshold is exceeded (150%), the hashmap is resized, to allow for a better distribution of entries, and hence for faster access times.
Review Request:
I’d like to get feedback for the following things:
- Is the API of the hashmap intuitive/easy to use? Is it consistent (Parameters passed, values returned, Naming conventions etc)
- I plan on re-using this module in future projects
- Is there room for performance improvements? (E.g. are there any checks or loops that could be avoided?)
- I am especially thinking about the functions that directly manipulate the buckets, such as
hm_get
,hm_set
,hm_delete
andhm_resize
.
- I am especially thinking about the functions that directly manipulate the buckets, such as
- The test functions in
main.c
don’t need to be reviewed, as they will be replaced with proper unit tests in the future. - Are there any bad practices I didn’t catch? (e.g Redundant code etc)
hashmap.h
#ifndef HASHTABLE_H
#define HASHTABLE_H
#include <stddef.h>
#include <stdbool.h>
typedef struct entry {
void *key;
const void *value;
struct entry *next;
} entry;
typedef struct hashmap {
size_t buckets_count;
size_t entries_count;
struct entry **buckets;
} hashmap;
size_t hm_compute_hash(size_t key, size_t bucket_count);
/**
* Initializes a new hashmap instance with the default size
* @return The hashmap instance
*/
hashmap *hm_new();
/**
* Returns a pointer to the value from the entry in the hashmap given the key, or NULL, if entry doesn't exist.
* @param hm The hashmap instance
* @param key The entry key
* @return The pointer to the value of the matching entry, or NULL, if key is not valid.
*/
const void *hm_get(hashmap *hm, void *key);
/**
* Adds a new entry to the hashmap
* @param phm Pointer to the hashmap instance
* @param key The entry key
* @param value The value associated with the key
* @return Returns 0 if element was added successfully, otherwise a negative value is returned.
*/
int hm_set(hashmap **phm, void *key, const void *value);
/**
* Deletes an entry from the hashmap
* @param hm The hashmap instance
* @param key The key to the entry to delete
*/
void hm_delete(hashmap **phm, void *key);
/**
* Prints the hashtable in a human readable format to stdout
* @param hm The hashmap instance
*/
void hm_print(hashmap *hm);
/**
* Searches an entry by key in a bucket
* @param hm The hashmap instance
* @param key The entry key
* @return Pointer to the entry if found, otherwise NULL
*/
struct entry *hm_entry_search(struct hashmap *hm, void *key);
/**
* Creates a new entry that is accepted by that hashmap
* @param key The entry key
* @param value The value of the entry
* @return Returns a pointer to the created entry, or NULL, if an error occurred.
*/
struct entry *entry_create(void *key, const void *value);
/**
* Checks if the hashmap should be expanded to remain efficient
* @param hm The hashmap instance
* @return Returns the new bucket size, if expanding is recommended, otherwise 0
*/
int hm_should_grow(struct hashmap *hm);
/**
* Checks if the hashmap should be shrinked to remain efficient
* @param hm The hashmap instance
* @return Returns the new bucket size, if shrinking is recommended, otherwise 0
*/
int hm_should_shrink(struct hashmap *hm);
/**
* Creates a new larger or smaller hashtable with the given size.
* All entries get relinked, and the old hashtable instance gets replaced with the new one.
* @param hm_old The hashmap instance that should be expanded
* @param new_size The amount of buckets that the new hashmap instance should have
* @return The new (expanded) hashmap instance
*/
hashmap *hm_resize(struct hashmap *hm_old, size_t new_size);
/**
* Frees all memory allocated by the hashmap
*/
void hm_free(struct hashmap *hm);
#endif
hashmap.c
#include "hashmap.h"
#include <stdlib.h>
#include <stdio.h>
// Sources used as reference/learning material:
// http://pokristensson.com/code/strmap/strmap.c
// https://github.com/Encrylize/hashmap/blob/master/hashmap.c
// https://github.com/goldsborough/hashtable/blob/master/hashtable.c
hashmap *hm_new() {
const size_t DEFAULT_HASHMAP_SIZE = 3;
// Allocate a new hashmap
hashmap *hm = malloc(sizeof(hashmap));
if (hm == NULL)
return NULL;
// Allocate buckets
hm->buckets = malloc(sizeof(entry) * DEFAULT_HASHMAP_SIZE);
if (hm->buckets == NULL) {
free(hm);
return NULL;
}
// Set initial bucket count, preferably to a prime number
hm->buckets_count = DEFAULT_HASHMAP_SIZE;
hm->entries_count = 0;
// Initialize the buckets, might be obsolete if calloc is used
for (size_t i = 0; i < hm->buckets_count; ++i) {
hm->buckets(i) = NULL;
}
return hm;
}
struct entry *hm_entry_search(struct hashmap *hm, void *key) {
// Compute index from key
size_t index = hm_compute_hash((size_t) key, hm->buckets_count);
// Get bucket start
entry *e_curr = hm->buckets(index);
// Iterate bucket until entry is found, or bucket end was reached
while (e_curr != NULL) {
if (e_curr->key == key) {
break;
}
e_curr = e_curr->next;
}
return e_curr;
}
const void *hm_get(hashmap *hm, void *key) {
// Make sure the key is not null
if (key == NULL) return NULL;
// Find entry first
entry *entry_ptr = hm_entry_search(hm, key);
// Return value of entry, or NULL, if not present
return entry_ptr == NULL ? NULL : entry_ptr->value;
}
int hm_set(hashmap **phm, void *key, const void *value) {
struct hashmap *hm = *phm;
// Make sure the key is not null
if (key == NULL) return -1;
/* Search for entry in bucket*/
entry *entry_ptr = hm_entry_search(hm, key);
// If entry is already present, overwrite old value with new one
if (entry_ptr != NULL) {
entry_ptr->value = value;
return 0;
}
// If entry is not present, create a new one
struct entry *new_entry_ptr = entry_create(key, value);
if (new_entry_ptr == NULL)
return -2;
// Add newly created entry to the head of the bucket, and update next pointer, if necessary
size_t index = hm_compute_hash((size_t) key, hm->buckets_count);
struct entry *current_entry_ptr = hm->buckets(index);
new_entry_ptr->key = key;
new_entry_ptr->value = value;
new_entry_ptr->next = current_entry_ptr != NULL ? current_entry_ptr : NULL;
hm->buckets(index) = new_entry_ptr;
hm->entries_count++;
// Check if the hashmap should be resized
int hm_size = hm_should_grow(hm);
if (hm_size != 0) {
*phm = hm_resize(hm, hm_size);
}
return 0;
}
void hm_delete(hashmap **phm, void *key) {
struct hashmap *hm = *phm;
// Make sure the key is not null
if (key == NULL) return;
// Compute index from key
size_t index = hm_compute_hash((size_t) key, hm->buckets_count);
// Iterate through bucket. When entry is found, remove it, and update neighbor entries
entry *e_prev = NULL;
entry *e_curr = hm->buckets(index);
while (e_curr != NULL) {
if (e_curr->key == key) {
if (e_prev == NULL) {
hm->buckets(index) = e_curr->next;
} else {
e_prev->next = e_curr->next;
}
hm->entries_count--;
free(e_curr);
break;
}
e_prev = e_curr;
e_curr = e_curr->next;
}
int hm_size = hm_should_shrink(hm);
if (hm_size != 0) {
*phm = hm_resize(hm, hm_size);
}
return;
}
entry *entry_create(void *key, const void *value) {
entry *e_new = malloc(sizeof(entry));
// Check if malloc failed
if (e_new == NULL) {
return NULL;
}
// Set fields for newly created entry
e_new->key = key;
e_new->value = value;
e_new->next = NULL;
return e_new;
}
int hm_should_grow(struct hashmap *hm) {
if (hm->entries_count >= 1.5 * hm->buckets_count) {
// Double the table size
return 2 * hm->buckets_count;
}
return 0;
}
int hm_should_shrink(struct hashmap *hm) {
if (hm->entries_count <= 0.5 * hm->buckets_count) {
// Half the table size
return hm->buckets_count / 2;
}
return 0;
}
hashmap *hm_resize(struct hashmap *hm_old, size_t new_size) {
// First, create a new hashtable which is bigger in size
hashmap *hm = malloc(sizeof(hashmap));
if (hm == NULL)
return NULL;
// Allocate new amount of buckets
hm->buckets = malloc(sizeof(entry) * new_size);
if (hm->buckets == NULL) {
free(hm);
return NULL;
}
// Set new bucket count and reset entries count to 0
hm->buckets_count = new_size;
hm->entries_count = 0;
// Initialize the buckets, might be obsolete if calloc is used
for (size_t i = 0; i < hm->buckets_count; ++i) {
hm->buckets(i) = NULL;
}
// Traverse old and relink entries to new hashmap. This requires re-hashing
for (size_t i = 0; i < hm_old->buckets_count; ++i) {
struct entry *e_curr = hm_old->buckets(i);
while (e_curr != NULL) {
hm_set(&hm, e_curr->key, e_curr->value);
e_curr = e_curr->next;
}
}
hm_free(hm_old);
return hm;
}
size_t hm_compute_hash(size_t key, size_t bucket_count) {
key = ((key >> 16) ^ key) * 0x45d9f3b;
key = ((key >> 16) ^ key) * 0x45d9f3b;
key = (key >> 16) ^ key;
return key % bucket_count;
}
void hm_print(hashmap *hm) {
for (size_t i = 0; i < hm->buckets_count; ++i) {
printf("(%zu) ", i);
struct entry *e_curr = hm->buckets(i);
for (; e_curr != NULL; e_curr = e_curr->next) {
printf("%zu -> ", (size_t)(e_curr->key));
}
printf("NULLn");
}
}
void hm_free(struct hashmap *hm) {
// Free all buckets
for (size_t i = 0; i < hm->buckets_count; ++i) {
entry *e_curr = hm->buckets(i);
entry *e_prev = NULL;
while (e_curr != NULL) {
e_prev = e_curr;
e_curr = e_curr->next;
free(e_prev);
}
}
// Free hashmap struct
free(hm->buckets);
free(hm);
}
main.c
#include <stdio.h>
#include "hashmap.h"
#include <assert.h>
void test_hm_set_entry() {
hashmap *hm = hm_new(3);
hm_set(&hm, (void *) 1, (void *) 100);
assert(hm_get(hm, (void *) 1) == (void *) 100);
hm_free(hm);
}
void test_hm_get_entry() {
hashmap *hm = hm_new(3);
hm_set(&hm, (void *) 1, (void *) 100);
hm_set(&hm, (void *) 2, (void *) 200);
hm_set(&hm, (void *) 3, (void *) 300);
hm_set(&hm, (void *) 4, (void *) 400);
hm_set(&hm, (void *) 5, (void *) 500);
hm_set(&hm, (void *) 6, (void *) 600);
hm_set(&hm, (void *) 7, (void *) 700);
assert(hm_get(hm, (void *) 1) == (void *) 100);
assert(hm_get(hm, (void *) 2) == (void *) 200);
assert(hm_get(hm, (void *) 3) == (void *) 300);
assert(hm_get(hm, (void *) 4) == (void *) 400);
assert(hm_get(hm, (void *) 5) == (void *) 500);
assert(hm_get(hm, (void *) 6) == (void *) 600);
assert(hm_get(hm, (void *) 7) == (void *) 700);
hm_free(hm);
}
void test_hm_delete_entry() {
hashmap *hm = hm_new(3);
hm_set(&hm, (void *) 1, (void *) 100);
hm_set(&hm, (void *) 2, (void *) 200);
hm_set(&hm, (void *) 3, (void *) 300);
hm_delete(&hm, (void *) 2);
assert(hm_get(hm, (void *) 2) == NULL);
hm_free(hm);
}
void test_hm_resize() {
// Allocate new hashmap (3 buckets by default)
hashmap *hm = hm_new();
// Make sure 3 buckets are present
assert(hm->buckets_count == 3);
/* Add 5 items to trigger resize */
hm_set(&hm, (void *) 1, (void *) 100);
hm_set(&hm, (void *) 2, (void *) 200);
hm_set(&hm, (void *) 3, (void *) 300);
hm_set(&hm, (void *) 4, (void *) 400);
hm_set(&hm, (void *) 5, (void *) 500);
// Check if buckets size was doubled
assert(hm->buckets_count == 6);
// Check if elements can still be obtained correctly
assert(hm_get(hm, (void *) 1) == (void *) 100);
assert(hm_get(hm, (void *) 2) == (void *) 200);
assert(hm_get(hm, (void *) 3) == (void *) 300);
assert(hm_get(hm, (void *) 4) == (void *) 400);
assert(hm_get(hm, (void *) 5) == (void *) 500);
// Delete some entries
hm_delete(&hm, (void *) 1);
hm_delete(&hm, (void *) 2);
// Check if table size shrinked again
assert(hm->buckets_count == 3);
// Fee hashmap
hm_free(hm);
}
int main() {
test_hm_get_entry();
test_hm_set_entry();
test_hm_delete_entry();
test_hm_resize();
return 0;
}
Feel free to be very nit-picky with your review 🙂