c – Generic hashmap with dynamic resizing and collision handling

This is a (more or less) continuation of a previous post I made: Generic implementation of a hashtable with double-linked list.
I got lots of valuable feedback, so I decided to rewrite the entire module from scratch to improve readability, performance and also to add new features.

Specifications:

  • The hashmap implementation is able to store any data (generic)
  • The hashmap provides collision handling using a single linked list.
  • The hashmap dynamically resizes, to allow for good performance
    • With many collisions (e.g. because of a small bucket count), access-time per element on average is not O(1)
    • Therefore, when a certain threshold is exceeded (150%), the hashmap is resized, to allow for a better distribution of entries, and hence for faster access times.

Review Request:

I’d like to get feedback for the following things:

  • Is the API of the hashmap intuitive/easy to use? Is it consistent (Parameters passed, values returned, Naming conventions etc)
    • I plan on re-using this module in future projects
  • Is there room for performance improvements? (E.g. are there any checks or loops that could be avoided?)
    • I am especially thinking about the functions that directly manipulate the buckets, such as hm_get, hm_set, hm_delete and hm_resize.
  • The test functions in main.c don’t need to be reviewed, as they will be replaced with proper unit tests in the future.
  • Are there any bad practices I didn’t catch? (e.g Redundant code etc)

hashmap.h

#ifndef HASHTABLE_H
#define HASHTABLE_H

#include <stddef.h>
#include <stdbool.h>

typedef struct entry {
    void *key;
    const void *value;
    struct entry *next;
} entry;

typedef struct hashmap {
    size_t buckets_count;
    size_t entries_count;
    struct entry **buckets;
} hashmap;


size_t hm_compute_hash(size_t key, size_t bucket_count);

/**
 * Initializes a new hashmap instance with the default size
 * @return The hashmap instance
 */
hashmap *hm_new();

/**
 * Returns a pointer to the value from the entry in the hashmap given the key, or NULL, if entry doesn't exist.
 * @param hm The hashmap instance
 * @param key The entry key
 * @return The pointer to the value of the matching entry, or NULL, if key is not valid.
 */
const void *hm_get(hashmap *hm, void *key);

/**
 * Adds a new entry to the hashmap
 * @param phm Pointer to the hashmap instance
 * @param key The entry key
 * @param value The value associated with the key
 * @return Returns 0 if element was added successfully, otherwise a negative value is returned.
 */
int hm_set(hashmap **phm, void *key, const void *value);

/**
 * Deletes an entry from the hashmap
 * @param hm The hashmap instance
 * @param key The key to the entry to delete
 */
void hm_delete(hashmap **phm, void *key);

/**
 * Prints the hashtable in a human readable format to stdout
 * @param hm The hashmap instance
 */
void hm_print(hashmap *hm);

/**
 * Searches an entry by key in a bucket
 * @param hm The hashmap instance
 * @param key The entry key
 * @return Pointer to the entry if found, otherwise NULL
 */
struct entry *hm_entry_search(struct hashmap *hm, void *key);

/**
 * Creates a new entry that is accepted by that hashmap
 * @param key The entry key
 * @param value The value of the entry
 * @return Returns a pointer to the created entry, or NULL, if an error occurred.
 */
struct entry *entry_create(void *key, const void *value);

/**
 * Checks if the hashmap should be expanded to remain efficient
 * @param hm The hashmap instance
 * @return Returns the new bucket size, if expanding is recommended, otherwise 0
 */
int hm_should_grow(struct hashmap *hm);

/**
 * Checks if the hashmap should be shrinked to remain efficient
 * @param hm The hashmap instance
 * @return Returns the new bucket size, if shrinking is recommended, otherwise 0
 */
int hm_should_shrink(struct hashmap *hm);

/**
 * Creates a new larger or smaller hashtable with the given size.
 * All entries get relinked, and the old hashtable instance gets replaced with the new one.
 * @param hm_old The hashmap instance that should be expanded
 * @param new_size The amount of buckets that the new hashmap instance should have
 * @return The new (expanded) hashmap instance
 */
hashmap *hm_resize(struct hashmap *hm_old, size_t new_size);

/**
 * Frees all memory allocated by the hashmap
 */
void hm_free(struct hashmap *hm);

#endif

hashmap.c

#include "hashmap.h"
#include <stdlib.h>
#include <stdio.h>

// Sources used as reference/learning material:
// http://pokristensson.com/code/strmap/strmap.c
// https://github.com/Encrylize/hashmap/blob/master/hashmap.c
// https://github.com/goldsborough/hashtable/blob/master/hashtable.c

hashmap *hm_new() {

    const size_t DEFAULT_HASHMAP_SIZE = 3;

    // Allocate a new hashmap
    hashmap *hm = malloc(sizeof(hashmap));
    if (hm == NULL)
        return NULL;

    // Allocate buckets
    hm->buckets = malloc(sizeof(entry) * DEFAULT_HASHMAP_SIZE);
    if (hm->buckets == NULL) {
        free(hm);
        return NULL;
    }

    // Set initial bucket count, preferably to a prime number
    hm->buckets_count = DEFAULT_HASHMAP_SIZE;
    hm->entries_count = 0;

    // Initialize the buckets, might be obsolete if calloc is used
    for (size_t i = 0; i < hm->buckets_count; ++i) {
        hm->buckets(i) = NULL;
    }

    return hm;
}

struct entry *hm_entry_search(struct hashmap *hm, void *key) {

    // Compute index from key
    size_t index = hm_compute_hash((size_t) key, hm->buckets_count);

    // Get bucket start
    entry *e_curr = hm->buckets(index);

    // Iterate bucket until entry is found, or bucket end was reached
    while (e_curr != NULL) {
        if (e_curr->key == key) {
            break;
        }
        e_curr = e_curr->next;
    }
    return e_curr;
}

const void *hm_get(hashmap *hm, void *key) {

    // Make sure the key is not null
    if (key == NULL) return NULL;

    // Find entry first
    entry *entry_ptr = hm_entry_search(hm, key);

    // Return value of entry, or NULL, if not present
    return entry_ptr == NULL ? NULL : entry_ptr->value;
}

int hm_set(hashmap **phm, void *key, const void *value) {

    struct hashmap *hm = *phm;

    // Make sure the key is not null
    if (key == NULL) return -1;

    /* Search for entry in bucket*/
    entry *entry_ptr = hm_entry_search(hm, key);

    // If entry is already present, overwrite old value with new one
    if (entry_ptr != NULL) {
        entry_ptr->value = value;
        return 0;
    }

    // If entry is not present, create a new one
    struct entry *new_entry_ptr = entry_create(key, value);
    if (new_entry_ptr == NULL)
        return -2;

    // Add newly created entry to the head of the bucket, and update next pointer, if necessary
    size_t index = hm_compute_hash((size_t) key, hm->buckets_count);
    struct entry *current_entry_ptr = hm->buckets(index);

    new_entry_ptr->key = key;
    new_entry_ptr->value = value;
    new_entry_ptr->next = current_entry_ptr != NULL ? current_entry_ptr : NULL;
    hm->buckets(index) = new_entry_ptr;

    hm->entries_count++;

    // Check if the hashmap should be resized
    int hm_size = hm_should_grow(hm);
    if (hm_size != 0) {
        *phm = hm_resize(hm, hm_size);
    }

    return 0;
}

void hm_delete(hashmap **phm, void *key) {

    struct hashmap *hm = *phm;

    // Make sure the key is not null
    if (key == NULL) return;

    // Compute index from key
    size_t index = hm_compute_hash((size_t) key, hm->buckets_count);

    // Iterate through bucket. When entry is found, remove it, and update neighbor entries
    entry *e_prev = NULL;
    entry *e_curr = hm->buckets(index);

    while (e_curr != NULL) {
        if (e_curr->key == key) {
            if (e_prev == NULL) {
                hm->buckets(index) = e_curr->next;
            } else {
                e_prev->next = e_curr->next;
            }
            hm->entries_count--;
            free(e_curr);
            break;
        }
        e_prev = e_curr;
        e_curr = e_curr->next;
    }

    int hm_size = hm_should_shrink(hm);
    if (hm_size != 0) {
        *phm = hm_resize(hm, hm_size);
    }

    return;
}

entry *entry_create(void *key, const void *value) {
    entry *e_new = malloc(sizeof(entry));

    // Check if malloc failed
    if (e_new == NULL) {
        return NULL;
    }

    // Set fields for newly created entry
    e_new->key = key;
    e_new->value = value;
    e_new->next = NULL;

    return e_new;
}

int hm_should_grow(struct hashmap *hm) {
    if (hm->entries_count >= 1.5 * hm->buckets_count) {
        // Double the table size
        return 2 * hm->buckets_count;
    }
    return 0;
}

int hm_should_shrink(struct hashmap *hm) {
    if (hm->entries_count <= 0.5 * hm->buckets_count) {
        // Half the table size
        return hm->buckets_count / 2;
    }
    return 0;
}

hashmap *hm_resize(struct hashmap *hm_old, size_t new_size) {

    // First, create a new hashtable which is bigger in size
    hashmap *hm = malloc(sizeof(hashmap));
    if (hm == NULL)
        return NULL;

    // Allocate new amount of buckets
    hm->buckets = malloc(sizeof(entry) * new_size);
    if (hm->buckets == NULL) {
        free(hm);
        return NULL;
    }

    // Set new bucket count and reset entries count to 0
    hm->buckets_count = new_size;
    hm->entries_count = 0;

    // Initialize the buckets, might be obsolete if calloc is used
    for (size_t i = 0; i < hm->buckets_count; ++i) {
        hm->buckets(i) = NULL;
    }

    // Traverse old and relink entries to new hashmap. This requires re-hashing
    for (size_t i = 0; i < hm_old->buckets_count; ++i) {
        struct entry *e_curr = hm_old->buckets(i);

        while (e_curr != NULL) {
            hm_set(&hm, e_curr->key, e_curr->value);
            e_curr = e_curr->next;
        }
    }

    hm_free(hm_old);

    return hm;
}


size_t hm_compute_hash(size_t key, size_t bucket_count) {
    key = ((key >> 16) ^ key) * 0x45d9f3b;
    key = ((key >> 16) ^ key) * 0x45d9f3b;
    key = (key >> 16) ^ key;
    return key % bucket_count;
}

void hm_print(hashmap *hm) {
    for (size_t i = 0; i < hm->buckets_count; ++i) {
        printf("(%zu) ", i);
        struct entry *e_curr = hm->buckets(i);

        for (; e_curr != NULL; e_curr = e_curr->next) {
            printf("%zu -> ", (size_t)(e_curr->key));
        }
        printf("NULLn");
    }
}

void hm_free(struct hashmap *hm) {

    // Free all buckets
    for (size_t i = 0; i < hm->buckets_count; ++i) {
        entry *e_curr = hm->buckets(i);
        entry *e_prev = NULL;

        while (e_curr != NULL) {
            e_prev = e_curr;
            e_curr = e_curr->next;
            free(e_prev);
        }
    }

    // Free hashmap struct
    free(hm->buckets);
    free(hm);
}

main.c

#include <stdio.h>
#include "hashmap.h"
#include <assert.h>

void test_hm_set_entry() {
    hashmap *hm = hm_new(3);
    hm_set(&hm, (void *) 1, (void *) 100);
    assert(hm_get(hm, (void *) 1) == (void *) 100);
    hm_free(hm);
}

void test_hm_get_entry() {
    hashmap *hm = hm_new(3);

    hm_set(&hm, (void *) 1, (void *) 100);
    hm_set(&hm, (void *) 2, (void *) 200);
    hm_set(&hm, (void *) 3, (void *) 300);
    hm_set(&hm, (void *) 4, (void *) 400);
    hm_set(&hm, (void *) 5, (void *) 500);
    hm_set(&hm, (void *) 6, (void *) 600);
    hm_set(&hm, (void *) 7, (void *) 700);

    assert(hm_get(hm, (void *) 1) == (void *) 100);
    assert(hm_get(hm, (void *) 2) == (void *) 200);
    assert(hm_get(hm, (void *) 3) == (void *) 300);
    assert(hm_get(hm, (void *) 4) == (void *) 400);
    assert(hm_get(hm, (void *) 5) == (void *) 500);
    assert(hm_get(hm, (void *) 6) == (void *) 600);
    assert(hm_get(hm, (void *) 7) == (void *) 700);

    hm_free(hm);
}

void test_hm_delete_entry() {
    hashmap *hm = hm_new(3);

    hm_set(&hm, (void *) 1, (void *) 100);
    hm_set(&hm, (void *) 2, (void *) 200);
    hm_set(&hm, (void *) 3, (void *) 300);

    hm_delete(&hm, (void *) 2);

    assert(hm_get(hm, (void *) 2) == NULL);
    hm_free(hm);
}

void test_hm_resize() {

    // Allocate new hashmap (3 buckets by default)
    hashmap *hm = hm_new();

    // Make sure 3 buckets are present
    assert(hm->buckets_count == 3);

    /* Add 5 items to trigger resize */
    hm_set(&hm, (void *) 1, (void *) 100);
    hm_set(&hm, (void *) 2, (void *) 200);
    hm_set(&hm, (void *) 3, (void *) 300);
    hm_set(&hm, (void *) 4, (void *) 400);
    hm_set(&hm, (void *) 5, (void *) 500);

    // Check if buckets size was doubled
    assert(hm->buckets_count == 6);

    // Check if elements can still be obtained correctly
    assert(hm_get(hm, (void *) 1) == (void *) 100);
    assert(hm_get(hm, (void *) 2) == (void *) 200);
    assert(hm_get(hm, (void *) 3) == (void *) 300);
    assert(hm_get(hm, (void *) 4) == (void *) 400);
    assert(hm_get(hm, (void *) 5) == (void *) 500);

    // Delete some entries
    hm_delete(&hm, (void *) 1);
    hm_delete(&hm, (void *) 2);

    // Check if table size shrinked again
    assert(hm->buckets_count == 3);

    // Fee hashmap
    hm_free(hm);
}

int main() {
    test_hm_get_entry();
    test_hm_set_entry();
    test_hm_delete_entry();
    test_hm_resize();

    return 0;
}

Feel free to be very nit-picky with your review 🙂