IBM Metal C

Examples

Example 6. Hash Table

A hash table (hash map) is a data structure used to implement an associative array, a structure that can map keys to values. A hash table uses a hash function to compute an index, using the key value, into an array of slots, to find the associated value.

Ideally, the hash function will assign each key to a unique slot; in practice some keys usually hash to the same slot. This duplicated mapping is called a collision, and can be resolved by a number of well documentaed solutions. This implementation resolves the collision through double hashing and open addressing. The average cost for each hash table lookup is independent of the number of elements stored in the table. In many situations, hash tables are more efficient than search trees or any other table lookup structure.

Performance Analysis

With an ideal hash function, a table of size k with open addressing has no collisions and contains up to k elements, with a single comparison for successful lookup. A table of size k with chaining and n keys has the minimum of max(0, n-k) collisions and Ο(1 + n/k) comparisons for lookup. For the worst choice of hash function, every insertion causes a collision, and hash tables degenerate to a linear search, up to n comparisons for a successful lookup.

This hash table implmentation is based upon the Open Scatter Table Search with Collision Resolution by Open Addressing and Double Hashing, found in D.E. Knuth’s The Art of Computer Programming. There are two hash functions employed in this program, the Fowler/Noll/Vo (FNV) hash and MurmurHash3

Type Signature

extern int hcreate(int elements, int elementSize, int keyLength, int keyOffset, ENTRY *__item);
extern ENTRY* data hsearch(ENTRY *__item, ACTION __action);
extern hdestroy(ENTRY *__item);

Signature Details

hcreate – Create a new hash table.: elements – maximum number of hash table entries. Minimum 50 – maximum 1,000,000; elementSize – size in bytes of each table entry.; Key length – length in bytes of the table key.; Key offset – offset in bytes of the table key from the start of an entry.; The action argument is a member of an enumeration type ACTION indicating the disposition of the entry if it cannot be found in the table. ENTER indicates that the item should be inserted in the table at an appropriate point. FIND indicates that no entry should be made.
hsearch() – Add or find a hash table entry: returns a NULL pointer if either the action is FIND and the item could not be found or the action is ENTER and the table is full.

hdestroy – Free the storage associated with a hash table

C Source

 
#ifndef __MVS__
#include <stdio.h>
#include <stdbool.h>
#else
#define true  1
#define false 0
#endif

#include <string.h>
#include <stdlib.h>
#include <stdint.h>

#define NotInUse 'N'
#define InUse    'I'

struct hashStats
{
    int elements;      // Number of table entries
    int inUseElements; // Number of table entries in use
    int loadFactor;    // (in_use / table_size) + 1 * 100 
    int searches;      // total searches performed
    int searchHits;    // total search hits
    int hit1hash;      // search hit on 1st hash
    int hit2hash;      // search hit on/after 2nd hash
    int adds;          // Number of elements added
    int deleted;       // Number of elements deleted
    int keyExists;     // Find but keys already exists;
} ;

struct seeds
{
    unsigned int seed1;
    unsigned int seed2;

} ;

struct hashWork
{
    int tableLength;
    void *table;
    struct seeds _seeds;
    struct hashStats _hashStats;
} ;

/* Action which shall be performed in the call the hsearch.  */
typedef enum
{
    FIND,
    ENTER,
    INDEX
}
ACTION;

typedef struct entry
{
    void *data;
    struct hashWork _hashWork;
}
ENTRY;

extern void* hsearch (ENTRY *__item, ACTION __action);

#define TableLimitSize  1000003 

struct HashTablePrefix
{
    int tableOffset;
    int size;
    int elementSize;
    int entrySize;
    int keyLength;
    int keyOffset;
    double elements;
    double inUseElements;
    double loadFactor;
} ;

struct HashTableEntry
{

    struct
    {
        int  index;
        char inUseFlag;
    } prefix;

    char data;
} ;


/*
 hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code
 
 Fowler/Noll/Vo hash
  
 To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
 Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
  
 Please do not copyright this code.  This code is in the public domain.
  
 To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
 Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
 
 */

#define FNV_VERSION "5.0.2"	/* @(#) FNV Version */

/* 32 bit FNV-0 hash type  */
typedef unsigned int Fnv32_t;

/* 32 bit FNV-1 and FNV-1a non-zero initial basis

NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition.
 */
#define FNV1_32_INIT ((Fnv32_t)0x811c9dc5)
#define FNV1_32A_INIT FNV1_32_INIT

extern Fnv32_t fnv_32a_buf(void *buf, size_t len, Fnv32_t hashval,
        unsigned int modulus);


/*  32 bit magic FNV-1a prime */
#define FNV_32_PRIME ((Fnv32_t)0x01000193)

/*
   fnv_32a_buf - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a buffer
 
    input:
    buf	- start of buffer to hash
    len	- length of buffer in octets
    hval- previous hash value or 0 if first call
 
 returns:
 32 bit hash as a static hash type
 
 NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the
 hval arg on the first call to either fnv_32a_buf() or fnv_32a_str().
 */
Fnv32_t fnv_32a_buf(void *buf, size_t len, Fnv32_t hval, unsigned int modulus)
{
    unsigned char *bp = (unsigned char *) buf;	/* start of buffer */
    unsigned char *be = bp + len;		/* beyond end of buffer */

    /* FNV-1a hash each octet in the buffer */
    while (bp < be)
    {

        /* xor the bottom with the current octet */
        hval ^= (Fnv32_t) * bp++;

        /* multiply by the 32 bit FNV magic prime mod 2^32 */
        hval *= FNV_32_PRIME;
    }

    /* return our new hash value */
    hval = 1 + (hval % (modulus - 2));
    return hval;
}

uint32_t rotl32 ( uint32_t x, int8_t r )
{
    return (x << r) | (x >> (32 - r));
}


//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.

#define ROTL32(x,y)     rotl32(x,y)

//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche

uint32_t fmix (uint32_t h)
{
    h ^= h >> 16;
    h *= 0x85ebca6b;
    h ^= h >> 13;
    h *= 0xc2b2ae35;
    h ^= h >> 16;

    return h;
}

//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here

uint32_t getblock ( const uint32_t *p, int i )
{
    return p[i];
}

unsigned int MurmurHash3_32 (const void * key, int len, uint32_t seed, unsigned int modulus)
{
    int i;
    const uint8_t * data = (const uint8_t*) key;
    const int nblocks = len / 4;

    uint32_t h1 = seed;

    uint32_t c1 = 0xcc9e2d51;
    uint32_t c2 = 0x1b873593;

    //----------
    // body

    const uint32_t * blocks = (const uint32_t *) (data + nblocks * 4);

    for (i = -nblocks; i; i++)
    {
        uint32_t k1 = getblock(blocks, i);

        k1 *= c1;
        k1 = ROTL32(k1, 15);
        k1 *= c2;

        h1 ^= k1;
        h1 = ROTL32(h1, 13);
        h1 = h1 * 5 + 0xe6546b64;
    }

    //----------
    // tail

    const uint8_t * tail = (const uint8_t*) (data + nblocks * 4);

    uint32_t k1 = 0;

    switch (len & 3)
    {
        case 3: k1 ^= tail[2] << 16;
        case 2: k1 ^= tail[1] << 8;
        case 1: k1 ^= tail[0];
            k1 *= c1;
            k1 = ROTL32(k1, 15);
            k1 *= c2;
            h1 ^= k1;
    };

    //----------
    // finalization

    h1 ^= len;

    h1 = fmix(h1);
    h1 = h1 % modulus;

    return h1;
}

/*
Open Scatter Table Search with Collision Resolution by Open Addressing and Double Hashing.
From: The Art of Computer Programming, Vol. 3 Sorting and Searching, by D.E. Knuth,  pp. 518-522

This algorithm searches an M-node table, looking for a given key K.
The nodes of the table are denoted by TABLE[i], for 0 <= i < M, and they are
of two types, empty and occupied. An occupied node contains a key
called KEY[i]. An auxiliary variable N is used to keep track of how many nodes
are occupied. It is increased by 1 whenever a new key is inserted.
The algorithm probes the table using two hash functions h1(K) and h2(K).

The hsearch() function is a hash-table search routine. 

    ● It returns a pointer into a hash table indicating the location at which an entry can 
      be found. 

    ● The item argument is a structure of type ENTRY containing a pointer, item.data (a
      void *). 
 
    ● The comparison function used by hsearch() is memcmp(). 
 
    ●  The action argument is a member of an enumeration type ACTION indicating the 
       disposition of the entry if it cannot be found in the table. ENTER indicates 
       that the item should be inserted in the table at an appropriate point. FIND 
       indicates that no entry should be made.
       hsearch() returns a NULL pointer if either the action is FIND and the item could not
       be found or the action is ENTER and the table is full.
 */

void* hsearch(ENTRY *__item, ACTION __action)
{
    int index;
    const unsigned int seed =  1216379;
    unsigned int c;
    char *startOfHashTable;
    char *keyPtr;
    char *entryKeyPtr;
    char *entryElementPtr;
    unsigned char *keyString;
    int modulus;
    int keyLength;
    int keyOffset;
    int entrySize;
    int elementSize;
    struct HashTablePrefix *_HashTablePrefix;
    struct HashTableEntry *_HashTableEntry;

    _HashTablePrefix = __item->_hashWork.table;

    if (__action == FIND)
    {
        __item->_hashWork._hashStats.searches++;
    }

    modulus = _HashTablePrefix->elements;
    keyLength = _HashTablePrefix->keyLength;
    keyOffset = _HashTablePrefix->keyOffset;
    entrySize = _HashTablePrefix->entrySize;
    elementSize = _HashTablePrefix->elementSize;
    __item->_hashWork._seeds.seed1 = seed;

    startOfHashTable =  (char*) _HashTablePrefix;
    startOfHashTable += _HashTablePrefix->tableOffset;

    keyPtr = (char*) __item->data;
    keyPtr +=  keyOffset;
    /* Key to Cache */
    keyString = (unsigned char *) keyPtr;

    /*  D1: Hash: 0 <= index < ENTRIES */

    index = MurmurHash3_32((char*) keyString,  _HashTablePrefix->keyLength,
            __item->_hashWork._seeds.seed1, modulus);

    _HashTableEntry = (struct HashTableEntry *) (startOfHashTable + (index * entrySize));

    entryKeyPtr = (char*) &_HashTableEntry->data;
    entryKeyPtr += keyOffset;

    // First Hash. If Slot is empty, then key can be added.
    if (_HashTableEntry->prefix.inUseFlag == NotInUse)
    {
        goto D6;
    }

    if (memcmp(keyPtr, entryKeyPtr, keyLength) == 0)
    {
        /* Found Entry */
        if (__action == INDEX)
        {
            // intptr_t	integer type capable of holding a pointer

            return (void *) (intptr_t) index;
        }
        __item->_hashWork._hashStats.searchHits++;
        __item->_hashWork._hashStats.hit1hash++;
        return &_HashTableEntry->data;
    }

    /* D3: Keys are not equal, collision condition. Second Hash */
    c = fnv_32a_buf((char*) keyString, keyLength, __item->_hashWork._seeds.seed2, modulus);
    c = c % modulus;

    /* Advance to Next */
D4:
    index = index - c;
    if (index < 0)
    {
        index += modulus;
    }

    _HashTableEntry = (struct HashTableEntry *) (startOfHashTable + (index * entrySize));
    entryKeyPtr = (char*) _HashTableEntry;
    entryKeyPtr += keyOffset;

    /* Compare - D5:
       A Slot Not in Use Means that the Key is Not a Duplicate and Can be Added.
     */
    if (_HashTableEntry->prefix.inUseFlag == NotInUse)
    {
        goto D6;
    }

    if (memcmp(&keyPtr, &entryKeyPtr, keyLength) == 0)
    {
        /* Found Entry */
        if (__action == INDEX)
        {
            // intptr_t	integer type capable of holding a pointer
            return (void *) (intptr_t) index;
        }
        __item->_hashWork._hashStats.hit2hash++;
        __item->_hashWork._hashStats.searchHits++;
        return &_HashTableEntry->data;
    }
        // Continue Linear Search Until Either Empty Slot, or Keys Match.
    else
    {
        goto D4;
    }

    /* ------ Add ------ */
D6:
    // Check for Table Full;
    if (_HashTablePrefix->inUseElements == modulus)
    {
        return NULL;
    }

    if (__action == FIND)
    {
        __item->_hashWork._hashStats.keyExists++;
        return NULL;
    }

    if (__action == INDEX)
    {
        return (void *) (intptr_t) - 8;
    }

    printf("%i\n", index);
    __item->_hashWork._hashStats.adds++;
    __item->_hashWork._hashStats.inUseElements++;
    _HashTableEntry->prefix.index = index;
    entryElementPtr = (char*) &_HashTableEntry->data;
    _HashTableEntry->prefix.inUseFlag = InUse;
    memcpy(entryElementPtr, __item->data, elementSize);
    _HashTablePrefix->inUseElements++;
    _HashTablePrefix->loadFactor = _HashTablePrefix->inUseElements / _HashTablePrefix->elements;
    __item->_hashWork._hashStats.loadFactor = (int) (_HashTablePrefix->loadFactor * 100) + 1;

    return entryElementPtr;
}


// prime numbers are of form 6k+/-1

_Bool isPrime(int n)
{
    int i;

    if (n % 2 == 0 || n % 3 == 0)
    {
        return false;
    }
    else
    {
        for (i = 5; i * i <= n; i += 6)
        {
            if (n % i == 0 || n % (i + 2) == 0)
            {
                return false;
            }
        }
        return true;
    }
}

// Find 1st prime greater than N

int Primes(int N)
{
    int i;
    int root;
    int rem;
    int start;

    if (N <= 50)
    {
        return 53;
    }
    // If N is even start with next odd number
    rem = N % 2;

    if (rem == 0)
    {
        start = N + 1;
    }
    else
    {
        start = N;
    }

    // Start from N until a prime umber is found;

    for (i = start; i < TableLimitSize; i += 2)
    {
        if (isPrime(i))
        {
            return i;
        }
    }
    return -8;
}

#ifndef __MVS__

void* getStorage(int _storageSize)
{
    int rc;
    void* storageAddr;

    storageAddr = malloc(_storageSize);

    return storageAddr;
}

void hdestroy(ENTRY *__item)
{
    free(__item->_hashWork.table);
    return;
}
#else

void* getStorage(int _storageSize)
{
    int rc;
    void* storageAddr;

    __asm( (" STORAGE OBTAIN,LOC=(31,64),STARTBDY=3"
            ",LENGTH=%1"
            ",ADDR=%0")
            : "=m"(storageAddr)
            : "r"(_storageSize)
            : "r0", "r15");   // clobber list 

    __asm(" ST    15,%0   "
            : "=m"(rc)
            :
            : "r0", "r15");

    if (rc != 0)
    {
        return NULL;
    }

    return storageAddr;
}

void hdestroy(ENTRY *__item)
{
    int rc;

    __asm( (" STORAGE RELEASE"
            ",LENGTH=%1"
            ",ADDR=%0")
            : "=m"(__item->_hashWork.table)
            : "r"(__item->_hashWork.tableLength)
            : "r0", "r15");   // clobber list 

    __asm(" ST    15,%0   "
            : "=m"(rc)
            :
            : "r0", "r15");
}

#endif

int allocateHashTable(int _elements, int _elementSize, int _prime, int _keyLength,
        int _keyOffset, ENTRY *__item)
{
    int storageSize;
    void *storageAddr;
    struct HashTablePrefix *_HashTablePrefix;
    struct HashTableEntry *_HashTableEntry;
    int entrySize;
    int i;
    char *wrkPtr;

    if (_elementSize <= 0)
    {
        return -8;
    }

    if (_keyLength <= 0)
    {
        return -8;
    }

    if (_keyOffset < 0)
    {
        return -8;
    }

    entrySize = _elementSize + sizeof(_HashTableEntry->prefix);
    storageSize = (entrySize * _prime) + sizeof(struct HashTablePrefix);
    storageAddr = getStorage(storageSize);

    if (storageAddr == NULL)
    {
        return;
    }

    __item->_hashWork.table =  storageAddr;
    __item->_hashWork.tableLength = storageSize;

    memset(storageAddr, 0, storageSize);

    _HashTablePrefix = storageAddr;

    _HashTablePrefix->elements = _prime;
    _HashTablePrefix->inUseElements = 0;
    _HashTablePrefix->loadFactor = 0;
    _HashTablePrefix->size = storageSize;
    _HashTablePrefix->elementSize = _elementSize;
    _HashTablePrefix->keyLength = _keyLength;
    _HashTablePrefix->keyOffset = _keyOffset;
    _HashTablePrefix->tableOffset = sizeof(struct HashTablePrefix);
    _HashTablePrefix->entrySize = entrySize;

    wrkPtr = (char*) _HashTablePrefix;
    wrkPtr += _HashTablePrefix->tableOffset;
    _HashTableEntry = (struct HashTableEntry *) wrkPtr;

    for (i = 0; i < _prime; i++)
    {
        _HashTableEntry->prefix.index = 0;
        _HashTableEntry->prefix.inUseFlag = NotInUse;
        wrkPtr += entrySize;
        _HashTableEntry = (struct HashTableEntry *) wrkPtr;
    }
}

int getPrime(int _elements)
{
    int prime;

    prime = Primes (_elements);
    if (prime <= 0)
    {
        return -8;
    }
    return prime;
}

int hcreate(int _elements, int _elementSize, int _keyLength, int _keyOffset, ENTRY *__item)
{
    int prime;
    int rc;
    unsigned int seed;

    memset(__item, 0, sizeof(struct entry));

    prime = getPrime(_elements);

    if (prime <= 0)
    {
        return prime;
    }

    rc = allocateHashTable(_elements, _elementSize, prime, _keyLength, _keyOffset, __item);

    if (__item->_hashWork.table == NULL)
    {
        return -8;
    }

    __item->_hashWork._seeds.seed1 = FNV1_32A_INIT;
    __item->_hashWork._seeds.seed2 =
            fnv_32a_buf("2718281828459045235360287471352662497757247093699959574966", 60,
            __item->_hashWork._seeds.seed2, prime);
    __item->_hashWork._hashStats.elements = prime;

    return 0;
};

#ifndef __MVS__

int main()
{
    int elements;
    int elementSize;
    int keyLength;
    int keyOffset;
    int rc;
    ENTRY __item;
    ACTION __action;
    ENTRY* data;
    int i;
    struct HashTablePrefix *_HashTablePrefix;
    struct HashTableEntry *_HashTableEntry;
    char *startOfHashTable;
    char *keyPtr;
    char *entryKeyPtr;
    char *entryElementPtr;
    int entrySize;
    char *wrkPtr;

    char testArray[35][80] = {
        "978076453306Apache Server Administrator's Handbook",
        "978076453320Book - Unix Secrets Second Edition",
        "978079237989Book: Code Optimization Techniques for Embedded Processors",
        "978082420941Violence in American Society",
        "978084998066Adventures of Woof",
        "978087779273Webster's Instant Word Guide",
        "978088079402EXPLORER'S CARD GAME",
        "978088079648NURSERY RHYMES CARD GAME",
        "978094126381Jumble for Kids #1",
        "978096522372Basic Frank Lloyd Wright by Henry J. Michel",
        "978155558199PB Book - Digital U",
        "978155615915Microsoft Visual C++ User's Guide ISBN 1-55615-915-3",
        "978155969971Book Mark (On The Farm by Cheri Blum)",
        "978156592116C++ The Core Language - Nutshell Series",
        "978157054385A Klutz Guide - Glove Compartment Science",
        "978157169058Perl 5 Howto",
        "978184215322Southwater/ITALIAN COOKBOK",
        "978186100268ADO 2.1 Programmer's Reference",
        "978186100404Professional WAP",
        "978188516739How to Get Filthy Stinking Rich, by Herb Key",
        "980480025208BACARDI GOLD",
        "982000154986TGI FRIDAYS ORANGE DREAM",
        "982000157642TGI FRIDAYS MARGARITA",
        "982000157659TGI FRIDAYS PINA COLADA",
        "982000166101JOSE CUERVO LIME MARGARITA MIX",
        "982000190328JOSE CUERVO AUTHENTICS LIME",
        "982000192889JOSE CUERVO TEQUILA GOLD",
        "982000192988E CUERVO TEQUILA GOLD",
        "982000194012JOSE CUERVO 1800 TEQUILA",
        "982000720860TGI FRIDAYS VERY VANILLA",
        "987000201159CAPTAIN MORGAN RUM",
        "987000202057CAPTAIN MORGAN RUM",
        "988076111311GORDON'S GIN 80 PF",
        "988076111441GORDONS VODKA 80 PF",
        "988076114503GORDONS VODKA CITRUS"
    };

    elements = 60;
    elementSize = 100;
    keyLength = 12;
    keyOffset = 0;

    rc = hcreate(elements, elementSize, keyLength, keyOffset, &__item);

    for (i = 0;i < 35; i++)
    {
        __item.data = &testArray[i];
        data = hsearch(&__item,  ENTER);
        // printf("Add for: %s\n", __item.data);
        fflush(stdout);
    }

    printf("Elements: %i\n", __item._hashWork._hashStats.elements);
    printf("In use: %i\n", __item._hashWork._hashStats.inUseElements);
    printf("Added: %i\n", __item._hashWork._hashStats.adds);
    printf("Deleted: %i\n", __item._hashWork._hashStats.deleted);
    printf("Load Factor: %i\n", __item._hashWork._hashStats.loadFactor);
    printf("Searches: %i\n", __item._hashWork._hashStats.searches);
    printf("Key exists: %i\n", __item._hashWork._hashStats.keyExists);
    printf("Hits: %i\n", __item._hashWork._hashStats.searchHits);


    printf("Elements: %i\n", __item._hashWork._hashStats.elements);
    printf("In use: %i\n", __item._hashWork._hashStats.inUseElements);
    printf("Added: %i\n", __item._hashWork._hashStats.adds);
    printf("Deleted: %i\n", __item._hashWork._hashStats.deleted);
    printf("Load Factor: %i\n", __item._hashWork._hashStats.loadFactor);
    printf("Searches: %i\n", __item._hashWork._hashStats.searches);
    printf("Key exists: %i\n", __item._hashWork._hashStats.keyExists);
    printf("Hits: %i\n", __item._hashWork._hashStats.searchHits);

    hdestroy(&__item);

};
#endif

References

Hash function, Wikipedia, 1997
The Art of Computer Programming, Volume 3: Sorting and Searching, Chapter 6.4., Knuth, D. Addison Wesley, 1973
FNV Hash, Landon Curt Noll, 1994-2013
SMHasher & MurmurHash, Google Code