// @ZBS { // *MODULE_NAME Simple Hash // *MASTER_FILE 1 // +DESCRIPTION { // A simple, fast, and decoupled hash table using string keys. // } // *PORTABILITY win32 unix // *REQUIRED_FILES zhashtable.cpp zhashtable.h zhashtyped.h // *VERSION 1.5 // +HISTORY { // 1.2 KLD fixed a bug on setting a key that had previously been deleted // 1.3 ZBS changed memcmp to strncmp to fix a bug that made set read potenitally outside of bounds (causing Bounds Checker to complain) // 1.4 ZBS changed hash function to code borrowed from Bob Jenkins // 1.5 ZBS changed name to standard convention // } // +TODO { // Rename to ZHashTable. Rename TypedHash // Make non-string hashing using typedhash easier / faster. Right now it converts back and forth to strings // } // *SELF_TEST yes console // *PUBLISH yes // } // OPERATING SYSTEM specific includes: #ifdef WIN32 //#include "windows.h" // The following line is copied out of windows.h to avoid the header dependency // If there is a problem, comment it out and uncomment the include above. // This function is only used for the debugging function "debugDump()" // and could easily be removed entirely. extern "C" { __declspec(dllimport) void __stdcall OutputDebugStringA( const char *lpOutputString ); } #define OutputDebugString OutputDebugStringA #endif // STDLIB includes: #include "malloc.h" #include "string.h" #include "stdio.h" #include "stdlib.h" #include "stdarg.h" #include "assert.h" // MODULE includes: #include "zhashtable.h" ///////////////////////////////////////////////////////////// // ZHashTable ///////////////////////////////////////////////////////////// // This hash function generator taken from: // http://burtleburtle.net/bob/hash/doobs.html // By Bob Jenkins, 1996. bob_jenkins -AT- burtleburtle.net. unsigned int ZHashTable::hash( char *k ) { #define mix(a,b,c) \ { \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ c -= a; c -= b; c ^= (b>>13); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<16); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>3); \ b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \ } unsigned int a,b,c,len; unsigned int length = strlen(k); len = length; a = b = 0x9e3779b9; c = 0; while( len >= 12 ) { a += (k[0] +((unsigned int)k[1]<<8) +((unsigned int)k[2]<<16) +((unsigned int)k[3]<<24)); b += (k[4] +((unsigned int)k[5]<<8) +((unsigned int)k[6]<<16) +((unsigned int)k[7]<<24)); c += (k[8] +((unsigned int)k[9]<<8) +((unsigned int)k[10]<<16)+((unsigned int)k[11]<<24)); mix(a,b,c); k += 12; len -= 12; } // handle the last 11 bytes c += length; switch(len) { case 11: c+=((unsigned int)k[10]<<24); case 10: c+=((unsigned int)k[9]<<16); case 9 : c+=((unsigned int)k[8]<<8); // the first byte of c is reserved for the length case 8 : b+=((unsigned int)k[7]<<24); case 7 : b+=((unsigned int)k[6]<<16); case 6 : b+=((unsigned int)k[5]<<8); case 5 : b+=k[4]; case 4 : a+=((unsigned int)k[3]<<24); case 3 : a+=((unsigned int)k[2]<<16); case 2 : a+=((unsigned int)k[1]<<8); case 1 : a+=k[0]; // case 0: nothing left to add } mix(a,b,c); return c; } int ZHashTable::internalSet( char **_hashTable, int _hashTableSize, char *key, char *_recPtr ) { hasAnyChanged = 1; int keyLen = strlen( key ) + 1; int index = hash( key ) % _hashTableSize; int total = _hashTableSize; int totalDeleted = 0; int foundKey = 0; char *recPtr; int dstIndex = -1; while( total-- ) { recPtr = _hashTable[index]; if( recPtr ) { // Each hash record is actually a small structure // 1st byte is 1 if active, 0 if deleted // Starting at the fifth byte is a nul-terminated key string // Next 4 bytes has val length // Followed by val string of length in previous 4 bytes if( *recPtr & aaDELETED ) { // Found a deleted key. Use this space unless the key is found. // NOTE: This will end up using farthest deleted key from the // head of the chain first. dstIndex = index; totalDeleted++; } else if( !strncmp(key, recPtr + 1, keyLen) ) { // Found already existing key. Use this slot. foundKey = 1; dstIndex = index; break; } else { collisions++; } } else { // Found an empty slot, marking the end of this chain. // Use this slot unless we found a deleted slot above. if( dstIndex == -1 ) { dstIndex = index; } break; } index = (index+1) % _hashTableSize; } assert( total + totalDeleted ); // There should always be some room left since we keep // the list at 50% free all the time to avoid collisions // Here, destIndex contains the index of the found record. The found record value can // be either NULL, a pointer to a deleted entry, or a pointer to the found entry. if( _recPtr == NULL ) { // We are deleting the key if( foundKey ) { // Only can delete it if it already exists *_hashTable[dstIndex] |= aaDELETED; return 1; } return 0; } else { // We are either adding a new key or setting the value of an old key. // Either way, if the destination record has a value, it must be deleted // before the new value is set. if( _hashTable[dstIndex] ) { free( _hashTable[dstIndex] ); } *_recPtr = aaCHANGED; _hashTable[dstIndex] = _recPtr; } return !foundKey; } ZHashTable::ZHashTable( int _initialSize ) { initialSize = _initialSize; hashTable = NULL; clear(); } ZHashTable::~ZHashTable() { if( hashTable ) { for( int i=0; i hashTableSize / 2 ) { // If the table is 50% utilized, then go double its size // and rebuild the whole hash table int _numRecords = 0; int _hashTableSize = hashTableSize * 2; char **_hashTable = (char **)malloc( sizeof(char*) * _hashTableSize ); memset( _hashTable, 0, sizeof(char*) * _hashTableSize ); for( int i=0; i= string ) { break; } } } void ZHashTable::clear() { if( hashTable ) { for( int i=0; i= 0 ) { return hasChanged( i ); } return 0; } void ZHashTable::setChanged( int i ) { hasAnyChanged = 1; if( 0 <= i && i < hashTableSize ) { char *c = hashTable[i]; if( c ) { *c |= aaCHANGED; } } } void ZHashTable::setChanged( char *key ) { hasAnyChanged = 1; int i = lookup( key ); if( i >= 0 ) { setChanged( i ); } } void ZHashTable::clearChanged( int i ) { if( 0 <= i && i < hashTableSize ) { char *c = hashTable[i]; if( c ) { *c &= ~aaCHANGED; } } } void ZHashTable::clearChanged( char *key ) { int i = lookup( key ); if( i >= 0 ) { clearChanged( i ); } } void ZHashTable::clearChangedAll() { for( int i=0; iput( temp ); return h; } #ifdef SELF_TEST #pragma message( "BUILDING SELF_TEST" ) #include "zhashtyped.h" void main() { ZHashTable table; table.putS( "aaa", "this is a test" ); table.putS( "bbb", "this too" ); assert( table.getS( "ccc" ) == NULL ); assert( !strcmp( table.getS( "aaa" ), "this is a test" ) ); table.putS( "aaa", NULL ); assert( table.getS( "aaa" ) == NULL ); table.put( "key1=val1 'key 2'=val2 key3='val 3' 'key\\'4'='val\\'4'" ); assert( !strcmp(table.getS( "key1" ), "val1") ); assert( !strcmp(table.getS( "key 2" ), "val2") ); assert( !strcmp(table.getS( "key3" ), "val 3") ); char *a = table.getS( "key'4" ); assert( !strcmp(a, "val'4") ); table.putI( "inttest", 0xffffffff ); int b = table.getI( "inttest" ); ZHashTyped intTable; intTable.set( "key1", 1 ); intTable.set( "key2", 2 ); int c = intTable.get( "key2" ); } #endif