Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Related Pages

hashtable.c

00001 /* 00002 00003 The BLAH library, a container library 00004 Copyright (C) 1997-2004 The CDG Team <cdg@nats.informatik.uni-hamburg.de> 00005 00006 This program is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 This program is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this program; if not, write to the Free Software 00018 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00019 00020 Contact: blah@nats.informatik.uni-hamburg.de 00021 00022 $Id: hashtable.c,v 1.6 2004/09/23 15:46:25 micha Exp $ 00023 00024 */ 00025 00026 /* ---------------------------------------------------------------------- 00027 * @defgroup Hashtable Hashtables 00028 * Implementation of hashtables. 00029 * 00030 * A hashtable stores an arbitrary number of objects that are accessed 00031 * using an arbitrary key. The key is converted to an integer value, so 00032 * so-called hash value, by the hash function. A hash function should be 00033 * fast and should map the keys to integers in a highly irregular but 00034 * consistent way, i.~e., the keys should be distributed evenly over the 00035 * whole set of integers. Ideally, objects can then be accessed in 00036 * constant time based on their key. 00037 * @{ 00038 */ 00039 00040 /* ----INCLUDES------------------------------------------------------- */ 00041 #include <stdio.h> 00042 #include <string.h> 00043 #include "blah.h" 00044 00045 #define PRIME_TESTS 10 00046 00047 /* ---------------------------------------------------------------------- 00048 * internal representation of the hash table entry. 00049 */ 00050 struct HashtableEntryStruct { 00051 Pointer key; /**< hash key */ 00052 Pointer value; /**< value */ 00053 struct HashtableEntryStruct *next; /**< pointer to next entry */ 00054 }; 00055 00056 /* ---------------------------------------------------------------------- 00057 * internal representation of the hash table. 00058 */ 00059 struct HashtableStruct { 00060 int count; /**< number of entries in hashtable */ 00061 int threshold; /**< limit when table is rehashed */ 00062 int capacity; /**< capacity of table */ 00063 double loadFactor; /**< ratio when to rehash */ 00064 IntFunction *keyEqualFunction; /**< equality function for key */ 00065 IntFunction *hashFunction; /**< hash function */ 00066 HashtableEntry *entries; /**< table of entries */ 00067 }; 00068 00069 /* ---------------------------------------------------------------------- 00070 * internal representation of the hash iterator. 00071 */ 00072 struct HashIteratorStruct { 00073 Hashtable hashtable; /**< Where do we belong to? */ 00074 int index; /**< index of entry */ 00075 HashtableEntry entry; /**< determines _next_ HashtableEntry */ 00076 }; 00077 00078 /* ---------------------------------------------------------------------- 00079 * rehashes the hashtable. 00080 * 00081 * doubles the capacity and this enlarges the space. 00082 * 00083 * @param ht the hash table that is to be resized 00084 * @returns the new resized hashtable. 00085 */ 00086 void rehashHashtable(Hashtable ht) 00087 { 00088 HashtableEntry *newEntries; 00089 HashtableEntry entry, old; 00090 int i, index; 00091 int newCapacity = primeNext(ht->capacity * 2, PRIME_TESTS); 00092 00093 #ifdef DEBUG 00094 printf("DEBUG: rehashHashtable %d --> %d...", ht->capacity, newCapacity); 00095 #endif 00096 newEntries = 00097 (HashtableEntry *)memMalloc(sizeof(HashtableEntryStruct) * newCapacity); 00098 for (i = 0; i < newCapacity; i++) 00099 newEntries[i] = (HashtableEntry)NULL; 00100 for (i = 0; i < ht->capacity; i++) 00101 for (old = ht->entries[i]; old != NULL; ) { 00102 entry = old; 00103 old = old->next; 00104 00105 index = (((*ht->hashFunction)(entry->key)) & 0x7fffffff) % newCapacity; 00106 00107 entry->next = newEntries[index]; 00108 newEntries[index] = entry; 00109 } 00110 00111 ht->threshold = (int)(newCapacity * ht->loadFactor); 00112 ht->capacity = newCapacity; 00113 memFree(ht->entries); 00114 ht->entries = newEntries; 00115 00116 #ifdef DEBUG 00117 printf("done\n"); 00118 #endif 00119 } 00120 00121 /* ---------------------------------------------------------------------- 00122 * creates a new hashtable with an initial capacity of c. 00123 * 00124 * whenever the number of stored objects exceeds \a loadFactor times the current 00125 * capacity, the hashtable is automatically resized. 00126 * 00127 * @param capacity specifies the capacity of the new hashtable to be created. 00128 * @param loadFactor it is a number that should be chosen between 0.5 and 0.9. 00129 * @param hashFunction it is that function which is called with a key as the only parameter and 00130 * returns the hash value of * that key. 00131 * @param keyEqualFunction It is that function which is called with two keys as 00132 * parameters.It returns 1 if the keys are to be considered equal and 0 otherwise. 00133 * @returns the new hashtable with a capacity of c. 00134 00135 */ 00136 Hashtable hashNew(int capacity, 00137 double loadFactor, 00138 IntFunction *hashFunction, 00139 IntFunction *keyEqualFunction) 00140 { 00141 Hashtable ht; 00142 int i; 00143 00144 ht = (Hashtable)memMalloc(sizeof(HashtableStruct)); 00145 ht->count = 0; 00146 if(capacity < 3) { 00147 capacity = 3; 00148 } 00149 ht->capacity = primeNext(capacity, PRIME_TESTS); 00150 ht->threshold = (int)(ht->capacity * loadFactor); 00151 ht->loadFactor = loadFactor; 00152 ht->keyEqualFunction = keyEqualFunction; 00153 ht->hashFunction = hashFunction; 00154 ht->entries = 00155 (HashtableEntry *)memMalloc(sizeof(HashtableEntryStruct)*ht->capacity); 00156 for (i = 0; i < ht->capacity; i++) 00157 ht->entries[i] = NULL; 00158 00159 return(ht); 00160 } 00161 00162 /* ---------------------------------------------------------------------- 00163 * adds the object value with the key \a key in the hashtable. 00164 * 00165 * rehashes the hashtable if necessary. 00166 * 00167 * @param ht the hashtable in which the object value has to be added with the key. 00168 * @param key the key value that has to be added with the \a value 00169 * @param value the object whose value has to be added to the key. 00170 * @returns the old object that was stored at that key or \a NULL 00171 */ 00172 Pointer hashSet(Hashtable ht, 00173 Pointer key, 00174 Pointer value) 00175 { 00176 HashtableEntry entry; 00177 int index; 00178 Pointer p; 00179 00180 if (ht == NULL || key == NULL || value == NULL) { 00181 fprintf(stderr, "ERROR: hashSet: arguments must not be NULL\n"); 00182 abort(); 00183 } 00184 00185 index = (((*ht->hashFunction)(key)) & 0x7fffffff) % ht->capacity; 00186 for (entry = ht->entries[index]; entry != NULL; entry = entry->next) { 00187 if ((*ht->keyEqualFunction)(entry->key, key)) { 00188 p = entry->value; 00189 entry->key = key; 00190 entry->value = value; 00191 return(p); 00192 } 00193 } 00194 00195 if (ht->count >= ht->threshold) { 00196 /* The number of entries exceeds the thresholds. Rehash the table. 00197 * TODO: 00198 * - touching fresh buckets is counted and punished by rehashing 00199 * - rehashing should depend on collisions not on tabelsize 00200 */ 00201 rehashHashtable(ht); 00202 return(hashSet(ht, key, value)); 00203 } 00204 00205 entry = (HashtableEntry)memMalloc(sizeof(HashtableEntryStruct)); 00206 entry->key = key; 00207 entry->value = value; 00208 entry->next = ht->entries[index]; 00209 ht->entries[index] = entry; 00210 ht->count++; 00211 return(NULL); 00212 } 00213 00214 00215 /* ---------------------------------------------------------------------- 00216 * retrieves value associated with the key. 00217 * 00218 * @param ht the hashtable from which the value at the specified key has to be retrieved. 00219 * @param key the key at which the value of the hashtable has to be retrieved. 00220 * @returns the object that(or \a NULL) that is stored under the key in the hashtable. 00221 */ 00222 Pointer hashGet(Hashtable ht, 00223 Pointer key) 00224 { 00225 HashtableEntry entry; 00226 int index; 00227 00228 if (ht == NULL || key == NULL) { 00229 fprintf(stderr, "ERROR: hashGet: arguments must not be NULL\n"); 00230 abort(); 00231 } 00232 00233 index = (((*ht->hashFunction)(key)) & 0x7fffffff) % ht->capacity; 00234 for (entry = ht->entries[index]; entry != NULL; entry = entry->next) { 00235 if ((*ht->keyEqualFunction)(entry->key, key)) { 00236 return(entry->value); 00237 } 00238 } 00239 00240 return(NULL); 00241 } 00242 00243 /* ---------------------------------------------------------------------- 00244 * retrieves value associated with the key 00245 * 00246 * @param ht the hashtable from which the value at the specified key has to be retrieved. 00247 * @param key the key at which the value of the hashtable has to be retrieved. 00248 * @returns the object that(or \a NULL) that is stored under the key in the hashtable. 00249 */ 00250 Pointer *hashGetPointerToValue(Hashtable ht, Pointer key) 00251 { 00252 HashtableEntry entry; 00253 int index; 00254 00255 if (ht == NULL || key == NULL) { 00256 fprintf(stderr, "ERROR: hashGet: arguments must not be NULL\n"); 00257 abort(); 00258 } 00259 00260 index = (((*ht->hashFunction)(key)) & 0x7fffffff) % ht->capacity; 00261 for (entry = ht->entries[index]; entry != NULL; entry = entry->next) { 00262 if ((*ht->keyEqualFunction)(entry->key, key)) { 00263 return(&entry->value); 00264 } 00265 } 00266 00267 return(NULL); 00268 } 00269 00270 /* ---------------------------------------------------------------------- 00271 * removes key/value pair from hashtable 00272 * 00273 * @param ht the hashtable from which the key/value pair is to be removed. 00274 * @param key this shows the value that has to be deleted in the hashtable ht. 00275 * @returns the object (or \a NULL) that is stored under the key in the hashtable ht. 00276 */ 00277 Pointer hashRemove(Hashtable ht, Pointer key) 00278 { 00279 HashtableEntry entry, prev; 00280 int index; 00281 Pointer value; 00282 00283 if (ht == NULL || key == NULL) { 00284 fprintf(stderr, "ERROR: hashRemove: arguments must not be NULL\n"); 00285 abort(); 00286 } 00287 00288 index = (((*ht->hashFunction)(key)) & 0x7fffffff) % ht->capacity; 00289 for (entry = ht->entries[index], prev = NULL; 00290 entry != NULL; 00291 prev = entry, entry = entry->next) { 00292 if ((*ht->keyEqualFunction)(entry->key, key)) { 00293 if (prev != NULL) { 00294 prev->next = entry->next; 00295 } else { 00296 ht->entries[index] = entry->next; 00297 } 00298 ht->count--; 00299 value = entry->value; 00300 memFree(entry); 00301 return (value); 00302 } 00303 } 00304 return(NULL); 00305 } 00306 00307 /* ---------------------------------------------------------------------- 00308 * retrieves the size of the hashtable 00309 * 00310 * @param ht the hashtable whose size has to be retrieved. 00311 * @returns the number of objects that are currently stored in the hashtable. 00312 */ 00313 int hashSize(Hashtable ht) 00314 { 00315 if (ht == NULL) { 00316 fprintf(stderr, "ERROR: hashSize: argument must not be NULL\n"); 00317 abort(); 00318 } 00319 00320 return(ht->count); 00321 } 00322 00323 /* ---------------------------------------------------------------------- 00324 * checks if the hashtable is empty. 00325 * 00326 * @param ht the hash table whose emptiness is checked. 00327 * @returns \a TRUE if the hashtable is empty and \a False otherwise. 00328 */ 00329 Boolean hashIsEmpty(Hashtable ht) 00330 { 00331 if (ht == NULL) { 00332 fprintf(stderr, "ERROR: hashIsEmpty: argument must not be NULL\n"); 00333 abort(); 00334 } 00335 00336 return(ht->count == 0 ? TRUE : FALSE); 00337 } 00338 00339 /* ---------------------------------------------------------------------- 00340 * checks if the hashtable contains the specified key. 00341 * 00342 * @param ht the hashtable in which the specified key has to be checked. 00343 * @param key the key whose existance in the hashtable is to be checked. 00344 * @returns \a TRUE if the an object under the key exists and \a FALSE otherwise. 00345 */ 00346 Boolean hashContainsKey(Hashtable ht, Pointer key) 00347 { 00348 HashtableEntry entry; 00349 int index; 00350 00351 if (ht == NULL || key == NULL) { 00352 fprintf(stderr, "ERROR: hashConstainsKey: arguments must not be NULL\n"); 00353 abort(); 00354 } 00355 00356 index = (((*ht->hashFunction)(key)) & 0x7fffffff) % ht->capacity; 00357 for (entry = ht->entries[index]; entry != NULL; entry = entry->next) 00358 if ((*ht->keyEqualFunction)(entry->key, key)) 00359 return(TRUE); 00360 00361 return(FALSE); 00362 } 00363 00364 /* ---------------------------------------------------------------------- 00365 * checks if the hashtable contains a specific value. 00366 * 00367 * Objects are compared by the standard C operator == and this method is Expensive!!! 00368 * 00369 * @param ht the hashtable in which the specific value has to be checked. 00370 * @param value the value whose existance in the hashtable has to be checked. 00371 * @returns \a TRUE if the object is contained in the hashtable and \a FALSE otherwise. 00372 */ 00373 Boolean hashContainsValue(Hashtable ht, Pointer value) 00374 { 00375 HashtableEntry entry; 00376 int i; 00377 00378 if (ht == NULL || value == NULL) { 00379 fprintf(stderr, "ERROR: hashConstainsValue: arguments must not be NULL\n"); 00380 abort(); 00381 } 00382 00383 for (i = 0; i < ht->count; i++) 00384 for (entry = ht->entries[i]; entry != NULL; entry = entry->next) 00385 if (value == entry->value) 00386 return(TRUE); 00387 00388 return(FALSE); 00389 } 00390 00391 /* ---------------------------------------------------------------------- 00392 * deletes the hashtable, but can't free the memory for the content. 00393 * 00394 * @param ht the hashtable that has to be deleted. 00395 */ 00396 void hashDelete(Hashtable ht) 00397 { 00398 HashtableEntry entry, next; 00399 int i; 00400 00401 for (i = 0; i < ht->capacity; i++) { 00402 for (entry = ht->entries[i]; entry != NULL; entry = next) { 00403 next = entry->next; 00404 memFree(entry); 00405 } 00406 } 00407 memFree(ht->entries); 00408 memFree(ht); 00409 } 00410 00411 /* ---------------------------------------------------------------------- 00412 * calls the function `f(key,value)' for each item in the hashtable. 00413 * 00414 * @param ht the hashtable in which the function 'f' has to be called. 00415 * @param f the function that has to be called for every object in the hashtable ht. 00416 */ 00417 void hashForEach(Hashtable ht, VoidFunction *f) 00418 { 00419 HashtableEntry entry; 00420 int i; 00421 00422 for (i = 0; i < ht->capacity; i++) { 00423 for (entry = ht->entries[i]; entry != NULL; entry = entry->next) { 00424 (*f)(entry->key, entry->value); 00425 } 00426 } 00427 } 00428 /* ---------------------------------------------------------------------- 00429 * calls the function `f(element,data)' for each object in the hashtable. 00430 * 00431 * @param ht the hashtable in which the function 'f' has to be called. 00432 * @param f the function that has to be called in the hashtable ht. 00433 * @param clientData the data in the function 'f(element,data)'that is called in the hashtable ht. 00434 */ 00435 void hashForEachWithData(Hashtable ht, VoidFunction *f, Pointer clientData) 00436 { 00437 HashtableEntry entry; 00438 int i; 00439 00440 for (i = 0; i < ht->capacity; i++) { 00441 for (entry = ht->entries[i]; entry != NULL; entry = entry->next) { 00442 (*f)(entry->key, entry->value, clientData); 00443 } 00444 } 00445 } 00446 00447 /* ---------------------------------------------------------------------- 00448 * calls the function `f(key, value)' for each item in the Hashtable. 00449 * 00450 * deletes the hashtable and hashtable becomes inaccessible 00451 * 00452 * @param ht the hashtable in which the function 'f' has to be called. 00453 * @param f the function that has to be called for every object in the hashtable ht. 00454 */ 00455 void hashForEachFree(Hashtable ht, VoidFunction *f) 00456 { 00457 HashtableEntry entry, next; 00458 int i; 00459 00460 for (i = 0; i < ht->capacity; i++) { 00461 for (entry = ht->entries[i]; entry != NULL; entry = next) { 00462 next = entry->next; 00463 (*f)(entry->key, entry->value); 00464 memFree(entry); 00465 } 00466 } 00467 memFree(ht->entries); 00468 memFree(ht); 00469 } 00470 00471 /* ---------------------------------------------------------------------- 00472 List hashForEachFree(), but frees only the embedded value. F is applied 00473 to the value only and must be a unariy void function. 00474 */ 00475 void hashForEachFreeValue(Hashtable ht, VoidFunction *f) 00476 { 00477 HashtableEntry entry, next; 00478 int i; 00479 00480 for (i = 0; i < ht->capacity; i++) { 00481 for (entry = ht->entries[i]; entry != NULL; entry = next) { 00482 next = entry->next; 00483 (*f)(entry->value); 00484 memFree(entry); 00485 } 00486 } 00487 memFree(ht->entries); 00488 memFree(ht); 00489 } 00490 00491 00492 /* ---------------------------------------------------------------------- 00493 * retrieves a list of keys of all objects in the hashtable. 00494 * 00495 * @param ht the hashtable from which the list of keys have to be retrieved. 00496 * @returns the list of the keys of all objects in the hashtable. 00497 */ 00498 List hashListOfKeys(Hashtable ht) 00499 { 00500 int i; 00501 List l = NULL; 00502 HashtableEntry entry; 00503 00504 for (i = 0; i < ht->capacity; i++) { 00505 for (entry = ht->entries[i]; entry != NULL; entry = entry->next) { 00506 l = listPrependElement(l, entry->key); 00507 } 00508 } 00509 00510 return(l); 00511 } 00512 00513 /* ---------------------------------------------------------------------- 00514 * returns a new hash iterator object. 00515 * 00516 * Iterators allow to loop through all the elements of a container. 00517 * However the behaviour is undefined if the container changes while the iterator is still looping. 00518 * 00519 * \code hi = hashIteratorNew(ht); 00520 * while (NULL != (key = hashIteratorNextKey(hi))) { 00521 * do something with key; 00522 * } 00523 * hashIteratorDelete(hi); \endcode 00524 * 00525 * @param ht the hashtable for which the new iterator object has to be returned. 00526 * @returns the new hash iterator object for the hashtable ht. 00527 */ 00528 HashIterator hashIteratorNew(Hashtable ht) 00529 { 00530 HashIterator hi; 00531 00532 /* Some checks would be nice but I don't have the time right now. */ 00533 hi = (HashIterator)memMalloc(sizeof(HashIteratorStruct)); 00534 hi->hashtable = ht; 00535 hi->index = -1; 00536 hi->entry = NULL; 00537 while (!hi->entry && ++hi->index < ht->capacity) { 00538 hi->entry=ht->entries[hi->index]; 00539 } 00540 00541 return(hi); 00542 } 00543 00544 /* ---------------------------------------------------------------------- 00545 * returns the next key of a hash-iterator. 00546 * 00547 * Hash iterator points to the following entry afterwards 00548 * 00549 * @param hi the hash iterator whose next key has to be returned. 00550 * @returns the next key of the hash iterator hi 00551 */ 00552 Pointer hashIteratorNextKey(HashIterator hi) 00553 { 00554 HashtableEntry current=hi->entry; 00555 Hashtable ht=hi->hashtable; 00556 00557 if (!current) { 00558 return(NULL); 00559 } 00560 00561 hi->entry = hi->entry->next; 00562 while (!hi->entry && ++hi->index < ht->capacity) { 00563 hi->entry=ht->entries[hi->index]; 00564 } 00565 00566 return(current->key); 00567 } 00568 00569 /* ---------------------------------------------------------------------- 00570 * returns the next value of a hash-iterator. 00571 * 00572 * Hash iterator points to the following entry afterwards. 00573 * 00574 * @param hi the hash iterator whose next value has to be returned. 00575 * @returns the next value of a hash-iterator hi. 00576 */ 00577 Pointer hashIteratorNextValue(HashIterator hi) 00578 { 00579 HashtableEntry current; 00580 Hashtable ht = hi->hashtable; 00581 00582 if (NULL == (current = hi->entry)) { 00583 return(NULL); 00584 } 00585 00586 if (NULL == (hi->entry = hi->entry->next)) { 00587 while (++hi->index < ht->capacity) { 00588 /* for (hi->index++; hi->index < ht->capacity; hi->index++) { */ 00589 if (NULL != (hi->entry = ht->entries[hi->index])) { 00590 return(current->value); 00591 } 00592 } 00593 } 00594 00595 return(current->value); 00596 } 00597 00598 /* ---------------------------------------------------------------------- 00599 * deletes and frees hash iterator object 00600 * 00601 * @param hi the hash iterator object that has to be deleted. 00602 */ 00603 void hashIteratorDelete(HashIterator hi) 00604 { 00605 /* This is easy :-) */ 00606 memFree(hi); 00607 } 00608 00609 /* ---------------------------------------------------------------------- 00610 * is an example has function for C strings that can be used in hashNew. 00611 * 00612 * A bit rotating function by Knuth is used here. 00613 * TODO: 00614 * - try different hash functions 00615 * - strlen should go out - supply length of key. 00616 * 00617 * @param s the string on which the hashStringHashFunction is performed. 00618 * @returns the integer representation of the string. 00619 */ 00620 int hashStringHashFunction(char *s) 00621 { 00622 register int len = strlen(s); 00623 register int value; 00624 00625 #if 1 00626 /* rotating ala knuth */ 00627 value = len; 00628 while(len) 00629 value = (value << 5) ^ (value >> 27) ^ s[--len]; 00630 00631 #else 00632 value = 1; 00633 while(len) 00634 value *= s[--len]; 00635 /* value = (value >> 11) + s[--len] */ 00636 #endif 00637 00638 00639 return (value); 00640 } 00641 00642 /* ---------------------------------------------------------------------- 00643 * is an example equality function for C strings that can be used in hashNew. 00644 * 00645 * @param s the first string that is used in the string comparison function. 00646 * @param t the second string that is used in the string comparison function. 00647 * @returns 0 if they are equal and 1 if not. 00648 */ 00649 int hashStringEqualFunction(char *s, char *t) 00650 { 00651 return(strcmp(s, t) == 0); 00652 } 00653 00654 /* ---------------------------------------------------------------------- */ 00655 /** @} */

BLAH 0.95 (20 Oct 2004)