$treeview $search $mathjax $extrastylesheet
librsync
2.3.1
$projectbrief
|
$projectbrief
|
$searchbox |
00001 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- 00002 * 00003 * hashtable.h -- a generic open addressing hashtable. 00004 * 00005 * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au> 00006 * 00007 * This program is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU Lesser General Public License as published by 00009 * the Free Software Foundation; either version 2.1 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * This program is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ 00020 #ifndef _HASHTABLE_H_ 00021 # define _HASHTABLE_H_ 00022 00023 # include <assert.h> 00024 # include <stdlib.h> 00025 # include <stdbool.h> 00026 00027 /** \file hashtable.h 00028 * A generic open addressing hashtable. 00029 * 00030 * This is a minimal hashtable containing pointers to arbitrary entries with 00031 * configurable hashtable size and support for custom hash() and cmp() methods. 00032 * The cmp() method can either be a simple comparison between two keys, or can 00033 * be against a special match object containing additional mutable state. This 00034 * allows for things like deferred and cached evaluation of costly comparison 00035 * data. The hash() function doesn't need to avoid clustering behaviour. 00036 * 00037 * It uses open addressing with quadratic probing for collisions. The 00038 * MurmurHash3 finalization function is optionally used on the hash() output to 00039 * avoid clustering and can be disabled by setting HASHTABLE_NMIX32. There is 00040 * no support for removing entries, only adding them. Multiple entries with the 00041 * same key can be added, and you can use a fancy cmp() function to find 00042 * particular entries by more than just their key. There is an iterator for 00043 * iterating through all entries in the hashtable. There are optional 00044 * NAME_find() find/match/hashcmp/entrycmp stats counters that can be disabled 00045 * by defining HASHTABLE_NSTATS. There is an optional simple k=1 bloom filter 00046 * for speed that can be disabled by defining HASHTABLE_NBLOOM. 00047 * 00048 * The types and methods of the hashtable and its contents are specified by 00049 * using \#define parameters set to their basenames (the prefixes for the *_t 00050 * type and *_func() methods) before doing \#include "hashtable.h". This 00051 * produces static inline type-safe methods that are either application 00052 * optimized for speed or wrappers around void* implementation methods for 00053 * compactness. 00054 * 00055 * \param ENTRY - the entry type basename. 00056 * 00057 * \param KEY - optional key type basename (default: ENTRY). 00058 * 00059 * \param MATCH - optional match type basename (default: KEY). 00060 * 00061 * \param NAME - optional hashtable type basename (default: ENTRY_hashtable). 00062 * 00063 * Example: \code 00064 * typedef ... mykey_t; 00065 * int mykey_hash(mykey_t const *e); 00066 * int mykey_cmp(mykey_t *e, mykey_t const *o); 00067 * 00068 * typedef struct myentry { 00069 * mykey_t key; // Inherit from mykey_t. 00070 * ...extra entry value data... 00071 * } myentry_t; 00072 * void myentry_init(myentry_t *e, ...); 00073 * 00074 * #define ENTRY myentry 00075 * #define KEY mykey 00076 * #include "hashtable.h" 00077 * 00078 * hashtable_t *t; 00079 * myentry_t entries[300]; 00080 * mykey_t k; 00081 * myentry_t *e; 00082 * 00083 * t = myentry_hashtable_new(300); 00084 * myentry_init(&entries[5], ...); 00085 * myentry_hashtable_add(t, &entries[5]); 00086 * k = ...; 00087 * e = myentry_hashtable_find(t, &k); 00088 * 00089 * int i; 00090 * for (e = myentry_hashtable_iter(t, &i); e != NULL; 00091 * e = myentry_hashtable_next(t, &i)) 00092 * ... 00093 * 00094 * myentry_hashtable_free(t); 00095 * \endcode 00096 * 00097 * The mykey_hash() and mykey_cmp() fuctions will typically take pointers to 00098 * mykey/myentry instances the same as the pointers stored in the hashtable. 00099 * However it is also possible for them to take "match objects" that are a 00100 * "subclass" of the entry type that contain additional state for complicated 00101 * comparision operations. 00102 * 00103 * Example: \code 00104 * typedef struct mymatch { 00105 * mykey_t key; // Inherit from mykey_t; 00106 * ...extra match criteria and state data... 00107 * } mymatch_t; 00108 * int mymatch_cmp(mymatch_t *m, myentry_t const *e); 00109 * 00110 * #define ENTRY myentry 00111 * #define KEY mykey 00112 * #define MATCH mymatch 00113 * #include "hashtable.h" 00114 * 00115 * ... 00116 * mymatch_t m; 00117 * 00118 * t = myentry_hashtable_new(300); 00119 * ... 00120 * m = ...; 00121 * e = myentry_hashtable_find(t, &m); 00122 * \endcode 00123 * 00124 * The mymatch_cmp() function is only called for finding hashtable entries and 00125 * can mutate the mymatch_t object for doing things like deferred and cached 00126 * evaluation of expensive match data. It can also access the whole myentry_t 00127 * object to match against more than just the key. */ 00128 00129 /** The hashtable type. */ 00130 typedef struct hashtable { 00131 int size; /**< Size of allocated hashtable. */ 00132 int count; /**< Number of entries in hashtable. */ 00133 unsigned tmask; /**< Mask to get the hashtable index. */ 00134 # ifndef HASHTABLE_NBLOOM 00135 unsigned bshift; /**< Shift to get the bloomfilter index. */ 00136 # endif 00137 # ifndef HASHTABLE_NSTATS 00138 /* The following are for accumulating NAME_find() stats. */ 00139 long find_count; /**< The count of finds tried. */ 00140 long match_count; /**< The count of matches found. */ 00141 long hashcmp_count; /**< The count of hash compares done. */ 00142 long entrycmp_count; /**< The count of entry compares done. */ 00143 # endif 00144 # ifndef HASHTABLE_NBLOOM 00145 unsigned char *kbloom; /**< Bloom filter of hash keys with k=1. */ 00146 # endif 00147 void **etable; /**< Table of pointers to entries. */ 00148 unsigned ktable[]; /**< Table of hash keys. */ 00149 } hashtable_t; 00150 00151 /* void* implementations for the type-safe static inline wrappers below. */ 00152 hashtable_t *_hashtable_new(int size); 00153 void _hashtable_free(hashtable_t *t); 00154 00155 # ifndef HASHTABLE_NBLOOM 00156 static inline void hashtable_setbloom(hashtable_t *t, unsigned const h) 00157 { 00158 /* Use upper bits for a "different hash". */ 00159 unsigned const i = h >> t->bshift; 00160 t->kbloom[i / 8] |= 1 << (i % 8); 00161 } 00162 00163 static inline bool hashtable_getbloom(hashtable_t *t, unsigned const h) 00164 { 00165 /* Use upper bits for a "different hash". */ 00166 unsigned const i = h >> t->bshift; 00167 return (t->kbloom[i / 8] >> (i % 8)) & 1; 00168 } 00169 # endif 00170 00171 /** MurmurHash3 finalization mix function. */ 00172 static inline unsigned mix32(unsigned h) 00173 { 00174 h ^= h >> 16; 00175 h *= 0x85ebca6b; 00176 h ^= h >> 13; 00177 h *= 0xc2b2ae35; 00178 h ^= h >> 16; 00179 return h; 00180 } 00181 00182 /** Ensure hash's are never zero. */ 00183 static inline unsigned nozero(unsigned h) 00184 { 00185 return h ? h : -1; 00186 } 00187 00188 #endif /* _HASHTABLE_H_ */ 00189 00190 /* If ENTRY is defined, define type-dependent static inline methods. */ 00191 #ifdef ENTRY 00192 00193 # define _JOIN2(x, y) x##y 00194 # define _JOIN(x, y) _JOIN2(x, y) 00195 00196 # ifndef KEY 00197 # define KEY ENTRY 00198 # endif 00199 00200 # ifndef MATCH 00201 # define MATCH KEY 00202 # endif 00203 00204 # ifndef NAME 00205 # define NAME _JOIN(ENTRY, _hashtable) 00206 # endif 00207 00208 # define ENTRY_t _JOIN(ENTRY, _t) /**< The entry type. */ 00209 # define KEY_t _JOIN(KEY, _t) /**< The key type. */ 00210 # define MATCH_t _JOIN(MATCH, _t) /**< The match type. */ 00211 # define KEY_hash _JOIN(KEY, _hash) /**< The key hash(k) method. */ 00212 # define MATCH_cmp _JOIN(MATCH, _cmp) /**< The match cmp(m, e) method. */ 00213 /* The names for all the hashtable methods. */ 00214 # define NAME_new _JOIN(NAME, _new) 00215 # define NAME_free _JOIN(NAME, _free) 00216 # define NAME_stats_init _JOIN(NAME, _stats_init) 00217 # define NAME_add _JOIN(NAME, _add) 00218 # define NAME_find _JOIN(NAME, _find) 00219 # define NAME_iter _JOIN(NAME, _iter) 00220 # define NAME_next _JOIN(NAME, _next) 00221 00222 /* Modified hash() with/without mix32() and reserving zero for empty buckets. */ 00223 # ifdef HASHTABLE_NMIX32 00224 # define _KEY_HASH(k) nozero(KEY_hash((KEY_t *)k)) 00225 # else 00226 # define _KEY_HASH(k) nozero(mix32(KEY_hash((KEY_t *)k))) 00227 # endif 00228 00229 /* Loop macro for probing table t for key hash hk, iterating with index i and 00230 entry hash h, terminating at an empty bucket. */ 00231 # define _for_probe(t, hk, i, h) \ 00232 unsigned const *const ktable = t->ktable;\ 00233 unsigned const tmask = t->tmask;\ 00234 unsigned i, s, h;\ 00235 for (i = hk & tmask, s = 0; (h = ktable[i]); i = (i + ++s) & tmask) 00236 00237 /* Conditional macro for incrementing stats counters. */ 00238 # ifndef HASHTABLE_NSTATS 00239 # define _stats_inc(c) (c++) 00240 # else 00241 # define _stats_inc(c) 00242 # endif 00243 00244 /** Allocate and initialize a hashtable instance. 00245 * 00246 * The provided size is used as an indication of the number of entries you wish 00247 * to add, but the allocated size will probably be larger depending on the 00248 * implementation to enable optimisations or avoid degraded performance. It may 00249 * be possible to fill the table beyond the requested size, but performance can 00250 * start to degrade badly if it is over filled. 00251 * 00252 * \param size - The desired minimum size of the hash table. 00253 * 00254 * \return The initialized hashtable instance or NULL if it failed. */ 00255 static inline hashtable_t *NAME_new(int size) 00256 { 00257 return _hashtable_new(size); 00258 } 00259 00260 /** Destroy and free a hashtable instance. 00261 * 00262 * This will free the hashtable, but will not free the entries in the 00263 * hashtable. If you want to free the entries too, use a hashtable iterator to 00264 * free the the entries first. 00265 * 00266 * \param *t - The hashtable to destroy and free. */ 00267 static inline void NAME_free(hashtable_t *t) 00268 { 00269 _hashtable_free(t); 00270 } 00271 00272 /** Initialize hashtable stats counters. 00273 * 00274 * This will reset all the stats counters for the hashtable, 00275 * 00276 * \param *t - The hashtable to initializ stats for. */ 00277 static inline void NAME_stats_init(hashtable_t *t) 00278 { 00279 # ifndef HASHTABLE_NSTATS 00280 t->find_count = t->match_count = t->hashcmp_count = t->entrycmp_count = 0; 00281 # endif 00282 } 00283 00284 /** Add an entry to a hashtable. 00285 * 00286 * This doesn't use MATCH_cmp() or do any checks for existing copies or 00287 * instances, so it will add duplicates. If you want to avoid adding 00288 * duplicates, use NAME_find() to check for existing entries first. 00289 * 00290 * \param *t - The hashtable to add to. 00291 * 00292 * \param *e - The entry object to add. 00293 * 00294 * \return The added entry, or NULL if the table is full. */ 00295 static inline ENTRY_t *NAME_add(hashtable_t *t, ENTRY_t *e) 00296 { 00297 unsigned he = _KEY_HASH(e); 00298 00299 assert(e != NULL); 00300 if (t->count + 1 == t->size) 00301 return NULL; 00302 # ifndef HASHTABLE_NBLOOM 00303 hashtable_setbloom(t, he); 00304 # endif 00305 _for_probe(t, he, i, h); 00306 t->count++; 00307 t->ktable[i] = he; 00308 return t->etable[i] = e; 00309 } 00310 00311 /** Find an entry in a hashtable. 00312 * 00313 * Uses MATCH_cmp() to find the first matching entry in the table in the same 00314 * hash() bucket. 00315 * 00316 * \param *t - The hashtable to search. 00317 * 00318 * \param *m - The key or match object to search for. 00319 * 00320 * \return The first found entry, or NULL if nothing was found. */ 00321 static inline ENTRY_t *NAME_find(hashtable_t *t, MATCH_t *m) 00322 { 00323 assert(m != NULL); 00324 unsigned hm = _KEY_HASH(m); 00325 ENTRY_t *e; 00326 00327 _stats_inc(t->find_count); 00328 # ifndef HASHTABLE_NBLOOM 00329 if (!hashtable_getbloom(t, hm)) 00330 return NULL; 00331 # endif 00332 _for_probe(t, hm, i, he) { 00333 _stats_inc(t->hashcmp_count); 00334 if (hm == he) { 00335 _stats_inc(t->entrycmp_count); 00336 if (!MATCH_cmp(m, e = t->etable[i])) { 00337 _stats_inc(t->match_count); 00338 return e; 00339 } 00340 } 00341 } 00342 /* Also count the compare for the empty bucket. */ 00343 _stats_inc(t->hashcmp_count); 00344 return NULL; 00345 } 00346 00347 static inline ENTRY_t *NAME_next(hashtable_t *t, int *i); 00348 00349 /** Initialize a iteration and return the first entry. 00350 * 00351 * This works together with NAME_next() for iterating through all entries in a 00352 * hashtable. 00353 * 00354 * Example: \code 00355 * for (e = NAME_iter(t, &i); e != NULL; e = NAME_next(t, &i)) 00356 * ... 00357 * \endcode 00358 * 00359 * \param *t - the hashtable to iterate over. 00360 * 00361 * \param *i - the int iterator index to initialize. 00362 * 00363 * \return The first entry or NULL if the hashtable is empty. */ 00364 static inline ENTRY_t *NAME_iter(hashtable_t *t, int *i) 00365 { 00366 assert(t != NULL); 00367 assert(i != NULL); 00368 *i = 0; 00369 return NAME_next(t, i); 00370 } 00371 00372 /** Get the next entry from a hashtable iterator or NULL when finished. 00373 * 00374 * This works together with NAME_iter() for iterating through all entries in a 00375 * hashtable. 00376 * 00377 * \param *t - the hashtable to iterate over. 00378 * 00379 * \param *i - the int iterator index to use. 00380 * 00381 * \return The next entry or NULL if the iterator is finished. */ 00382 static inline ENTRY_t *NAME_next(hashtable_t *t, int *i) 00383 { 00384 assert(t != NULL); 00385 assert(i != NULL); 00386 ENTRY_t *e = NULL; 00387 00388 while ((*i < t->size) && !(e = t->etable[(*i)++])) ; 00389 return e; 00390 } 00391 00392 # undef ENTRY 00393 # undef KEY 00394 # undef MATCH 00395 # undef NAME 00396 # undef ENTRY_t 00397 # undef KEY_t 00398 # undef MATCH_t 00399 # undef KEY_hash 00400 # undef MATCH_cmp 00401 # undef NAME_new 00402 # undef NAME_free 00403 # undef NAME_stats_init 00404 # undef NAME_add 00405 # undef NAME_find 00406 # undef NAME_iter 00407 # undef NAME_next 00408 # undef _KEY_HASH 00409 #endif /* ENTRY */