Dictionary.h

Go to the documentation of this file.
00001 /*
00002  * See the dyninst/COPYRIGHT file for copyright information.
00003  * 
00004  * We provide the Paradyn Tools (below described as "Paradyn")
00005  * on an AS IS basis, and do not warrant its validity or performance.
00006  * We reserve the right to update, modify, or discontinue this
00007  * software at any time.  We shall have no obligation to supply such
00008  * updates or modifications or any other form of support to you.
00009  * 
00010  * By your use of Paradyn, you understand and agree that we (or any
00011  * other person or entity with proprietary rights in Paradyn) are
00012  * under no obligation to provide either maintenance services,
00013  * update services, notices of latent defects, or correction of
00014  * defects for Paradyn.
00015  * 
00016  * This library is free software; you can redistribute it and/or
00017  * modify it under the terms of the GNU Lesser General Public
00018  * License as published by the Free Software Foundation; either
00019  * version 2.1 of the License, or (at your option) any later version.
00020  * 
00021  * This library is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00024  * Lesser General Public License for more details.
00025  * 
00026  * You should have received a copy of the GNU Lesser General Public
00027  * License along with this library; if not, write to the Free Software
00028  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00029  */
00030 
00031 // Dictionary.h
00032 
00033 #ifndef _DICTIONARY_H_
00034 #define _DICTIONARY_H_
00035 
00036 #if (defined(__XLC__) || defined(__xlC__)) && defined(__TEMPINC__)
00037 //#pragma implementation("../src/Dictionary.C")
00038 #endif
00039 
00040 #include "common/h/language.h"
00041 #include "common/h/Vector.h" // takes care of redefining assert as ASSERT for _KERNEL
00042 #include "common/h/Pair.h"
00043 
00044 /************************************************************************
00045  * template<class K, class V> class dictionary_hash
00046  * Ari Tamches
00047  *
00048  * See Stroustrup, The C++ Programming Language, 3d edition, which
00049  * provided inspiration for this hash table class via an extended
00050  * example in section 17.6
00051  *
00052  * This class tries very hard to ensure that there are enough bins s.t.
00053  * hash collisions are few (though if you provide a bad hash function,
00054  * all bets are off, as usual).  Toward that end, the following invariant
00055  * is maintained at all times:
00056  *         #bins * max_bin_load >= total # items
00057  *         Note about "total # items": includes items that are marked as removed
00058  *         (after all, such items do indeed clutter up the bins and so should be
00059  *         taken into account for this performance invariant.)
00060  * max_bin_load is a user-settable parameter (see ctor).
00061  *
00062  * When adding a new hash table item would break the invariant,
00063  * #bins is multiplied by bin_grow_factor (another user-settable parameter in ctor),
00064  * and everything is rehashed (expensive, but not needed often, since growth
00065  * is exponential).  NOTE: since the performance invariant doesn't distinguish
00066  * between items marked as 'removed' and non-removed items, when we clear the
00067  * 'removed' flag of any item, the invariant cannot possibly be broken.
00068  *
00069  * Note: We don't make any effort to keep num_bins always a power of 2.
00070  *       If such an invariant were maintained, then "x % num_bins"
00071  *       simplifies to "x % (1U << lg_num_bins)-1".  We could get away
00072  *       with just storing lg_num_bins, and use "1U << lg_num_bins"
00073  *       to get num_bins at any time.
00074  * 
00075 ************************************************************************/
00076 
00077 template<class K, class V> class dictionary_hash_iter;
00078 
00079 template<class K, class V>
00080 class dictionary_hash {
00081   typedef const V &RET;
00082   friend class dictionary_hash_iter<K,V>;
00083 
00084  public:
00085   typedef K key_type;
00086   typedef V value_type;
00087   typedef RET iter_value_return_type;
00088   typedef V& value_reference_type;
00089 
00090   typedef dictionary_hash_iter<K,V> const_iterator;
00091   typedef dictionary_hash_iter<K,V> iterator;
00092    
00093   dictionary_hash (unsigned (*hashfunc)(const K &),
00094            unsigned nbins=101,
00095            unsigned max_bin_load=70
00096            // we keep #bins*max_bin_load >= total # items * 100, 
00097            // to make sure that there are enough bins s.t. 
00098            // (assuming a good hash function) collisions are few.
00099            );
00100   ~dictionary_hash () {}
00101 
00102   // I'd rather not have provided these methods, but apparantly some code
00103   // in paradynd actually copies entire dictionaries (!)
00104   dictionary_hash (const dictionary_hash<K,V> &);
00105   dictionary_hash<K,V>&  operator= (const dictionary_hash<K,V> &);
00106 
00107   unsigned getMemUsage_exceptObjItself_AndExtraFromKeyOrValue() const {
00108     return all_elems.capacity() * sizeof(entry) + bins.capacity() * sizeof(unsigned);
00109   }
00110 
00111   unsigned size () const;
00112 
00113   V& operator[] (const K &);
00114   // If key doesn't exist, creates a new entry (using the default ctor for the
00115   // value).  Hence this can't be a const method; contrast with get().
00116   const V& operator[] (const K &) const;
00117 
00118   V& get(const K &);
00119   const V& get(const K &) const;
00120   // If key doesn't exist, then barfs.  Contrast with operator[].
00121 
00122   const V& get_and_remove(const K &);
00123   // returns a reference the to value part of the deleted item (possible because
00124   // in reality, deleting an item just sets the 'removed' flag).
00125    
00126   bool find_and_remove(const K &, V&);
00127 
00128   void set(const K &, const V &);
00129   // If key already exists, barfs.  Contrast with operator[].
00130 
00131   bool      find (const K &, V &) const;
00132   bool      find (const K &, const V **) const;
00133 
00134   iterator find(const K &);  // like STL hashmap find
00135   const_iterator find(const K &) const;  // like STL hashmap find
00136 
00137   bool      defines (const K &) const;
00138   void      undef (const K &); 
00139 
00140 #ifndef _KERNEL
00141   // Sun's compiler can't handle the return types
00142   pdvector<K> keys () const;
00143   pdvector<V> values () const;
00144   pdvector< pdpair<K, V> > keysAndValues() const;
00145 #endif
00146 
00147   void      clear ();
00148 
00149   const_iterator begin() const {
00150     //return const_iterator(all_elems.begin(), all_elems.end());
00151     return const_iterator(*this);
00152   }
00153   const_iterator end() const {
00154     //return const_iterator(all_elems.end(), all_elems.end());
00155     return const_iterator(*this).end();
00156   }
00157 
00158   // pdvector<V> linear_filter(bool (*filt_func)(const K&, void *), void *data);
00159  private:
00160   bool enoughBins() const {
00161     //return bins.size() * max_bin_load >= size();
00162     // ABOVE WAS BUGGY: use all_elems.size(), not size()!
00163     return bins.size() * max_bin_load >= all_elems.size();
00164   }
00165   bool enoughBinsIf1MoreItemAdded() const {
00166     return bins.size() * max_bin_load >= all_elems.size() + 1;
00167   }
00168 
00169   unsigned add(const K &, const V &);
00170   // internal routine called by locate_addIfNotFound() and by set()
00171   // Assumes an entry with that key does not exist; adds to the dictionary.
00172   // returns ndx within all_elems[]
00173    
00174   unsigned locate_addIfNotFound(const K &);
00175   // returns ndx within all_elems[]; never returns UINT_MAX
00176 
00177   unsigned locate(const K &, bool even_if_removed) const;
00178   // never adds a new entry if not found.  Okay to call from const member fns
00179 
00180   void grow_numbins(unsigned new_numbins);
00181 
00182   // We keep a ptr to the hash function:
00183   unsigned (*hasher)(const K &);
00184   // NOTE: always squeeze the result into 31 bits before using it, since
00185   // key_hashval, below, is a 31-bit bitfield.
00186 
00187   // Important structure:
00188   struct entry {
00189     K key;
00190     V val;
00191     unsigned key_hashval : 31;
00192     // we could always rehash, since we have 'key', but this
00193     // is faster.
00194     unsigned removed : 1;
00195     // since removal would require a lot of shifting w/in all_elems[],
00196     // we use this flag instead, and only do the actual removal on
00197     // resize.
00198     unsigned next; // an index into 'all_elems', for hash collisions.  UINT_MAX
00199                    // implies end of collision chain for this bin
00200 
00201     entry() {} // needed by pdvector class
00202     entry(const K &ikey, unsigned ikey_hashval, const V &ival, unsigned inext) :
00203           key(ikey), val(ival), key_hashval(ikey_hashval), next(inext) {
00204       removed = false;
00205     }
00206     entry(const entry &src) : key(src.key), val(src.val) {
00207       key_hashval = src.key_hashval;
00208       removed = src.removed;
00209       next = src.next;
00210     }
00211     entry& operator=(const entry &src) {
00212       if (&src == this) return *this;
00213 
00214       key = src.key;
00215       val = src.val;
00216       key_hashval = src.key_hashval;
00217       removed = src.removed;
00218       next = src.next;
00219       return *this;
00220     }
00221   };
00222 
00223   // The actual elements, in one big pdvector.  This enables certain important
00224   // operations, (specifically rehashing the entire dictionary) to be done
00225   // efficiently.  Plus, it's a clean approach to storage.
00226   // Since we use pdvector<>::operator+= on this pdvector, we're glad that class pdvector<>
00227   // preallocates some extra stuff when growing.
00228   pdvector<entry> all_elems;
00229 
00230   // The bins.  Note: since the number of bins doesn't change often (only when
00231   // growing), we use resize with the exact flag set.
00232   pdvector<unsigned> bins;
00233   // each entry in 'bins' gives the index (w/in 'all_elems') of the first element
00234   // in the bin.  In other words, the first element in bin X is all_elems[bin[X]].
00235   // If bin[X] is UINT_MAX, then the bin is empty.
00236 
00237   unsigned num_removed_elems;
00238    
00239   // Notes:
00240   // 1) At any given time, all_elems.size()-num_removed_items gives us the
00241   //    total # of items in the dictionary.
00242   // 2) At any given time, bins.size() gives us the total # of bins in the dictionary.
00243   // 3) We keep the invariant #bins * max_bin_load >= total # items * 100,
00244   //    incrementing #bins (by a factor of bin_grow_factor) if needed to maintain it.
00245 
00246   unsigned max_bin_load;    // percentage of total number of items
00247   static const unsigned bin_grow_factor;
00248 };
00249 
00250 // Typical way to use an iterator for this class:
00251 // for (dictionary_hash<K,V>::iterator iter=thedict.begin();
00252 //      iter != thedict.end(); iter++) {
00253 //    V &v = *iter; // or use iter-> directly to access the value.
00254 //    // You can write to iter-> to change the value.  You can't change or even access
00255 //    // the key right now.
00256 // }
00257 
00258 template<class K, class V>
00259 class dictionary_hash_iter {
00260  private:
00261   typedef const V &RET; // RET: type returned by operator*()
00262   dictionary_hash<K,V> &dict;
00263   TYPENAME pdvector< TYPENAME dictionary_hash<K,V>::entry >::iterator i;
00264   TYPENAME pdvector< TYPENAME dictionary_hash<K,V>::entry >::iterator the_end;
00265 
00266   // too bad we need to store the_end (for make_valid_or_end())
00267    
00268   void move_to_next() {
00269     i++;
00270     make_valid_or_end();
00271   }
00272   void make_valid_or_end() {
00273     while (i != the_end && i->removed)
00274       i++;
00275   }
00276    
00277  public:
00278   //dictionary_hash_iter(pdvector< dictionary_hash<K,V>::entry >::const_iterator ii,
00279   //               pdvector< dictionary_hash<K,V>::entry >::const_iterator iend) :
00280   //                   i(ii), the_end(iend) {
00281   //make_valid_or_end();
00282   //}
00283 
00284   dictionary_hash_iter(dictionary_hash<K,V> &idict) : dict(idict) {
00285     reset();
00286   }
00287 
00288   dictionary_hash_iter(const dictionary_hash<K,V> &idict) : 
00289     dict(const_cast< dictionary_hash<K,V>& >(idict)) {
00290     reset();
00291   }
00292 
00293 #if defined (cap_use_pdvector)
00294   dictionary_hash_iter(dictionary_hash<K,V> &idict,
00295       TYPENAME pdvector< TYPENAME dictionary_hash<K,V>::entry>::iterator curi) 
00296     : dict(idict), i(curi), the_end(dict.all_elems.end()) {
00297   }
00298 
00299   dictionary_hash_iter(const dictionary_hash<K,V> &idict,
00300       TYPENAME pdvector< TYPENAME dictionary_hash<K,V>::entry>::const_iterator curi) 
00301     : dict(const_cast< dictionary_hash<K,V>& >(idict)), 
00302     i(const_cast< TYPENAME pdvector< TYPENAME dictionary_hash<K,V>::entry >::iterator>(curi)), 
00303     the_end(const_cast< TYPENAME pdvector< TYPENAME dictionary_hash<K,V>::entry >::iterator>(
00304                                                                                            dict.all_elems.end()))
00305   {  }
00306 #else
00307   dictionary_hash_iter(dictionary_hash<K,V> &idict,
00308         TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry>::iterator curi)
00309      : dict(idict), i(curi), the_end(dict.all_elems.end()) {
00310      }
00311 
00312   dictionary_hash_iter(const dictionary_hash<K,V> &idict,
00313         TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry>::const_iterator curi)
00314      : dict( const_cast< dictionary_hash<K,V>& >(idict))
00315      {
00316         TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry>::const_iterator *temp = &curi;
00317         TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry>::iterator *iptr = NULL;
00318         //iptr =  const_cast< TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::iterator *>
00319         //      (temp);
00320         iptr =  ( TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::iterator *)
00321            (temp);
00322         i = *iptr;
00323 
00324         //i =  const_cast< TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::iterator >
00325         //      (temp);
00326 
00327         //i =  ( TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::iterator)
00328         //      (temp);
00329         TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::const_iterator ci = dict.all_elems.end();
00330         TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::const_iterator *cip = &ci;
00331         TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::iterator *nip;
00332         nip = (TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::iterator *) cip;
00333         the_end = *nip;
00334 
00335 
00336         //the_end = const_cast< TYPENAME std::vector< TYPENAME dictionary_hash<K,V>::entry >::iterator>
00337         //  (dict.all_elems.end()) ;
00338      }
00339 
00340 #endif
00341 
00342   dictionary_hash_iter(const dictionary_hash_iter<K,V> &src) :
00343      dict(src.dict), i(src.i), the_end(src.the_end) { }
00344   //dictionary_hash_iter& operator=(const dictionary_hash_iter<K,V> &src) {
00345   //  dict = src.dict;
00346   //  i = src.i;
00347   //  the_end = src.the_end;
00348   //  return *this;
00349   //}
00350 
00351   dictionary_hash_iter operator++(int) {
00352      dictionary_hash_iter result = *this;
00353      move_to_next();
00354      return result;
00355   }
00356   dictionary_hash_iter operator++() { // prefix
00357      move_to_next();
00358      return *this;
00359   }
00360 
00361   bool next(K &k, V &v) {
00362      for (; i != the_end; i++) {
00363       if (!i->removed) {
00364     k = i->key;
00365     v = i->val;
00366     i++;
00367     return true;
00368       }
00369     }
00370     return false;
00371   }
00372 
00373   void reset() {
00374     // start the iterator off at the first non-removed item now:
00375     if (dict.all_elems.size() == 0) {
00376 #if defined (use_pdvector)
00377        i = NULL;
00378        the_end = NULL;
00379 #else
00380        i = dict.all_elems.begin();
00381        the_end = dict.all_elems.end();
00382 #endif
00383 
00384     }
00385     else {
00386       i = dict.all_elems.begin();
00387       the_end = dict.all_elems.end();
00388 
00389       while (i != the_end && i->removed)
00390     i++;
00391     }
00392   }
00393 
00394   dictionary_hash_iter end() {
00395     i = the_end;
00396     return *this;
00397   }
00398 
00399   RET operator*() {
00400     return i->val;
00401   }
00402 //   RET *operator->() {
00403 //      return &i->val;
00404 //   }
00405 
00406   K currkey() {
00407     return i->key;
00408   }
00409   const K &currkey() const {
00410     return i->key;
00411   }
00412   RET currval() const {
00413     return i->val;
00414   }
00415   V &currval() {
00416     return i->val;
00417   }
00418 
00419   bool operator==(const dictionary_hash_iter &other) const {
00420     return i == other.i; // no need to check the_end, right?
00421   }
00422   bool operator!=(const dictionary_hash_iter &other) const {
00423     return i != other.i; // no need to check the_end, right?
00424   }
00425 
00426   operator bool() const {return i < the_end;}
00427 };
00428 
00429 #if defined(__XLC__) || defined(__xlC__) || defined (AUTO_TEMPLATES)
00430 #define DICT_C_IS_HEADER
00431 #include "../src/Dictionary.C"
00432 #endif
00433 
00434 #endif
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 12 Jul 2013 for SymtabAPI by  doxygen 1.6.1