Dictionary.C

Go to the documentation of this file.
00001 /*
00002  * See the dyninst/COPYRIGHT file for copyright information.
00003  * 
00004  * We provide the Paradyn Tools (below described as "Paradyn")
00005  * on an AS IS basis, and do not warrant its validity or performance.
00006  * We reserve the right to update, modify, or discontinue this
00007  * software at any time.  We shall have no obligation to supply such
00008  * updates or modifications or any other form of support to you.
00009  * 
00010  * By your use of Paradyn, you understand and agree that we (or any
00011  * other person or entity with proprietary rights in Paradyn) are
00012  * under no obligation to provide either maintenance services,
00013  * update services, notices of latent defects, or correction of
00014  * defects for Paradyn.
00015  * 
00016  * This library is free software; you can redistribute it and/or
00017  * modify it under the terms of the GNU Lesser General Public
00018  * License as published by the Free Software Foundation; either
00019  * version 2.1 of the License, or (at your option) any later version.
00020  * 
00021  * This library is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00024  * Lesser General Public License for more details.
00025  * 
00026  * You should have received a copy of the GNU Lesser General Public
00027  * License along with this library; if not, write to the Free Software
00028  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00029  */
00030 
00031 // Dictionary.C
00032 
00033 #if (!defined (__XLC__) && !defined(__xlC__)) || defined(DICT_C_IS_HEADER)
00034 
00035 #include "common/h/std_namesp.h"
00036 #include "common/h/Dictionary.h"
00037 //#include "common/h/String.h"
00038 
00039 // methods of this class implemented in .h so compiler has no trouble finding
00040 // them for template instantiation
00041 
00042 #include <limits.h> // UINT_MAX
00043 #include <assert.h>
00044 
00045 template<class K, class V>
00046 const unsigned dictionary_hash<K,V>::bin_grow_factor = 2;
00047 
00048 template<class K, class V>
00049 dictionary_hash<K,V>::dictionary_hash(unsigned (*hf)(const K &),
00050                                       unsigned nbins,
00051                                       unsigned imax_bin_load) {
00052   // we keep #bins*max_bin_load <= total # items * 100
00053   assert(imax_bin_load > 0);
00054   assert(imax_bin_load < 1000); // why would you want to allow so many
00055                                 // collisions per bin?
00056   hasher = hf;
00057 
00058   // Note: all_elems[] starts off as an empty pdvector.
00059 
00060   // Pre-size the # of bins from parameter.  
00061   // Each bins starts off empty (UINT_MAX)
00062   assert(nbins > 0);
00063   bins.resize(nbins);
00064   for (unsigned binlcv=0; binlcv < bins.size(); binlcv++)
00065     bins[binlcv] = UINT_MAX;
00066 
00067   num_removed_elems = 0;
00068 
00069   max_bin_load = imax_bin_load;
00070 
00071   assert(enoughBins());
00072 }
00073 
00074 template<class K, class V>
00075 dictionary_hash<K,V>::dictionary_hash(const dictionary_hash<K,V> &src) :
00076                                       all_elems(src.all_elems), bins(src.bins) {
00077   // copying an entire dictionary should be a rare occurance; we provide it only
00078   // for completeness (I'd prefer to leave it out, actually)
00079   hasher = src.hasher;
00080   num_removed_elems = src.num_removed_elems;
00081   max_bin_load = src.max_bin_load;
00082 
00083   assert(enoughBins());
00084 }
00085 
00086 template<class K, class V>
00087 dictionary_hash<K,V> &
00088 dictionary_hash<K,V>::operator=(const dictionary_hash<K,V> &src) {
00089   // assigning an entire dictionary should be a rare occurance; we provide it only
00090   // for completeness (I'd prefer to leave it out, actually)
00091   if (&src == this) return *this;
00092 
00093   hasher = src.hasher;
00094   all_elems = src.all_elems;
00095   bins = src.bins;
00096 
00097   num_removed_elems = src.num_removed_elems;
00098   max_bin_load = src.max_bin_load;
00099 
00100   assert(enoughBins());
00101 
00102   return *this;
00103 }
00104 
00105 template<class K, class V>
00106 unsigned dictionary_hash<K,V>::size() const {
00107   assert(num_removed_elems <= all_elems.size());
00108   return all_elems.size() - num_removed_elems;
00109 }
00110 
00111 template<class K, class V>
00112 V& dictionary_hash<K,V>::operator[](const K& key) {
00113   const unsigned ndx = locate_addIfNotFound(key);
00114 
00115   //assert(defines(key)); // WARNING: expensive assert!
00116    
00117   return all_elems[ndx].val;
00118 }
00119 
00120 template<class K, class V>
00121 const V& dictionary_hash<K,V>::operator[](const K& key) const {
00122   const unsigned ndx = locate(key, false);
00123   assert(ndx != UINT_MAX);
00124   //assert(defines(key)); // WARNING: expensive assert!
00125    
00126   return all_elems[ndx].val;
00127 }
00128 
00129 template<class K, class V>
00130 const V& dictionary_hash<K,V>::get(const K &key) const {
00131   const unsigned ndx = locate(key, false);
00132   // false: if removed, then it doesn't count
00133 
00134   if (ndx == UINT_MAX)
00135     assert(false && "dictionary_hash get() requires a hit");
00136    
00137   return all_elems[ndx].val;
00138 }
00139 
00140 template<class K, class V>
00141 V& dictionary_hash<K,V>::get(const K &key) {
00142   const unsigned ndx = locate(key, false);
00143   // false: if removed, then it doesn't count
00144 
00145   if (ndx == UINT_MAX)
00146     assert(false && "dictionary_hash get() requires a hit");
00147    
00148   return all_elems[ndx].val;
00149 }
00150 
00151 template<class K, class V>
00152 const V& dictionary_hash<K,V>::get_and_remove(const K &key) {
00153   const unsigned ndx = locate(key, false);
00154   // false: if removed, then it doesn't count
00155 
00156   if (ndx == UINT_MAX)
00157     assert(false && "dictionary_hash get_and_remove() requires a hit");
00158 
00159   const unsigned oldsize = size();
00160 
00161   entry &e = all_elems[ndx];
00162 
00163   assert(!e.removed);
00164   e.removed = true;
00165   num_removed_elems++;
00166   assert(num_removed_elems <= all_elems.size());
00167 
00168   assert(size() + 1 == oldsize);
00169    
00170   return e.val;
00171 }
00172 
00173 template<class K, class V>
00174 bool dictionary_hash<K,V>::find_and_remove(const K &key, V &val) {
00175   const unsigned ndx = locate(key, false);
00176   // false: if removed, then it doesn't count
00177 
00178   if (ndx == UINT_MAX)
00179     return false;
00180 
00181   const unsigned oldsize = size();
00182 
00183   entry &e = all_elems[ndx];
00184 
00185   assert(!e.removed);
00186   e.removed = true;
00187   num_removed_elems++;
00188   assert(num_removed_elems <= all_elems.size());
00189 
00190   assert(size() + 1 == oldsize);
00191 
00192   val = e.val;
00193 
00194   return true;
00195 }
00196 
00197 template<class K, class V>
00198 void dictionary_hash<K,V>::set(const K &key, const V &val) {
00199   const unsigned ndx = locate(key, true); // true --> if removed, count as a find
00200 
00201   if (ndx != UINT_MAX) {
00202     // Found an entry.  If this entry is actually 'removed', then un-remove it.
00203     // Otherwise, barf.
00204     entry &theEntry = all_elems[ndx];
00205     if (theEntry.removed) {
00206       assert(num_removed_elems > 0);
00207       theEntry.val = val;
00208       theEntry.removed = false;
00209       num_removed_elems--;
00210 
00211       // clearing the removed flag can't possibly invalidate the performance
00212       // invariant
00213     }
00214     else
00215       assert(false && "dictionary set(): an entry with that key already exists");
00216   }
00217   else {
00218     // Didn't find that entry.  Good.  Create that entry and add to the dictionary.
00219     add(key, val);
00220       
00221     //assert(defines(key)); // WARNING: expensive assert()
00222   }
00223 }
00224 
00225 template<class K, class V>
00226 TYPENAME dictionary_hash<K,V>::iterator
00227 dictionary_hash<K,V>::find(const K& key) {
00228   const unsigned ndx = locate(key, false); // false -->don't find removed items
00229   if (ndx == UINT_MAX) {
00230     return end();
00231   }
00232   else {
00233      TYPENAME dictionary_hash<K,V>::iterator dictIter(*this,
00234            GET_ITER(all_elems,ndx));
00235         
00236     return dictIter;
00237   }
00238 }
00239 
00240 template<class K, class V>
00241 TYPENAME dictionary_hash<K,V>::const_iterator
00242 dictionary_hash<K,V>::find(const K& key) 
00243      const {
00244   const unsigned ndx = locate(key, false); // false -->don't find removed items
00245   if (ndx == UINT_MAX) {
00246     return end();
00247   }
00248   else {
00249      TYPENAME dictionary_hash<K,V>::const_iterator dictIter(*this,
00250            GET_ITER(all_elems,ndx));
00251 
00252 #if 0
00253     TYPENAME dictionary_hash<K,V>::const_iterator dictIter(*this, 
00254                                                         all_elems.getIter(ndx));
00255 #endif
00256     return dictIter;
00257   }
00258 }
00259 
00260 template<class K, class V>
00261 bool dictionary_hash<K,V>::find(const K& key, V& el) const {
00262   const unsigned ndx = locate(key, false); // false --> don't find removed items
00263   if (ndx == UINT_MAX)
00264     return false;
00265   else {
00266     el = all_elems[ndx].val;
00267     return true;
00268   }
00269 }
00270 
00271 template<class K, class V>
00272 bool dictionary_hash<K,V>::find(const K& key, const V **el) const {
00273   const unsigned ndx = locate(key, false); // false --> don't find removed items
00274   if (ndx == UINT_MAX)
00275     return false;
00276   else {
00277     (*el) = &all_elems[ndx].val;
00278     return true;
00279   }
00280 }
00281 
00282 template<class K, class V>
00283 bool dictionary_hash<K,V>::defines(const K& key) const {
00284   const unsigned ndx = locate(key, false); // false --> don't find removed items
00285   return (ndx != UINT_MAX);
00286 }
00287 
00288 template<class K, class V>
00289 void dictionary_hash<K,V>::undef(const K& key) {
00290   unsigned ndx = locate(key, false); // false --> don't find removed items
00291   if (ndx == UINT_MAX)
00292     return; // nothing to do...either doesn't exist, or already removed
00293 
00294 #ifndef NDEBUG
00295   const unsigned oldsize = size();
00296 #endif
00297 
00298   entry &e = all_elems[ndx];
00299   assert(!e.removed);
00300   e.removed = true;
00301   num_removed_elems++;
00302 
00303 #ifndef NDEBUG
00304   assert(oldsize == size()+1);
00305   assert(num_removed_elems <= all_elems.size());
00306 #endif
00307 }
00308 
00309 #ifndef _KERNEL
00310 // Sun's compiler can't handle the return types
00311 
00312 template<class K, class V>
00313 pdvector<K>
00314 dictionary_hash<K,V>::keys() const {
00315   // One can argue that this method (and values(), below) should be removed in
00316   // favor of using the dictionary iterator class.  I agree; it's here only for
00317   // backwards compatibility.
00318   pdvector<K> result;
00319   result.reserve(size());
00320    
00321   // note that the iterator class automatically skips elems tagged for removal
00322   const_iterator finish = end();
00323   for (const_iterator iter=begin(); iter != finish; iter++)
00324     result.push_back(iter.currkey());
00325 
00326   return result;
00327 }
00328 
00329 template<class K, class V>
00330 pdvector<V>
00331 dictionary_hash<K,V>::values() const {
00332   // One can argue that this method (and keys(), above) should be removed in
00333   // favor of using the dictionary iterator class.  I agree; it's here only for
00334   // backwards compatibility.
00335   pdvector<V> result;
00336   result.reserve(size());
00337    
00338   // note that the iterator class automatically skips elems tagged for removal
00339   const_iterator finish = end();
00340   for (const_iterator iter=begin(); iter != finish; ++iter)
00341     result.push_back(*iter);
00342 
00343   return result;
00344 }
00345 
00346 template<class K, class V>
00347 pdvector< pdpair<K, V> >
00348 dictionary_hash<K,V>::keysAndValues() const {
00349   pdvector< pdpair<K, V> > result;
00350   result.reserve(size());
00351    
00352   const_iterator finish = end();
00353   for (const_iterator iter = begin(); iter != finish; ++iter)
00354     result.push_back( pdpair<K,V>(iter.currkey(), iter.currval()) );
00355 
00356   return result;
00357 }
00358 
00359 #endif //ifndef(_KERNEL)
00360 /*
00361 template<class K, class V>
00362 int dictionary_hash<K,V>::linear_filter(pdvector< pdpair<K, V> > &matches, 
00363                     bool (*filt_func)(const V&)) 
00364 {
00365   const_iterator finish = end();
00366   for (const_iterator iter = begin(); iter != finish; ++iter) {
00367     if ((*filt_func)(*iter))
00368       matches.push_back(pdpair<K,V>(iter.currkey(),iter.currval()));
00369   }
00370   return 0;
00371 }
00372 */
00373 /*
00374 template<class K, class V>
00375 pdvector<V>  dictionary_hash<K,V>::linear_filter(bool (*filt_func)(const K&, void *data),
00376                          void *param) 
00377 {
00378   pdvector< V > result;
00379   result.reserve(size());
00380   const_iterator finish = end();
00381   for (const_iterator iter = begin(); iter != finish; ++iter) {
00382     if ((*filt_func)(iter.currkey(), param)) {
00383       cerr << "adding value" << endl;
00384       result.push_back(iter.currval());
00385     }
00386   }
00387   cerr <<"result.size() = " << result.size() <<endl;
00388   return result;
00389 }
00390 */
00391 template<class K, class V>
00392 void dictionary_hash<K,V>::clear() {
00393   // max_bin_load and bin_grow_factor don't change.
00394   // Also, bins.size() doesn't change; is this best (perhaps we should shrink
00395   // bins down to its original size...not trivial since we didn't set that value
00396   // aside in the ctor.  In any event we don't lose sleep since calling clear() is
00397   // rare.)  Like class pdvector, we could also provide a zap() method which actually
00398   // does free up memory.
00399 
00400   all_elems.clear();
00401 
00402   for (unsigned lcv=0; lcv < bins.size(); lcv++)
00403     bins[lcv] = UINT_MAX;
00404 
00405   num_removed_elems = 0;
00406 
00407   assert(size() == 0);
00408 
00409   assert(enoughBins());
00410 }
00411 
00412 template<class K, class V>
00413 unsigned
00414 dictionary_hash<K,V>::locate(const K& key, bool evenIfRemoved) const {
00415   // An internal routine used by everyone.
00416 
00417   // call hasher(key), but make sure it fits in 31 bits:
00418   if(hasher == NULL) {
00419     cerr << "hasher == NULL\n";
00420     assert(false);
00421   }
00422   unsigned hashval = hasher(key) & 0x7fffffff;
00423 
00424   const unsigned bin = hashval % bins.size();
00425    
00426   unsigned elem_ndx = bins[bin];
00427   while (elem_ndx != UINT_MAX) {
00428     const entry &elem = all_elems[elem_ndx];
00429 
00430     // verify that this elem is in the right bin!
00431     assert(elem.key_hashval % bins.size() == bin);
00432       
00433     if (elem.key_hashval == hashval && elem.key == key) {
00434       // found it...unless it was removed
00435       if (elem.removed && !evenIfRemoved)
00436     elem_ndx = UINT_MAX;
00437 
00438       break;
00439     }
00440     else
00441       elem_ndx = elem.next;
00442   }
00443 
00444   return elem_ndx;
00445 }
00446 
00447 
00448 template<class K, class V>
00449 unsigned
00450 dictionary_hash<K,V>::add(const K& key, const V &val) {
00451   // internal routine called by locate_addIfNotFound() and by set()
00452   // returns ndx (within all_elems[]) of newly added item
00453 
00454   // before the insert, we should have enough bins to fit everything nicely...
00455   assert(enoughBins());
00456 
00457   if (!enoughBinsIf1MoreItemAdded()) {
00458     // ...but adding 1 more element would make things too big.  So, grow (add
00459     // some new bins) before adding.
00460          
00461     const unsigned new_numbins = (unsigned)(bins.size() * bin_grow_factor);
00462     assert(new_numbins > bins.size() && "bin_grow_factor too small");
00463 
00464     grow_numbins(new_numbins);
00465 
00466     // ...verify that we have enough bins after the grow:
00467     assert(enoughBinsIf1MoreItemAdded());
00468 
00469     // fall through...
00470   }
00471       
00472   // We don't need to grow.
00473   const unsigned hashval = hasher(key) & 0x7fffffff; // make sure it fits in 31 bits
00474   const unsigned bin     = hashval % bins.size();
00475 
00476   entry e(key, hashval, val, UINT_MAX);
00477   e.next = bins[bin];
00478   all_elems.push_back(e);
00479   //entry *e = all_elems.append_with_inplace_construction();
00480   //new((void*)e)entry(key, hashval, val, UINT_MAX);
00481   const unsigned new_entry_ndx = all_elems.size()-1;
00482 
00483   // Insert at the head of the bin (we could insert at the tail, but this
00484   // is a tad easier, and besides, position in the bin doesn't matter)
00485   bins[bin] = new_entry_ndx;
00486 
00487   //assert(defines(key)); // WARNING: expensive assert()
00488       
00489   assert(enoughBins());
00490   return new_entry_ndx;
00491 }
00492 
00493 template<class K, class V>
00494 unsigned
00495 dictionary_hash<K,V>::locate_addIfNotFound(const K& key) {
00496   // An internal routine used by everyone.
00497   // Returns ndx (within all_elems[]) of the item.
00498 
00499   unsigned result = locate(key, true); // true --> find even if 'removed' flag set
00500   // UINT_MAX if not found
00501 
00502   if (result == UINT_MAX)
00503     return add(key, V());
00504   else {
00505     // found the item.
00506     entry &e = all_elems[result];
00507     if (e.removed) {
00508       // Item has been removed.  We're gonna un-remove it.
00509 
00510       //assert(!defines(key)); // WARNING: expensive assert
00511       
00512       assert(num_removed_elems > 0);
00513 
00514       e.removed = false;
00515       e.val = V();
00516       num_removed_elems--;
00517 
00518       // Clearing the 'removed' flag of an entry can't possibly invalidate the
00519       // performance assertion
00520     }
00521 
00522     //assert(defines(key)); // WARNING: expensive assert
00523       
00524     return result;
00525   }
00526 }
00527 
00528 template<class K, class V>
00529 void
00530 dictionary_hash<K,V>::grow_numbins(unsigned new_numbins) {
00531   assert(new_numbins > bins.size() && "grow_numbins not adding any bins?");
00532 
00533   bins.resize(new_numbins, true); // true --> exact resize flag
00534   for (unsigned binlcv=0; binlcv < bins.size(); binlcv++)
00535     bins[binlcv] = UINT_MAX;
00536 
00537   // Look for elems to remove; shrink all_elems[] as appropriate
00538   if (num_removed_elems > 0) {
00539     for (unsigned lcv=0; lcv < all_elems.size(); ) {
00540       entry &e = all_elems[lcv];
00541       if (e.removed) {
00542     const unsigned oldsize = all_elems.size();
00543     assert(oldsize > 0);
00544 
00545     // remove item from pdvector by swap with last item & resizing/-1
00546     all_elems[lcv] = all_elems[oldsize-1]; // should be OK if lcv==oldsize-1
00547     all_elems.resize(oldsize-1);
00548 
00549     num_removed_elems--;
00550             
00551     // note: we DON'T bump up lcv in this case
00552       }
00553       else
00554     lcv++;
00555     }
00556 
00557     assert(num_removed_elems == 0);
00558   }
00559 
00560   // Now rehash everything.  Sounds awfully expensive, and I guess it is -- it's
00561   // linear in the total # of items in the hash table.  But take heart: beyond this
00562   // point, we don't need to make any copies of type KEY or VALUE, resize any pdvectors,
00563   // or recalculate any hash values.  We just need to assign to <n> unsigned
00564   // integers.
00565 
00566   const unsigned numbins = bins.size(); // loop invariant
00567   for (unsigned lcv=0; lcv < all_elems.size(); lcv++) {
00568     entry &e = all_elems[lcv];
00569     assert(!e.removed);
00570 
00571     // the entry's key_hashval stays the same, but it may go into a different bin:
00572     const unsigned bin = e.key_hashval % numbins;
00573 
00574     // prepend element to bin:
00575     unsigned &thebin = bins[bin];
00576       
00577     e.next = thebin;
00578     thebin = lcv;
00579   }
00580 
00581   // The invariant might not have held at the top of this routine, but it
00582   // should now hold.
00583   assert(enoughBins());
00584 }
00585 
00586 #endif
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 12 Jul 2013 for SymtabAPI by  doxygen 1.6.1