Aleph-w  1.5a.2
Biblioteca general de algoritmos y estructuras de datos
 Todo Clases Archivos Funciones Variables 'typedefs' Enumeraciones Amigas Grupos Páginas
tpl_odhash.H
1 
2 # ifndef TPL_ODHASH_H
3 # define TPL_ODHASH_H
4 
5 # include <iostream>
6 # include <limits>
7 # include <primes.H>
8 # include <htlist.H>
9 # include <ahDry.H>
10 # include <hash-dry.H>
11 # include <hash-fct.H>
12 # include <tpl_dynArray.H>
13 
14 using namespace Aleph;
15 
16 namespace Aleph {
17 
18 # ifdef N
19 # define NBACKUP N
20 # undef N
21 # endif
22 
23 # ifdef M
24 # define MBACKUP M
25 # undef M
26 # endif
27 
28 
52  template <typename Key, class Cmp = Aleph::equal_to<Key>>
54 {
55  public:
56 
57  typedef Key Key_Type;
58 
59  typedef Key Item_Type;
60 
62  typedef size_t (*Hash_Fct)(const Key &);
63 
64  enum Status { EMPTY, BUSY, DELETED };
65 
66  enum Probe { NO_PROBED, FIRST_PROBE, SECOND_PROBE, LINEAR_PROBE };
67 
68  struct Bucket
69  {
70  Key key; // clave
71  unsigned status : 4; // status EMPTY, DELETED o BUSY
72  unsigned probe_type : 4; // FIRST_PROBE SECOND_PROBE LINEAR_PROBE
73  unsigned int probe_counter; // contador de sondeos
74 
75  Bucket() : status(EMPTY), probe_type(NO_PROBED), probe_counter(0)
76  { /* empty */ }
77 
78  void reset() // put all as when constructed
79  {
80  status = EMPTY;
81  probe_type = NO_PROBED;
82  probe_counter = 0;
83  }
84  };
85 
86  Bucket * table = NULL; // arreglo de cubetas
87  Hash_Fct hash_fct = NULL; // primera función hash
88  Hash_Fct second_hash_fct = NULL; // segunda función hash
89 
90 protected:
91 
92  size_t len; // tamaño de la tabla
93  float lower_alpha;
94  float upper_alpha;
95 
96 private:
97 
98  size_t N; // número de cubetas ocupadas
99  bool with_resize;
100 
101  Bucket * allocate_bucket(Bucket & bucket, unsigned char probe_type)
102  {
103  I(bucket.status != BUSY);
104 
105  ++N;
106  bucket.status = BUSY;
107  bucket.probe_type = probe_type;
108  bucket.probe_counter++;
109 
110  return &bucket;
111  }
112 
113  void decrease_probe_counter(Bucket * bucket)
114  {
115  I(bucket->status == BUSY or bucket->status == DELETED);
116 
117  bucket->probe_counter--;
118  if (bucket->probe_counter == 0) // <marcar EMPTY sobre la cubeta?
119  bucket->status = EMPTY;
120  }
121 
122  void deallocate_bucket(Bucket * bucket)
123  {
124  I(bucket->status == BUSY);
125 
126  bucket->status = DELETED;
127  const Key & key = bucket->key;
128 
129  const size_t i_fst = (*hash_fct)(key) % len;
130  if (&table[i_fst] == bucket)
131  {
132  I(Cmp () (table[i_fst].key, key));
133  I(table[i_fst].probe_type == FIRST_PROBE);
134  ;
135  }
136  else
137  {
138  const size_t i_snd = (*second_hash_fct)(key) % len;
139  if (&table[i_snd] == bucket)
140  {
141  I(Cmp () (table[i_snd].key, key));
142  I(table[i_snd].probe_type == SECOND_PROBE);
143  decrease_probe_counter(&table[i_fst]);
144  ;
145  }
146  else
147  {
148  decrease_probe_counter(&table[i_fst]);
149  decrease_probe_counter(&table[i_snd]);
150  size_t i = i_snd;
151  for (index_forward(i); &table[i] != bucket; index_forward(i))
152  {
153  I(table[i].status != EMPTY);
154  decrease_probe_counter(&table[i]);
155  }
156  I(Cmp () (table[i].key, key));
157  I(table[i].probe_type == LINEAR_PROBE);
158  }
159  }
160 
161  decrease_probe_counter(bucket);
162  --N;
163  }
164 
165  size_t index_forward(size_t & i) const
166  {
167  I(i < len);
168  if (++i == len)
169  i = 0;
170  return i;
171  }
172 
173  size_t index_backward(size_t & i) const
174  {
175  I(i < len);
176  if (i-- == 0)
177  i = len - 1;
178  return i;
179  }
180 
181  static Bucket * key_to_bucket(Key * rec)
182  {
183  Bucket * ret_val = 0;
184  const long offset = (long) &ret_val->key;
185  return (Bucket*) ((long) rec - offset);
186  }
187 
188  bool is_valid_bucket(Bucket * bucket)
189  {
190  if (bucket < &table[0] or bucket >= &table[len])
191  return false;
192 
193  const int offset_with_base = (char*) bucket - (char*) &table[0];
194  return offset_with_base % sizeof(*bucket) == 0;
195  }
196 
197  size_t bucket_to_index(Bucket * bucket)
198  {
199  I(is_valid_bucket(bucket));
200  return bucket - &table[0];
201  }
202 
203 public:
204 
205  void swap(ODhashTable & other)
206  {
207  std::swap(table, other.table);
208  std::swap(hash_fct, other.hash_fct);
209  std::swap(second_hash_fct, other.second_hash_fct);
210  std::swap(N, other.N);
211  std::swap(len, other.len);
212  }
213 
224  ODhashTable(Hash_Fct __first_hash_fct = Aleph::dft_hash_fct<Key>,
225  Hash_Fct __second_hash_fct = Aleph::snd_hash_fct<Key>,
226  const size_t __len = Primes::DefaultPrime,
227  const float & __lower_alpha = hash_default_lower_alpha,
228  const float & __upper_alpha = hash_default_upper_alpha,
229  const bool __with_resize = true)
230  : table(NULL), hash_fct(__first_hash_fct),
231  second_hash_fct(__second_hash_fct), len(Primes::next_prime(__len)),
232  lower_alpha(__lower_alpha), upper_alpha(__upper_alpha),
233  N(0), with_resize(__with_resize)
234  {
235  table = new Bucket[len];
236  }
237 
240  {
241  if (table != NULL)
242  delete [] table;
243  }
244 
245  ODhashTable(const ODhashTable & other)
246  : ODhashTable(other.hash_fct, other.second_hash_fct, other.len,
247  other.lower_alpha, other.upper_alpha, other.with_resize)
248  {
249  I(table != NULL);
250  copy_from_table(other);
251  }
252 
253  ODhashTable(ODhashTable && other)
254  : ODhashTable(other.hash_fct, other.second_hash_fct, other.len,
255  other.lower_alpha, other.upper_alpha, other.with_resize)
256  {
257  I(table != NULL);
258  swap(other);
259  }
260 
261  ODhashTable & operator = (const ODhashTable & other)
262  {
263  if (this == &other)
264  return *this;
265 
266  if (len > other.N)
267  clean_table();
268  else
269  {
270  Bucket * new_table = new Bucket [other.len];
271  delete [] table;
272  table = new_table;
273  N = 0;
274  len = other.len;
275  hash_fct = other.hash_fct;
276  second_hash_fct = other.second_hash_fct;
277  lower_alpha = other.lower_alpha;
278  upper_alpha = other.upper_alpha;
279  with_resize = other.with_resize;
280  }
281 
282  copy_from_table(other);
283 
284  return *this;
285  }
286 
287  ODhashTable & operator = (ODhashTable && other)
288  {
289  swap(other);
290  return *this;
291  }
292 
293  OHASH_COMMON(ODhashTable);
294 
295  Generic_Traverse(Key);
296  Functional_Methods(Key);
297  Equal_To_Method(ODhashTable);
298 
301  Key * search(const Key & key) const
302  {
303  const size_t i_fst = (*hash_fct)(key) % len; // 1er sondeo (1ra fun hash)
304  if (table[i_fst].status == EMPTY)
305  return NULL;
306 
307  if (table[i_fst].status == BUSY and Cmp() (table[i_fst].key, key))
308  {
309  I(table[i_fst].probe_type == FIRST_PROBE);
310  I(table[i_fst].probe_counter > 0);
311  return &table[i_fst].key;
312  }
313 
314  const size_t i_snd = (*second_hash_fct)(key) % len; // 2do sondeo
315 
316  if (table[i_snd].status == EMPTY)
317  return NULL;
318 
319  if (table[i_snd].status == BUSY and Cmp() (table[i_snd].key, key))
320  {
321  I(table[i_snd].probe_type == SECOND_PROBE);
322  I(table[i_snd].probe_counter > 0);
323  return &table[i_snd].key;
324  }
325 
326  size_t i = i_snd;
327  // sondeo lineal a partir de índice de 2da función hash
328  for (int count = 0; count < len; ++count)
329  {
330  index_forward(i);
331  switch (table[i].status)
332  {
333  case EMPTY:
334  I(table[i].probe_counter == 0);
335  return NULL;
336  case BUSY:
337  I(table[i].probe_counter > 0);
338  if (Cmp() (table[i].key, key))
339  {
340  I(table[i].probe_type == LINEAR_PROBE);
341  return &table[i].key;
342  }
343  break;
344  case DELETED:
345  I(table[i].probe_counter > 0);
346  break;
347  default: ERROR("ODhashTable search: inconsistent bucket status");
348  }
349  }
350 
351  return NULL;
352  }
353 
354  Hash_Fct get_second_hash_fct() const { return second_hash_fct; }
355 
356  void set_second_hash_fct(Hash_Fct fct)
357  {
358  second_hash_fct = fct;
359  }
360 
361 private:
362 
363  Bucket * allocate_bucket(const Key & key)
364  {
365  if (N >= len)
366  throw std::overflow_error("Hash table is full");
367 
368  const size_t i_fst = (*hash_fct)(key) % len; // sondeo con 1ra función hash
369  if (table[i_fst].status != BUSY) // ¿cubeta disponible?
370  return allocate_bucket(table[i_fst], FIRST_PROBE);
371 
372  if (Cmp () (table[i_fst].key, key)) // test if key is alredy present
373  return NULL;
374 
375  const size_t i_snd = (*second_hash_fct)(key) % len; // 2do hash
376  if (table[i_snd].status != BUSY) // ¿cubeta disponible?
377  {
378  table[i_fst].probe_counter++;
379  return allocate_bucket(table[i_snd], SECOND_PROBE);
380  }
381 
382  if (Cmp () (table[i_snd].key, key)) // test if key is alredy present
383  return NULL;
384 
385  size_t i = i_snd;
386  for (int c = 0; c < len; ++c)
387  {
388  index_forward(i);
389  switch (table[i].status)
390  {
391  case BUSY:
392  if (Cmp() (table[i].key, key))
393  { // duplicated key ==> rollback previous probe_counter increases
394  for (int k = 0; k < c; ++k)
395  {
396  index_backward(i);
397  table[i].probe_counter--;
398  }
399  return NULL;
400  }
401  break;
402  case DELETED:
403  case EMPTY:
404  table[i_fst].probe_counter++;
405  table[i_snd].probe_counter++;
406  return allocate_bucket(table[i], LINEAR_PROBE);
407  default: ERROR("ODhashTable: Invalid bucket status"); break;
408  }
409  table[i].probe_counter++;
410  }
411 
412  return NULL;
413  }
414 
420  void remove_bucket(Bucket * bucket)
421  {
422  if (not is_valid_bucket(bucket))
423  throw std::invalid_argument("key pty does not belong to hash table");
424 
425  if (bucket->status != BUSY)
426  throw std::domain_error("Bucket containing key is not BUSY");
427 
428  deallocate_bucket(bucket);
429  }
430 
431  // returns true if the bucket contains the key
432  bool process_entry_for_remove(Bucket * bucket, const Key & key)
433  {
434  switch (bucket->status)
435  {
436  case EMPTY:
437  throw std::domain_error("Key not in hash table");
438  case BUSY:
439  if (Cmp () (bucket->key, key))
440  {
441  bucket->status = DELETED;
442  decrease_probe_counter(bucket);
443  return true;
444  }
445  case DELETED:
446  decrease_probe_counter(bucket);
447  break;
448  default: ERROR("Inconsistent bucket status");
449  }
450  return false;
451  }
452 
453 public:
454 
455  void remove(const Key & key)
456  {
457  --N;
458  try
459  {
460  const size_t i_fst = (*hash_fct)(key) % len;
461  if (process_entry_for_remove(&table[i_fst], key))
462  return;
463 
464  const size_t i_snd = (*second_hash_fct)(key) % len;
465  if (process_entry_for_remove(&table[i_snd], key))
466  return;
467 
468  size_t i = i_snd;
469  for (int c = 0; i < len; ++c)
470  {
471  index_forward(i);
472  if (process_entry_for_remove(&table[i], key))
473  return;
474  }
475  }
476  catch (...)
477  {
478  ++N;
479  rehash(); // in order to leave the table in a consistent state
480  throw;
481  }
482  }
483 
484  Stats stats() const
485  {
486  DynArray<size_t> lens;
487  size_t num_busy = 0, num_deleted = 0, num_empty = 0;
488  size_t max_len = std::numeric_limits<size_t>::min();
489  for (int i = 0; i < len; ++i)
490  switch (table[i].status)
491  {
492  case BUSY:
493  {
494  ++num_busy;
495  const Key & key = table[i].key;
496  size_t count = 1;
497  const size_t i_fst = (*hash_fct)(key) % len;
498  if (Cmp () (table[i_fst].key, key))
499  {
500  I(table[i_fst].probe_type == FIRST_PROBE);
501  I(table[i_fst].probe_counter > 0);
502  ;
503  }
504  else
505  {
506  ++count;
507  size_t i_snd = (*second_hash_fct)(key) % len;
508  if (Cmp () (table[i_snd].key, key))
509  {
510  I(table[i_snd].probe_type == SECOND_PROBE);
511  I(table[i_snd].probe_counter > 0);
512  ;
513  }
514  else
515  {
516  for (size_t i = index_forward(i_snd); true; index_forward(i))
517  {
518  if (table[i].status == BUSY and
519  Cmp () (table[i].key, key))
520  break;
521  ++count;
522  }
523  }
524  }
525  max_len = std::max(max_len, count);
526  update_stat_len(lens, count);
527  break;
528  }
529  case EMPTY:
530  ++num_empty;
531  update_stat_len(lens, 0);
532  break;
533  case DELETED:
534  ++num_deleted;
535  break;
536  }
537 
538  float avg = 0, sum = 0;
539  for (int i = 0; i < lens.size(); ++i)
540  {
541  avg += lens(i)*i;
542  sum += lens(i);
543  }
544 
545  avg /= sum;
546  float var = 0;
547  for (int i = 0; i < lens.size(); ++i)
548  {
549  float s = i - avg;
550  var += lens(i)*s*s;
551  }
552  var /= sum;
553 
554  Stats stats;
555  stats.num_busy = num_busy;
556  stats.num_deleted = num_deleted;
557  stats.num_empty = num_empty;
558  std::swap(lens, stats.lens);
559  stats.avg = avg;
560  stats.var = var;
561  stats.max_len = max_len;
562 
563  return stats;
564  }
565 };
566 
567 
568 
569 
570 # ifdef NBACKUP
571 # define N NBACKUP
572 # undef NBACKUP
573 # endif
574 
575 # ifdef MBACKUP
576 # define M MBACKUP
577 # undef MBACKUP
578 # endif
579 
580 # undef EMPTY
581 # undef BUSY
582 # undef DELETED
583 # undef NO_PROBED
584 # undef FIRST_PROBE
585 # undef SECOND_PROBE
586 # undef LINEAR_PROBE
587 
588 } // end namespace Aleph
589 
590 # endif // TPL_ODHASH_H
591 
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Definition: ahAlgo.H:127
ODhashTable(Hash_Fct __first_hash_fct=Aleph::dft_hash_fct< Key >, Hash_Fct __second_hash_fct=Aleph::snd_hash_fct< Key >, const size_t __len=Primes::DefaultPrime, const float &__lower_alpha=hash_default_lower_alpha, const float &__upper_alpha=hash_default_upper_alpha, const bool __with_resize=true)
Definition: tpl_odhash.H:224
size_t size() const
Retorna dimensión actual (más lejano índice escrito)
Definition: tpl_dynArray.H:523
Key * search(const Key &key) const
Definition: tpl_odhash.H:301
Definition: tpl_odhash.H:68
~ODhashTable()
Libera toda la tabla hash.
Definition: tpl_odhash.H:239
Definition: tpl_dynArray.H:70
Definition: tpl_odhash.H:53
size_t(* Hash_Fct)(const Key &)
El tipo de función hash.
Definition: tpl_odhash.H:62

Leandro Rabindranath León