Aleph-w  1.9
General library for algorithms and data structures
tpl_odhash.H
1 
2 /* Aleph-w
3 
4  / \ | | ___ _ __ | |__ __ __
5  / _ \ | |/ _ \ '_ \| '_ \ ____\ \ /\ / / Data structures & Algorithms
6  / ___ \| | __/ |_) | | | |_____\ V V / version 1.9b
7  /_/ \_\_|\___| .__/|_| |_| \_/\_/ https://github.com/lrleon/Aleph-w
8  |_|
9 
10  This file is part of Aleph-w library
11 
12  Copyright (c) 2002-2018 Leandro Rabindranath Leon & Alejandro Mujica
13 
14  This program is free software: you can redistribute it and/or modify
15  it under the terms of the GNU General Public License as published by
16  the Free Software Foundation, either version 3 of the License, or
17  (at your option) any later version.
18 
19  This program is distributed in the hope that it will be useful, but
20  WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23 
24  You should have received a copy of the GNU General Public License
25  along with this program. If not, see <https://www.gnu.org/licenses/>.
26 */
27 
28 # ifndef TPL_ODHASH_H
29 # define TPL_ODHASH_H
30 
31 # include <iostream>
32 
33 # include <primes.H>
34 # include <dlink.H>
35 # include <tpl_dynArray.H>
36 # include <array_it.H>
37 # include <ahDry.H>
38 # include <hash-dry.H>
39 # include <hashDry.H>
40 # include <hash-fct.H>
41 
42 using namespace Aleph;
43 
44 namespace Aleph {
45 
46 # ifdef N
47 # define NBACKUP N
48 # undef N
49 # endif
50 
51 # ifdef M
52 # define MBACKUP M
53 # undef M
54 # endif
55 
56 
80  template <typename Key, class Cmp = Aleph::equal_to<Key>>
82  : public OhashCommon<ODhashTable<Key, Cmp>, Key>,
83  public GenericTraverse<ODhashTable<Key, Cmp>>,
84  public LocateFunctions<ODhashTable<Key, Cmp>, Key>,
85  public FunctionalMethods<ODhashTable<Key, Cmp>, Key>,
86  public EqualToMethod<ODhashTable<Key, Cmp>>,
87  public StlAlephIterator<ODhashTable<Key, Cmp>>
88 {
89  friend class OhashCommon<ODhashTable<Key, Cmp>, Key>;
90 
91  public:
92 
93  using Key_Type = Key;
94 
95  using Item_Type = Key;
96 
97  using Hash_Fct = std::function<size_t(const Key &)>;
98 
99  using Hash_Fct_Ptr = size_t (*) (const Key&);
100 
101  enum Status { EMPTY, BUSY, DELETED };
102 
103  enum Probe { NO_PROBED, FIRST_PROBE, SECOND_PROBE, LINEAR_PROBE };
104 
105  struct Bucket
106  {
107  Key key; // clave
108  unsigned char status = EMPTY; // status EMPTY, DELETED o BUSY
109  unsigned char probe_type = NO_PROBED; // FIRST_PROBE SECOND_PROBE LINEAR_PROBE
110  unsigned int probe_counter = 0; // contador de sondeos
111 
112  Bucket() noexcept
113  : key(), status(EMPTY), probe_type(NO_PROBED), probe_counter(0)
114  { /* empty */ }
115 
116  void reset() noexcept // put all as when constructed
117  {
118  status = EMPTY;
119  probe_type = NO_PROBED;
120  probe_counter = 0;
121  }
122 
123  bool valid() const
124  {
125  return (status == EMPTY or status == DELETED or status == BUSY) and
126  (probe_type == NO_PROBED or probe_type == FIRST_PROBE or
127  probe_type == SECOND_PROBE or probe_type == LINEAR_PROBE);
128  }
129 
130  friend ostream & operator << (ostream & out, const Bucket & bucket)
131  {
132  string status;
133  switch (bucket.status)
134  {
135  case EMPTY: status = "EMPTY"; break;
136  case BUSY: status = "BUSY"; break;
137  case DELETED: status = "DELETED"; break;
138  }
139  string probe_type;
140  switch (bucket.probe_type)
141  {
142  case NO_PROBED: probe_type = "NO_PROBED"; break;
143  case FIRST_PROBE: probe_type = "FIRST_PROBE"; break;
144  case SECOND_PROBE: probe_type = "SECOND_PROBE"; break;
145  case LINEAR_PROBE: probe_type = "LINEAR_PROBE"; break;
146  }
147  return out << "Bucket at " << &bucket << endl
148  << "status = " << status << endl
149  << "probe_type = " << probe_type << endl
150  << "probe_counter = " << bucket.probe_counter;
151  }
152  };
153 
154  Bucket * table = nullptr; // arreglo de cubetas
155  Hash_Fct hash_fct = nullptr; // primera función hash
156  Hash_Fct second_hash_fct = nullptr; // segunda función hash
157 
158  Cmp cmp;
159 
160 protected:
161 
162  size_t len; // tamaño de la tabla
163  float lower_alpha;
164  float upper_alpha;
165 
166 private:
167 
168  size_t N; // número de cubetas ocupadas
169  bool with_resize;
170 
171  Bucket * allocate_bucket(Bucket & bucket, unsigned char probe_type) noexcept
172  {
173  assert(bucket.status != BUSY);
174 
175  ++N;
176  bucket.status = BUSY;
177  bucket.probe_type = probe_type;
178  bucket.probe_counter++;
179 
180  return &bucket;
181  }
182 
183  void decrease_probe_counter(Bucket * bucket) noexcept
184  {
185  assert(bucket->status == BUSY or bucket->status == DELETED);
186 
187  bucket->probe_counter--;
188  if (bucket->probe_counter == 0) // <marcar EMPTY sobre la cubeta?
189  bucket->status = EMPTY;
190  }
191 
192  void deallocate_bucket(Bucket * bucket) noexcept
193  {
194  assert(bucket->status == BUSY);
195 
196  bucket->status = DELETED;
197  const Key & key = bucket->key;
198 
199  const size_t i_fst = hash_fct(key) % len;
200  if (&table[i_fst] == bucket)
201  {
202  assert(Cmp () (table[i_fst].key, key));
203  assert(table[i_fst].probe_type == FIRST_PROBE);
204  }
205  else
206  {
207  const size_t i_snd = second_hash_fct(key) % len;
208  if (&table[i_snd] == bucket)
209  {
210  assert(Cmp () (table[i_snd].key, key));
211  assert(table[i_snd].probe_type == SECOND_PROBE);
212  decrease_probe_counter(&table[i_fst]);
213  }
214  else
215  {
216  decrease_probe_counter(&table[i_fst]);
217  decrease_probe_counter(&table[i_snd]);
218  size_t i = i_snd;
219  for (index_forward(i); &table[i] != bucket; index_forward(i))
220  {
221  assert(table[i].status != EMPTY);
222  decrease_probe_counter(&table[i]);
223  }
224  assert(Cmp () (table[i].key, key));
225  assert(table[i].probe_type == LINEAR_PROBE);
226  }
227  }
228 
229  decrease_probe_counter(bucket);
230  --N;
231  }
232 
233  size_t index_forward(size_t & i) const noexcept
234  {
235  assert(i < len);
236  if (++i == len)
237  i = 0;
238  return i;
239  }
240 
241  size_t index_backward(size_t & i) const noexcept
242  {
243  assert(i < len);
244  if (i-- == 0)
245  i = len - 1;
246  return i;
247  }
248 
249  static Bucket * key_to_bucket(Key * rec) noexcept
250  {
251  Bucket * ret_val = 0;
252  const long offset = (long) &ret_val->key;
253  return (Bucket*) ((long) rec - offset);
254  }
255 
256  bool is_valid_bucket(Bucket * bucket) noexcept
257  {
258  if (bucket < &table[0] or bucket >= &table[len])
259  return false;
260 
261  const int offset_with_base = (char*) bucket - (char*) &table[0];
262  return offset_with_base % sizeof(*bucket) == 0;
263  }
264 
265  size_t bucket_to_index(Bucket * bucket) noexcept
266  {
267  assert(is_valid_bucket(bucket));
268  return bucket - &table[0];
269  }
270 
271 public:
272 
273  const Cmp & get_compare() const { return cmp; }
274 
275  Cmp & get_compare() { return cmp; }
276 
277  void swap(ODhashTable & other) noexcept
278  {
279  std::swap(table, other.table);
280  std::swap(hash_fct, other.hash_fct);
281  std::swap(second_hash_fct, other.second_hash_fct);
282  std::swap(cmp, other.cmp);
283  std::swap(N, other.N);
284  std::swap(len, other.len);
285  }
286 
287  protected:
288 
289  ODhashTable(size_t __len, Hash_Fct __first_hash_fct,
290  Hash_Fct __second_hash_fct, Cmp __cmp,
291  float __lower_alpha, float __upper_alpha, bool __with_resize)
292  : table(nullptr), hash_fct(__first_hash_fct),
293  second_hash_fct(__second_hash_fct), cmp(__cmp),
294  len(Primes::next_prime(__len)),
295  lower_alpha(__lower_alpha), upper_alpha(__upper_alpha),
296  N(0), with_resize(__with_resize)
297  {
298  table = new Bucket[len];
299  }
300 
301  public:
302 
312  ODhashTable(size_t len = Primes::DefaultPrime,
313  Hash_Fct_Ptr first_hash_fct = Aleph::dft_hash_fct<Key>,
314  Hash_Fct_Ptr second_hash_fct = Aleph::snd_hash_fct<Key>,
315  Cmp cmp = Cmp(),
316  float lower_alpha = hash_default_lower_alpha,
317  float upper_alpha = hash_default_upper_alpha,
318  bool with_resize = true)
319  : ODhashTable(len, Hash_Fct(first_hash_fct), Hash_Fct(second_hash_fct),
320  cmp, lower_alpha, upper_alpha, with_resize) {}
321 
324  {
325  if (table != nullptr)
326  delete [] table;
327  }
328 
329  ODhashTable(const ODhashTable & other)
330  : ODhashTable(other.len, other.hash_fct, other.second_hash_fct, other.cmp,
331  other.lower_alpha, other.upper_alpha, other.with_resize)
332  {
333  assert(table != nullptr);
334  this->copy_from_table(other);
335  }
336 
337  ODhashTable(ODhashTable && other) : ODhashTable(other)
338  {
339  assert(table != nullptr);
340  swap(other);
341  }
342 
343  Special_Ctors(ODhashTable, Key);
344 
345  ODhashTable & operator = (const ODhashTable & other)
346  {
347  if (this == &other)
348  return *this;
349 
350  if (len > other.N)
351  this->clean_table();
352  else
353  {
354  Bucket * new_table = new Bucket [other.len];
355  delete [] table;
356  table = new_table;
357  N = 0;
358  len = other.len;
359  hash_fct = other.hash_fct;
360  second_hash_fct = other.second_hash_fct;
361  cmp = other.cmp;
362  lower_alpha = other.lower_alpha;
363  upper_alpha = other.upper_alpha;
364  with_resize = other.with_resize;
365  }
366 
367  this->copy_from_table(other);
368 
369  return *this;
370  }
371 
372  ODhashTable & operator = (ODhashTable && other) noexcept
373  {
374  swap(other);
375  return *this;
376  }
377 
378  OHASH_COMMON(ODhashTable);
379 
380 
383  Key * search(const Key & key) const noexcept
384  {
385  const size_t i_fst = hash_fct(key) % len; // 1er sondeo (1ra fun hash)
386  if (table[i_fst].status == EMPTY)
387  return nullptr;
388 
389  if (table[i_fst].status == BUSY and cmp(table[i_fst].key, key))
390  {
391  assert(table[i_fst].probe_type == FIRST_PROBE);
392  assert(table[i_fst].probe_counter > 0);
393  return &table[i_fst].key;
394  }
395 
396  const size_t i_snd = second_hash_fct(key) % len; // 2do sondeo
397 
398  if (table[i_snd].status == EMPTY)
399  return nullptr;
400 
401  if (table[i_snd].status == BUSY and cmp(table[i_snd].key, key))
402  {
403  assert(table[i_snd].probe_type == SECOND_PROBE);
404  assert(table[i_snd].probe_counter > 0);
405  return &table[i_snd].key;
406  }
407 
408  size_t i = i_snd;
409  // sondeo lineal a partir de índice de 2da función hash
410  for (size_t count = 0; count < len; ++count)
411  {
412  index_forward(i);
413  switch (table[i].status)
414  {
415  case EMPTY:
416  assert(table[i].probe_counter == 0);
417  return nullptr;
418  case BUSY:
419  assert(table[i].probe_counter > 0);
420  if (cmp(table[i].key, key))
421  {
422  assert(table[i].probe_type == LINEAR_PROBE);
423  return &table[i].key;
424  }
425  break;
426  case DELETED:
427  assert(table[i].probe_counter > 0);
428  break;
429  default: AH_ERROR("ODhashTable search: inconsistent bucket status");
430  }
431  }
432 
433  return nullptr;
434  }
435 
436  Hash_Fct get_second_hash_fct() const noexcept { return second_hash_fct; }
437 
438  void set_second_hash_fct(Hash_Fct fct) noexcept
439  {
440  second_hash_fct = fct;
441  }
442 
443  void set_second_hash_fct(Hash_Fct_Ptr fct) noexcept
444  {
445  second_hash_fct = Hash_Fct(fct);
446  }
447 
448 private:
449 
450  Bucket * allocate_bucket(const Key & key) noexcept
451  {
452  assert(N < len);
453 
454  const size_t i_fst = hash_fct(key) % len; // sondeo con 1ra función hash
455  if (table[i_fst].status != BUSY) // ¿cubeta disponible?
456  return allocate_bucket(table[i_fst], FIRST_PROBE);
457 
458  if (cmp(table[i_fst].key, key)) // test if key is alredy present
459  return nullptr;
460 
461  const size_t i_snd = second_hash_fct(key) % len; // 2do hash
462  if (table[i_snd].status != BUSY) // ¿cubeta disponible?
463  {
464  table[i_fst].probe_counter++;
465  return allocate_bucket(table[i_snd], SECOND_PROBE);
466  }
467 
468  if (cmp(table[i_snd].key, key)) // test if key is alredy present
469  return nullptr;
470 
471  size_t i = i_snd;
472  for (size_t c = 0; c < len; ++c)
473  {
474  index_forward(i);
475  switch (table[i].status)
476  {
477  case BUSY:
478  if (cmp(table[i].key, key))
479  { // duplicated key ==> rollback previous probe_counter increases
480  for (size_t k = 0; k < c; ++k)
481  {
482  index_backward(i);
483  table[i].probe_counter--;
484  }
485  return nullptr;
486  }
487  break;
488  case DELETED:
489  case EMPTY:
490  table[i_fst].probe_counter++;
491  table[i_snd].probe_counter++;
492  return allocate_bucket(table[i], LINEAR_PROBE);
493  default: AH_ERROR("ODhashTable: Invalid bucket status"); break;
494  }
495  table[i].probe_counter++;
496  }
497 
498  return nullptr;
499  }
500 
501  // search key. If found return true, otherwise false and allocates the bucket
502  tuple<Bucket*, bool> hard_allocate_bucket(const Key & key) noexcept
503  {
504  assert(N < len);
505 
506  const size_t i_fst = hash_fct(key) % len; // sondeo con 1ra función hash
507  if (table[i_fst].status != BUSY) // ¿cubeta disponible?
508  return tuple<Bucket*, bool>(allocate_bucket(table[i_fst], FIRST_PROBE),
509  false);
510 
511  if (cmp(table[i_fst].key, key)) // test if key is alredy present
512  return tuple<Bucket*, bool>(&table[i_fst], true);
513 
514  const size_t i_snd = second_hash_fct(key) % len; // 2do hash
515  if (table[i_snd].status != BUSY) // ¿cubeta disponible?
516  {
517  table[i_fst].probe_counter++;
518  return tuple<Bucket*, bool>(allocate_bucket(table[i_snd], SECOND_PROBE),
519  false);
520  }
521 
522  if (cmp(table[i_snd].key, key)) // test if key is alredy present
523  return tuple<Bucket*, bool>(&table[i_snd], true);
524 
525  size_t i = i_snd;
526  for (size_t c = 0; c < len; ++c)
527  {
528  index_forward(i);
529  switch (table[i].status)
530  {
531  case BUSY:
532  if (cmp(table[i].key, key))
533  { // duplicated key ==> rollback previous probe_counter increases
534  const size_t idx = i;
535  for (size_t k = 0; k < c; ++k)
536  {
537  index_backward(i);
538  table[i].probe_counter--;
539  }
540  return tuple<Bucket*, bool>(&table[idx], true);
541  }
542  break;
543  case DELETED:
544  case EMPTY:
545  table[i_fst].probe_counter++;
546  table[i_snd].probe_counter++;
547  return make_tuple(allocate_bucket(table[i], LINEAR_PROBE), false);
548  default: AH_ERROR("ODhashTable: Invalid bucket status"); break;
549  }
550  table[i].probe_counter++;
551  }
552  assert(false); // never must reach this point
553  return tuple<Bucket*, bool>((Bucket*)nullptr, false);
554  }
555 
561  void remove_bucket(Bucket * bucket)
562  {
563  if (not is_valid_bucket(bucket))
564  throw std::invalid_argument("key pty does not belong to hash table");
565 
566  if (bucket->status != BUSY)
567  throw std::domain_error("Bucket containing key is not BUSY");
568 
569  deallocate_bucket(bucket);
570  }
571 
572  // returns true if the bucket contains the key
573  bool process_entry_for_remove(Bucket * bucket, const Key & key)
574  {
575  switch (bucket->status)
576  {
577  case EMPTY:
578  throw std::domain_error("Key not in hash table");
579  case BUSY:
580  if (cmp(bucket->key, key))
581  {
582  bucket->status = DELETED;
583  decrease_probe_counter(bucket);
584  return true;
585  }
586  break;
587  case DELETED:
588  decrease_probe_counter(bucket);
589  break;
590  default: AH_ERROR("Inconsistent bucket status");
591  }
592  return false;
593  }
594 
595 public:
596 
597  void remove(const Key & key)
598  {
599  --N;
600  try
601  {
602  const size_t i_fst = hash_fct(key) % len;
603  if (process_entry_for_remove(&table[i_fst], key))
604  return;
605 
606  const size_t i_snd = second_hash_fct(key) % len;
607  if (process_entry_for_remove(&table[i_snd], key))
608  return;
609 
610  size_t i = i_snd;
611  for (size_t c = 0; i < len; ++c)
612  {
613  index_forward(i);
614  if (process_entry_for_remove(&table[i], key))
615  return;
616  }
617  }
618  catch (...)
619  {
620  ++N;
621  this->rehash(); // in order to leave the table in a consistent state
622  throw;
623  }
624  }
625 
626  using Stats = typename OhashCommon<ODhashTable<Key, Cmp>, Key>::Stats;
627 
628  Stats stats() const
629  {
630  DynArray<size_t> lens;
631  size_t num_busy = 0, num_deleted = 0, num_empty = 0;
632  size_t max_len = std::numeric_limits<size_t>::min();
633  for (size_t i = 0; i < len; ++i)
634  switch (table[i].status)
635  {
636  case BUSY:
637  {
638  ++num_busy;
639  const Key & key = table[i].key;
640  size_t count = 1;
641  const size_t i_fst = hash_fct(key) % len;
642  if (cmp(table[i_fst].key, key))
643  {
644  assert(table[i_fst].probe_type == FIRST_PROBE);
645  assert(table[i_fst].probe_counter > 0);
646  ;
647  }
648  else
649  {
650  ++count;
651  size_t i_snd = second_hash_fct(key) % len;
652  if (cmp(table[i_snd].key, key))
653  {
654  assert(table[i_snd].probe_type == SECOND_PROBE);
655  assert(table[i_snd].probe_counter > 0);
656  ;
657  }
658  else
659  {
660  for (size_t i = index_forward(i_snd); true;
661  index_forward(i))
662  {
663  if (table[i].status == BUSY and cmp(table[i].key, key))
664  break;
665  ++count;
666  }
667  }
668  }
669  max_len = std::max(max_len, count);
670  update_stat_len(lens, count);
671  break;
672  }
673  case EMPTY:
674  ++num_empty;
675  update_stat_len(lens, 0);
676  break;
677  case DELETED:
678  ++num_deleted;
679  break;
680  }
681 
682  float avg = 0, sum = 0;
683  for (size_t i = 0; i < lens.size(); ++i)
684  {
685  avg += lens(i)*i;
686  sum += lens(i);
687  }
688 
689  avg /= sum;
690  float var = 0;
691  for (size_t i = 0; i < lens.size(); ++i)
692  {
693  float s = i - avg;
694  var += lens(i)*s*s;
695  }
696  var /= sum;
697 
698  Stats stats;
699  stats.num_busy = num_busy;
700  stats.num_deleted = num_deleted;
701  stats.num_empty = num_empty;
702  std::swap(lens, stats.lens);
703  stats.avg = avg;
704  stats.var = var;
705  stats.max_len = max_len;
706 
707  return stats;
708  }
709 };
710 
711 
712 template <typename Key, class Cmp = Aleph::equal_to<Key>>
714 
715 
716 # ifdef NBACKUP
717 # define N NBACKUP
718 # undef NBACKUP
719 # endif
720 
721 # ifdef MBACKUP
722 # define M MBACKUP
723 # undef MBACKUP
724 # endif
725 
726 # undef EMPTY
727 # undef BUSY
728 # undef DELETED
729 # undef NO_PROBED
730 # undef FIRST_PROBE
731 # undef SECOND_PROBE
732 # undef LINEAR_PROBE
733 
734 } // end namespace Aleph
735 
736 # endif // TPL_ODHASH_H
737 
Definition: htlist.H:450
Definition: htlist.H:133
Definition: htlist.H:45
ODhashTable(size_t len=Primes::DefaultPrime, Hash_Fct_Ptr first_hash_fct=Aleph::dft_hash_fct< Key >, Hash_Fct_Ptr second_hash_fct=Aleph::snd_hash_fct< Key >, Cmp cmp=Cmp(), float lower_alpha=hash_default_lower_alpha, float upper_alpha=hash_default_upper_alpha, bool with_resize=true)
Definition: tpl_odhash.H:312
Definition: ah-comb.H:35
size_t size() const noexcept
Definition: tpl_dynArray.H:641
~ODhashTable()
Libera toda la tabla hash.
Definition: tpl_odhash.H:323
Key * search(const Key &key) const noexcept
Definition: tpl_odhash.H:383
Definition: tpl_odhash.H:81
Definition: htlist.H:1323

Leandro Rabindranath León