Aleph-w  1.9
General library for algorithms and data structures
tpl_linHash.H
1 
2 # ifndef TPL_LINHASH_H
3 # define TPL_LINHASH_H
4 
5 # include <iostream>
6 # include <primes.H>
7 # include <dlink.H>
8 # include <tpl_dynArray.H>
9 # include <tpl_dnode.H>
10 # include <htlist.H>
11 # include <hashDry.H>
12 # include <hash-fct.H>
13 # include <hash-dry.H>
14 
15 
16 # ifdef N
17 # define NBACKUP N
18 # undef N
19 # endif
20 
21 # ifdef M
22 # define MBACKUP M
23 # undef M
24 # endif
25 
26 
27 using namespace Aleph;
28 
29 namespace Aleph {
30 
31 # define LINBUCKET_BODY(BUCKETNAME) \
32  \
33  Dlink link; \
34  \
35 public: \
36  \
37  BUCKETNAME(const BUCKETNAME & bucket) \
38  noexcept(std::is_nothrow_constructible<Dnode<Key>>::value) \
39  : Dnode<Key>(bucket) {} \
40  \
41  BUCKETNAME() noexcept(std::is_nothrow_constructible<Key>::value) {} \
42  \
43  BUCKETNAME(const Key & key) noexcept(noexcept(Dnode<Key>(key))) \
44  : Dnode<Key>(key) {} \
45  \
46  Key & get_key() noexcept { return this->get_data(); } \
47  \
48  Dlink * get_link() noexcept { return &link; } \
49  \
50  DLINK_TO_BASE(BUCKETNAME, link);
51 
52 
60  template <typename Key>
61 class LinHashBucket : public Dnode<Key>
62 {
63  LINBUCKET_BODY(LinHashBucket);
64 };
65 
73  template <typename Key>
74 class LinHashBucketVtl : public Dnode<Key>
75 {
76  LINBUCKET_BODY(LinHashBucketVtl);
77 
79  virtual ~LinHashBucketVtl() {}
80 };
81 
82 
108  template <typename Key, template <class> class BucketType,
109  class Cmp = Aleph::equal_to<Key>>
111  : public HashStats<GenLinearHashTable<Key, BucketType, Cmp>>
112 {
113  friend class HashStats<GenLinearHashTable<Key, BucketType, Cmp>>;
114 
115 public:
116 
119  using Hash_Fct = std::function<size_t(const Key &)>;
120  using Hash_Fct_Ptr = size_t (*) (const Key &);
121 
123  using Bucket = BucketType<Key>;
124 
125  using Key_Type = Key;
126 
127  using Item_Type = Key;
128 
129 private:
130 
131  using BucketList = Dnode<Key>;
132 
133  using BucketItor = typename Dnode<Key>::Iterator;
134 
135  static size_t multiply_by_two(const size_t & n) noexcept { return n << 1; }
136 
137  static size_t divide_by_two(const size_t & n) noexcept { return n >> 1; }
138 
139  DynArray<BucketList> table;
140  Dlink entries_list;
141 
142 protected:
143 
144  Hash_Fct hash_fct;
145  Cmp cmp;
146 
147 private:
148 
149  size_t M; // Tamaño de la tabla
150  size_t N; // Número de elementos que tiene la tabla
151  size_t busy_slots_counter; // Cantidad de entradas ocupadas
152  bool remove_all_buckets; // Indica si destructor debe liberar
153  // las cubetas
154 
155 protected:
156 
157  float upper_alpha; // factor de carga superior
158  float lower_alpha; // factor de carga inferior
159 
160 private:
161 
162  size_t p; // índice de la lista que se particiona (o aumenta)
163  size_t l; // cantidad de veces que se ha duplicado la tabla
164  size_t MP; // guarda el valor p + M
165  size_t MM; // producto 2*M
166 
167 protected:
168 
169  mutable size_t len;
170 
171 private:
172 
173  size_t call_hash_fct(const Key & key) const
174  noexcept(noexcept(hash_fct(key)))
175  {
176  const auto hash = hash_fct(key);
177  const auto i = hash % M;
178  return i < p ? hash % MM : i;
179  }
180 
181  void expand()
182  { // expandir la tabla hasta que la carga esté debajo de upper_alpha
183  for (float alpha = 1.0*N/MP; alpha >= upper_alpha; alpha = 1.0*N/MP)
184  {
185  BucketList * src_list_ptr = table.test(p);
186  if (src_list_ptr != nullptr) // ¿table[p] está escrita?
187  if (not src_list_ptr->is_empty()) // ¿table[p] no está vacía'
188  {
189  BucketList * tgt_list_ptr = nullptr;
190 
191  // recorrer lista colisiones y mover cubetas de table[p+M]
192  for (BucketItor it(*src_list_ptr); it.has_curr(); /* nada */)
193  {
194  Bucket * bucket = static_cast<Bucket*>(it.get_curr_ne());
195 
196  it.next_ne(); // avance al siguiente elemento de la lista
197 
198  const Key & key = bucket->get_key();
199  const int i = hash_fct(key) % MM;
200  if (i == p) // ¿pertenece esta clave a table[p]?
201  continue; // sí ==> clave sigue en table[p] ==> siguiente
202 
203  if (tgt_list_ptr == nullptr)
204  tgt_list_ptr = &table.touch(MP);
205 
206  // bucket no pertenece a table[p] sino a table[p+m] ==>
207  // eliminar bucket de table[i] e insertarlo en table[p+m]
208  bucket->del();
209  tgt_list_ptr->append(bucket);
210  }
211 
212  if (src_list_ptr->is_empty()) // ¿table[p] quedó vacía?
213  --busy_slots_counter; // sí ==> un slot vacío
214 
215  ++busy_slots_counter; // uno nuevo por table[p+M]
216  }
217  ++p;
218  ++MP;
219  if (p == M) // (p == 2*M) ¿debe duplicarse el tamaño de la tabla?
220  { // sí ==> modificar el tamaño de la tabla a 2*M
221  ++l; // Cantidad de veces que se ha duplicado la tabla
222  p = 0;
223  MP = M = MM; // se les asigna 2*M
224  MM = multiply_by_two(MM);
225  }
226  }
227  }
228 
229  void contract() noexcept
230  { // contraer la tabla hasta que carga esté debajo de lower_alpha
231  for (float alpha = (1.0*N)/MP; alpha <= lower_alpha and MP > len;
232  alpha = (1.0*N)/MP)
233  {
234  if (p == 0) // ¿debe dividirse entre 2 el tamaño de la tabla?
235  { // sí ==> actualizar tamaño de la tabla a M/2
236  --l; // Cantidad de veces que se ha duplicado la tabla disminuye
237  MM = M; // se divide entre dos
238  M = divide_by_two(M);
239  p = M - 1;
240  }
241  else
242  --p; // no ==> sólo reducir índice p
243 
244  --MP;
245  if (MP < table.size()) // ¿Existe table[MP]]?
246  {
247  BucketList * src_list_ptr = table.test(MP);
248  if (src_list_ptr != nullptr) // ¿existe entrada para table[p+M]?
249  {
250  if (not src_list_ptr->is_empty()) // ¿table[p+M] está vacía?
251  { // no ==> fusionar las listas
252  BucketList & tgt_list = table.touch(p);// aparta table[p]
253  tgt_list.concat_list(src_list_ptr);
254  --busy_slots_counter; // table[p+M] devino vacía
255  }
256  table.cut_ne(MP); // eventualmente liberar memoria de table[p+M]
257  }
258  }
259  }
260  }
261 
262 public:
263 
265  void set_hash_fct(Hash_Fct fct) noexcept
266  {
267  hash_fct = fct;
268  }
269 
270  void set_hash_fct(Hash_Fct_Ptr fct) noexcept
271  {
272  hash_fct = Hash_Fct(fct);
273  }
274 
275  Hash_Fct get_hash_fct() const noexcept { return hash_fct; }
276 
277  Cmp & get_compare() noexcept { return cmp; }
278 
279  const Cmp & get_compare() const noexcept { return cmp; }
280 
282  float current_alpha() const noexcept { return 1.0*N/MP; }
283 
284  protected:
285 
286  GenLinearHashTable(size_t __len, Hash_Fct __hash_fct, Cmp __cmp,
287  float __lower_alpha, float __upper_alpha,
288  bool __remove_all_buckets, bool /* fake */)
289  : table(__len), hash_fct(__hash_fct), cmp(__cmp), M(__len), N(0),
290  busy_slots_counter(0), remove_all_buckets(__remove_all_buckets),
291  upper_alpha(__upper_alpha), lower_alpha(__lower_alpha),
292  p(0), l(0), MP(M), MM(multiply_by_two(M)), len(__len)
293  {
294  if (M == 0)
295  std::length_error("table's length is zero");
296 
297  if (MM > table.max_size())
298  throw std::length_error("table's length too big");
299 
300  if (upper_alpha <= lower_alpha)
301  throw std::domain_error("lower alpha is greater than lower alpha");
302  }
303 
304  public:
305 
328  GenLinearHashTable(size_t len = Primes::DefaultPrime,
329  Hash_Fct_Ptr hash_fct = Aleph::dft_hash_fct<Key>,
330  Cmp cmp = Cmp(),
331  float lower_alpha = hash_default_lower_alpha,
332  float upper_alpha = hash_default_upper_alpha,
333  bool remove_all_buckets = true,
334  bool with_resize = true)
335  : GenLinearHashTable(len, Hash_Fct(hash_fct), cmp,
336  lower_alpha, upper_alpha,
337  remove_all_buckets, with_resize) {}
338 
339  void swap(GenLinearHashTable & other) noexcept
340  {
341  std::swap(table, other.table);
342  std::swap(hash_fct, other.hash_fct);
343  std::swap(M, other.M);
344  std::swap(N, other.N);
345  std::swap(busy_slots_counter, other.busy_slots_counter);
346  std::swap(remove_all_buckets, other.remove_all_buckets);
347  std::swap(upper_alpha, other.upper_alpha);
348  std::swap(lower_alpha, other.lower_alpha);
349  std::swap(p, other.p);
350  std::swap(l, other.l);
351  std::swap(MP, other.MP);
352  std::swap(MM, other.MM);
353  std::swap(len, other.len);
354  }
355 
370  void empty() noexcept
373  {
374  while (not entries_list.is_empty())
375  {
376  Bucket * bucket = Bucket::dlink_to_base(entries_list.remove_first_ne());
377  bucket->del(); // borra de la lista en el arreglo
378  bucket->get_link()->del(); // borra de entries list
379  delete bucket;
380  }
381 
382  M = MP = len;
383  MM = multiply_by_two(M);
384  N = p = l = 0;
385  table.cut_ne(len);
386  }
387 
388  ~GenLinearHashTable()
389  {
390  if (remove_all_buckets)
391  empty();
392  }
393 
394 private:
395 
396  Bucket *
397  search_in_bucket_list(BucketList * list, const Key & key) const noexcept
398  {
399  for (BucketItor it(*list); it.has_curr(); it.next_ne())
400  {
401  Bucket * bucket = static_cast<Bucket*>(it.get_curr_ne());
402  if (cmp (key, bucket->get_key()))
403  return bucket;
404  }
405 
406  return nullptr;
407  }
408 
409 public:
410 
414  Bucket * search(const Key & key) const noexcept
415  {
416  auto i = call_hash_fct(key);
417  BucketList * list = table.test(i);
418  if (list == nullptr) // ¿Ha sido escrita alguna vez table[i]?
419  return nullptr; // No ==> el elemento no se encuentra en la tabla
420 
421  if (list->is_empty())
422  return nullptr;
423 
424  return search_in_bucket_list(list, key);
425  }
426 
428  const size_t & size() const noexcept { return N; }
429 
431  bool is_empty() const noexcept { return N == 0; }
432 
434  const size_t & capacity() const noexcept { return MP; }
435 
437  const size_t & busy_slots() const noexcept { return busy_slots_counter; }
438 
441  const size_t & expansions() const noexcept { return l; }
442 
445  Bucket * insert(Bucket * bucket)
446  {
447  const int i = call_hash_fct(bucket->get_key());
448  BucketList & list = table.touch(i); // allocates memory for table[i]
449  if (list.is_empty())
450  ++busy_slots_counter;
451 
452  if (search_in_bucket_list(&list, bucket->get_key()) != nullptr)
453  return nullptr; // duplicated key
454 
455  list.append(bucket);
456  entries_list.append(bucket->get_link());
457  ++N;
458  expand();
459 
460  return bucket;
461  }
462 
463  Bucket * search_or_insert(Bucket * bucket)
464  {
465  const int i = call_hash_fct(bucket->get_key());
466  BucketList & list = table.touch(i); // allocates memory for table[i]
467  if (list.is_empty())
468  ++busy_slots_counter;
469 
470  Bucket * p = search_in_bucket_list(&list, bucket->get_key());
471  if (p != nullptr)
472  return p; // duplicated key
473 
474  list.append(bucket);
475  entries_list.append(bucket->get_link());
476  ++N;
477  expand();
478 
479  return bucket;
480  }
481 
483  size_t resize(size_t) noexcept { return MP; }
484 
485 private:
486 
487  // This routine deletes bucket from hash table EXCEPT from
488  // entries_list. The end of this routine is to dry the deletion and to
489  // allow remove from other places; ofor instance, from the del()
490  // method of Iterator class
491  Bucket * remove_bucket(Bucket * bucket) noexcept
492  {
493  assert(bucket != nullptr);
494  assert(search(bucket->get_key()) == bucket);
495 
496  Bucket * next = static_cast<Bucket*>(bucket->get_next());
497 
498  bucket->del(); // elimine de lista de colisiones
499 
500  if (next->is_empty()) // ¿lista de colisiones vacía?
501  --busy_slots_counter; // sí ==> un slot vacío
502 
503  --N;
504  contract();
505 
506  return bucket;
507  }
508 
509 public:
510 
513  Bucket * remove(Bucket * bucket) noexcept
514  {
515  bucket->get_link()->del(); // elimine de entries_list
516  return remove_bucket(bucket);
517  }
518 
519  void print()
520  {
521  for (int i = 0; i < MP; ++i)
522  {
523  cout << "table[" << i << "] = [ ";
524 
525  if (table.exist(i))
526  {
527  BucketList & list = table.access(i);
528 
529  if (not list.is_empty())
530  for (BucketItor it(list); it.has_curr(); it.next_ne())
531  {
532  Bucket * bucket = static_cast<Bucket*>(it.get_curr_ne());
533  const Key & key = bucket->get_key();
534  cout << key << ",";
535  }
536  }
537  cout << "]" << endl;
538  }
539  }
540 
541  class Iterator : public Dlink::Iterator
542  {
543  private:
544 
545  GenLinearHashTable * hash_table = nullptr;
546  long pos = 0;
547 
548  public:
549 
551  using Set_Type = GenLinearHashTable;
552 
554  using Item_Type = Bucket *;
555 
557  Iterator(const GenLinearHashTable & table) noexcept
558  : Dlink::Iterator(const_cast<Dlink &>(table.entries_list)),
559  hash_table(& const_cast<GenLinearHashTable &>(table))
560  {
561  // Empty
562  }
563 
565  Iterator() noexcept { /* Empty */ }
566 
567  Bucket * get_curr_ne() noexcept
568  {
569  return Bucket::dlink_to_base(this->Dlink::Iterator::get_curr_ne());
570  }
571 
573  Bucket * get_curr()
574  {
575  return Bucket::dlink_to_base(this->Dlink::Iterator::get_curr());
576  }
577 
578  Bucket * del()
579  {
580  Bucket * bucket = Bucket::dlink_to_base(this->Dlink::Iterator::del());
581  return (Bucket *) hash_table->remove_bucket(bucket);
582  }
583 
584  void next_ne() noexcept
585  {
586  this->Dlink::Iterator::next_ne();
587  pos++;
588  }
589 
590  void next()
591  {
592  this->Dlink::Iterator::next();
593  pos++;
594  }
595 
596  void prev()
597  {
598  this->Dlink::Iterator::prev();
599  pos--;
600  }
601 
602  long get_pos() const noexcept { return pos; }
603  };
604 };
605 
606 
626 template <typename Key, class Cmp = Aleph::equal_to<Key>>
628 
629 
649  template <typename Key, class Cmp = Aleph::equal_to<Key>>
651 
652 
653 } // end namespace Aleph
654 
655 # ifdef NBACKUP
656 # define N NBACKUP
657 # undef NBACKUP
658 # endif
659 
660 # ifdef MBACKUP
661 # define M MBACKUP
662 # undef MBACKUP
663 # endif
664 
665 # endif // TPL_LINHASH_H
666 
Definition: tpl_linHash.H:74
Dnode & swap(Dnode &p) noexcept(noexcept(std::swap(data, p.data)))
Swap this with p
Definition: tpl_dnode.H:136
T & access(const size_t i) const noexcept
Definition: tpl_dynArray.H:874
T & touch(const size_t i)
Definition: tpl_dynArray.H:958
size_t max_size() const noexcept
Definition: tpl_dynArray.H:647
T & get_key() noexcept
Definition: tpl_dnode.H:187
const size_t & busy_slots() const noexcept
Retorna la cantidad de entradas vacía que tiene la tabla.
Definition: tpl_linHash.H:437
const size_t & size() const noexcept
Retorna la cantidad de elementos que tiene la tabla.
Definition: tpl_linHash.H:428
size_t resize(size_t) noexcept
Provided for generic programming compatibility.
Definition: tpl_linHash.H:483
bool exist(const size_t i) const
Definition: tpl_dynArray.H:895
Bucket * insert(Bucket *bucket)
Definition: tpl_linHash.H:445
const size_t & capacity() const noexcept
Retorna el tamaño de la tabla.
Definition: tpl_linHash.H:434
float current_alpha() const noexcept
return the current table load
Definition: tpl_linHash.H:282
Definition: tpl_linHash.H:61
Definition: ah-comb.H:35
GenLinearHashTable(size_t len=Primes::DefaultPrime, Hash_Fct_Ptr hash_fct=Aleph::dft_hash_fct< Key >, Cmp cmp=Cmp(), float lower_alpha=hash_default_lower_alpha, float upper_alpha=hash_default_upper_alpha, bool remove_all_buckets=true, bool with_resize=true)
Definition: tpl_linHash.H:328
T * test(const size_t i) const noexcept
Definition: tpl_dynArray.H:927
Definition: tpl_linHash.H:110
BucketType< Key > Bucket
El tipo de cubeta.
Definition: tpl_linHash.H:123
Bucket * search(const Key &key) const noexcept
Definition: tpl_linHash.H:414
bool is_empty() const noexcept
return true is table is empty
Definition: tpl_linHash.H:431
std::function< size_t(const Key &)> Hash_Fct
Definition: tpl_linHash.H:119
void set_hash_fct(Hash_Fct fct) noexcept
Set the internal hash function.
Definition: tpl_linHash.H:265
size_t size() const noexcept
Definition: tpl_dynArray.H:641
Definition: tpl_dynArray.H:159
Definition: List.H:49
Definition: dlink.H:37
const size_t & expansions() const noexcept
Definition: tpl_linHash.H:441

Leandro Rabindranath León