Aleph-w  1.9
General library for algorithms and data structures
Huffman.H
1 
2 /* Aleph-w
3 
4  / \ | | ___ _ __ | |__ __ __
5  / _ \ | |/ _ \ '_ \| '_ \ ____\ \ /\ / / Data structures & Algorithms
6  / ___ \| | __/ |_) | | | |_____\ V V / version 1.9b
7  /_/ \_\_|\___| .__/|_| |_| \_/\_/ https://github.com/lrleon/Aleph-w
8  |_|
9 
10  This file is part of Aleph-w library
11 
12  Copyright (c) 2002-2018 Leandro Rabindranath Leon & Alejandro Mujica
13 
14  This program is free software: you can redistribute it and/or modify
15  it under the terms of the GNU General Public License as published by
16  the Free Software Foundation, either version 3 of the License, or
17  (at your option) any later version.
18 
19  This program is distributed in the hope that it will be useful, but
20  WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23 
24  You should have received a copy of the GNU General Public License
25  along with this program. If not, see <https://www.gnu.org/licenses/>.
26 */
27 # ifndef HUFFMAN_H
28 # define HUFFMAN_H
29 
30 
31 # include <memory>
32 # include <tpl_binNodeUtils.H>
33 # include <tpl_treap.H>
34 # include <tpl_binHeap.H>
35 # include <tpl_dynMapTree.H>
36 # include <bitArray.H>
37 
38 
39 namespace Aleph {
40 
41 struct Huffman_Node;
42 
44 
46 
47 struct Huffman_Node : public BinHeap<size_t>::Node
48 {
49  BinNode<string> * bin_node;
50 
51  Freq_Node * freq_node;
52 
53 public:
54 
55  Huffman_Node()
56  : BinHeap<size_t>::Node(0), bin_node(nullptr), freq_node(nullptr)
57  {
58  /* empty */
59  }
60 
61  Huffman_Node(BinNode<string> * node)
62  : BinHeap<size_t>::Node(0), bin_node(node), freq_node(nullptr)
63  {
64  /* empty */
65  }
66 
67  ~Huffman_Node() { /* No debe liberarse memoria de bin_node */ }
68 
69 };
70 
72 
73 static inline const size_t & get_freq(Huffman_Node * huffman_node) noexcept
74 {
75  return huffman_node->get_key();
76 }
77 
78 
79 static inline void increase_freq(Huffman_Node * huffman_node) noexcept
80 {
81  huffman_node->get_key()++;
82 }
83 
84 
85 static inline void set_freq(Huffman_Node * huffman_node, const size_t & freq)
86  noexcept
87 {
88  huffman_node->get_key() = freq;
89 }
90 
92 static inline bool is_leaf(BinNode<string> * p) noexcept
93 {
94  return LLINK(p) == nullptr and RLINK(p) == nullptr;
95 }
96 
103 {
104  BinNode<string> * root;
105  Huffman_Heap heap;
106  Symbol_Map symbol_map;
107  Code_Map code_map;
108 
109  Freq_Node * freq_root;
110 
111  string end_symbol;
112  size_t text_len;
113 
114  void build_prefix_encoding(BinNode<string> * p, BitArray & array)
115  {
116  if (is_leaf(p))
117  {
118  const string & str = p->get_key();
119  code_map.insert(str, BitArray(array));
120  return;
121  }
122  array.push(0); build_prefix_encoding(LLINK(p), array); array.pop();
123  array.push(1); build_prefix_encoding(RLINK(p), array); array.pop();
124  }
125 
126  void build_encoding_map()
127  {
128  assert(symbol_map.size() > 0);
129 
130  if (root == nullptr)
131  throw domain_error("Huffman encoding tree has not been generated");
132 
133  BitArray array(0);
134  symbol_map.empty();
135  build_prefix_encoding(root, array);
136  }
137 
138  bool test_end(const string & str) const
139  {
140  if (end_symbol == "NO-END")
141  return false;
142  return end_symbol == str;
143  }
144 
145  void update_freq(const string & str)
146  {
147  if (root != nullptr)
148  throw domain_error("Huffman encoding tree has already been generated");
149 
150  if (test_end(str))
151  throw domain_error("End symbol has already been inserted");
152 
153  Huffman_Node * huffman_node = nullptr;
154  auto huffman_node_ptr = symbol_map.search(str);
155  if (huffman_node_ptr == nullptr) // ¿símbolo definido previamente?
156  { // No ==> crear una entrada en symbol_map e insertarlo en heap
157  unique_ptr<BinNode<string> > bin_node_auto(new BinNode<string>(str));
158  huffman_node = static_cast<Huffman_Node*>
159  (heap.insert(new Huffman_Node(bin_node_auto.get())));
160  symbol_map.insert(str, huffman_node);
161  bin_node_auto.release();
162  }
163  else
164  huffman_node = huffman_node_ptr->second; // ya definido, recuperarlo
165 
166  increase_freq(huffman_node);
167  heap.update(huffman_node);
168  }
169 
170  static void append_code(BitArray & bit_stream, const BitArray & symbol_code)
171  {
172  const size_t symbol_code_size = symbol_code.size();
173  for (int i = 0; i < symbol_code_size; i++)
174  bit_stream.push(symbol_code[i]);
175  }
176 
177  public:
178 
181  : root(nullptr), freq_root(nullptr), end_symbol("NO-END"), text_len(0)
182  {
183  // empty
184  }
185 
186  private:
187 
188  struct Get_Key
189  {
190  string operator () (BinNode<string> * p) const noexcept
191  {
192  return is_leaf(p) ? p->get_key() : "";
193  }
194  };
195 
196  struct Load_Key
197  {
198  void operator () (BinNode<string> * p, istream & input) const noexcept
199  {
200  if (is_leaf(p))
201  input >> p->get_key();
202  }
203  };
204 
205  public:
206 
221  void save_tree(ostream & output)
222  {
223  if (root == nullptr)
224  throw std::domain_error("Huffman tree has not been generated");
225 
226  Aleph::save_tree<BinNode<string>> (root, output);
227  }
228 
269  void save_tree_in_array_of_chars(const string & array_name, ostream & output)
270  {
271  if (root == nullptr)
272  throw std::domain_error("Huffman tree has not been generated");
273 
274  Aleph::save_tree_in_array_of_chars<BinNode<string>, Get_Key>
275  (root, array_name, output);
276  }
277 
280  {
281  if (root == nullptr)
282  throw std::domain_error("Huffman tree has not been generated");
283 
284  return root;
285  }
286 
303  BinNode<string> * generate_huffman_tree(const bool & with_freqs = false)
304  {
305  while (heap.size() > 1) // hasta que quede solo un nodo
306  {
307  Huffman_Node * l_huffman_node = (Huffman_Node*) heap.getMin(); // izq
308  Huffman_Node * r_huffman_node = (Huffman_Node*) heap.getMin(); // der
309  BinNode <string> * bin_node = new BinNode <string>;
310  Huffman_Node * huffman_node = new Huffman_Node (bin_node);
311  LLINK(bin_node) = l_huffman_node->bin_node;
312  RLINK(bin_node) = r_huffman_node->bin_node;
313  const size_t new_freq = get_freq(l_huffman_node) +
314  get_freq(r_huffman_node);
315  Aleph::set_freq(huffman_node, new_freq);
316 
317  if (with_freqs)
318  {
319  Freq_Node *& l_freq_node = l_huffman_node->freq_node;
320  if (l_freq_node == nullptr)
321  {
322  l_freq_node = new Freq_Node;
323  l_freq_node->get_key().first =
324  l_huffman_node->bin_node->get_key();
325  l_freq_node->get_key().second = l_huffman_node->get_key();
326  }
327 
328  Freq_Node *& r_freq_node = r_huffman_node->freq_node;
329  if (r_freq_node == nullptr)
330  {
331  r_freq_node = new Freq_Node;
332  r_freq_node->get_key().first =
333  r_huffman_node->bin_node->get_key();
334  r_freq_node->get_key().second = r_huffman_node->get_key();
335  }
336 
337  const string str = to_string(new_freq);
338  Freq_Node *& freq_node = huffman_node->freq_node;
339  freq_node = new Freq_Node;
340  freq_node->get_key().first = str;
341  freq_node->get_key().second = huffman_node->get_key();
342  LLINK(freq_node) = l_freq_node;
343  RLINK(freq_node) = r_freq_node;
344  }
345 
346  delete l_huffman_node;
347  delete r_huffman_node;
348  heap.insert(huffman_node);
349  } // nodo que queda en heap es la raíz del árbol de prefijos
350 
351  Huffman_Node * huffman_root = (Huffman_Node *) heap.getMin();
352  root = huffman_root->bin_node;
353 
354  if (with_freqs)
355  freq_root = huffman_root->freq_node;
356 
357  delete huffman_root;
358  build_encoding_map(); // construir mapeo de códigos
359 
360  return root;
361  }
362 
376  void load_tree(ifstream & input)
377  {
378  if (root != BinNode<string>::NullPtr)
379  destroyRec(root);
380 
381  root = Aleph::load_tree<BinNode<string>>(input);
382  }
383 
386  {
387  if (freq_root == nullptr)
388  throw std::domain_error("Huffman tree has not been generated");
389 
390  return freq_root;
391  }
392 
402  void set_freq(const string & str, const size_t & freq)
403  {
404  if (root != nullptr)
405  throw domain_error("Huffman encoding tree has already been generated");
406  if (test_end(str))
407  throw domain_error("End symbol has already been inserted");
408 
409  // Buscar símbolo str
410  auto huffman_node_ptr = symbol_map.search(str);
411  if (huffman_node_ptr != nullptr) // ¿ya fue definido?
412  {
413  std::string msg = "Frequency for symbol " + str + " has already set";
414  throw std::domain_error(msg); // Sí ==> esto es un error!
415  }
416 
417  unique_ptr<BinNode<string> > bin_node_auto(new BinNode<string>(str));
418  Huffman_Node * huffman_node = new Huffman_Node(bin_node_auto.get());
419  Aleph::set_freq(huffman_node, freq);
420  heap.insert(huffman_node);
421  symbol_map.insert(str, huffman_node);
422  bin_node_auto.release();
423  }
424 
425  private:
426 
427  static const size_t Max_Token_Size = 256;
428 
429  public:
430 
443  void read_input(char * input, const bool & with_freqs = false)
444  {
445  if (root != nullptr)
446  throw domain_error("Huffman encoding tree has already been generated");
447 
448  char * curr_stream = input;
449  char curr_token[Max_Token_Size];
450  curr_token[1] = '\0';
451 
452  while (*curr_stream != '\0')
453  {
454  curr_token[0] = *curr_stream++;
455  update_freq(curr_token);
456  text_len++;
457  }
458 
459  set_end_of_stream("");
460  generate_huffman_tree(with_freqs);
461  }
462 
475  void read_input(ifstream & input, const bool & with_freqs = false)
476  {
477  if (root != nullptr)
478  throw domain_error("Huffman encoding tree has already been generated");
479 
480  char curr_token[2];
481  curr_token[0] = curr_token[1] = '\0';
482 
483  while (not input.eof())
484  {
485  input.read(curr_token, 1);
486  update_freq(curr_token);
487  text_len++;
488  }
489 
490  set_end_of_stream("");
491  generate_huffman_tree(with_freqs);
492  }
493 
495  void set_end_of_stream(const string & str)
496  {
497  if (test_end(str))
498  throw domain_error("End symbol has already been inserted");
499 
500  if (root != nullptr)
501  throw domain_error("Huffman encoding tree has already been generated");
502 
503  unique_ptr<BinNode<string> > bin_node_auto ( new BinNode<string> (str) );
504 
505  Huffman_Node * huffman_node = static_cast<Huffman_Node*>
506  (heap.insert(new Huffman_Node(bin_node_auto.get())));
507  symbol_map.insert(str, huffman_node);
508  bin_node_auto.release();
509  end_symbol = str;
510  }
511 
525  size_t encode(char * input, BitArray & bit_stream)
526  {
527  if (root == nullptr)
528  throw std::domain_error("Huffman tree has not been generated");
529 
530  char * curr_stream = input;
531  char curr_token[Max_Token_Size];
532  curr_token[1] = '\0';
533  while (*curr_stream != '\0')
534  {
535  curr_token[0] = *curr_stream++;
536  append_code(bit_stream, code_map.find(curr_token));
537  }
538  append_code(bit_stream, code_map.find(""));
539 
540  return bit_stream.size();
541  }
542 
556  size_t encode(ifstream & input, BitArray & bit_stream)
557  {
558  if (root == nullptr)
559  throw std::domain_error("Huffman tree has not been generated");
560  char curr_token[2];
561  curr_token[0] = curr_token[1] = '\0';
562  while (not input.eof())
563  {
564  input.read(curr_token, 1);
565  append_code(bit_stream, code_map.find(curr_token));
566  }
567  append_code(bit_stream, code_map.find(""));
568  return bit_stream.size();
569  }
570 };
571 
578 {
579  BinNode<string> * root;
580  string end_symbol;
581  public:
582 
591  Huffman_Decoder_Engine(BinNode<string> * p, const string & end)
592  : root(p), end_symbol(end)
593  {
594  // empty
595  }
596 
599  {
600  if (root == nullptr)
601  throw std::domain_error("Huffman tree has not been generated");
602 
603  return root;
604  }
605 
617  void decode(BitArray & bit_stream, ostream & output)
618  {
619  const size_t & bit_stream_len = bit_stream.size();
620  BinNode<string> * p = root;
621  for (int i = 0; i < bit_stream_len; ++i)
622  {
623  if (bit_stream.read_bit(i) == 0)
624  p = LLINK(p);
625  else
626  p = RLINK(p);
627 
628 
629  if (p == nullptr)
630  throw std::domain_error("Invalid bits sequence");
631 
632  if (is_leaf(p)) // ¿es hoja?
633  { // sí ==> escribir símbolo y reiniciar a raíz
634  const string & symbol = p->get_key();
635  if (symbol == end_symbol) // ¿se alcanza final?
636  break;
637 
638  output << symbol;
639  p = root; // reiniciar a raíz, pues se leerá un nuevo código
640  }
641  }
642  }
643 };
644 
645 } // end namespace Aleph
646 
647 # endif // HUFFMAN_H
int read_bit(const size_t i) const
Definition: bitArray.H:280
Pair * insert(const Key &key, const Data &data)
Definition: tpl_dynMapTree.H:112
Huffman_Encoder_Engine()
Constructor de codificador.
Definition: Huffman.H:180
BinNode< string > *& get_root()
Retorna la raíz del árbol decodificador de Huffman.
Definition: Huffman.H:279
Definition: bitArray.H:135
Freq_Node *& get_freq_root()
Retorna la raíz de un árbol de frecuencias.
Definition: Huffman.H:385
Node * insert(Node *p) noexcept
Definition: tpl_binHeap.H:587
size_t size() const noexcept
Retorna la dimensión del arreglo de bits.
Definition: bitArray.H:241
void read_input(char *input, const bool &with_freqs=false)
Definition: Huffman.H:443
Huffman_Decoder_Engine(BinNode< string > *p, const string &end)
Definition: Huffman.H:591
void save_tree(ostream &output)
Definition: Huffman.H:221
BinNode< string > * generate_huffman_tree(const bool &with_freqs=false)
Definition: Huffman.H:303
Definition: ah-comb.H:35
size_t encode(ifstream &input, BitArray &bit_stream)
Definition: Huffman.H:556
void push(const unsigned int value)
Inserta al final del arreglo el valor value.
Definition: bitArray.H:347
void update(Node *p) noexcept
Definition: tpl_binHeap.H:686
void set_end_of_stream(const string &str)
Define el símbolo fin de entrada.
Definition: Huffman.H:495
Node *& LLINK(Node *p) noexcept
Definition: tpl_binNode.H:298
void read_input(ifstream &input, const bool &with_freqs=false)
Definition: Huffman.H:475
Pair * search(const Key &key) const noexcept
Definition: tpl_dynMapTree.H:214
Node * getMin()
Definition: tpl_binHeap.H:663
Definition: Huffman.H:577
void destroyRec(Node *&root) noexcept
Definition: tpl_binNodeUtils.H:500
void pop()
Elimina el último bit del arreglo.
Definition: bitArray.H:353
Node for binary search tree.
const size_t & size() const
Retorna la cardinalidad del conjunto.
Definition: tpl_dynSetTree.H:502
Definition: Huffman.H:102
Definition: tpl_binHeap.H:940
void load_tree(ifstream &input)
Definition: Huffman.H:376
void set_freq(const string &str, const size_t &freq)
Definition: Huffman.H:402
const size_t & size() const noexcept
Retorna la cardinalidad del heap.
Definition: tpl_binHeap.H:763
size_t encode(char *input, BitArray &bit_stream)
Definition: Huffman.H:525
void save_tree_in_array_of_chars(const string &array_name, ostream &output)
Definition: Huffman.H:269
void decode(BitArray &bit_stream, ostream &output)
Definition: Huffman.H:617
void empty()
Elimina todos los elementos del conjunto.
Definition: tpl_dynSetTree.H:133
Node *& RLINK(Node *p) noexcept
Definition: tpl_binNode.H:307
BinNode< string > *& get_root()
Retorna la raíz del árbol decodificador de Huffman.
Definition: Huffman.H:598
Data & find(const Key &key)
Definition: tpl_dynMapTree.H:242

Leandro Rabindranath León