标签:hoffman
Hoffman树是由David A. Hoffman于1952年在MIT攻读博士学位期间发表的论文《A Method for the Construction of Minimum-Redundancy Codes》中提出的,它的目的寻找一种利用最少量的编码方法表示信息。Hoffman使用自底向上的方法构建了一棵满足要求的树,用这棵树进行的编码叫做Hoffman编码。
用一个节点表示一条信息,每条信息都要知道它们使用的频率,构建Hoffman树时,将每个节点看做只有一个节点的一棵树,这些树组成了一个森林,算法的流程如下:
算法流程很简单,证明却很难,我看了原始论文即便都没弄明白,也许是英文太差了。虽然不能证明,但是实现上面的算法还是比较简单的,实现中用一个数组保存所有信息,另一个数组保存每条信息的使用频率,Hoffman树的节点就由四项组成,信息在数组中的索引,信息使用的频率,指向节点左孩子、右孩子的指针。这样设计的理由是信息可能复杂,用一个对象表示,那么如果出现赋值,性能就不好,更重要的一个理由是Hoffman树中有两类节点,一类包含信息(叶子节点),一类不包含信息(非叶子结点),这两类通过信息来区分是不方便的,特别是非叶子结点的信息是空的,C++中没有表示空类型的对象,如果节点中保存的是索引,那么-1就表示该节点是一个非叶子结点,非负索引就可以到数组中取得相应的信息和信息出现的频率。这样设计还能带来另一个好处就是可以让Hoffman树更容易的支持插入操作(虽然这个操作不是被使用的可能性比较小)。实现算法的1、2步需要用一个最小堆,我的前一篇博文已经实现了堆数据结构,用到的数组结构也用之前博文实现的Vector结构。
#ifndef _HOFFMAN_H_
#define _HOFFMAN_H_
#include "../include/Vector.h"
#include "../include/Heap.h"
#include "../Utilites/type_traits.h"
#include "../include/Functor.h"
namespace MyDataStructure
{
template<typename WeightType>
struct HoffmanNode
{
int index;
WeightType weight;
HoffmanNode* left;
HoffmanNode* right;
};
//针对HoffmanNode指针特化一个大于比较仿函数,构造
//最小堆的的时候需要,最小堆中的元素是HoffmanNode
//型指针
template<typename WeightType>
struct greater<HoffmanNode<WeightType>*>
{
bool operator ()(HoffmanNode<WeightType>* op1, HoffmanNode<WeightType>* op2)
{
return op1->weight > op2->weight;
}
};
template <typename ValueType, typename WeightType>
class HoffmanTree
{
public:
typedef typename ParameterTrait<WeightType>::ParameterType WeightParameterType;
typedef typename HoffmanNode< WeightType>* NodePtr;
typedef typename HoffmanNode< WeightType> NodeType;
typedef typename HoffmanTree<ValueType, WeightType> self;
public:
HoffmanTree(ValueType values[],WeightType weights[],int count);
HoffmanTree(const HoffmanTree& rhs);
self& operator = (const HoffmanTree& rhs);
~HoffmanTree();
void Clear();
NodePtr GetHead(){ return head; }
private:
NodePtr create_node(int index, WeightParameterType weight);
NodePtr copy_node(const NodePtr node);
NodePtr merge_node(NodePtr left, NodePtr right);
void copy(const HoffmanTree& rhs);
void __copy_tree(NodePtr* dst_node,NodePtr start_node);
void __clear_tree(NodePtr start_node);
void create_tree(ValueType values[], WeightType weights[], int count);
void clear();
private:
NodePtr head;
Vector<ValueType> values;
Vector<WeightType> weights;
};
template<typename ValueType, typename WeightType>
HoffmanTree<ValueType, WeightType>::HoffmanTree(ValueType values[], WeightType weights[], int count)
{
create_tree( values,weights, count);
}
template<typename ValueType, typename WeightType>
HoffmanTree<ValueType, WeightType>::HoffmanTree(const HoffmanTree& rhs)
{
clear();
copy(rhs);
}
template<typename ValueType, typename WeightType>
typename HoffmanTree<ValueType, WeightType>& HoffmanTree<ValueType, WeightType>
::operator =(const HoffmanTree& rhs)
{
clear();
copy(rhs);
return *this;
}
template<typename ValueType, typename WeightType>
HoffmanTree<ValueType, WeightType>::~HoffmanTree()
{
clear();
}
template<typename ValueType, typename WeightType>
void HoffmanTree<ValueType, WeightType>::Clear()
{
clear();
}
template<typename ValueType, typename WeightType>
typename HoffmanTree<ValueType, WeightType>::NodePtr
HoffmanTree<ValueType, WeightType>
::create_node(int index, WeightParameterType weight)
{
NodeType* node = new NodeType;
node->left = node->right = nullptr;
node->index = index;
node->weight = weight;
return node;
}
template<typename ValueType, typename WeightType>
typename HoffmanTree<ValueType, WeightType>::NodePtr
HoffmanTree<ValueType, WeightType>
::copy_node(const NodePtr node)
{
return create_node(node->index, node->weight);
}
template<typename ValueType, typename WeightType>
typename HoffmanTree<ValueType, WeightType>::NodePtr
HoffmanTree<ValueType, WeightType>
::merge_node(NodePtr left, NodePtr right)
{
NodeType* node = new NodeType;
node->left = left;
node->right = right;
node->weight = left->weight + right->weight;
//用index = -1表示这是一个内部节点
node->index = -1;
return node;
}
template<typename ValueType, typename WeightType>
void HoffmanTree<ValueType, WeightType>
::copy(const HoffmanTree& rhs)
{
values = rhs.values;
weights = rhs.weights;
__copy_tree(&head, rhs.head);
}
template<typename ValueType, typename WeightType>
void HoffmanTree<ValueType, WeightType>
::__copy_tree(NodePtr* dst_node, NodePtr start_node)
{
if (start_node == nullptr) *dst_node = nullptr;
else
{
NodeType* node = copy_node(start_node);
*dst_node = node;
__copy_tree(&((*dst_node)->left), start_node->left);
__copy_tree(&((*dst_node)->right), start_node->right);
}
}
template<typename ValueType, typename WeightType>
void HoffmanTree<ValueType, WeightType>::clear()
{
__clear_tree(head);
values.Clear();
}
template<typename ValueType, typename WeightType>
void HoffmanTree<ValueType, WeightType>
::__clear_tree(NodePtr start_node)
{
if (start_node == nullptr) return;
else
{
if (start_node->left != nullptr) __clear_tree(start_node->left);
if (start_node->right != nullptr) __clear_tree(start_node->right);
delete start_node;
start_node = nullptr;
}
}
template<typename ValueType, typename WeightType>
void HoffmanTree<ValueType, WeightType>
::create_tree(ValueType values[], WeightType weights[], int count)
{
Heap<NodePtr, greater<NodePtr>> minHeap;
for (int i = 0; i < count; ++i)
{
NodeType* node = create_node(i, weights[i]);
this->values.PushBack(values[i]);
this->weights.PushBack(weights[i]);
minHeap.Insert(node);
}
while (minHeap.Size() > 1)
{
NodePtr node1,node2;
//取使用频率最小的树
minHeap.GetTop(node1);
//从堆中移除使用频率最小的树
minHeap.RemoveTop();
//取使用频率第二小的树
minHeap.GetTop(node2);
//从堆中移除使用频率第二小的树
minHeap.RemoveTop();
//构造新树
NodePtr new_node = merge_node(node1, node2);
//将新树插入最小堆中
minHeap.Insert(new_node);
}
minHeap.GetTop(head);
}
}
#endif
测试代码:
// HoffmanTest.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include "../include/Hoffman.h"
#include <iostream>
using namespace MyDataStructure;
using namespace std;
int _tmain(int argc, _TCHAR* argv[])
{
char values[] = { ‘a‘, ‘b‘, ‘c‘, ‘d‘, ‘e‘, ‘f‘, ‘g‘, ‘h‘, ‘i‘,‘j‘ };
float weights[] = { 0.24f, 0.84f, 0.03f, 0.51f, 0.12f, 0.79f, 0.93f, 0.34f, 0.61f, 0.46f };
HoffmanTree<char, float> H(values,weights,10);
HoffmanTree<char, float> H1(H);
HoffmanTree<char, float> H2 = H1;
float sum = 0.0f;
for (int i = 0; i < 10; ++i)
{
sum += weights[i];
}
cout << sum << endl;
return 0;
}
看看程序运行结果:
这是我用调试状态下查看构造出来的Hoffman树画出来的,本来写了一个打印二叉树的程序,但是由于想做到跨平台、支持多种设备迟迟没有做出来,主要是卡在打印字符的这一步,如果哪位小伙伴有好的点子或资料还请推荐给我。虽然程序没写完,但是却收获了不少东西,最重要的收获记录在了前一篇博客里面。
标签:hoffman
原文地址:http://blog.csdn.net/liao_jian/article/details/45845589