文件压缩
通过某种特殊的编码方式将数据信息中存在的重复度、冗余度有效地降低,从而达到数据压缩的目的。这里用的是哈夫曼树产生特殊编码。
//compress.h
#pragma once
typedef unsigned long long longType;
struct CharInfo
{
unsigned char _ch;//字母信息
longType _count; //出现次数
string _code; //哈夫曼编码
CharInfo(){}
CharInfo(longType count)
:_count(count)
, _ch(0)
{}
bool operator!=(const CharInfo& ch)const
{
return _count != ch._count;
}
longType operator+(const CharInfo& ch)const
{
return _count + ch._count;
}
bool operator<(const CharInfo& ch)const
{
return _count < ch._count;
}
};
class FileCompress
{
public:
FileCompress()
{
for (int i = 0; i < 256; ++i)
{
_infos[i]._ch = i;
_infos[i]._count = 0;
}
}
public:
//压缩
void Compress(const char *filename)
{
assert(filename);
FILE *fout = fopen(filename, "rb");
assert(fout);
unsigned char ch = fgetc(fout);
while (!feof(fout))
{
_infos[ch]._count++;
ch = fgetc(fout);
}
//构造哈夫曼树
CharInfo invalid(0);
HuffmanTree<CharInfo> tree(_infos, 256, invalid);
//生成哈夫曼编码
string code;
GenerateHuffmanCode(tree.GetRootNode(), code);
//压缩文件名称
string compressfilename = filename;
compressfilename += ".compress";
FILE* fin = fopen(compressfilename.c_str(), "wb");
assert(fin);
fseek(fout, 0, SEEK_SET);//文件起始的位置
ch = fgetc(fout);
char value = 0;
int pos = 0;
while (!feof(fout))
{
string& code = _infos[(unsigned char)ch]._code;
for (size_t i= 0; i < code.size(); ++i)
{
value <<= 1;
if (code[i] == ‘1‘)
{
value |= 1;
}
if (++pos == 8)
{
fputc(value, fin);
value = 0;
pos = 0;
}
}
ch = fgetc(fout);
}
if (pos != 0)
{
value <<= (8 - pos);
fputc(value, fin);
}
//写配置文件
string configfilename = filename;
configfilename += ".config";
FILE* finconfig = fopen(configfilename.c_str(), "wb");
assert(finconfig);
char buffer[128];
string str;
for (size_t i = 0; i < 256; ++i)
{
if (_infos[i]._count>0)
{
str += _infos[i]._ch;
str += ‘,‘;
sprintf(buffer,"%d",_infos[i]._count);
str += buffer;
str += ‘\n‘;
fputs(str.c_str(), finconfig);
str = "";
}
}
fclose(finconfig);
fclose(fout);
fclose(fin);
}
bool ReadLine(FILE *&fout, string&str)
{
char ch = fgetc(fout);
if (feof(fout))
return false;
//while (ch != EOF)
while (!feof(fout))
{
str += ch;
ch = fgetc(fout);
if (ch == ‘\n‘)
{
break;
}
}
/*while (ch != ‘\n‘)
{
str += ch;
ch = fgetc(fout);
}*/
return true;
}
//解压
void UnCompress(const char *filename)
{
//从配置文件中读取文件出现的次数
string configfilename = filename;
configfilename += ".config";
FILE* foutconfig = fopen(configfilename.c_str(), "rb");
assert(foutconfig);
string str;
longType count = 0;
while (ReadLine(foutconfig, str))
{
_infos[(unsigned char)str[0]]._count = atoi(str.substr(2).c_str());
count += _infos[(unsigned char)str[0]]._count;
//string a=s.substr(0,5);
//获得字符串s中 从第0位开始的长度为5的字符串//默认时的长度为从开始位置到尾
str.clear();
}
//建立哈夫曼树
CharInfo invalid(0);
HuffmanTree<CharInfo> tree(_infos, 256, invalid);
//读压缩文件
string compressfile = filename;
compressfile += ".compress";
FILE *fout = fopen(compressfile.c_str(), "rb");
assert(fout);
//建立解压文件
string uncompressfile = filename;;
uncompressfile += ".uncompress";
FILE* fin = fopen(uncompressfile.c_str(), "wb");
assert(fout);
unsigned char ch = fgetc(fout);
HuffmanNode<CharInfo>* _root = tree.GetRootNode();
HuffmanNode<CharInfo>* cur = _root;
int pos = 8;
while (!feof(fout))
{
if (ch & 1 << (pos-1))
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (cur->_left == NULL&&cur->_right == NULL)
{
fputc(cur->_weight._ch, fin);
cur = _root;
--count;
if (count == 0)
{
break;
}
}
--pos;
if (pos == 0)
{
pos = 8;
ch = fgetc(fout);
}
}
fclose(foutconfig);
fclose(fout);
fclose(fin);
}
protected:
void GenerateHuffmanCode(const HuffmanNode<CharInfo> *_root, string code)
{
if (_root == NULL)
{
return;
}
if (_root->_left)
GenerateHuffmanCode(_root->_left, code + ‘0‘);
if (_root->_right)
GenerateHuffmanCode(_root->_right, code + ‘1‘);
if (_root->_left == NULL&&_root->_right == NULL)
{
_infos[(_root->_weight)._ch]._code = code;
}
}
protected:
CharInfo _infos[256];
};//heap.h
#pragma once
#include<vector>
template<class T>
struct small
{
bool operator()(const T &data1, const T &data2)
{
return data1 < data2;
}
};
template<class T>
struct big
{
bool operator()(const T &data1, const T &data2)
{
return data1>data2;
}
};
//template<class T,template<class>class compare=big>
template<class T, class compare = big<T>>
class Heap
{
public:
Heap()
{}
Heap(const T*&a, size_t size)
{
_a.reserve(size);
//reserve用来(预留空间,)改变capacity,
//不改变size,会去分配内存,但不会构造出对象;
//如果改变后的capacity比当前capacity大,则capacity会变大;
//反之,capacity不变。可以用下面的代码去测试
for (size_t i = 0; i < size; ++i)
{
_a.push_back(a[i]);
}
for (int i = (_a.size() - 2) / 2; i >= 0; --i)
{
_AdjustDown(i);
}
}
Heap(const Heap<T>& h)
{
_a = _Copy(h->_a);
}
~Heap()
{
//vector会自动析构
}
void Push(const T &data)
{
_a.push_back(data);
_AdjustUp(_a.size() - 1);
}
void Pop()
{
swap(_a[0], _a[_a.size() - 1]);
_a.pop_back();
_AdjustDown(0);
}
void Print()
{
_Print(_a);
cout << endl;
}
bool Empty()
{
return (_a.size() == 0);
}
T& Top()
{
return _a[0];
}
size_t Size()
{
return _a.size();
}
protected:
void _AdjustDown(size_t parent)
{
compare com;
size_t child = parent * 2 + 1;
while (child < _a.size())
{
if (child + 1 < _a.size() && com(_a[child + 1], _a[child]))
{
++child;
}
if (com(_a[child], _a[parent]))
{
swap(_a[parent], _a[child]);
parent = child;
child = parent * 2 + 1;
}
else
{
break;
}
}
}
void _AdjustUp(size_t child)
{
compare com;
size_t parent = (child - 1) / 2;
while (child > 0)
{
if (com(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child - 1) / 2;
}
else
{
break;
}
}
}
vector<T> _Copy(vector<T>&_a)
{
vector<T> a;
a.resize(_a.size());
for (int i = 0; i < _a.size(); ++i)
{
a[i] = _a[i];
}
return a;
}
void _Print(vector<T>&_a)
{
for (size_t i = 0; i < _a.size(); ++i)
{
cout << _a[i] << " ";
}
}
protected:
vector<T> _a;
};huffmanTree.h
#pragma once
template<class T>
struct HuffmanNode
{
HuffmanNode<T>* _left;
HuffmanNode<T>* _right;
T _weight;
HuffmanNode(const T& value)
:_left(NULL)
, _right(NULL)
, _weight(value)
{}
};
//template<class T>
template<class T>
class HuffmanTree
{
typedef HuffmanNode<T> Node;
//friend void Clear(Node* _root);
public:
HuffmanTree()
{}
HuffmanTree(const T* a, size_t size, const T& invalid)
{
_root = _CreateTree(a, size, invalid);
}
Node* GetRootNode()
{
return _root;
}
protected:
Node* _CreateTree(const T*&a, size_t size, const T&invalid)
{
struct Compare
{
bool operator()(const HuffmanNode<T>* node1, const HuffmanNode<T>* node2)
{
return node1->_weight < node2->_weight;
}
};
Heap<Node*, Compare> minheap;
for (size_t i = 0; i < size; i++)
{
if (a[i] != invalid)
minheap.Push(new Node(a[i]));
}
while (minheap.Size()>1)
{
Node* left = minheap.Top();
minheap.Pop();
Node* right = minheap.Top();
minheap.Pop();
Node* parent = new Node((left->_weight) + (right->_weight));
parent->_left = left;
parent->_right = right;
minheap.Push(parent);
}
return minheap.Top();
}
protected:
Node* _root;
};运行结果:
本文出自 “学习记录” 博客,转载请与作者联系!
原文地址:http://10794428.blog.51cto.com/10784428/1827252