标签:文件压缩
简介:利用哈夫曼树实现一个文本文档的压缩,以及对压缩文件的解压
思路:在压缩文件时,首先要统计字符出现的次数,构建哈夫曼树,生成哈夫曼编码,压缩到文件。
在解压文件时,读取压缩文件,将编码与字符相对应,最后将字符写到文件中。
在解压文件中,如何将编码与字符相对应?
我们都知道,在解压文件时,我们只有一个压缩文件,其余一慨不知。所以在解压时,需要重建哈夫曼树。要想重建哈夫曼树,就需要知道字符以及字符出现的次数。在压缩文件时,已经统计出字符出现的次数。所以,在压缩文件时,应该写配置文件。配置文件中存放字符以及字符出现的次数。在解压时,读取压缩文件,配置文件,重建哈夫曼树,将编码与字符相对应。
//建堆
#pragma once
#include <iostream>
#include <vector>
using namespace std;
template <class T>
struct Less //小于
{
bool operator()(const T& l,const T& r)
{
return l < r;
}
};
template <class T>
struct Greater //大于
{
bool operator()(const T& l,const T& r)
{
return l > r;
}
};
template <class T,class Comper = Greater<T> >//默认建大堆
class Heap
{
public:
Heap() //无参构造函数
{}
Heap(T* a,size_t size)
{
for(size_t i=0;i<size;++i)
{
_a.push_back(a[i]);
}
//建堆
for(int i=(_a.size()-2)/2;i>=0;--i)
{
_ApDown(i);
}
}
void Push(const T& x)//插入元素
{
_a.push_back(x);//在堆尾插入元素
_ApHeapUp(_a.size()-1); //向上调整
}
void Pop()//删除(删除优先级高)
{
swap(_a[0],_a[_a.size()-1]);//交换堆的第一个元素和最后一个元素
_a.pop_back();//删除最后一个元素
_ApDown(0);//向下调整
}
size_t Size()//堆的大小
{
return _a.size();
}
bool Empty()//堆是否为空
{
return _a.empty();
}
T Top()
{
return _a[0];
}
public:
void _ApDown(size_t parent)
{
size_t child = parent*2+1;
while(child < _a.size())
{
Comper com;
//找到左右孩子中较大的
if((child+1) < _a.size() && com(_a[child+1],_a[child]))
{
++child;
}
//比较较大孩子与父亲
if(com(_a[child],_a[parent]))
{
swap(_a[child],_a[parent]);
parent = child;
child = parent*2+1;
}
else
{
break;
}
}
}
void _ApHeapUp(size_t child)
{
size_t parent = (child-1)/2;
Comper com;
while(child > 0)
{
if(com(_a[child],_a[parent]))//比较孩子与父亲
{
swap(_a[child],_a[parent]);
child = parent;
parent = (child-1)/2;
}
else
{
break;
}
}
}
protected:
vector<T> _a;
};//建哈夫曼树 Huffman.h
#include "Heap.h"
template <class T>
struct HuffmanTreeNode
{
HuffmanTreeNode(const T& x)
:_left(NULL)
,_right(NULL)
,_weight(x)
{}
HuffmanTreeNode<T>* _left;
HuffmanTreeNode<T>* _right;
T _weight;
};
template <class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;
public:
HuffmanTree(const T* a,size_t n,const T& invalue)
{
struct IsLess
{
bool operator()(const Node* left,const Node* right)
{
return left->_weight < right->_weight;
}
};
Heap<Node*,IsLess> minHeap;
for(size_t i=0;i<n;++i)
{
if(a[i] != invalue)
{
minHeap.Push(new Node(a[i])); //建小堆
}
}
while(minHeap.Size() > 1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_weight+right->_weight);
parent->_left = left;
parent->_right = right;
minHeap.Push(parent);
}
_root = minHeap.Top();
}
Node* GetRoot()
{
return _root;
}
protected:
Node* _root;
};
void HuffmanTreeTest()
{
int a[] = {1,2,3,4,5,6,7,8,9};
HuffmanTree<int> ht(a,sizeof(a)/sizeof(a[0]),‘#‘);
}//实现压缩,解压 FileCompare.h
#define _CRT_SECURE_NO_WARNINGS
#include "HuffmanTree.h"
#include <assert.h>
#include <string>
#include <stdlib.h>
typedef unsigned long LongType;
struct CharInfo
{
unsigned char _ch; //字符
LongType _count; //字符出现的次数
string _code; //字符对应的Huffman编码
CharInfo()
:_ch(0)
,_count(0)
{}
CharInfo(LongType count)
:_ch(0)
,_count(count)
{}
bool operator!=(const CharInfo& info) const
{
return _count != info._count;
}
CharInfo operator+(const CharInfo& info) const
{
return CharInfo(_count + info._count);
}
bool operator<(const CharInfo& info) const
{
return _count < info._count;
}
};
class FileCompress
{
public:
FileCompress()
{
for(size_t i=0;i<256;++i)
{
_info[i]._ch = i;
_info[i]._count = 0;
}
}
void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root,string code)//获取哈夫曼编码
{
if(root == NULL)
return;
if(root->_left == NULL && root->_right == NULL)
{
_info[root->_weight._ch]._code = code;
}
GetHuffmanCode(root->_left,code + ‘0‘);//左为0
GetHuffmanCode(root->_right,code + ‘1‘);//右为1
}
bool ReadLine(FILE* fout,string& line)
{
char ch = fgetc(fout);
if(feof(fout)) //若结束返回非零值
return false;
while(!feof(fout) && ch != ‘\n‘)
{
line += ch;
ch = fgetc(fout);
}
return true;
}
void Compress(const char* filename)
{
//统计字符的次数
FILE* fout = fopen(filename,"rb");
assert(fout);
char ch = fgetc(fout);
while(!feof(fout)) //读到文件尾的标志位 若采用ch != EOF 11111111 跳出读取文件
{
_info[(unsigned char)ch]._count++;
ch = fgetc(fout);
}
//构建Huffman树
CharInfo invalue; //非法值
HuffmanTree<CharInfo> tree(_info,256,invalue);
//生成Huffman编码
string code;
GetHuffmanCode(tree.GetRoot(),code);
//压缩
string comFilename = filename;
comFilename += ".compress";
FILE* fin = fopen(comFilename.c_str(),"wb");
assert(fin);
fseek(fout,0,SEEK_SET); //设置文件指针的位置
ch = fgetc(fout);
int size = 0;
int value = 0;
while(!feof(fout)) //feof 来判断文件是否执行结束,若结束,则返回非零值。
{
string code = _info[(unsigned char)ch]._code;
for(size_t i=0;i<code.size();++i)
{
if(code[i] == ‘1‘)
{
value |= 1;
}
++size;
if(size == 8)
{
fputc(value,fin);
size = 0;
value = 0;
}
value <<= 1;
}
ch = fgetc(fout);
}
if(size > 0)
{
value <<= (7-size);
fputc(value,fin);
}
//配置文件
string configfile = filename;
configfile += ".config";
FILE* fconfig = fopen(configfile.c_str(),"wb");//以二进制的形式打开
assert(fconfig);
char buffer[256];
string line;
for(size_t i=0;i<256;++i)
{
if(_info[i]._count > 0)
{
line += _info[i]._ch;
line += ‘,‘;
line += itoa(_info[i]._count,buffer,10);
line += ‘\n‘;
fputs(line.c_str(),fconfig);
}
line.clear();
}
fclose(fout);
fclose(fin);
fclose(fconfig);
}
void Uncompress(const char* filename)
{
//读配置文件
string configfile = filename;
configfile += ".config";
FILE* fconfig = fopen(configfile.c_str(),"rb");//以二进制的形式读取
assert(fconfig);
string str;
while(ReadLine(fconfig,str))
{
if(str.empty()) //处理空行
{
str += ‘\n‘;
}
else
{
_info[(unsigned char)str[0]]._count = atoi(str.substr(2).c_str());//第二个位置即第三个字符为字符的次数
str.clear();
}
}
//构建Huffman树
CharInfo invalue;
HuffmanTree<CharInfo> tree(_info,256,invalue);
//读取压缩文件,进行还原
string comFilename = filename;
comFilename += ".compress";
FILE* fout = fopen(comFilename.c_str(),"rb");
assert(fout);
HuffmanTreeNode<CharInfo>* root = tree.GetRoot();
HuffmanTreeNode<CharInfo>* cur = root;
string uncomFilename = filename;
uncomFilename += ".uncompress";
FILE* fin = fopen(uncomFilename.c_str(),"wb");
assert(fin);
LongType SumCount = tree.GetRoot()->_weight._count; //总数
char ch = fgetc(fout);
int pos = 7;
while(1)
{
if(ch & (1<<pos))
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if(cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_weight._ch,fin);
if(--SumCount == 0)
{
break;
}
cur = root;
}
if(pos-- == 0)
{
ch = fgetc(fout);
pos = 7;
}
}
fclose(fout);
fclose(fin);
}
protected:
CharInfo _info[256];
};
void PressHuffmanTest()
{
FileCompress fh;
fh.Compress("input");
//fh.Compress("project.txt");
}
void UnPressHuffmanTest()
{
FileCompress fh;
fh.Uncompress("input");
//fh.Uncompress("project.txt");
}//测试
#include "FileCompree.h"
#include <windows.h>
int main()
{
//HuffmanTreeTest(); //验证哈弗曼树
int begin1 = GetTickCount();
PressHuffmanTest();
int end1 = GetTickCount();
cout<<"压缩时间为:"<<end1-begin1<<endl;
int begin2 = GetTickCount();
UnPressHuffmanTest();
int end2 = GetTickCount();
cout<<"解压时间为:"<<end2-begin2<<endl;
return 0;
}测试结果:
比较结果:
本文出自 “一起去看星星” 博客,转载请与作者联系!
标签:文件压缩
原文地址:http://10810429.blog.51cto.com/10800429/1828719