标签:
文件压缩与解压思想:
(1)统计字符出现次数,结合最小堆的性质生成哈夫曼树;
(2)对叶节点进行编码,结点左边编0,右边编1;
(3)读取文件,用哈夫曼编码代替字符产生新的字符,即压缩;
(4)读取压缩文件,进行哈夫曼编码的解读产生相应字符,即解压;
例如,对以下数据生成哈夫曼树,以及产生相应的哈夫曼编码:
//自己写的最小堆(因为在加强巩固,所以没用STL里的) #pragma once #include<iostream> #include<vector> using namespace std; template<class T> struct Less { bool operator()(const T&left, const T&right) { return left->_weight < right->_weight; } }; template<class T,class Compare=Less<T>> class Heap { public: Heap() {} Heap(const T*v, int size, int valid) { for (int i = 0; i < size; i++) { if (v[i] != valid) _v.push_back(v[i]); } int _size = _v.size(); int begin = (_size - 2) / 2; for (int root = begin; root >= 0; root--) { AdjustDown(root); } } void Insert(const T& value) { _v.push_back(value); int leaf = _v.size() - 1; AdjustUp(leaf); } void Remove() { swap(_v[0], _v[_v.size() - 1]); _v.pop_back(); AdjustDown(0); } T& GetTop() { if (!_v.empty()) return _v[0]; } bool Empty() { if (_v.empty()) return true; return false; } int Size() { return _v.size(); } protected: void AdjustDown(int root) { size_t left = root * 2 + 1; size_t right = left + 1; int key = left; while (left < _v.size()) { if (right < _v.size() && Compare()(_v[right], _v[left])) key = right; if (Compare()(_v[key], _v[root])) { swap(_v[key], _v[root]); root = key; left = root * 2 + 1; right = left + 1; key = left; } else break; } } void AdjustUp(int leaf) { int root = (leaf - 1) / 2; while (leaf > 0) { if (Compare()(_v[leaf], _v[root])) { swap(_v[leaf], _v[root]); leaf = root; root = (leaf - 1) / 2; } else break; } } private: vector<T> _v; };
//哈夫曼树的建立 #pragma once #include<iostream> #include<string> #include"Heap.h" using namespace std; template<class T> struct HuffmanTreeNode { HuffmanTreeNode<T>* _left; HuffmanTreeNode<T>* _right; T _weight; HuffmanTreeNode(const T& weight) :_left(NULL) , _right(NULL) , _weight(weight) {} }; template<class T> class HuffmanTree { typedef HuffmanTreeNode<T> Node; public: HuffmanTree(const T*a, int size, T invaild) { _CreatHuffmanTree(a, size, invaild); } Node* GetRoot() { return _root; } protected: void _CreatHuffmanTree(const T*a,int size,T invaild) { Heap<Node*,Less<Node*>>hp; for (int i = 0; i < size; i++) { if (a[i] != invaild) hp.Insert(new Node(a[i]));//建立小堆 } //当_v中只剩下一个数据时,哈弗曼树建立完成 while (hp.Size()>1) { Node* left = hp.GetTop(); hp.Remove(); Node* right = hp.GetTop(); hp.Remove(); Node*parent = new Node(left->_weight + right->_weight); parent->_left = left; parent->_right = right; hp.Insert(parent); } _root = hp.GetTop(); } private: Node* _root; };
//文件压缩与解压 #pragma once #include<iostream> #include<string> using namespace std; #include"HuffmanTree.h" struct CharInfo { unsigned char _ch; long _count; string _code; CharInfo(const long count = 0) :_count(count) {} CharInfo(const unsigned char ch) :_ch(ch) {} long operator+(const CharInfo& info)const { return _count + info._count; } bool operator!=(const CharInfo& info)const { return _count != info._count; } bool operator<(const CharInfo& info)const { return _count < info._count; } }; class FileCompress { public: //对文件filename进行压缩 void Compress(const char* filename) { FILE* fread = fopen(filename, "rb"); if (fread == NULL) { cout << "打开文件失败..." << endl; return; } //打开文件成功 //1、字符与下标对应,填充结构体的_ch for (int i = 0; i < 256; i++) { _Info[i]._ch = i; } //2、统计各个字符出现的次数 unsigned char ch = fgetc(fread); while (!feof(fread)) { _Info[ch]._count++; ch = fgetc(fread); } //3、建立哈夫曼树 HuffmanTree<CharInfo> hft(_Info, 256, CharInfo()); //4、获取各个字符的哈夫曼编码 //每次都从根节点开始,走到叶节点,即得到叶节点对应字符的编码 HuffmanTreeNode<CharInfo>*root = hft.GetRoot(); string code; GetHuffmanCodeOfChar(root, code); //5、从头开始读文件,把读取字符的编码重新组合写入压缩文件中 fseek(fread, 0, SEEK_SET); //压缩产生的文件的名字 string fcp = filename; fcp += ".compress"; FILE* fwrite = fopen(fcp.c_str(), "wb"); if (fwrite == NULL) { cout << "compress 文件打开失败" << endl; return; } //打开文件成功 unsigned char data = 0;//写入压缩文件的字符 int offset = 7;//偏移量 ch = fgetc(fread); while (!feof(fread)) { const char* str = _Info[ch]._code.c_str();//获取当前字符的编码 while (*str != '\0') { if (offset >= 0) { data = data | ((*str - '0') << offset); offset--; } if (offset < 0) { fputc(data, fwrite); data = 0; offset = 7; } str++; } ch = fgetc(fread); } fputc(data, fwrite); WriteConfig(filename); fclose(fread); fclose(fwrite); } void UnCompress(const char* filename) { string fcp = filename; fcp += ".compress"; FILE* fread = fopen(fcp.c_str(), "rb"); if (fread == NULL) { cout << "打开文件失败..." << endl; return; } //打开文件成功 //3、建立哈夫曼树 CharInfo info[256]; ReadConfig(filename, info); HuffmanTree<CharInfo> hft(info, 256, CharInfo()); string fucp = filename; fucp += ".uncompress"; FILE* fwrite = fopen(fucp.c_str(), "wb"); HuffmanTreeNode<CharInfo>*root = hft.GetRoot(); HuffmanTreeNode<CharInfo>*cur = root;//每次都要从根开始读,读到叶节点即可获取一个原字符 long DataTotal = (root->_weight)._count;//原文件中的字符总数 unsigned char ch = fgetc(fread); while (DataTotal) { int tmp = 1; int offset = 7; while (offset >= 0) { if (ch&(1 << offset))//检验位是否为0 { cur = cur->_right; offset--; } else { cur = cur->_left; offset--; } if (cur->_left == NULL&&cur->_right == NULL) { unsigned char wch = cur->_weight._ch; fputc(wch, fwrite); cur = root; DataTotal--; //最后一个字符的编码在最后两个字节当中的情况 if (!DataTotal) { break; } } } ch = fgetc(fread); } fclose(fread); fclose(fwrite); } protected: void WriteConfig(const char*filename) { string Config = filename; Config += "config"; FILE* fwrite = fopen(Config.c_str(), "wb"); if (fwrite == NULL) { cout << "打开文件失败" << endl; return; } //打开成功 for (int i = 0; i < 256; i++) { if (_Info[i] ._count) { fputc(_Info[i]._ch, fwrite); fputc(',', fwrite); char count[100]; //参数说明:1.要转换的值;2.结果存放区;3.进制 _itoa(_Info[i]._count, count, 10); fputs(count, fwrite); fputc(',', fwrite); fputs(_Info[i]._code.c_str(), fwrite); fputc('\n', fwrite); } } fclose(fwrite); } void ReadConfig(const char*filename,CharInfo *info) { string Config = filename; Config += "config"; FILE* fread = fopen(Config.c_str(), "rb"); if (fread == NULL) { cout << "打开文件失败" << endl; return; } //打开成功 char str[100]; while (fgets(str, 100, fread)) { char*pstr = str; unsigned char ch = (unsigned char)*pstr;//得到字符 info[ch]._ch = ch; if (ch == '\n') { fgets(str, 100, fread); pstr=str; pstr++; } else pstr+=2;//跳过分隔符(逗号) //得到_count的字符串形式 long count = 0; while (*pstr&&*pstr!=',') { count *= 10; count += *pstr - '0'; pstr++; } info[ch]._count = count;//得到出现次数 pstr++; string code(pstr); info[ch]._code = code; } } void GetHuffmanCodeOfChar(HuffmanTreeNode<CharInfo>*root, string& code) { if (root == NULL) return; if (root->_left == NULL&&root->_right == NULL) { unsigned char ch = root->_weight._ch; _Info[ch]._code = code; return; } GetHuffmanCodeOfChar(root->_left, code + '0');//左边编码为0 GetHuffmanCodeOfChar(root->_right, code + '1');//右边编码为1 } private: CharInfo _Info[256];//结构体数组 };
标签:
原文地址:http://blog.csdn.net/zdf0414/article/details/51935338