文件压缩
通过某种特殊的编码方式将数据信息中存在的重复度、冗余度有效地降低,从而达到数据压缩的目的。这里用的是哈夫曼树产生特殊编码。
//compress.h #pragma once typedef unsigned long long longType; struct CharInfo { unsigned char _ch;//字母信息 longType _count; //出现次数 string _code; //哈夫曼编码 CharInfo(){} CharInfo(longType count) :_count(count) , _ch(0) {} bool operator!=(const CharInfo& ch)const { return _count != ch._count; } longType operator+(const CharInfo& ch)const { return _count + ch._count; } bool operator<(const CharInfo& ch)const { return _count < ch._count; } }; class FileCompress { public: FileCompress() { for (int i = 0; i < 256; ++i) { _infos[i]._ch = i; _infos[i]._count = 0; } } public: //压缩 void Compress(const char *filename) { assert(filename); FILE *fout = fopen(filename, "rb"); assert(fout); unsigned char ch = fgetc(fout); while (!feof(fout)) { _infos[ch]._count++; ch = fgetc(fout); } //构造哈夫曼树 CharInfo invalid(0); HuffmanTree<CharInfo> tree(_infos, 256, invalid); //生成哈夫曼编码 string code; GenerateHuffmanCode(tree.GetRootNode(), code); //压缩文件名称 string compressfilename = filename; compressfilename += ".compress"; FILE* fin = fopen(compressfilename.c_str(), "wb"); assert(fin); fseek(fout, 0, SEEK_SET);//文件起始的位置 ch = fgetc(fout); char value = 0; int pos = 0; while (!feof(fout)) { string& code = _infos[(unsigned char)ch]._code; for (size_t i= 0; i < code.size(); ++i) { value <<= 1; if (code[i] == ‘1‘) { value |= 1; } if (++pos == 8) { fputc(value, fin); value = 0; pos = 0; } } ch = fgetc(fout); } if (pos != 0) { value <<= (8 - pos); fputc(value, fin); } //写配置文件 string configfilename = filename; configfilename += ".config"; FILE* finconfig = fopen(configfilename.c_str(), "wb"); assert(finconfig); char buffer[128]; string str; for (size_t i = 0; i < 256; ++i) { if (_infos[i]._count>0) { str += _infos[i]._ch; str += ‘,‘; sprintf(buffer,"%d",_infos[i]._count); str += buffer; str += ‘\n‘; fputs(str.c_str(), finconfig); str = ""; } } fclose(finconfig); fclose(fout); fclose(fin); } bool ReadLine(FILE *&fout, string&str) { char ch = fgetc(fout); if (feof(fout)) return false; //while (ch != EOF) while (!feof(fout)) { str += ch; ch = fgetc(fout); if (ch == ‘\n‘) { break; } } /*while (ch != ‘\n‘) { str += ch; ch = fgetc(fout); }*/ return true; } //解压 void UnCompress(const char *filename) { //从配置文件中读取文件出现的次数 string configfilename = filename; configfilename += ".config"; FILE* foutconfig = fopen(configfilename.c_str(), "rb"); assert(foutconfig); string str; longType count = 0; while (ReadLine(foutconfig, str)) { _infos[(unsigned char)str[0]]._count = atoi(str.substr(2).c_str()); count += _infos[(unsigned char)str[0]]._count; //string a=s.substr(0,5); //获得字符串s中 从第0位开始的长度为5的字符串//默认时的长度为从开始位置到尾 str.clear(); } //建立哈夫曼树 CharInfo invalid(0); HuffmanTree<CharInfo> tree(_infos, 256, invalid); //读压缩文件 string compressfile = filename; compressfile += ".compress"; FILE *fout = fopen(compressfile.c_str(), "rb"); assert(fout); //建立解压文件 string uncompressfile = filename;; uncompressfile += ".uncompress"; FILE* fin = fopen(uncompressfile.c_str(), "wb"); assert(fout); unsigned char ch = fgetc(fout); HuffmanNode<CharInfo>* _root = tree.GetRootNode(); HuffmanNode<CharInfo>* cur = _root; int pos = 8; while (!feof(fout)) { if (ch & 1 << (pos-1)) { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left == NULL&&cur->_right == NULL) { fputc(cur->_weight._ch, fin); cur = _root; --count; if (count == 0) { break; } } --pos; if (pos == 0) { pos = 8; ch = fgetc(fout); } } fclose(foutconfig); fclose(fout); fclose(fin); } protected: void GenerateHuffmanCode(const HuffmanNode<CharInfo> *_root, string code) { if (_root == NULL) { return; } if (_root->_left) GenerateHuffmanCode(_root->_left, code + ‘0‘); if (_root->_right) GenerateHuffmanCode(_root->_right, code + ‘1‘); if (_root->_left == NULL&&_root->_right == NULL) { _infos[(_root->_weight)._ch]._code = code; } } protected: CharInfo _infos[256]; };
//heap.h #pragma once #include<vector> template<class T> struct small { bool operator()(const T &data1, const T &data2) { return data1 < data2; } }; template<class T> struct big { bool operator()(const T &data1, const T &data2) { return data1>data2; } }; //template<class T,template<class>class compare=big> template<class T, class compare = big<T>> class Heap { public: Heap() {} Heap(const T*&a, size_t size) { _a.reserve(size); //reserve用来(预留空间,)改变capacity, //不改变size,会去分配内存,但不会构造出对象; //如果改变后的capacity比当前capacity大,则capacity会变大; //反之,capacity不变。可以用下面的代码去测试 for (size_t i = 0; i < size; ++i) { _a.push_back(a[i]); } for (int i = (_a.size() - 2) / 2; i >= 0; --i) { _AdjustDown(i); } } Heap(const Heap<T>& h) { _a = _Copy(h->_a); } ~Heap() { //vector会自动析构 } void Push(const T &data) { _a.push_back(data); _AdjustUp(_a.size() - 1); } void Pop() { swap(_a[0], _a[_a.size() - 1]); _a.pop_back(); _AdjustDown(0); } void Print() { _Print(_a); cout << endl; } bool Empty() { return (_a.size() == 0); } T& Top() { return _a[0]; } size_t Size() { return _a.size(); } protected: void _AdjustDown(size_t parent) { compare com; size_t child = parent * 2 + 1; while (child < _a.size()) { if (child + 1 < _a.size() && com(_a[child + 1], _a[child])) { ++child; } if (com(_a[child], _a[parent])) { swap(_a[parent], _a[child]); parent = child; child = parent * 2 + 1; } else { break; } } } void _AdjustUp(size_t child) { compare com; size_t parent = (child - 1) / 2; while (child > 0) { if (com(_a[child], _a[parent])) { swap(_a[child], _a[parent]); child = parent; parent = (child - 1) / 2; } else { break; } } } vector<T> _Copy(vector<T>&_a) { vector<T> a; a.resize(_a.size()); for (int i = 0; i < _a.size(); ++i) { a[i] = _a[i]; } return a; } void _Print(vector<T>&_a) { for (size_t i = 0; i < _a.size(); ++i) { cout << _a[i] << " "; } } protected: vector<T> _a; };
huffmanTree.h #pragma once template<class T> struct HuffmanNode { HuffmanNode<T>* _left; HuffmanNode<T>* _right; T _weight; HuffmanNode(const T& value) :_left(NULL) , _right(NULL) , _weight(value) {} }; //template<class T> template<class T> class HuffmanTree { typedef HuffmanNode<T> Node; //friend void Clear(Node* _root); public: HuffmanTree() {} HuffmanTree(const T* a, size_t size, const T& invalid) { _root = _CreateTree(a, size, invalid); } Node* GetRootNode() { return _root; } protected: Node* _CreateTree(const T*&a, size_t size, const T&invalid) { struct Compare { bool operator()(const HuffmanNode<T>* node1, const HuffmanNode<T>* node2) { return node1->_weight < node2->_weight; } }; Heap<Node*, Compare> minheap; for (size_t i = 0; i < size; i++) { if (a[i] != invalid) minheap.Push(new Node(a[i])); } while (minheap.Size()>1) { Node* left = minheap.Top(); minheap.Pop(); Node* right = minheap.Top(); minheap.Pop(); Node* parent = new Node((left->_weight) + (right->_weight)); parent->_left = left; parent->_right = right; minheap.Push(parent); } return minheap.Top(); } protected: Node* _root; };
运行结果:
本文出自 “学习记录” 博客,转载请与作者联系!
原文地址:http://10794428.blog.51cto.com/10784428/1827252