标签:
知道有的人比较懒,直接贴全部代码. 一开始一次性Code完了压缩部分代码.只调试了2,3次就成功了. 一次性写150行代码,没遇到什么bug的感觉还是蛮爽的. 写解压代码,才发现压缩代码有些细节问题. 对最后一个字符处理问题. 遇到比较折腾点:构建二叉树时,把原本应该是(叶结点的有值的)节点放在了左节点,正确应该放在右节点,导致生成的编码序列不满足(任意编码不是其他编码的前缀).导致解码失败. 使用方法:
var srcData = Encoding.UTF8.GetBytes(textBox1.Text);
var cpsData = Compress(srcData);
treeView1.ExpandAll();
var depData = DeCompress(cpsData);
var depStr = Encoding.UTF8.GetString(depData );
这个TreeView就是显示二叉树的,要添加控件,或者删除代码.
快速理解:
1.此压缩直接对字节流进行压缩.
2.压缩原理:字节流对每个直接使用率不平均,所以用变长的编码对256个字节重新编码,以较短的编码表示使用率高的字节,较长编码表示使用率低的字节.
所以总体来看,用新的编码表示的字节流要比原来的短.(除非字节流特别小,压缩效果就不好)
3.由于二叉树的性质,将使用率低的先加入树,使用率高的后加入作为使用率低的节点的父节点的兄弟节点(因为有值的节点必须是叶结点).从最底下向上构建
二叉树.
1 using System; 2 using System.Collections.Generic; 3 using System.ComponentModel; 4 using System.Data; 5 using System.Drawing; 6 using System.Linq; 7 using System.Text; 8 using System.Windows.Forms; 9 using System.IO; 10 11 namespace 霍夫曼二叉树压缩 12 { 13 public partial class Form1 : Form 14 { 15 public Form1() 16 { 17 InitializeComponent(); 18 var s=GetCode(25); 19 var b= GetByteByCode(s); 20 } 21 22 private void button1_Click(object sender, EventArgs e) 23 { 24 var srcData = Encoding.UTF8.GetBytes(textBox1.Text); 25 var cpsData = Compress(srcData); 26 treeView1.ExpandAll(); 27 var depData = DeCompress(cpsData); 28 var depStr = Encoding.UTF8.GetString(depData ); 29 } 30 31 Dictionary<int, string> dicCode = new Dictionary<int, string>(); 32 byte[] Compress(byte[] data) 33 { 34 Dictionary<byte, int> everyCount = new Dictionary<byte, int>(); 35 foreach (var d in data) 36 { 37 if(everyCount.ContainsKey(d)==false ) 38 everyCount.Add(d,0); 39 everyCount[d]++; 40 } 41 var orderAscCounts = everyCount.OrderBy(a=>a.Value); 42 Queue<Count> queCouts = new Queue<Count>(); 43 orderAscCounts.ToList().ForEach(d => { 44 queCouts.Enqueue(new Count { key=d.Key, count=d.Value }); 45 }); 46 BuildTree(ref queCouts); 47 foreach (var a in BNode.nodes) 48 { 49 var code = new string(GetCode(a).Reverse().ToArray()); 50 dicCode.Add(a.key,code); 51 } 52 BNode root = BNode.nodes[0]; 53 while(root.parent!=null){ 54 root = root.parent; 55 } 56 CreateTreeView(root,treeView1.Nodes); 57 string curCode = ""; 58 List<byte> outData = new List<byte>(); 59 foreach (var d in data) 60 { 61 curCode += dicCode[d]; 62 if (curCode.Length >= 8) 63 { 64 byte curBit = GetByteByCode(curCode.Substring(0,8)); 65 outData.Add(curBit); 66 curCode = curCode.Length > 8 ? curCode.Substring(8, curCode.Length - 8) : ""; 67 } 68 } 69 if (curCode != "") 70 { 71 curCode = curCode.PadRight(8,‘0‘); 72 byte curBit = GetByteByCode(curCode); 73 outData.Add(curBit); 74 } 75 76 return outData.ToArray(); 77 } 78 79 byte[] DeCompress(byte[] data) 80 { 81 string codes = ""; 82 for (int i = 0; i < data.Length - 1;i++ ) 83 { 84 codes += GetCode(data[i]); 85 } 86 codes += GetCode(data[data.Length-1]).TrimEnd(‘0‘); 87 var bdata = GetCode(codes); 88 89 return bdata; 90 } 91 92 byte GetByteByCode(string curCode) 93 { 94 return Convert.ToByte(curCode, 2); 95 } 96 byte[] GetCode(string code) 97 { 98 List<byte> datas = new List<byte>(); 99 int pos = 0; 100 var orderDicCode=dicCode.OrderByDescending(a=>a.Value.Length); 101 do{ 102 int p=-1; 103 foreach (var vCode in orderDicCode) 104 { 105 p = code.IndexOf(vCode.Value); 106 if (p == 0) 107 { 108 datas.Add((byte)vCode.Key); 109 code = code.Substring(vCode.Value.Length , code.Length-vCode.Value.Length ); 110 break; 111 } 112 } 113 if (p == -1) 114 { 115 throw new Exception("解压出错:发现未能识别的编码,编码表或数据已被破坏!"); 116 } 117 }while(code.Length>0); 118 119 /* for (int i = 1; pos + i < code.Length ; i++) 120 { 121 var firstCode = code.Substring(pos, i); 122 var secondCode = code.Substring(pos, i + 1); 123 124 var first = dicCode.Where(a => a.Value == firstCode); 125 var second = dicCode.Where(a => a.Value == secondCode); 126 if (first.Count() > 0 && second.Count() == 0 ){ 127 datas.Add( (byte)first.First().Key); 128 pos = pos+i; 129 i = 1; 130 } 131 132 else if (pos + i == code.Length - 1 && second.Count() > 0) 133 datas.Add( (byte)second.First().Key ); 134 }*/ 135 return datas.ToArray(); 136 } 137 string GetCode(byte b ) 138 { 139 return Convert.ToString(b, 2).PadLeft(8, ‘0‘);//Convert.ToString(b, 2) ;//: 140 } 141 string GetCode(BNode a) 142 { 143 if (a.parent!=null) 144 return (a.isLeft ? "0" : "1")+GetCode(a.parent); 145 return "" ; 146 } 147 148 BNode BuildTree(ref Queue<Count> queCouts ) 149 { 150 var first = queCouts.Dequeue(); 151 var second = queCouts.Dequeue(); 152 153 var lft =first.node==null? new BNode { key=first.key, count=first.count } : first.node; 154 155 var rgt = second.node == null ? new BNode { key = second.key, count = second.count } : second.node; 156 157 if (rgt.key == -1) 158 { 159 var temp = lft; 160 lft = rgt; 161 rgt = temp; 162 163 } 164 165 var pnode = new BNode 166 { 167 key = -1, count = first.count + second.count 168 }; 169 lft.isLeft = true; 170 rgt.isLeft = false; 171 pnode.left = lft; 172 pnode.right = rgt; 173 lft.parent = pnode; 174 175 rgt.parent = pnode; 176 if (lft.key != -1) 177 BNode.nodes.Add(lft); 178 if (rgt.key != -1) 179 BNode.nodes.Add(rgt); 180 if (queCouts.Count > 0){ 181 queCouts.Enqueue(new Count { count=pnode.count, key=pnode.key, node=pnode }); 182 var orderQue = queCouts.OrderBy(q => q.count).ToList(); 183 queCouts.Clear(); 184 foreach (var a in orderQue) 185 queCouts.Enqueue(a); 186 return BuildTree(ref queCouts); 187 } 188 else 189 return pnode; 190 } 191 192 void CreateTreeView(BNode node , TreeNodeCollection tnc) 193 { 194 if (node == null) return; 195 var newNode = tnc.Add((node.isLeft ? "0" : "1") + (node.key!=-1?"-"+node.key + ":" + node.count:"")); 196 CreateTreeView(node.left,newNode.Nodes); 197 CreateTreeView(node.right, newNode.Nodes); 198 } 199 200 class Count 201 { 202 public int key; 203 public int count; 204 public BNode node; 205 } 206 207 class BNode{ 208 public int key; 209 public int count; 210 public BNode left; 211 public BNode right; 212 public BNode parent; 213 public bool isLeft = false; 214 public static List<BNode> nodes = new List< BNode>(); 215 216 } 217 } 218 }
标签:
原文地址:http://www.cnblogs.com/niconico/p/5170777.html