作为一个通信人,本科时候上过信息论,研究生也继续修过信息编码。面试的时候,面试官说了一个霍夫曼树,作为一个通信人竟然忘了。多少有些说不过去。
Huffman算法的最根本的原则是:累计的(字符的统计数字字符的编码长度)为最小,也就是权值(字符的统计数字字符的编码长度)的和最小。 这样编码可以达到压缩的效果。又名最优二叉树。
具体的可以参考左耳朵耗子的博客:http://coolshell.cn/articles/7459.html 很形象。
主要包括:构造树、编码、解码。show me your code:
package com.zy.huffman;
import java.nio.charset.Charset;
import java.util.*;
class Tree {
Node root;
public Node getRoot() {
return root;
}
public void setRoot(Node root) {
this.root = root;
}
}
class Node implements Comparable<Node> {
String chars = "";
int frequence = 0;
Node parent;
Node leftNode;
Node rightNode;
public int compareTo(Node n) {
return frequence - n.frequence;
}
public boolean isLeaf() {
return chars.length() == 1;
}
public boolean isRoot() {
return parent == null;
}
public boolean isLeftChild() {
return parent != null && this == parent.leftNode;
}
public int getFrequence() {
return frequence;
}
public void setFrequence(int frequence) {
this.frequence = frequence;
}
public String getChars() {
return chars;
}
public void setChars(String chars) {
this.chars = chars;
}
public Node getParent() {
return parent;
}
public void setParent(Node parent) {
this.parent = parent;
}
public Node getLeftNode() {
return leftNode;
}
public void setLeftNode(Node leftNode) {
this.leftNode = leftNode;
}
public Node getRightNode() {
return rightNode;
}
public void setRightNode(Node rightNode) {
this.rightNode = rightNode;
}
}
public class HuffmanTree {
//统计数据字符出现的次数
public static Map<Character, Integer> statistics(char[] charArray) {
Map<Character, Integer> map = new HashMap<Character, Integer>();
for (char c : charArray) {
Character character = new Character(c);
if (map.containsKey(character)) {
map.put(character, map.get(character) + 1);
} else {
map.put(character, 1);
}
}
return map;
}
//构建树
private static Tree buildTree(Map<Character, Integer> statistics,
List<Node> leafs) {
Character[] keys = statistics.keySet().toArray(new Character[0]);
PriorityQueue<Node> priorityQueue = new PriorityQueue<Node>();
for (Character character : keys) {
Node node = new Node();
node.chars = character.toString();
node.frequence = statistics.get(character);
priorityQueue.add(node);
leafs.add(node);
}
int size = priorityQueue.size();
for (int i = 1; i <= size - 1; i++) {
Node node1 = priorityQueue.poll();
Node node2 = priorityQueue.poll();
Node sumNode = new Node();
sumNode.chars = node1.chars + node2.chars;
sumNode.frequence = node1.frequence + node2.frequence;
sumNode.leftNode = node1;
sumNode.rightNode = node2;
node1.parent = sumNode;
node2.parent = sumNode;
priorityQueue.add(sumNode);
}
Tree tree = new Tree();
tree.root = priorityQueue.poll();
return tree;
}
//编码
public static String encode(String originalStr,
Map<Character, Integer> statistics) {
if (originalStr == null || originalStr.equals("")) {
return "";
}
char[] charArray = originalStr.toCharArray();
List<Node> leafNodes = new ArrayList<Node>();
buildTree(statistics, leafNodes);
Map<Character, String> encodInfo = buildEncodingInfo(leafNodes);
StringBuffer buffer = new StringBuffer();
for (char c : charArray) {
Character character = new Character(c);
buffer.append(encodInfo.get(character));
}
return buffer.toString();
}
private static Map<Character, String> buildEncodingInfo(List<Node> leafNodes) {
Map<Character, String> codewords = new HashMap<Character, String>();
for (Node leafNode : leafNodes) {
Character character = new Character(leafNode.getChars().charAt(0));
String codeword = "";
Node currentNode = leafNode;
do {
if (currentNode.isLeftChild()) {
codeword = "0" + codeword;
} else {
codeword = "1" + codeword;
}
currentNode = currentNode.parent;
} while (currentNode.parent != null);
codewords.put(character, codeword);
}
return codewords;
}
//解码
public static String decode(String binaryStr,
Map<Character, Integer> statistics) {
if (binaryStr == null || binaryStr.equals("")) {
return "";
}
char[] binaryCharArray = binaryStr.toCharArray();
LinkedList<Character> binaryList = new LinkedList<Character>();
int size = binaryCharArray.length;
for (int i = 0; i < size; i++) {
binaryList.addLast(new Character(binaryCharArray[i]));
}
List<Node> leafNodes = new ArrayList<Node>();
Tree tree = buildTree(statistics, leafNodes);
StringBuffer buffer = new StringBuffer();
while (binaryList.size() > 0) {
Node node = tree.root;
do {
Character c = binaryList.removeFirst();
if (c.charValue() == ‘0‘) {
node = node.leftNode;
} else {
node = node.rightNode;
}
} while (!node.isLeaf());
buffer.append(node.chars);
}
return buffer.toString();
}
//测试
public static void main(String[] args) {
String oriStr = "i love somebody‘s name is xiaofang";
Map<Character, Integer> statistics = statistics(oriStr.toCharArray());
String encodedBinariStr = encode(oriStr, statistics);
String decodedStr = decode(encodedBinariStr, statistics);
System.out.println("Original sstring: " + oriStr);
System.out.println("Huffman encoed binary string: " + encodedBinariStr);
System.out.println("decoded string from binariy string: " + decodedStr);
System.out.println("binary string of US-ASCII: "
+ getStringOfByte(oriStr, Charset.forName("US-ASCII")));
}
public static String getStringOfByte(String str, Charset charset) {
if (str == null || str.equals("")) {
return "";
}
byte[] byteArray = str.getBytes(charset);
int size = byteArray.length;
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < size; i++) {
byte temp = byteArray[i];
buffer.append(getStringOfByte(temp));
}
return buffer.toString();
}
public static String getStringOfByte(byte b) {
StringBuffer buffer = new StringBuffer();
for (int i = 7; i >= 0; i--) {
byte temp = (byte) ((b >> i) & 0x1);
buffer.append(String.valueOf(temp));
}
return buffer.toString();
}
}
result is:
Original sstring: i love somebody‘s name is xiaofang
Huffman encoed binary string: 001101110011000111011111011110100011011110111110011011111000110101110101010100001101111101001111010101001001000100010000000101110110
decoded string from binariy string: i love somebody‘s name is xiaofang
binary string of US-ASCII: 01101001001000000110110001101111011101100110010100100000011100110110111101101101011001010110001001101111011001000111100100100111011100110010000001101110011000010110110101100101001000000110100101110011001000000111100001101001011000010110111101100110011000010110111001100111
程序参考:http://blog.csdn.net/kimylrong/article/details/17022319
版权声明:本文为博主原创文章,未经博主允许不得转载。
原文地址:http://blog.csdn.net/zy416548283/article/details/48058307