码迷,mamicode.com
首页 > 其他好文 > 详细

Huffman Coding 原理与C代码

时间:2015-03-13 16:32:43      阅读:421      评论:0      收藏:0      [点我收藏+]

标签:huffman   霍夫曼编码   linjm   

Huffman编码的代码计划一直躺在Evernote里面。这几天正好是论文初稿的提交间歇,就花两天把这项todolist干掉。


Huffman Coding 原理

Huffman Coding是一种可变长编码的无损压缩方法,在数据压缩、音频编码、图像编码中得到了广泛的应用,例如,MPEG1音频标准的LayerIII、H.263视频编码标准中都使用Huffman Coding来进行数据压缩。它是由Huffman于1952年提出的[1],这个方法完成依据字符出现的概率来构造平均长度最短的码字。
具体过程如下:

  1. 先对各个字符出现的概率进行统计;
  2. 然后按照各个字符出现概率的大小排列,把最小的两个概率相加,作为新的概率和剩余的概率重新排队;
  3. 再把最小的两个概率相加,再重新排队,直到最后变成1。每次相加时都把“0”和“1”赋给相加的两个概率,读出时由该符号开始一直到最后的“1”。

技术分享

Pseudo Code

begin
     count frequencies of each single characters
     sort them to non-decreasing sequence
     create a leaf node (character, frequency c, left son = NULL, right son = NULL) 
     of the tree for each character and put nodes into queue F
     while (|F|>=2) do 
      begin
        pop the first two nodes (u1, u2) with the lowest 
          frequencies from sorted queue
        create a node evaluated with sum of the chosen units, 
          successors are chosen units (eps, c(u1)+c(u2), u1, u2)
        insert new node into queue
      end
     node evaluate with way from root to leaf node (left son 0, right son 1)
     create output from coded intput characters
end

C Code

#include<stdio.h>
#include<stdlib.h>
#include<conio.h>

#define MAX_TREE_HT 100

typedef struct tagNode
{
    char character;
    unsigned frequency;
    struct tagNode *left, *right;
}HNode;

typedef struct tagHeap
{
    unsigned size;
    unsigned space;
    HNode **array;
}HHeap;

HNode* newNode(char character, unsigned frequency)
{
    HNode* temp = (HNode*)malloc(sizeof(HNode));
    temp->left = NULL;
    temp->right = NULL;
    temp->character = character;
    temp->frequency = frequency;
    return temp;
}

HHeap* createHHeap(unsigned space)
{
    HHeap* HHeapX = (HHeap*)malloc(sizeof(HHeap));
    HHeapX->size = 0;
    HHeapX->space = space;
    HHeapX->array = (HNode**)malloc(HHeapX->space * sizeof(HNode*));
    return HHeapX;
}

void swapHNode(HNode** a,HNode** b)
{
    HNode* t = *a;
    *a = *b;
    *b = t;
}

void HHeapify(HHeap* HHeapX, int idx)
{
    int smallest = idx;
    int left = 2*idx + 1;
    int right = 2*idx + 2;

    if ((left < HHeapX->size) && (HHeapX->array[left]->frequency < HHeapX->array[smallest]->frequency) )
    {
        smallest = left;
    }

    if ((right < HHeapX->size)&& (HHeapX->array[right]->frequency < HHeapX->array[smallest]->frequency))
    {
        smallest = right;
    }

    if (smallest != idx)
    {
        swapHNode(&HHeapX->array[smallest], &HHeapX->array[idx]);
        HHeapify(HHeapX, smallest);
    }
}

int isSizeOne(HHeap* HHeapX)
{
    return (HHeapX->size == 1);
}

HNode* extractMin(HHeap* HHeapX)
{
    HNode* temp = HHeapX->array[0];
    HHeapX->array[0] = HHeapX->array[HHeapX->size - 1];
    --HHeapX->size;
    HHeapify(HHeapX,0);
    return temp;
}

void insertHHeap(HHeap* HHeapX, HNode* HNodeX)
{
    //int i = HHeapX->size - 1;
    int i = HHeapX->size; //不减1
    ++HHeapX->size;
    while ((i > 1) && HNodeX->frequency < HHeapX->array[(i-1)/2]->frequency)
    {
        HHeapX->array[i] = HHeapX->array[(i-1)/2];
        i = (i-1)/2;
    }
    HHeapX->array[i] = HNodeX;
}

void buildHHeap(HHeap* HHeapX)
{
    int n = HHeapX->size - 1;
    for (int i = (n-1)/2; i >= 0 ; --i)
    {
        HHeapify(HHeapX, i);
    }
}

void printArr(int arr[],int n)
{
    for (int i = 0; i < n; i++)
    {
        printf("%d", arr[i]);
    }
    printf("\n");
}

int isLeaf(HNode* root)
{
    return !(root->left) && !(root->right) ;
}

HHeap* createAndBuildHHeap(char character[], int frequency[], int size)
{
    int i;
    HHeap* HHeapX = createHHeap(size);
    for (i = 0; i < size; ++i)
        HHeapX->array[i] = newNode(character[i], frequency[i]);
    HHeapX->size = size;
    buildHHeap(HHeapX);
    return HHeapX;
}

HNode* buildHuffmanTree(char character[], int frequency[], int size)
{
    HNode *l, *r, *top;
    HHeap* HHeap = createAndBuildHHeap(character, frequency, size);

    while (!isSizeOne(HHeap))
    {
        l = extractMin(HHeap);
        r = extractMin(HHeap);
        top = newNode(‘$‘, l->frequency + r->frequency);
        top->left = l;
        top->right = r;
        insertHHeap(HHeap, top);
    }
    return extractMin(HHeap);
}

void printCodes(HNode* root, int arr[], int top)
{
    if (root->left)
    {
        arr[top] = 0;
        printCodes(root->left, arr, top + 1);
    }

    if (root->right)
    {
        arr[top] = 1;
        printCodes(root->right, arr, top + 1);
    }

    if (isLeaf(root))
    {
        printf("%c: ", root->character);
        printArr(arr, top);
    }
}

void HuffmanCoding(char character[], int frequency[], int size)
{
    HNode* root = buildHuffmanTree(character, frequency, size);
    int arr[MAX_TREE_HT], top = 0;
    printCodes(root, arr, top);
}

int countStrFreq(const char *s, char character[], int frequency[])
{
    // 用表计算字符出现的频率
    int freq[128] = {0};
    while (*s)
    {
        freq[(int)*s++]++;
        //printf("%c",*s);
    }

    int c = 0;
    for (int i = 0; i < 128; i++)
    {
        if (freq[i] != 0)
        {
            character[c] = char(i);
            frequency[c] = freq[i];
            c++;
        }
    }
    return c;
}

void main()
{
    // 输入的字符串
    const char *str = "this is an example for huffman encoding";

    // ASCII码共包含128个字符,因此初始化大小设为128
    char cha[128];
    int freq[128]={0};

    // 计算字符串中各字符出现的频率
    int val;
    val = countStrFreq(str,cha,freq);

    // 进行Huffman编码
    HuffmanCoding(cha, freq, val);

    system("pause");
}

[1] Huffman, D.A., A method for the construction of minimum redundancy codes. Proceedings of the IRE, 1952. 40(9): p. 1098-1101.
[2] http://scanftree.com/Data_Structure/huffman-code

Huffman Coding 原理与C代码

标签:huffman   霍夫曼编码   linjm   

原文地址:http://blog.csdn.net/linj_m/article/details/44241543

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!