04-3. Huffman Codes (PAT) - 哈弗曼编码问题

时间：2015-02-11 18:21:50 阅读：206 评论：0 收藏：0 [点我收藏+]

标签：

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters ‘a‘, ‘x‘, ‘u‘ and ‘z‘ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a‘=0, ‘x‘=10, ‘u‘=110, ‘z‘=111}, or in another way as {‘a‘=1, ‘x‘=01, ‘u‘=001, ‘z‘=000}, both compress the string into 14 bits. Another set of code can be given as {‘a‘=0, ‘x‘=11, ‘u‘=100, ‘z‘=101}, but {‘a‘=0, ‘x‘=01, ‘u‘=011, ‘z‘=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2 <= N <= 63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {‘0‘ - ‘9‘, ‘a‘ - ‘z‘, ‘A‘ - ‘Z‘, ‘_‘}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (<=1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is a string of ‘0‘s and ‘1‘s.

Output Specification:

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No

题意：给出一系列的哈弗曼编码，要求判断是否编码是否正确
解题思路：
　　1.对给出字符进行哈弗曼编码，得出哈弗曼树的带权路径长度（WPL），将其与输入的各个哈弗曼编码进行对比（输入的wpl可通过计算总的字符长度得到）
　　2.对于满足条件1的编码，再判断是否任一字符编码都不是其他字符编码的前缀码（将编码从短到长排序后在逐一对比）

#include <iostream>
#include <string>
#include <algorithm>    //使用sort函数
#include <map>
#include <queue>
using namespace std;

#define MinData 0
#define MaxInputSize 2000

//用PAIR来代替pair<char, string>
typedef pair<char, string> PAIR;

//哈弗曼树结构
typedef struct HuffmanTreeNode{
    int weight;
    HuffmanTreeNode* leftweight;
    HuffmanTreeNode* rightweight;
}*pHuffmanTree, nHuffmanTree;

//最小堆结构
typedef struct HeapStruct{
    pHuffmanTree Elememts;    //存储堆元素的数组
    int Size;    //堆当前元素个数
    int Capacity;    //堆得最大容量
}*pMinHeap, nMinHeap;



pMinHeap CreateMinHeap( int [], int );
pMinHeap CreateEmptyMinHeap( int );
void InsertMinHeap( pMinHeap, nHuffmanTree );
pHuffmanTree DeleteMinHeap( pMinHeap);
pHuffmanTree GetHuffmanRootWeight( pMinHeap );
void GetWPLValue( pHuffmanTree pHT, int layer, int *wpl);

//比较函数，按编码长度排序
int cmp( const PAIR &x, const PAIR &y )
{
    return x.second.size() < y.second.size();
}

int main()
{
    int charNum;
    cin >> charNum;
    char *arChar = new char[ charNum ];
    int *arWeight = new int[ charNum ];
    int i;
    for( i = 0; i < charNum; i++ )
    {
        cin >> arChar[i] >> arWeight[i];
    }
    pMinHeap minH = CreateMinHeap( arWeight, charNum );
    pHuffmanTree pHT = GetHuffmanRootWeight( minH );
    int originWPL = 0;
    GetWPLValue( pHT, 0, &originWPL );
    int stuNum;
    cin >> stuNum;
    int j;
    char temp;
    string *str = new string[charNum];
    int stuWPL;
    string outputstr = "";
    for ( i = 0; i < stuNum; i++ )
    {
        stuWPL = 0;
        //vector定义
        vector<PAIR> checkVec;
        for ( j = 0; j < charNum; j++ )
        {
            cin >> temp >> str[j];
            //向vector中添加元素
            checkVec.push_back( make_pair( temp, str[j] ) );
            stuWPL += arWeight[j] * str[j].length();
        }
        //按编码长度排序
        sort( checkVec.begin(), checkVec.end(), cmp );
        int cmp1, cmp2;
        if ( stuWPL == originWPL )
        {
            bool flag = true;
            for( cmp1 = 0; cmp1 < charNum; cmp1++ )
            {
                string tmpstr = checkVec[cmp1].second;
                for ( cmp2 = cmp1 + 1; cmp2 < charNum; cmp2++ )
                {
                    if ( checkVec[cmp2].second.substr( 0, tmpstr.size() ) == tmpstr )
                    {
                        flag = false;
                    }
                }
            }
            if ( flag == true )
            {
                cout << "Yes" << endl;
            }
            else
            {
                cout << "No" << endl;
            }
        }
        else
        {
            cout << "No" << endl;
        }
    }
    return 0;
}

pMinHeap CreateMinHeap( int weight[], int len )
{
    int i;
    pMinHeap minH = CreateEmptyMinHeap( MaxInputSize );
    nHuffmanTree nHT;
    for ( i = 0; i < len; i++ )    //顺序插入构造最小堆（更有效率的方式为先建立完全二叉树，再调整为最小堆）
    {
        nHT.weight = weight[i];
        nHT.leftweight = NULL;
        nHT.rightweight = NULL;
        InsertMinHeap( minH, nHT );
    }
    return minH;
}
pMinHeap CreateEmptyMinHeap( int MaxSize )
{
    pMinHeap minH = ( pMinHeap )malloc( sizeof( nMinHeap ) );
    minH->Elememts = ( pHuffmanTree )malloc( ( MaxSize + 1 ) * sizeof( nHuffmanTree ) );
    minH->Size = 0;
    minH->Capacity = MaxInputSize;
    minH->Elememts[0].weight = MinData;    //哨兵元素
    return minH;
}

void InsertMinHeap( pMinHeap minH, nHuffmanTree nHT )
{
    //将元素item插入最小堆，其中minH->Elements[0]定义为哨兵元素
    int i;
    if ( minH->Size >= minH->Capacity )
    {
        cout << "Heap Is Full!" << endl;
        return;
    }
    i = ++minH->Size;    //i指向插入后堆中的最后一个元素的位置(该结点此时为空结点)
    for( ; minH->Elememts[i/2].weight > nHT.weight; i/=2 )
    {
        minH->Elememts[i] = minH->Elememts[i/2];
    }
    minH->Elememts[i] = nHT;
}

pHuffmanTree DeleteMinHeap( pMinHeap minH)
{
    //从最小堆H中取出键值为最小的元素，并删除一个结点
    int parentNode, childNode;
    nHuffmanTree temp;
    pHuffmanTree minNode;
    if ( minH->Size >= minH->Capacity )
    {
        cout << "Heap Is Full!" << endl;
    }
    minNode = ( pHuffmanTree )malloc( sizeof( nHuffmanTree ) );
    *minNode = minH->Elememts[1];    //取出根结点的最小值
    temp = minH->Elememts[ minH->Size-- ];  //用最小堆的最后一个元素从根结点开始向上过滤下层结点
    for ( parentNode = 1; parentNode * 2 <= minH->Size; parentNode = childNode )
    {
        //找出当前parentNode结点的最小子结点
        childNode = parentNode * 2;
        if ( childNode != minH->Size && minH->Elememts[childNode].weight > minH->Elememts[childNode + 1].weight  )    //当存在右子结点，且右子节点小于左子节点时
        {
            childNode++;
        }
        if ( temp.weight <= minH->Elememts[childNode].weight )
        {
            break;
        }
        else
        {
            minH->Elememts[parentNode] = minH->Elememts[childNode];
        }
    }
    minH->Elememts[ parentNode ] = temp;
    return minNode;
}

pHuffmanTree GetHuffmanRootWeight( pMinHeap minH)
{
    int i;
    pHuffmanTree pHT;
    int times = minH->Size;
    for ( i = 1; i < times; i++ )    //执行初始Size-1次合并
    {
        pHT = ( pHuffmanTree )malloc( sizeof( nHuffmanTree ) );    //使用这种方法建树会出现malloc多余内存的情况
        pHT->leftweight = DeleteMinHeap( minH );    //Delete操作将使得ElementSize = ElementSize - 1
        pHT->rightweight = DeleteMinHeap( minH );
        pHT->weight = pHT->leftweight->weight + pHT->rightweight->weight;
        InsertMinHeap( minH, *pHT );
    }
    pHT =  DeleteMinHeap( minH );
    return pHT;
}

void GetWPLValue( pHuffmanTree pHT, int layer, int *wpl)
{
    if ( pHT->leftweight == NULL && pHT->rightweight == NULL )
    {
        (*wpl) += layer * pHT->weight;
    }
    else    //非叶结点必有两个子结点
    {
        GetWPLValue( pHT->leftweight, layer + 1, wpl );
        GetWPLValue( pHT->rightweight, layer + 1, wpl );
    }
}

04-3. Huffman Codes (PAT) - 哈弗曼编码问题

标签：

原文地址：http://www.cnblogs.com/liangchao/p/4286598.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行