标签:
In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters ‘a‘, ‘x‘, ‘u‘ and ‘z‘ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a‘=0, ‘x‘=10, ‘u‘=110, ‘z‘=111}, or in another way as {‘a‘=1, ‘x‘=01, ‘u‘=001, ‘z‘=000}, both compress the string into 14 bits. Another set of code can be given as {‘a‘=0, ‘x‘=11, ‘u‘=100, ‘z‘=101}, but {‘a‘=0, ‘x‘=01, ‘u‘=011, ‘z‘=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2 <= N <= 63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {‘0‘ - ‘9‘, ‘a‘ - ‘z‘, ‘A‘ - ‘Z‘, ‘_‘}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (<=1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is a string of ‘0‘s and ‘1‘s.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Sample Input:7 A 1 B 1 C 1 D 3 E 3 F 6 G 6 4 A 00000 B 00001 C 0001 D 001 E 01 F 10 G 11 A 01010 B 01011 C 0100 D 011 E 10 F 11 G 00 A 000 B 001 C 010 D 011 E 100 F 101 G 110 A 00000 B 00001 C 0001 D 001 E 00 F 10 G 11Sample Output:
Yes Yes No No
题意:给出一系列的哈弗曼编码,要求判断是否编码是否正确
解题思路:
1.对给出字符进行哈弗曼编码,得出哈弗曼树的带权路径长度(WPL),将其与输入的各个哈弗曼编码进行对比(输入的wpl可通过计算总的字符长度得到)
2.对于满足条件1的编码,再判断是否任一字符编码都不是其他字符编码的前缀码(将编码从短到长排序后在逐一对比)
#include <iostream> #include <string> #include <algorithm> //使用sort函数 #include <map> #include <queue> using namespace std; #define MinData 0 #define MaxInputSize 2000 //用PAIR来代替pair<char, string> typedef pair<char, string> PAIR; //哈弗曼树结构 typedef struct HuffmanTreeNode{ int weight; HuffmanTreeNode* leftweight; HuffmanTreeNode* rightweight; }*pHuffmanTree, nHuffmanTree; //最小堆结构 typedef struct HeapStruct{ pHuffmanTree Elememts; //存储堆元素的数组 int Size; //堆当前元素个数 int Capacity; //堆得最大容量 }*pMinHeap, nMinHeap; pMinHeap CreateMinHeap( int [], int ); pMinHeap CreateEmptyMinHeap( int ); void InsertMinHeap( pMinHeap, nHuffmanTree ); pHuffmanTree DeleteMinHeap( pMinHeap); pHuffmanTree GetHuffmanRootWeight( pMinHeap ); void GetWPLValue( pHuffmanTree pHT, int layer, int *wpl); //比较函数,按编码长度排序 int cmp( const PAIR &x, const PAIR &y ) { return x.second.size() < y.second.size(); } int main() { int charNum; cin >> charNum; char *arChar = new char[ charNum ]; int *arWeight = new int[ charNum ]; int i; for( i = 0; i < charNum; i++ ) { cin >> arChar[i] >> arWeight[i]; } pMinHeap minH = CreateMinHeap( arWeight, charNum ); pHuffmanTree pHT = GetHuffmanRootWeight( minH ); int originWPL = 0; GetWPLValue( pHT, 0, &originWPL ); int stuNum; cin >> stuNum; int j; char temp; string *str = new string[charNum]; int stuWPL; string outputstr = ""; for ( i = 0; i < stuNum; i++ ) { stuWPL = 0; //vector定义 vector<PAIR> checkVec; for ( j = 0; j < charNum; j++ ) { cin >> temp >> str[j]; //向vector中添加元素 checkVec.push_back( make_pair( temp, str[j] ) ); stuWPL += arWeight[j] * str[j].length(); } //按编码长度排序 sort( checkVec.begin(), checkVec.end(), cmp ); int cmp1, cmp2; if ( stuWPL == originWPL ) { bool flag = true; for( cmp1 = 0; cmp1 < charNum; cmp1++ ) { string tmpstr = checkVec[cmp1].second; for ( cmp2 = cmp1 + 1; cmp2 < charNum; cmp2++ ) { if ( checkVec[cmp2].second.substr( 0, tmpstr.size() ) == tmpstr ) { flag = false; } } } if ( flag == true ) { cout << "Yes" << endl; } else { cout << "No" << endl; } } else { cout << "No" << endl; } } return 0; } pMinHeap CreateMinHeap( int weight[], int len ) { int i; pMinHeap minH = CreateEmptyMinHeap( MaxInputSize ); nHuffmanTree nHT; for ( i = 0; i < len; i++ ) //顺序插入构造最小堆(更有效率的方式为先建立完全二叉树,再调整为最小堆) { nHT.weight = weight[i]; nHT.leftweight = NULL; nHT.rightweight = NULL; InsertMinHeap( minH, nHT ); } return minH; } pMinHeap CreateEmptyMinHeap( int MaxSize ) { pMinHeap minH = ( pMinHeap )malloc( sizeof( nMinHeap ) ); minH->Elememts = ( pHuffmanTree )malloc( ( MaxSize + 1 ) * sizeof( nHuffmanTree ) ); minH->Size = 0; minH->Capacity = MaxInputSize; minH->Elememts[0].weight = MinData; //哨兵元素 return minH; } void InsertMinHeap( pMinHeap minH, nHuffmanTree nHT ) { //将元素item插入最小堆,其中minH->Elements[0]定义为哨兵元素 int i; if ( minH->Size >= minH->Capacity ) { cout << "Heap Is Full!" << endl; return; } i = ++minH->Size; //i指向插入后堆中的最后一个元素的位置(该结点此时为空结点) for( ; minH->Elememts[i/2].weight > nHT.weight; i/=2 ) { minH->Elememts[i] = minH->Elememts[i/2]; } minH->Elememts[i] = nHT; } pHuffmanTree DeleteMinHeap( pMinHeap minH) { //从最小堆H中取出键值为最小的元素,并删除一个结点 int parentNode, childNode; nHuffmanTree temp; pHuffmanTree minNode; if ( minH->Size >= minH->Capacity ) { cout << "Heap Is Full!" << endl; } minNode = ( pHuffmanTree )malloc( sizeof( nHuffmanTree ) ); *minNode = minH->Elememts[1]; //取出根结点的最小值 temp = minH->Elememts[ minH->Size-- ]; //用最小堆的最后一个元素从根结点开始向上过滤下层结点 for ( parentNode = 1; parentNode * 2 <= minH->Size; parentNode = childNode ) { //找出当前parentNode结点的最小子结点 childNode = parentNode * 2; if ( childNode != minH->Size && minH->Elememts[childNode].weight > minH->Elememts[childNode + 1].weight ) //当存在右子结点,且右子节点小于左子节点时 { childNode++; } if ( temp.weight <= minH->Elememts[childNode].weight ) { break; } else { minH->Elememts[parentNode] = minH->Elememts[childNode]; } } minH->Elememts[ parentNode ] = temp; return minNode; } pHuffmanTree GetHuffmanRootWeight( pMinHeap minH) { int i; pHuffmanTree pHT; int times = minH->Size; for ( i = 1; i < times; i++ ) //执行初始Size-1次合并 { pHT = ( pHuffmanTree )malloc( sizeof( nHuffmanTree ) ); //使用这种方法建树会出现malloc多余内存的情况 pHT->leftweight = DeleteMinHeap( minH ); //Delete操作将使得ElementSize = ElementSize - 1 pHT->rightweight = DeleteMinHeap( minH ); pHT->weight = pHT->leftweight->weight + pHT->rightweight->weight; InsertMinHeap( minH, *pHT ); } pHT = DeleteMinHeap( minH ); return pHT; } void GetWPLValue( pHuffmanTree pHT, int layer, int *wpl) { if ( pHT->leftweight == NULL && pHT->rightweight == NULL ) { (*wpl) += layer * pHT->weight; } else //非叶结点必有两个子结点 { GetWPLValue( pHT->leftweight, layer + 1, wpl ); GetWPLValue( pHT->rightweight, layer + 1, wpl ); } }
04-3. Huffman Codes (PAT) - 哈弗曼编码问题
标签:
原文地址:http://www.cnblogs.com/liangchao/p/4286598.html