如图所示,共读出31个不同的字符,但26个小写字母外加“,”、“.”和“ ”应该只有29个呀?
更重要的是,为什么打印至k则停止了,还有部分字母为什么不输出至频数表呢?
代码如下,请多指教!
using namespace std;
class HuffmanTree//构造霍夫曼树的类
{
public:
unsigned int Weight, Parent, lChild, rChild;//四个属性:权重,父节点,左孩子,右孩子
};
typedef char **HuffmanCode;
/*
从结点集合中选出权值最小的两个结点
将值分别赋给s1和s2
*/
void Select(HuffmanTree HT, int Count, int s2, int *s1)//根据权重a[1:n]构造霍夫曼树,count为权重最大值
{
unsigned int temp1 = 0;
unsigned int temp2 = 0;
unsigned int temp3;
for (int i = 1; i <= Count; i++)//从权重小的开始
{
if (HT[i].Parent == 0)
{
if (temp1 == 0)
{
temp1 = HT[i].Weight;//放入当前最小权重
(*s1) = i;
}
else
{
if (temp2 == 0)
{
temp2 = HT[i].Weight;//放入当前次小的权重
(*s2) = i;
if (temp2<temp1)//规定左孩子权重比右孩子大,左temp2>右temp1
{
temp3 = temp2;
temp2 = temp1;
temp1 = temp3;
temp3 = (*s2);
(*s2) = (*s1);
(*s1) = temp3;
}
}
else//第三次及之后的构造
{
if (HT[i].Weight<temp1)//新权重比右孩子的权重还小,存为新的右孩子
{
temp2 = temp1;
temp1 = HT[i].Weight;
(*s2) = (*s1);
(*s1) = i;
}
if (HT[i].Weight>temp1&&HT[i].Weight<temp2)
{
temp2 = HT[i].Weight;
(*s2) = i;
}
}
}
}
}
}
//生成霍夫曼编码
void HuffmanCoding(HuffmanTree * HT,
HuffmanCode * HC,
int *Weight,
int Count)
{
int i;
int s1, s2;
int TotalLength;
char* cd;
unsigned int c;
unsigned int f;
int start;
if (Count <= 1) return;
TotalLength = Count * 2 - 1;//节点个数
HT = new HuffmanTree[(TotalLength + 1)*sizeof(HuffmanTree)];//创建霍夫曼树
for (i = 1; i <= Count; i++)//遍历不同字符,生成叶子节点
{
HT[i].Parent = 0;
HT[i].rChild = 0;
HT[i].lChild = 0;
HT[i].Weight = (*Weight);
Weight++;
}
for (i = Count + 1; i <= TotalLength; i++)//生成非叶子节点
{
HT[i].Weight = 0;
HT[i].Parent = 0;
HT[i].lChild = 0;
HT[i].rChild = 0;
}
//建造霍夫曼树
for (i = Count + 1; i <= TotalLength; ++i)
{
Select(HT, i - 1, &s1, &s2);
HT[s1].Parent = i;
HT[s2].Parent = i;//左右孩子指向同一父节点
HT[i].lChild = s1;//左孩子为s1,权重小的
HT[i].rChild = s2;
HT[i].Weight = HT[s1].Weight + HT[s2].Weight;//父节点权重为左右孩子权重之和
}
//输出霍夫曼编码
(*HC) = (HuffmanCode)malloc((Count + 1)*sizeof(char*));
cd = new char[Count*sizeof(char)];//存放编码
cd[Count - 1] = '\0';
for (i = 1; i <= Count; ++i)//遍历不同字符
{
start = Count - 1;//从下至上编码,从后往前编码
for (c = i, f = HT[i].Parent; f != 0; c = f, f = HT[f].Parent)//循环至根节点终止,每次循环辈分升高一次
{
if (HT[f].lChild == c)//左0右1规则()
cd[--start] = '0';
else
cd[--start] = '1';
(*HC)[i] = new char[(Count - start)*sizeof(char)];//存放霍夫曼编码
strcpy_s((*HC)[i], strlen(&cd[start]) + 1, &cd[start]);//将生成的编码拷贝至霍夫曼编码中存放
}
}
//删除霍夫曼树及编码存放数组
delete[] HT;
delete[] cd;
}
//检测函数
int LookFor(char *str, char letter, int count)//检测字符是否已在频数表中出现过,未出现时返回-1,出线出现过时返回在频数表中的位置
{
int i;
for (i = 0; i<count; i++)
{
if (str[i] == letter) return i;
}
return -1;
}
//生成字符、权重对应表
void OutputWeight(const char *Data, int Length,//Lengh为输入字符串的长度
char **WhatLetter,
int **Weight, int *Count)
{
int i;
//构造频数表
char* Letter = new char[Length];//存放各个字符
int* LetterCount = new int[Length];//存放字符出现频率,即其权重
int AllCount = 0;//统计不同字符的总数
int Index;//索引号用于在频数表中查找字符
int Sum = 0;
float Persent = 0;
for (i = 0; i<Length; i++)//遍历字符串
{
if (i == 0)
{
Letter[0] = Data[i];
LetterCount[0] = 1;
AllCount++;
}
else
{
Index = LookFor(Letter, Data[i], AllCount);
if (Index == -1)//频数表中无该字符,则在频数表中建立新空间存放对应字符
{
Letter[AllCount] = Data[i];//字符存入频数表
LetterCount[AllCount] = 1;//自负对因频数加1
AllCount++;//不同字符数加1
}
else//频数表中已有该字符,其频数加1
{
LetterCount[Index]++;
}
}
}
for (i = 0; i<AllCount; i++)//计算不同字符的总数
{
Sum = Sum + LetterCount[i];
}
//构造频率字符对应表
(*Weight) = new int[AllCount];//存放各字符的频率
(*WhatLetter) = new char[AllCount];//存放各个字符
for (i = 0; i<AllCount; i++)//遍历不同字符
{
Persent = (float)LetterCount[i] / (float)Sum;//计算出现频率
(*Weight)[i] = (int)(100 * Persent);
(*WhatLetter)[i] = Letter[i];
}
(*Count) = AllCount;
//删去频数表
delete[] Letter;
delete[] LetterCount;
}
void main()
{
HuffmanTree * HT = NULL;
HuffmanCode HC;
char *WhatLetter;
int *Weight;
int Count;
ifstream ifle("input.txt");
string s;
while (ifle) s.push_back(ifle.get());
transform(s.begin(), s.end(), s.begin(), tolower);//转小写
const char* str = s.c_str();
cout << str << endl;;
OutputWeight(str, strlen(str), &WhatLetter, &Weight, &Count);//生成字符频率对应表
HuffmanCoding(HT, &HC, Weight, Count);//由频率表生成霍夫曼编码
cout << Count;
cout << "char freq code" << endl;
for (int i = 0; i<Count; i++)
{
cout << WhatLetter[i] << " ";
cout << Weight[i] << "%\t";
cout << HC[i + 1] << endl;
}
cout << Count << endl;
cout << endl;
}
共读出31个不同的字符,但26个小写字母外加“,”、“.”和“ ”应该只有29个呀?
为什么打印至k则停止了