C语言实现数据结构之哈夫曼编码

任务

树的每个结点代表一个集合元素。根据给定的若干权值来构造哈夫曼树,实现对应的哈夫曼编码以及译码。

分析

哈夫曼树就是带权路径长度最小的二叉树。构造的思路是将文件中的每一个字符构造成结点对象,然后把每一个结点当成一棵树,将所有结点当成一片森林。然后每次从所有结点中取两个最小的结点,比较小的作为左子结点,比较大的作为右子结点,它们的和作为父结点构成一颗二叉树,然后把取出的这两个结点删除,将父结点放入剩下的结点中再重复以上过程直到最后只剩下一个结点,该结点即为哈夫曼树的根结点。

存储结果设计
typedef struct{
	int weight;		//权重
	char data; 		//字符
	int parent,Lchild,Rchild;
}HTNode, HuffmanTree[M+1];

int bucket[128];	//记录字符出现频率 
int count; 			//记录出现字符个个数
typedef int SeqElemtype;
typedef struct
{
	SeqElemtype data[MAXSIZE]; 
	int top; 
}SeqStack; 
//新建一个空栈
SeqStack *InitStack (){
	 
	SeqStack *S;
	S=malloc(sizeof(SeqStack));
	 
	S->top =-1; 
	return S;
}


//判断是否为空栈 
int StackEmpty (SeqStack *S){
	if(S->top==-1)
		return TRUE;
	else 
		return FALSE; 
} 

//入栈
int Push (SeqStack *S,SeqElemtype e){
	if(S->top==MAXSIZE-1) 
		return FALSE;
		
	S->top++;
	S->data[S->top]= e;
	return TRUE;
} 

//出栈并返回 
int Pop (SeqStack *S,SeqElemtype *e){
	if(S->top == -1) 
		return FALSE;
		
	*e=S->data[S->top];
	S->top--;	
	return TRUE;
	
}
源代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 54
#define M 2*N-1
#define IO "sourceFile.txt"
#define Code "CodeFile"
#define Decode "DecodeFile"
#define MAXSIZE M*6
#define TRUE 1
#define FALSE 0 
typedef struct{
	int weight;		//权重
	char data; 		//字符
	int parent,Lchild,Rchild;
}HTNode, HuffmanTree[M+1];

int bucket[128];	//记录字符出现频率 
int count; 			//记录出现字符个个数
typedef int SeqElemtype;
typedef struct
{
	SeqElemtype data[MAXSIZE]; 
	int top; 
}SeqStack; 
//新建一个空栈
SeqStack *InitStack (){
	 
	SeqStack *S;
	S=malloc(sizeof(SeqStack));
	 
	S->top =-1; 
	return S;
}


//判断是否为空栈 
int StackEmpty (SeqStack *S){
	if(S->top==-1)
		return TRUE;
	else 
		return FALSE; 
} 

//入栈
int Push (SeqStack *S,SeqElemtype e){
	if(S->top==MAXSIZE-1) 
		return FALSE;
		
	S->top++;
	S->data[S->top]= e;
	return TRUE;
} 

//出栈并返回 
int Pop (SeqStack *S,SeqElemtype *e){
	if(S->top == -1) 
		return FALSE;
		
	*e=S->data[S->top];
	S->top--;	
	return TRUE;
	
} 
/*统计各字符权重*/
void statistics(char *sourcefile){
	count = 0; 		//计数清零
	FILE *fp;
	char ch;
	fp = fopen(sourcefile,"r"); 
	if(fp==NULL)
    {
        printf("文件打开失败
");
        exit(0);
    }
    while((ch=fgetc(fp))!=EOF){
    	bucket[ch]++;				//字符ASCII码作为下标
    }
    int i; 
    //printf("文章字符出现情况如下:
"); 
    for(i=0;i<128;i++){
    	if(bucket[i]){
    		count++;
    		/*if(i==10){
		    	printf("%d,回车 -->%d
",i,bucket[i]);
		    }else if(i==13){
    			printf("%d,换行 -->%d
",i,bucket[i]);
    		}else{
		    	printf("%d,%c -->%d
",i,i,bucket[i]);
		    }*/
	    	
	    } 	
    } 
    fclose(fp);
}
void seek(HuffmanTree ht,int n,int *s1,int *s2){	//寻找ht中i之前最小的两个元素下标,s1<s2; 
	int i,j;
	for(i=0;i<n&&ht[i].parent!=-1;i++); //s1,s2的初始化(不存在双亲结点)
	j=i;
	i++;
	for(;i<n&&ht[i].parent!=-1;i++);	
	*s1 = ht[i].weight<ht[j].weight?i:j;
	*s2 = ht[i].weight<ht[j].weight?j:i;
	i++;
	while(i<n){
		if(ht[i].parent!=-1){			//已有双亲,则继续查找
			
		}else if(ht[i].weight<ht[*s1].weight){		//小于s1,则替换最小值,s1替换s2
			*s2 = *s1;
			*s1 = i;
		}else if(ht[i].weight<ht[*s2].weight){		//仅小于s2,则仅替换s2
			*s2 = i;
		}
		i++; 
	}
}
void CrtHuffmanTree(HuffmanTree ht){ //创建 
	int m=2*count-1;
	int i,j;
	int s1,s2;
	for(i=0,j=0;i<128;i++){				//给初始结点(储存字符)初始化
		if(bucket[i]){
			ht[j].weight=bucket[i];
			ht[j].data=i;
			ht[j].parent=-1;
			ht[j].Lchild=-1;
			ht[j].Rchild=-1;
			j++;
		}
	} 
	
	for(;j<m;j++){						//给后续双亲结点初始化
		ht[j].weight=0;
		ht[j].parent=-1;
		ht[j].Lchild=-1;
		ht[j].Rchild=-1; 
	}	
	for(j=count;j<m;j++)
	{
		seek(ht,j,&s1,&s2);	 //寻找ht中i之前最小的两个元素下标,s1<s2; 
		ht[s1].parent=j;
		ht[s2].parent=j;
		ht[j].weight = ht[s1].weight+ht[s2].weight;
		ht[j].Lchild = s1;
		ht[j].Rchild = s2;
	}
	
	/*for(i=0;i<m;i++){
		printf("%c %d %d %d %d
",ht[i].data,ht[i].weight,ht[i].parent,ht[i].Lchild,ht[i].Rchild);
	}*/
}
void Codinghuffman(HuffmanTree	ht , char *myfile, char *codefile){ //编码 
	FILE *infp,*outfp;
	infp = fopen(myfile,"r"); 
	if(infp==NULL)
    {
        printf("文件打开失败
");
        exit(0);
    }
    
	outfp = fopen(codefile,"wb"); 
	if(outfp==NULL)
    {
        printf("文件打开失败
");
        exit(0);
    }
	 
 
 	int i,j;
	int p,q;
	int str1[200];
	char ch; 
 	while((ch=fgetc(infp))!=EOF){
    	for(p=0;ht
.data!=ch&&p<count;p++); //找到该元素在哈夫曼树中的位置 for(j=0;ht
.parent!=-1;j++){ q=ht
.parent; if(p==ht[q].Lchild){ str1[j]='0'; }else{ str1[j]='1'; } p=ht
.parent; } for(j=j-1;j>=0;j--){ fputc(str1[j],outfp); } } fclose(infp); fclose(outfp); } void DecodeHuffmanTree(HuffmanTree ht, char *codefile, char*decodefile){ //解码 FILE *infp,*outfp; infp = fopen(codefile,"rb"); if(infp==NULL) { printf("文件打开失败 "); exit(0); } outfp = fopen(decodefile,"w"); if(outfp==NULL) { printf("文件打开失败 "); exit(0); } int i,j; int p=2*count-2; // p当前节点 char ch; while((ch=fgetc(infp))!=EOF){ if(ch=='1'){ p = ht
.Rchild; }else if(ch=='0'){ p = ht
.Lchild; } if(ht
.Lchild==-1&&ht
.Rchild==-1){ //当前字段解码完毕 printf("%c",ht
.data); fputc(ht
.data,outfp); p =2*count-2; } } fclose(infp); fclose(outfp); } void OutHuffmanTree(HuffmanTree ht){ int p,q; SeqStack *S; S = InitStack(); p = 2*count-2; int count,i; while(p!=-1||StackEmpty(S)==0){ while(p!=-1){ Push(S,p); //入栈 p=ht
.Lchild; //遍历左子树 } if(StackEmpty(S)==0){ Pop(S,&p);//出栈 count=0; q=p; while(q!=-1){ q=ht[q].parent; count++; } for(i=1;i<count;i++){ printf(" "); } printf("%-4d ",ht
.weight); p=ht
.Rchild; //遍历左子树 } } } void PrintCode(HuffmanTree ht){ //编码所有的叶子节点 int i,j; char str[N]; int p,q; //p当前节点,q是p的双亲节点 for(i=0;i<count;i++){ p=i; for(j=0;ht
.parent!=-1;j++){ q=ht
.parent; if(p==ht[q].Lchild){ //如果是左节点,则为0 str[j]='0'; }else{ //如果是右节点,则为1 str[j]='1'; } p=ht
.parent; } printf("%c: ",ht[i].data); //打印结果 for(j=j-1;j>=0;j--){ printf("%c",str[j],j); } printf(" "); } } int main(){ char myfile[20], codefile[20], decodefile[20]; int code; HuffmanTree ht[M+1]; printf("--------------------哈夫曼树基本操作系统-------------- "); printf(" | 1.建立并输出哈夫曼树 | "); printf("------------------------------------------------------ "); printf(" | 2.建立并输出哈夫曼编码 | "); printf("------------------------------------------------------ "); printf(" | 3.将文本转换成01编码 | "); printf("------------------------------------------------------ "); printf(" | 4.将01编码翻译成文本 | "); printf("------------------------------------------------------ "); printf(" | 0.退出系统 | "); printf("------------------------------------------------------ "); for(;;){ printf(" 请选择操作:"); scanf("%d", &code); fflush(stdin); switch(code){ case 1: printf("请输入需要编码的文件名(包含拓展名):"); scanf("%s", myfile); statistics(myfile); CrtHuffmanTree(ht); //创建哈夫曼树 printf("哈夫曼树创建成功! "); printf("哈夫曼树如下:"); OutHuffmanTree(ht); break; case 2: printf("请输入需要编码的文件名(包含拓展名):"); scanf("%s", myfile); statistics(myfile); CrtHuffmanTree(ht); printf("哈夫曼编码创建成功!"); printf("哈夫曼编码如下:"); PrintCode(ht); break; case 3: printf("请输入编码目标文件名(包含拓展名):"); scanf("%s", codefile); Codinghuffman(ht, myfile, codefile); //对文件进行编码 printf("编码成功! "); break; case 4: printf("请输入解码目标的文件名(包含拓展名):"); scanf("%s", decodefile); DecodeHuffmanTree(ht, codefile, decodefile); printf("解码成功! "); break; case 0: printf("谢谢使用! "); return 0; default: printf("暂时不支持该功能! "); break; } } }
运行结果