哈夫曼编码及译码 wxh zt

wxhwhmanshan · 发表于 2005-7-19 14:09:00

#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
using namespace std;
//***********************************
// 赫夫曼树和赫夫曼编码的存储表示
//***********************************
typedef struct {
unsigned int weight;
unsigned int parent,lChild,rChild;
}HTNode, *HuffmanTree; // 动态分配数组存储赫夫曼树

typedef struct {
char ch;
char* hufCh;
}HuffmanCode;          // 动态分配数组存储赫夫曼编码表

//***********************************
// 权值字符结点类型
//***********************************
typedef struct {
char ch;
int wt;
} wElem;                // 动态分配数组存储读入字符与权值

//***********************************
//赫夫曼编码函数
//构造赫夫曼树 HT , 并求出 n 个字符的赫夫曼编码 HC
//将结果存入 hufTree.txt
//***********************************
void HuffmanCoding( HuffmanTree &,HuffmanCode * , wElem * , int);
//***********************************
// 赫夫曼译码函数
// 对文件里的代码进行译码
// 将结果存入 textfile.txt
//***********************************
void DeCoding( HuffmanTree, HuffmanCode *, const char*, const int);
//************************************
// 选择两个最小的结点
//************************************
void SelectTwoNode( HuffmanTree , int , int& , int& );

//**************************************************
// 赫夫曼编码函数的实现
// w 存放 n 个字符的权值（均>0 ）
// 构造赫夫曼树 HT , 并求出 n 个字符的赫夫曼编码 HC
//**************************************************
void HuffmanCoding( HuffmanTree &HT , HuffmanCode* HC , wElem* w , int n )
{
if( n <= 1 ) return;
int m = 2 * n - 1;       // 赫夫曼树的结点数目
HT = ( HuffmanTree ) malloc ((m + 1) * sizeof( HTNode ) ); //0 号单元未用
HuffmanTree p = HT;
p++;                   // p 指向 HT 的第 1 号单元
int i=0 , ww=0;       // ww 为 wElem* w 的下标
for( i = 1 ; i <= n ; i++ , p++ , ww++ )
{
  p->weight = w[ww].wt;
  p->parent = p->lChild = p->rChild = 0;       //
}                                              // 初始化
for( ; i <= m ; ++i , ++p )                   //
{
  p->weight = p->parent = p->lChild = p->rChild = 0;
}
//*********建赫夫曼树****************
//在 HT[ 1 .. i-1 ]选择parent 为 0 且 weight 最小的两个结点，其序号分别为 s1 和 s2
int s1=0 , s2=0;
for( i = n + 1 ; i <= m ; ++i )
{
  SelectTwoNode( HT , i-1 , s1 , s2 );
  HT[ s1 ].parent = i; // 标记 parent
  HT[ s2 ].parent = i; // 标记 parent
  HT[ i ].lChild = s1; // 标记左孩子
  HT[ i ].rChild = s2; // 标记右孩子
  HT[ i ].weight = HT[ s1 ].weight + HT[ s2 ].weight; // 新结点的权值
}

// ***********从叶子到根逆向求每个字符的赫夫曼编码 ***********
//HC=(HuffmanCode)malloc((n+1)*sizeof(char *)); //分配n个字符编码
char* cd = new char[ n ];       // 分配求编码的工作空间
cd[n-1] = '\0';             // 编码结束符
int c = 0;
int f = 0;
for( i = 1;i <= n;++i )    // 逐个字符求赫夫曼编码
{
  int start = n - 1;          // 编码结束符位置
  for( c = i , f = HT[ i ].parent ; f != 0 ; c = f , f = HT[ f ].parent ) // 从叶子到根逆向求编码
  {
if( HT[f].lChild== c )
{
cd[--start ] = '0';
}
else
{
cd[ --start ] = '1';
}
  }
  HC[ i ].ch = w[ i-1 ].ch ; // 复制字符
  HC[ i ].hufCh = ( char* ) malloc ( ( n - start ) * sizeof( char ) ); // 为第 i 个字符编码分配空间
  strcpy( HC[ i ].hufCh , &cd[ start ] );             // 从 cd 复制编码到 HC
}
delete [] cd;  // 释放工作空间
} // end HuffmanCoding()

//************************************************
// 选择两个最小的结点的实现
// 选择两个最小的结点 , 序号分别放在 s1 和 s2 中
//************************************************
void SelectTwoNode( HuffmanTree HT , int i , int& s1 , int& s2 )
{
unsigned int sm1 , sm2; // 用于权值的比较
s1 = 1; // 从序号为 1 的权值开始
s2 = 1;
int m=0;  // 最小权值的序号临时变量
for( m = 1 ; m <= i ; m++ ) // 第一遍中第一个 parent 为 0 的结点
{
  if( HT[ m ].parent != 0 ) continue;
  else
  {
sm1 = HT[ m ].weight;
s1=m;
break;
  }
}
for( int j = m+1 ; j <= i ; j++ ) // 第一遍选第一个最小的
{
  if( HT[ j ].parent != 0 ) continue;
  else
  {
if( sm1 > HT[ j ].weight )
{
sm1 = HT[ j ].weight;
s1 = j;
}
  }
}
for( m = 1 ; m <= i ; m++ ) // 第二遍中第一个 parent 为 0 的结点
{
  if( HT[ m ].parent != 0 ) continue;
  else
  {
sm2 = HT[ m ].weight;
s2=m;
if( s2 == s1 ) continue;
else break;
  }
}
for( int k = m+1 ; k <= i ; k++ ) // 第二遍选第二个最小的
{

  if( HT[ k ].parent != 0 ) continue;
  else
  {
if( (HT[ k ].weight < sm2) && ( k != s1 ) ) // 保证sm1 != sm2
{
sm2 = HT[ k ].weight;
s2 = k;
}
  }
}
} // end SelectTwoNode()

//********************************************************************
// DeCoding的译码实现
// 在求子串相应的字符时，用的方法为在 HC 里进行逐字扫描比较
// 参数 n 为字符个数，p = 2*n-1 就为 HT 的结点数目. HT[p]就为 HT 的根.
//*******************************************************************
void DeCoding( HuffmanTree HT , HuffmanCode * HC , const char* iFile , const int n )
{
ifstream fIn( iFile , ios::in );
char inBuf = '1';
ofstream fout( "textfile.txt" , ios:

ut );
char* cd = new char[ n ];  // 储存字串的工作空间
int start = 0;          // 译码开始标记
int p = 2 * n - 1;       // 根下标
int iHC = 1;             // 用于扫描 HC 的循环变
while(fIn.get(inBuf))  // 循环读入 \'0\' 或 \'1\' 或 \'\\n\'
{
  if( inBuf =='0')
  {
if( HT[ p ].lChild != 0 ) // 不是叶子
{
cd[ start++ ] = inBuf;  // 压 inBuf 进 cd*
p = HT[ p ].lChild;    // 进入左子树
continue;
}
else // 左叶子
{
cd[ start++] = '\0'; // 子串结束符
for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
{
   if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
   {
   fout << HC[ iHC ].ch;
   break; // 找到，跳出 for
   }
   else
   {
   continue;
   }
}
start = 0;             //
cd[ start++ ] = inBuf; // 为读下一子串做准备
p = HT[ 2*n - 1 ].lChild; // 注意，这时 p 已指向 root 的 left child
}
  }
  else if( inBuf == '1' )
  {
if( HT[ p ].rChild != 0 ) // 不是叶子
{
cd[ start++ ] = inBuf;  // 压 inBuf 进 cd*
p = HT[ p ].rChild;    // 进入左子树
continue;
}
else // 右叶子
{
cd[ start++] ='\0';    // 子串结束符
for( iHC = 1 ; iHC <= n ; iHC++ ) // 寻找与子串匹配的字符
{
   if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
   {
   fout << HC[ iHC ].ch;
   break; // 找到，跳出 for
   }
   else
   {
   continue;
   }
}
start = 0;             //
cd[ start++ ] = '1';    // 为读下一子串做准备
p = HT[ 2*n - 1 ].rChild; // 注意，这时 p 已指向 root 的 right child
}
  }
  else if( inBuf == '\n' ) // 有回车符
  {
continue;

  }
}
//最后一个字符？
cd[ start ] = '\0';
  for( iHC =1; iHC <=n; iHC++ ) // 寻找与子串匹配的字符
  {
if( strcmp( HC[ iHC ].hufCh , cd ) == 0 )
{
fout << HC[ iHC ].ch;
break; // 找到，跳出 for
}
else
{
continue;
}

  }
      delete [] cd; // 释放工作空间
  fIn.close();  // 关闭文件
  fout.close(); // 关闭文件
} // end DeCoding()


int main( )
{
//
// 用文件流对象 hufInPut 打开外部文件
// 例如 human.txt 储存字符集大小 n ，以及 n 个字符和 n 个权值

char* wFileName = new char[ 128 ]; // 存放文件名的工作空间
cout << "The CHAR and WEIGHT stores in which file?"
  << endl<< &quot

lease input the file name( such as  hufman.txt ) : ";
cin >> wFileName;
ifstream hufInPut( wFileName,ios::in );
if( !hufInPut ) cerr << "file can not be opened" << endl;
int hufNum = 0;
hufInPut >> hufNum; // 读入字符集大小 n 到 hufNum
// 输出基本信息
cout << "total number of char: " << hufNum << endl;
cout << " char weight" << endl;

wElem* hufW = new wElem[ hufNum ]; // 分配 n 个字符和 n 个权值的储存空间
for( int ii = 0 ; ii < hufNum ;ii++)
{
// 循环读入字符及其对应的权值
  hufInPut >> hufW[ ii ].ch >> hufW[ ii ].wt;
  cout << setw( 4 ) << hufW[ ii ].ch << setw( 8 ) << hufW[ ii ].wt << endl;
}
cout<<endl;
hufInPut.close(); // 关闭存放字符和权值的文件
delete [] wFileName; // 释放工作空间

HuffmanTree hufT; // 空树
HuffmanCode* hufC = ( HuffmanCode* ) malloc ( ( hufNum + 1 ) * sizeof( HuffmanCode ) ); // 分配n个字符编码的头指针向量
HuffmanCoding( hufT , hufC , hufW , hufNum ); // 编码中...
// 用文件流对象 hufTreeOutPut 把赫夫曼树 HT , HC 输出到文件 hufTree.txt 中
ofstream hufTreeOutPut( "hufTree.txt" , ios::out );
hufTreeOutPut << "-- HT ------------------------------- " << endl << setw( 9 ) << "       weight " <<" parent " << " lchild " << " rchild "<< endl;
for( int tOut = 1 ; tOut <= 2*hufNum-1 ; tOut++ )
{
  hufTreeOutPut << setw( 6 ) << tOut<< setw( 8 ) << hufT[ tOut ].weight << setw( 8 ) << hufT[ tOut ].parent << setw( 8 ) << hufT[ tOut ].lChild << setw( 8 ) << hufT[ tOut ].rChild << endl;
}
hufTreeOutPut << "-- end HT --------------------------- " << endl << endl << "-- HC ------------------------------- " << endl;
// 向屏幕输出编码过的编码基本信息
cout << "after EnCoding , the code looks like: " << endl;
cout << " char code" << endl;
for( int cOut = 1 ; cOut <= hufNum ; cOut++ )
{
  hufTreeOutPut << " " << hufC[ cOut ].ch << " ---->> " << hufC[ cOut ].hufCh << endl;
// 向屏幕输出编码过的编码内容
  cout<< setw( 4 )<< hufC[ cOut ].ch<< setw( 8 ) <<hufC[ cOut ].hufCh<<endl;
}
cout<<"****Encoding code in < hufTree.txt >***** "<<endl;
cout<<endl<<endl;
hufTreeOutPut << "-- convert -- ok -------------------- " << endl;
hufTreeOutPut.close(); // 输出完毕，关闭文件

delete [] hufW; // 释放内存

// 向屏幕输出编码过的代码内容

char* cFileName = new char[ 128 ]; // 存放文件名的工作空间
cout << "The code stores in which file?" << endl<< "Please input the file name( such as  Dehufman.txt ) : ";
cin >> cFileName;
ifstream codeIn( cFileName, ios::in );
if( !codeIn ) cerr << "file can not be opened" << endl;
char codeInBuf='0';
cout<<"Decoding information ,the code looks like:"<<endl;
while( codeIn.get( codeInBuf ) )
  cout << codeInBuf;
   cout << endl << endl;

DeCoding( hufT , hufC ,cFileName , hufNum ); // 译码

// 向屏幕输出译码后的内容
cout << "after DeCoding , the code looks like: " << endl;
ifstream dcodeIn( "textfile.txt" , ios::in );
if( !dcodeIn ) cerr << "file can not be opened" << endl;
char dcodeInBuf='0';
while( dcodeIn.get( dcodeInBuf ) )
   cout <<dcodeInBuf;
   cout<<endl;
      cout<<"****Decode code in <textfile.txt>*** "<<endl;
cout << endl<<endl;
cout <<"************************************"<<endl;
cout <<"*       name: wanghaijiang       *"<<endl;
cout <<"*       grade:jsj203          *"<<endl;
cout <<"*       classID:16#          *"<<endl;
cout <<"*Zhejiang University of Technology *"<<endl;
cout <<"************************************"<<endl;
return 0;
}

账号		自动登录	找回密码
密码			立即注册