最近研究了下LZW算法,也看了很多这个方面的资料。LZW适合于文本文件,对于稍稍大点的流文件则出现压缩出来的文件大于源文件的情况。LZW有很多著名的实现程序,下面的程序以动态增加位数为出发点,利用哈希表来实现LZW的压缩。 哈希算法有二个,一个被我注释掉,二个都可以用。具体哪个好,我自己也没有测试。
/**********************************************************************
***********************************************************************/
#include
#include
#include
#define hashsize 4096
#define clear 256 /*清除标志位*/
#define teminate 257
#define not_used -1
#define MAXVAL(n) (( 1 <
#define max_bits 12
FILE *in;
FILE *out;
int bitsize &#61; 9;
int maxcode;
/*字典数据结构*/
typedef struct prex_cha{
int value; /*值*/
unsigned int prefix; /*字符串*/
unsigned int character; /*追加的字母*/
}Hash_Table;
Hash_Table Hash_table[hashsize];
void initial_hashtable ()/*字典初始化*/
{
int i;
for (i&#61;0; i
{
Hash_table[i].value &#61; not_used;
}
}
/*把数据改写成bit流输出*/
void output (unsigned int code)
{
static int count &#61; 0;
static unsigned long buffer &#61; 0L; /*buffer 为定义的存储字节的缓冲区*/
buffer |&#61; (unsigned long) code <<(32 - bitsize - count);
count &#43;&#61; bitsize;
while (count >&#61; 8) /*如果缓冲区大于8则输出里面的前8位*/
{
fputc(buffer >> 24,out);
buffer <<&#61; 8;
count -&#61; 8;
}
}
/*
int find_match(int prefix,unsigned int character)
{
int index;
int offset; /*offset为偏移位*/
/* index &#61; (character <<4) ^ prefix; /*用异或来决定index*/
/* if (index &#61;&#61; 0)
{
offset &#61; 1;
}
else
{
offset &#61; 5021 - index;
}
while (1)
{
if (Hash_table[index].value &#61;&#61; not_used)
{
return (index);
}
if (Hash_table[index].prefix &#61;&#61; prefix && Hash_table[index].character &#61;&#61; character)
{
return (index);
}
index -&#61; offset;
if (index <0)
{
index &#43;&#61; 5021;
}
}
}
*/
int find_match(int prefix,unsigned int character)
{
int index;
index &#61; prefix % 4096;
while (1)
{
if(Hash_table[index].value &#61;&#61; not_used)
{
return (index);
}
if(Hash_table[index].prefix &#61;&#61; prefix && Hash_table[index].character &#61;&#61; character)
{
return (index);
}
index &#61; index &#43; 1;
if(index >&#61; 4096)
{
index &#61; index - 4096;
}
}
}
void lzwcompression ()
{
unsigned int prefix;
unsigned int character;
unsigned int index;
unsigned int next_code &#61; 258; /* 当前字典的标号*/
initial_hashtable ();
prefix &#61; fgetc (in);
while ((character &#61; fgetc(in)) !&#61;(unsigned) EOF)
{
index &#61; find_match (prefix,character);
if (Hash_table[index].value !&#61; not_used)/*能够找到*/
{
prefix &#61; Hash_table[index].value;
}
else
{
if(next_code <&#61; maxcode)/*不能找到&#xff0c;是新的字符串&#xff0c;则添加到表中*/
{
Hash_table[index].value &#61; next_code&#43;&#43;;
Hash_table[index].character &#61; character;
Hash_table[index].prefix &#61; prefix;
}
output(prefix);
prefix &#61; character; /*把后缀给前缀&#xff0c;准备下次输入*/
/*特殊标志&#xff0c;当位数必须增加时*/
if(next_code > maxcode)
{
if(bitsize <12)
{
maxcode &#61; MAXVAL(&#43;&#43;bitsize);
}
else /*达到4096时候&#xff0c;必须清除哈希表&#xff0c;重新开始*/
{
output (256); /*输出清除标志到文件中*/
initial_hashtable();
next_code &#61; 258;
bitsize &#61; 9;
maxcode &#61; MAXVAL(bitsize);/*maxcode 变为511*/
}
}
}/*if&#xff0d;else结束*/
}/*while结束*/
output(prefix); /*输出最后一个*/
if (next_code &#61;&#61; maxcode)
{ /* 如果在最后的哈息表刚好刚好是maxcode&#xff0c;则也必须把位数增加一位 */
&#43;&#43;bitsize;
}
output(257); /* 输出结束标志*/
output(0); /* 解码时要用到 */
output(0);
output(0);
}
int main(int argc,char* argv[])
{
char filename[255];
if (argc <2)
{
printf("usage:the command format is:lzw_compression !");
return(1);
}
if( (in &#61; fopen(argv[1], "rb")) &#61;&#61; NULL)
{
printf ("Cannot open input file - %s/n", argv[1]);
exit (1);
}
strcpy(filename, argv[1]);
/*加上后缀.zzz,表明是压缩文件*/
strncat(filename, ".zzz", 4);
if ((out &#61; fopen(filename, "wb")) &#61;&#61; NULL)
{
printf("Cannot open output file - %s/n", filename);
fclose(in);
exit(1);
}
maxcode &#61; MAXVAL(bitsize);
lzwcompression();
fclose (in);
fclose (out);
return 0;
}