LZW 编码解码代码

从 libtiff 4.0.2 中提取出来并稍加修改的 lzw 的代码，符合 TIFF6 标准中的 LZW 部分。

本人目前对开源协议还不太清楚，不知是否存在侵权问题，如果有，请告知。

代码：

1 /*
2 codec_lzw.h
3
4 LZW 编码解码。
5 基于 TIFF6 标准，不考虑与之前 TIFF 版本兼容。
6 以字节为字符单位进行编码。
7 */
8
9
10 #ifndef  __CODEC_LZW_H_INCLUDED__
11 #define  __CODEC_LZW_H_INCLUDED__
12
13
14 #define MAXCODE(n)      ((1L<<(n))-1)
15
16 #define BITS_MIN        9
17 #define BITS_MAX        12
18
19 #define CODE_CLEAR      256
20 #define CODE_EOI        257
21 #define CODE_FIRST      258
22 #define CODE_MAX        MAXCODE(BITS_MAX)
23
24
25         /*
26         编码函数
27
28         输入参数 input       ：输入数据，即待编码数据
29         输入参数 inputSize   ：输入数据量，单位字节
30         输出参数 output      ：输出缓冲，编码所得数据置于其中
31                 输出数据以 CODE_CLEAR 开始
32                 输出数据以 CODE_EOI   结束
33                 若输出缓冲太小，则编码所得数据将其填满
34         输入参数 outputLimit : 输出缓冲大小，单位字节
35         输出参数 *outputSize ：编码输出数据量，单位字节
36                 若输出缓冲太小，则返回实际完成编码可得到的数据量
37
38         返回值：
39                 非负整数，编码成功，其值表示编码所得数据量，与 (*outputSize) 相同
40                 -1 表示出错失败
41                 -2 表示输出缓冲太小
42         */
43 int  LZWencode( const unsigned char *input, int inputSize,
44                         unsigned char *output, int outputLimit,
45                         int *outputSize );
46
47         /*
48         解码函数
49
50         输入参数 input       ：输入数据，即待解码数据
51                 认为输入数据以 CODE_CLEAR 开始
52                 认为输入数据以 CODE_EOI   结束
53         输入参数 inputSize   ：输入数据量，单位字节
54         输出参数 output      ：输出缓冲，解码所得数据置于其中
55                 若输出缓冲太小，则解码所得数据将其填满
56         输入参数 outputLimit : 输出缓冲大小，单位字节
57         输出参数 *outputSize ：解码输出数据量，单位字节
58                 若输出缓冲太小，则返回实际完成解码可得到的数据量
59
60         返回值：
61                 非负整数，解码成功，其值表示解码所得数据量，与 (*outputSize) 相同
62                 -1 表示出错失败
63                 -2 表示输出缓冲太小
64         */
65 int  LZWdecode( const unsigned char *input, int inputSize,
66                         unsigned char *output, int outputLimit,
67                         int *outputSize );
68
69
70 #endif  /* __CODEC_LZW_H_INCLUDED__ */
71

  1 /*
  2 codec_lzw.c
  3
  4 LZW 编码解码。
  5 */
  6
  7
  8 #include "codec_lzw.h"
  9
10 #include <stdlib.h>
11 #include <string.h>
12
13
14 #define HSIZE           9001L                   /* 91% occupancy */
15 #define HSHIFT          (13-8)
16 #define CSIZE           (MAXCODE(BITS_MAX)+1L)
17 #define CHECK_GAP        10000
18
19 typedef unsigned short hcode_t;                 /* codes fit in 16 bits */
20 typedef struct {
21         int        hash;
22         hcode_t    code;
23 } hash_t;
24
25 typedef struct code_ent {
26         struct code_ent     *next;
27         unsigned short      length;           /* string len, including this token */
28         unsigned char       value;            /* data value */
29         unsigned char       firstchar;        /* first token of string */
30 } code_t;
31
32 /*
33  * Reset encoding hash table.
34  */
35 static void cl_hash( hash_t *hashtab ) {
36         hash_t *hp = &hashtab[HSIZE-1];
37         int    i   = HSIZE-8;
38         do {
39                 i -= 8;
40                 hp[-7].hash = -1;
41                 hp[-6].hash = -1;
42                 hp[-5].hash = -1;
43                 hp[-4].hash = -1;
44                 hp[-3].hash = -1;
45                 hp[-2].hash = -1;
46                 hp[-1].hash = -1;
47                 hp[ 0].hash = -1;
48                 hp -= 8;
49         } while (i >= 0);
50         for (i += 8; i > 0; i--, hp--)
51                 hp->hash = -1;
52 }
53
54
55 int  LZWencode( const unsigned char *input, int inputSize,
56                         unsigned char *output, int outputLimit,
57                         int *outputSize ) {
58         int      fcode;
59         hash_t   *hp;
60         int      h, c;
61         hcode_t  ent           = (hcode_t)(-1);
62         int      disp;
63         int      incount       = 0;
64         int      outcount      = 0;
65         int      checkpoint    = CHECK_GAP;
66         int      nextdata      = 0;
67         int      nextbits      = 0;
68         int      free_ent      = CODE_FIRST;
69         int      maxcode       = MAXCODE(BITS_MIN);
70         int      nbits         = BITS_MIN;
71         int      enc_ratio     = 0;
72         const unsigned char *bp= input;
73         int           cc       = inputSize;
74         unsigned char *op      = output;
75         unsigned char *limit   = output + outputLimit;
76         hash_t        *hashtab;
77
78         if ( (NULL == input) || (0 > inputSize) ||
79                         (NULL == output) || (1 > outputLimit) ) {
80                 return (-1);
81         }
82
83         hashtab = (hash_t*)malloc(HSIZE * sizeof(hash_t));
84         if ( hashtab == NULL ) {
85                 return (-1);
86         }
87         cl_hash( hashtab );
88
89 #define CALCRATIO(rat) {                                            \
90         if (incount > 0x007fffff) { /* NB: shift will overflow */   \
91                 rat = outcount >> 8;                                \
92                 rat = (rat == 0 ? 0x7fffffff : incount/rat);        \
93         } else                                                      \
94                 rat = (incount<<8) / outcount;                      \
95 }
96
97 #define PutNextCode(op, c) {                                           \
98         nextdata = (nextdata << nbits) | c;                            \
99         nextbits += nbits;                                             \
100         if (op < limit)                                                \
101                 *op = (unsigned char)(nextdata >> (nextbits-8));       \
102         op = op + 1;                                                   \
103         nextbits -= 8;                                                 \
104         if (nextbits >= 8) {                                           \
105                 if (op < limit)                                        \
106                        *op = (unsigned char)(nextdata >> (nextbits-8));\
107                 op = op + 1;                                           \
108                 nextbits -= 8;                                         \
109         }                                                              \
110         outcount += nbits;                                             \
111 }
112
113         PutNextCode(op, CODE_CLEAR);
114         if (cc > 0) {
115                 ent = *bp++; cc--; incount++;
116         }
117         while (cc > 0) {
118                 c = *bp++; cc--; incount++;
119                 fcode = ((int)c << BITS_MAX) + ent;
120                 h = (c << HSHIFT) ^ ent;        /* xor hashing */
121                 /*
122                  * Check hash index for an overflow.
123                  */
124                 if (h >= HSIZE)
125                         h -= HSIZE;
126
127                 hp = &hashtab[h];
128                 if (hp->hash == fcode) {
129                         ent = hp->code;
130                         continue;
131                 }
132                 if (hp->hash >= 0) {
133                         /*
134                          * Primary hash failed, check secondary hash.
135                          */
136                         disp = HSIZE - h;
137                         if (h == 0)
138                                 disp = 1;
139                         do {
140                                 /*
141                                  * Avoid pointer arithmetic 'cuz of
142                                  * wraparound problems with segments.
143                                  */
144                                 if ((h -= disp) < 0)
145                                         h += HSIZE;
146                                 hp = &hashtab[h];
147                                 if (hp->hash == fcode) {
148                                         ent = hp->code;
149                                         goto hit;
150                                 }
151                         } while (hp->hash >= 0);
152                 }
153                 /*
154                  * New entry, emit code and add to table.
155                  */
156                 PutNextCode(op, ent);
157                 ent = c;
158                 hp->code = free_ent++;
159                 hp->hash = fcode;
160                 if (free_ent == CODE_MAX-1) {
161                         /* table is full, emit clear code and reset */
162                         cl_hash(hashtab);
163                         enc_ratio = 0;
164                         incount = 0;
165                         outcount = 0;
166                         free_ent = CODE_FIRST;
167                         PutNextCode(op, CODE_CLEAR);
168                         nbits = BITS_MIN;
169                         maxcode = MAXCODE(BITS_MIN);
170                 } else {
171                         /*
172                          * If the next entry is going to be too big for
173                          * the code size, then increase it, if possible.
174                          */
175                         if (free_ent > maxcode) {
176                                 nbits++;
177                                 maxcode = (int) MAXCODE(nbits);
178                         } else if (incount >= checkpoint) {
179                                 int rat;
180                                 /*
181                                  * Check compression ratio and, if things seem
182                                  * to be slipping, clear the hash table and
183                                  * reset state.  The compression ratio is a
184                                  * 24+8-bit fractional number.
185                                  */
186                                 checkpoint = incount + CHECK_GAP;
187                                 CALCRATIO(rat);
188                                 if (rat <= enc_ratio) {
189                                         cl_hash(hashtab);
190                                         enc_ratio = 0;
191                                         incount = 0;
192                                         outcount = 0;
193                                         free_ent = CODE_FIRST;
194                                         PutNextCode(op, CODE_CLEAR);
195                                         nbits = BITS_MIN;
196                                         maxcode = MAXCODE(BITS_MIN);
197                                 } else
198                                         enc_ratio = rat;
199                         }
200                 }
201         hit:
202                 ;
203         }
204         if (ent != (hcode_t) -1) {
205                 PutNextCode(op, ent);
206         }
207         PutNextCode(op, CODE_EOI);
208         if (nextbits > 0)  {
209                 if (op < limit)
210                         *op = (unsigned char)(nextdata << (8-nextbits));
211                 op = op + 1;
212         }
213
214         if ( outputSize != NULL ) {
215                 *outputSize = op - output;
216         }
217
218         return ((op <= limit) ? (op - output) : (-2));
219 }
220
221
222
223
224 int  LZWdecode( const unsigned char *input, int inputSize,
225                         unsigned char *output, int outputLimit,
226                         int *outputSize ) {
227         const unsigned char *bp = input;
228         unsigned char       *op = output;
229         int                 occ = outputLimit;
230         int                 cc  = inputSize;
231
232         unsigned char *tp;
233         hcode_t       code;
234         int           len;
235         int           i;
236
237         int  nbits     = BITS_MIN;
238         int  nextbits  = 0;
239         int  nextdata  = 0;
240         int  nbitsmask = MAXCODE(BITS_MIN);
241
242         code_t *codep;
243         code_t *free_entp;
244         code_t *maxcodep;
245         code_t *oldcodep;
246         code_t *codetab;
247
248         if ( (NULL == input) || (1 > inputSize) ||
249                         (NULL == output) || (1 > outputLimit) ) {
250                 return (-1);
251         }
252
253         codetab = (code_t*)malloc(CSIZE * sizeof (code_t));
254         if ( NULL == codetab ) {
255                 return (-1);
256         }
257
258         i = 255;
259         do {
260                 codetab[i].value = i;
261                 codetab[i].firstchar = i;
262                 codetab[i].length = 1;
263                 codetab[i].next = NULL;
264         } while (i--);
265         memset(&codetab[CODE_CLEAR], 0,
266                 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
267
268 #define DecFail   do { free( codetab ); return (-1); } while (0)
269
270 #define GetNextCode(bp, code) {                                         \
271         if (cc <= 0) DecFail;                                           \
272         nextdata = (nextdata<<8) | *(bp)++;                             \
273         nextbits += 8;                                                  \
274         --cc;                                                           \
275         if (nextbits < nbits) {                                         \
276                 if (cc <= 0) DecFail;                                   \
277                 nextdata = (nextdata<<8) | *(bp)++;                     \
278                 nextbits += 8;                                          \
279                 --cc;                                                   \
280         }                                                               \
281         code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);   \
282         nextbits -= nbits;                                              \
283 }
284
285         GetNextCode(bp, code);
286         if (code != CODE_CLEAR) {
287                 DecFail;
288         }
289         while (code != CODE_EOI) {
290                 if (code == CODE_CLEAR) {
291                         free_entp = codetab + CODE_FIRST;
292                         memset(free_entp, 0,
293                                     (CSIZE - CODE_FIRST) * sizeof (code_t));
294                         nbits     = BITS_MIN;
295                         nbitsmask = MAXCODE(BITS_MIN);
296                         maxcodep  = codetab + nbitsmask - 1;
297                         GetNextCode(bp, code);
298                         if (code == CODE_EOI)
299                                 break;
300                         if (code >= CODE_CLEAR) {
301                                 DecFail;
302                         }
303                         if ( occ-- > 0 ) {
304                                 *op++ = (unsigned char)code;
305                         }
306                         oldcodep = codetab + code;
307                         GetNextCode(bp, code);
308                         continue;
309                 }
310
311                 codep = codetab + code;
312
313                 if (free_entp < &codetab[0] ||
314                     free_entp >= &codetab[CSIZE]) {
315                         DecFail;
316                 }
317
318                 free_entp->next = oldcodep;
319                 if (free_entp->next < &codetab[0] ||
320                     free_entp->next >= &codetab[CSIZE]) {
321                         DecFail;
322                 }
323
324                 free_entp->firstchar = free_entp->next->firstchar;
325                 free_entp->length    = free_entp->next->length+1;
326                 free_entp->value     = (codep < free_entp) ?
327                                                 codep->firstchar :
328                                                 free_entp->firstchar;
329                 if (++free_entp > maxcodep) {
330                         if (++nbits > BITS_MAX)                /* should not happen */
331                                 DecFail;
332                         nbitsmask = MAXCODE(nbits);
333                         maxcodep  = codetab + nbitsmask - 1;
334                 }
335                 oldcodep = codep;
336
337                 if (code >= 256) {
338                         if(codep->length == 0) {
339                                 DecFail;
340                         }
341                         i = len = codep->length;
342                         while ( (len > 0) && (len > occ) && codep ) {
343                                 codep = codep->next;
344                                 --len;
345                         }
346                         tp = op + len;
347                         while ( codep && (tp > op) ) {
348                                 *(--tp) = codep->value;
349                                 codep   = codep->next;
350                         }
351                         op  += len;
352                         occ -= i;
353                 }
354                 else if ( occ-- > 0 ) {
355                         *op++ = (unsigned char)code;
356                 }
357
358                 GetNextCode(bp, code);
359         }
360
361         free( codetab );
362         if ( outputSize != NULL ) {
363                 *outputSize = outputLimit - occ;
364         }
365         return ((occ >= 0) ? (outputLimit - occ) : (-2));
366 }
367

测试程序：

  1 /*
  2 测试 LZW 编码解码，
  3 并未做到 100％测试。
  4 */
  5
  6 #include <stdio.h>
  7 #include <string.h>
  8 #include <time.h>
  9 #include <stdlib.h>
10
11 #include "codec_lzw.h"
12
13         /* 测试字符串最大长度，确保使字典超出大小 */
14 #define  L  (4096+4096)
15
16 char a[ L + L ];
17 int  na;
18
19 char b[ L + L ];
20 int  nb;
21
22 int res;
23
24 /* 小数据 */
25 char *data[] =
26 {
27         "",       /* 空串 */
28         "a",
29         "aaa",
30         "abcdefgabcdefgabcdefg",
31         "wabba_wabba_wabba_wabba_woo_woo_woo",
32         "abababababababab",   /* 解码时使用未构造好的字典条目 */
33 };
34
35 /* 使字典超出大小的数据 */
36 #define  DAL  5
37 char da[ L + 3 ];
38 void initDa( int i ) {
39         int j;
40         switch ( i ) {
41         case 0 :
42                 for ( j = 0; j < L; ++j ) {
43                         da[ j ] = 'a';
44                 }
45                 break;
46         case 1 :
47                 for ( j = 0; j < L; ++j ) {
48                         da[ j ] = ((j & 1) ? 'b' : 'a' );
49                 }
50                 break;
51         case 2 :
52                 for ( j = 0; j < L; ++j ) {
53                         da[ j ] = (rand() % 26) + 'a';
54                 }
55                 break;
56         case 3 :
57                 for ( j = 0; j < L; ++j ) {
58                         da[ j ] = (j % 26) + 'a';
59                 }
60                 break;
61         case 4 :
62                 for ( j = 0; j < L; ++j ) {
63                         da[ j ] = rand() & 0xFF;
64                 }
65                 break;
66         }
67 }
68
69 char* tobin( char d ) {
70         static char tmp[ 512 ];
71         int i;
72         for ( i = 7; i >= 0; --i ) {
73                 tmp[ 7 - i ] = '0' + ((d >> i) & 0x1);
74         }
75         tmp[ 8 ] = '\0';
76         return tmp;
77 }
78
79
80 int main() {
81         printf( "\n ---- test ---- \n" );
82         int tc = sizeof(data) / sizeof(data[0]);
83         for ( tc = 0; tc < sizeof(data) / sizeof(data[0]); ++tc ) {
84                 int i;
85                 printf( "\n*********************\ntest case %d:\n", tc );
86                 printf( "text : \n\"%s\"\n", data[tc] );
87                 res = LZWencode( data[tc], strlen(data[tc]), a, L, &na );
88                 printf( "encode : \nres = %d, len = %d\n", res, na );
89                 for ( i = 0; i < na; ++i ) {
90                         printf( "%s ", tobin(a[i]) );
91                 }
92                 res = LZWdecode( a, na, b, L, &nb );
93                 printf( "\ndecode : \nres = %d, len = %d\n\"", res, nb );
94                 for ( i = 0; i < nb; ++i ) {
95                         printf( "%c", b[i] );
96                 }
97                 printf( "\"\n" );
98         }
99
100         printf( "\n\n ---- huge ---- \n" );
101         srand( time(NULL) );
102         for ( tc = 0; tc < DAL; ++tc ) {
103                 printf( "\n*********************\nhuge case %d:\n", tc );
104                 initDa( tc );
105                 res = LZWencode( da, L, a, L+L, &na );
106                 printf( "encode : \nres = %d, len = %d\n", res, na );
107                 res = LZWdecode( a, na, b, L+L, &nb );
108                 printf( "decode : \nres = %d, len = %d\n", res, nb );
109                 printf( (0 == memcmp(da, b, nb)) ? "yes\n" : "     no !!!!\n" );
110         }
111
112
113         printf( "\n\n ---- error ---- \n" );
114         for ( tc = 0; tc < DAL; ++tc ) {
115                 printf( "\n*********************\nerror case %d:\n", tc );
116                 initDa( tc );
117                 res = LZWencode( da, L, a, L/3, &na );
118                 printf( "encode : \nres = %d, len = %d\n", res, na );
119                 if ( -2 == res ) {
120                         res = LZWencode( da, L, a, na, &na );
121                         printf( "encode : \nres = %d, len = %d\n", res, na );
122                 }
123                 res = LZWdecode( a, na, b, L/3, &nb );
124                 printf( "decode : \nres = %d, len = %d\n", res, nb );
125                 if ( -2 == res ) {
126                         printf( (0 == memcmp(da, b, L/3)) ? "partly yes\n" : "partly no\n" );
127                         res = LZWdecode( a, na, b, nb, &nb );
128                         printf( "decode : \nres = %d, len = %d\n", res, nb );
129                 }
130                 printf( (0 == memcmp(da, b, nb)) ? "yes\n" : "     no !!!!\n" );
131         }
132         return 0;
133 }
134

测试结果：

---- test ----

*********************
test case 0:
text :
""
encode :
res = 3, len = 3
10000000 01000000 01000000
decode :
res = 0, len = 0
""

*********************
test case 1:
text :
"a"
encode :
res = 4, len = 4
10000000 00011000 01100000 00100000
decode :
res = 1, len = 1
"a"

*********************
test case 2:
text :
"aaa"
encode :
res = 5, len = 5
10000000 00011000 01100000 01010000 00010000
decode :
res = 3, len = 3
"aaa"

*********************
test case 3:
text :
"abcdefgabcdefgabcdefg"
encode :
res = 18, len = 18
10000000 00011000 01001100 01000110 00110011 00100001 10010100 11001100 01100111 10000001 01000001 00100000 11010000 10001000 00011100 00010110 00001111 00000001
decode :
res = 21, len = 21
"abcdefgabcdefgabcdefg"

*********************
test case 4:
text :
"wabba_wabba_wabba_wabba_woo_woo_woo"
encode :
res = 26, len = 26
10000000 00011101 11001100 00100110 00100011 00010001 10000100 10111111 00000010 10000010 01000001 10100001 00010000 01011000 00111100 00001110 00011000 01110111 00110111 10011011 11100000 11110001 00011000 10011001 10111110 00000010
decode :
res = 35, len = 35
"wabba_wabba_wabba_wabba_woo_woo_woo"

*********************
test case 5:
text :
"abababababababab"
encode :
res = 11, len = 11
10000000 00011000 01001100 01010000 00101000 00100100 00001110 00001101 00000101 10000000 10000000
decode :
res = 16, len = 16
"abababababababab"

---- huge ----

*********************
huge case 0:
encode :
res = 147, len = 147
decode :
res = 8192, len = 8192
yes

*********************
huge case 1:
encode :
res = 206, len = 206
decode :
res = 8192, len = 8192
yes

*********************
huge case 2:
encode :
res = 6219, len = 6219
decode :
res = 8192, len = 8192
yes

*********************
huge case 3:
encode :
res = 771, len = 771
decode :
res = 8192, len = 8192
yes

*********************
huge case 4:
encode :
res = 11176, len = 11176
decode :
res = 8192, len = 8192
yes

---- error ----

*********************
error case 0:
encode :
res = 147, len = 147
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes

*********************
error case 1:
encode :
res = 206, len = 206
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes

*********************
error case 2:
encode :
res = -2, len = 6204
encode :
res = 6204, len = 6204
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes

*********************
error case 3:
encode :
res = 771, len = 771
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes

*********************
error case 4:
encode :
res = -2, len = 11161
encode :
res = 11161, len = 11161
decode :
res = -2, len = 8192
partly yes
decode :
res = 8192, len = 8192
yes

posted on 2013-11-04 15:58 coreBugZJ 阅读(1656) 评论(0) 编辑收藏引用所属分类: VideoImage 、Algorithm 、OpenSource

只有注册用户登录后才能发表评论。


相关文章: LZW 编码解码代码数字图像处理上机之五：图像平滑和锐化数字图像处理上机之四：灰度图快速傅里叶变换（ FFT IFFT 一维二维）运动轨迹捕捉数字图像处理上机之三：直方图均衡化数字图像处理上机之二：图像放大缩小旋转和平移处理尝试 OpenCV 摄像头与 Windows 消息机制相似图片搜索的原理（转）图像插值算法（转）数字图像处理上机之一：BMP图像文件读写和图像显示

网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

coreBugZJ

My Links

Blog Stats

常用链接

留言簿(10)

随笔分类(458)

随笔档案(268)

相册

ACM

AI

LaTeX

安全

编程语言

好有道理

技术

开源

科学

数学

图形图像

文化

问题（练习＆有趣）

资源

最新随笔

搜索

最新评论

阅读排行榜

评论排行榜

LZW 编码解码代码