View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  /**
19   * Core of FastLZ compression algorithm.
20   *
21   * This class provides methods for compression and decompression of buffers and saves
22   * constants which use by {@link FastLzFrameEncoder} and {@link FastLzFrameDecoder}.
23   *
24   * This is refactored code of <a href="https://code.google.com/p/jfastlz/">jfastlz</a>
25   * library written by William Kinney.
26   */
27  final class FastLz {
28  
29      private static final int MAX_DISTANCE = 8191;
30      private static final int MAX_FARDISTANCE = 65535 + MAX_DISTANCE - 1;
31  
32      private static final int HASH_LOG = 13;
33      private static final int HASH_SIZE = 1 << HASH_LOG; // 8192
34      private static final int HASH_MASK = HASH_SIZE - 1;
35  
36      private static final int MAX_COPY = 32;
37      private static final int MAX_LEN = 256 + 8;
38  
39      private static final int MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 = 1024 * 64;
40  
41      static final int MAGIC_NUMBER = 'F' << 16 | 'L' << 8 | 'Z';
42  
43      static final byte BLOCK_TYPE_NON_COMPRESSED = 0x00;
44      static final byte     BLOCK_TYPE_COMPRESSED = 0x01;
45      static final byte    BLOCK_WITHOUT_CHECKSUM = 0x00;
46      static final byte       BLOCK_WITH_CHECKSUM = 0x10;
47  
48      static final int OPTIONS_OFFSET = 3;
49      static final int CHECKSUM_OFFSET = 4;
50  
51      static final int MAX_CHUNK_LENGTH = 0xFFFF;
52  
53      /**
54       * Do not call {@link #compress(byte[], int, int, byte[], int, int)} for input buffers
55       * which length less than this value.
56       */
57      static final int MIN_LENGTH_TO_COMPRESSION = 32;
58  
59      /**
60       * In this case {@link #compress(byte[], int, int, byte[], int, int)} will choose level
61       * automatically depending on the length of the input buffer. If length less than
62       * {@link #MIN_RECOMENDED_LENGTH_FOR_LEVEL_2} {@link #LEVEL_1} will be choosen,
63       * otherwise {@link #LEVEL_2}.
64       */
65      static final int LEVEL_AUTO = 0;
66  
67      /**
68       * Level 1 is the fastest compression and generally useful for short data.
69       */
70      static final int LEVEL_1 = 1;
71  
72      /**
73       * Level 2 is slightly slower but it gives better compression ratio.
74       */
75      static final int LEVEL_2 = 2;
76  
77      /**
78       * The output buffer must be at least 6% larger than the input buffer and can not be smaller than 66 bytes.
79       * @param inputLength length of input buffer
80       * @return Maximum output buffer length
81       */
82      static int calculateOutputBufferLength(int inputLength) {
83          final int outputLength = (int) (inputLength * 1.06);
84          return Math.max(outputLength, 66);
85      }
86  
87      /**
88       * Compress a block of data in the input buffer and returns the size of compressed block.
89       * The size of input buffer is specified by length. The minimum input buffer size is 32.
90       *
91       * If the input is not compressible, the return value might be larger than length (input buffer size).
92       */
93      static int compress(final byte[] input, final int inOffset, final int inLength,
94                          final byte[] output, final int outOffset, final int proposedLevel) {
95          final int level;
96          if (proposedLevel == LEVEL_AUTO) {
97              level = inLength < MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 ? LEVEL_1 : LEVEL_2;
98          } else {
99              level = proposedLevel;
100         }
101 
102         int ip = 0;
103         int ipBound = ip + inLength - 2;
104         int ipLimit = ip + inLength - 12;
105 
106         int op = 0;
107 
108         // const flzuint8* htab[HASH_SIZE];
109         int[] htab = new int[HASH_SIZE];
110         // const flzuint8** hslot;
111         int hslot;
112         // flzuint32 hval;
113         // int OK b/c address starting from 0
114         int hval;
115         // flzuint32 copy;
116         // int OK b/c address starting from 0
117         int copy;
118 
119         /* sanity check */
120         if (inLength < 4) {
121             if (inLength != 0) {
122                 // *op++ = length-1;
123                 output[outOffset + op++] = (byte) (inLength - 1);
124                 ipBound++;
125                 while (ip <= ipBound) {
126                     output[outOffset + op++] = input[inOffset + ip++];
127                 }
128                 return inLength + 1;
129             }
130             // else
131             return 0;
132         }
133 
134         /* initializes hash table */
135         //  for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
136         for (hslot = 0; hslot < HASH_SIZE; hslot++) {
137             //*hslot = ip;
138             htab[hslot] = ip;
139         }
140 
141         /* we start with literal copy */
142         copy = 2;
143         output[outOffset + op++] = MAX_COPY - 1;
144         output[outOffset + op++] = input[inOffset + ip++];
145         output[outOffset + op++] = input[inOffset + ip++];
146 
147         /* main loop */
148         while (ip < ipLimit) {
149             int ref = 0;
150 
151             long distance = 0;
152 
153             /* minimum match length */
154             // flzuint32 len = 3;
155             // int OK b/c len is 0 and octal based
156             int len = 3;
157 
158             /* comparison starting-point */
159             int anchor = ip;
160 
161             boolean matchLabel = false;
162 
163             /* check for a run */
164             if (level == LEVEL_2) {
165                 //if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
166                 if (input[inOffset + ip] == input[inOffset + ip - 1] &&
167                         readU16(input, inOffset + ip - 1) == readU16(input, inOffset + ip + 1)) {
168                     distance = 1;
169                     ip += 3;
170                     ref = anchor - 1 + 3;
171 
172                     /*
173                      * goto match;
174                      */
175                     matchLabel = true;
176                 }
177             }
178             if (!matchLabel) {
179                 /* find potential match */
180                 // HASH_FUNCTION(hval,ip);
181                 hval = hashFunction(input, inOffset + ip);
182                 // hslot = htab + hval;
183                 hslot = hval;
184                 // ref = htab[hval];
185                 ref = htab[hval];
186 
187                 /* calculate distance to the match */
188                 distance = anchor - ref;
189 
190                 /* update hash table */
191                 //*hslot = anchor;
192                 htab[hslot] = anchor;
193 
194                 /* is this a match? check the first 3 bytes */
195                 if (distance == 0
196                         || (level == LEVEL_1 ? distance >= MAX_DISTANCE : distance >= MAX_FARDISTANCE)
197                         || input[inOffset + ref++] != input[inOffset + ip++]
198                         || input[inOffset + ref++] != input[inOffset + ip++]
199                         || input[inOffset + ref++] != input[inOffset + ip++]) {
200                     /*
201                      * goto literal;
202                      */
203                     output[outOffset + op++] = input[inOffset + anchor++];
204                     ip = anchor;
205                     copy++;
206                     if (copy == MAX_COPY) {
207                         copy = 0;
208                         output[outOffset + op++] = MAX_COPY - 1;
209                     }
210                     continue;
211                 }
212 
213                 if (level == LEVEL_2) {
214                     /* far, needs at least 5-byte match */
215                     if (distance >= MAX_DISTANCE) {
216                         if (input[inOffset + ip++] != input[inOffset + ref++]
217                                 || input[inOffset + ip++] != input[inOffset + ref++]) {
218                             /*
219                              * goto literal;
220                              */
221                             output[outOffset + op++] = input[inOffset + anchor++];
222                             ip = anchor;
223                             copy++;
224                             if (copy == MAX_COPY) {
225                                 copy = 0;
226                                 output[outOffset + op++] = MAX_COPY - 1;
227                             }
228                             continue;
229                         }
230                         len += 2;
231                     }
232                 }
233             } // end if(!matchLabel)
234             /*
235              * match:
236              */
237             /* last matched byte */
238             ip = anchor + len;
239 
240             /* distance is biased */
241             distance--;
242 
243             if (distance == 0) {
244                 /* zero distance means a run */
245                 //flzuint8 x = ip[-1];
246                 byte x = input[inOffset + ip - 1];
247                 while (ip < ipBound) {
248                     if (input[inOffset + ref++] != x) {
249                         break;
250                     } else {
251                         ip++;
252                     }
253                 }
254             } else {
255                 for (;;) {
256                     /* safe because the outer check against ip limit */
257                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
258                         break;
259                     }
260                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
261                         break;
262                     }
263                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
264                         break;
265                     }
266                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
267                         break;
268                     }
269                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
270                         break;
271                     }
272                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
273                         break;
274                     }
275                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
276                         break;
277                     }
278                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
279                         break;
280                     }
281                     while (ip < ipBound) {
282                         if (input[inOffset + ref++] != input[inOffset + ip++]) {
283                             break;
284                         }
285                     }
286                     break;
287                 }
288             }
289 
290             /* if we have copied something, adjust the copy count */
291             if (copy != 0) {
292                 /* copy is biased, '0' means 1 byte copy */
293                 // *(op-copy-1) = copy-1;
294                 output[outOffset + op - copy - 1] = (byte) (copy - 1);
295             } else {
296                 /* back, to overwrite the copy count */
297                 op--;
298             }
299 
300             /* reset literal counter */
301             copy = 0;
302 
303             /* length is biased, '1' means a match of 3 bytes */
304             ip -= 3;
305             len = ip - anchor;
306 
307             /* encode the match */
308             if (level == LEVEL_2) {
309                 if (distance < MAX_DISTANCE) {
310                     if (len < 7) {
311                         output[outOffset + op++] = (byte) ((len << 5) + (distance >>> 8));
312                         output[outOffset + op++] = (byte) (distance & 255);
313                     } else {
314                         output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
315                         for (len -= 7; len >= 255; len -= 255) {
316                             output[outOffset + op++] = (byte) 255;
317                         }
318                         output[outOffset + op++] = (byte) len;
319                         output[outOffset + op++] = (byte) (distance & 255);
320                     }
321                 } else {
322                     /* far away, but not yet in the another galaxy... */
323                     if (len < 7) {
324                         distance -= MAX_DISTANCE;
325                         output[outOffset + op++] = (byte) ((len << 5) + 31);
326                         output[outOffset + op++] = (byte) 255;
327                         output[outOffset + op++] = (byte) (distance >>> 8);
328                         output[outOffset + op++] = (byte) (distance & 255);
329                     } else {
330                         distance -= MAX_DISTANCE;
331                         output[outOffset + op++] = (byte) ((7 << 5) + 31);
332                         for (len -= 7; len >= 255; len -= 255) {
333                             output[outOffset + op++] = (byte) 255;
334                         }
335                         output[outOffset + op++] = (byte) len;
336                         output[outOffset + op++] = (byte) 255;
337                         output[outOffset + op++] = (byte) (distance >>> 8);
338                         output[outOffset + op++] = (byte) (distance & 255);
339                     }
340                 }
341             } else {
342                 if (len > MAX_LEN - 2) {
343                     while (len > MAX_LEN - 2) {
344                         output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
345                         output[outOffset + op++] = (byte) (MAX_LEN - 2 - 7 - 2);
346                         output[outOffset + op++] = (byte) (distance & 255);
347                         len -= MAX_LEN - 2;
348                     }
349                 }
350 
351                 if (len < 7) {
352                     output[outOffset + op++] = (byte) ((len << 5) + (distance >>> 8));
353                     output[outOffset + op++] = (byte) (distance & 255);
354                 } else {
355                     output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
356                     output[outOffset + op++] = (byte) (len - 7);
357                     output[outOffset + op++] = (byte) (distance & 255);
358                 }
359             }
360 
361             /* update the hash at match boundary */
362             //HASH_FUNCTION(hval,ip);
363             hval = hashFunction(input, inOffset + ip);
364             htab[hval] = ip++;
365 
366             //HASH_FUNCTION(hval,ip);
367             hval = hashFunction(input, inOffset + ip);
368             htab[hval] = ip++;
369 
370             /* assuming literal copy */
371             output[outOffset + op++] = MAX_COPY - 1;
372 
373             continue;
374 
375             // Moved to be inline, with a 'continue'
376             /*
377              * literal:
378              *
379               output[outOffset + op++] = input[inOffset + anchor++];
380               ip = anchor;
381               copy++;
382               if(copy == MAX_COPY){
383                 copy = 0;
384                 output[outOffset + op++] = MAX_COPY-1;
385               }
386             */
387         }
388 
389         /* left-over as literal copy */
390         ipBound++;
391         while (ip <= ipBound) {
392             output[outOffset + op++] = input[inOffset + ip++];
393             copy++;
394             if (copy == MAX_COPY) {
395                 copy = 0;
396                 output[outOffset + op++] = MAX_COPY - 1;
397             }
398         }
399 
400         /* if we have copied something, adjust the copy length */
401         if (copy != 0) {
402             //*(op-copy-1) = copy-1;
403             output[outOffset + op - copy - 1] = (byte) (copy - 1);
404         } else {
405             op--;
406         }
407 
408         if (level == LEVEL_2) {
409             /* marker for fastlz2 */
410             output[outOffset] |= 1 << 5;
411         }
412 
413         return op;
414     }
415 
416     /**
417      * Decompress a block of compressed data and returns the size of the decompressed block.
418      * If error occurs, e.g. the compressed data is corrupted or the output buffer is not large
419      * enough, then 0 (zero) will be returned instead.
420      *
421      * Decompression is memory safe and guaranteed not to write the output buffer
422      * more than what is specified in outLength.
423      */
424     static int decompress(final byte[] input, final int inOffset, final int inLength,
425                           final byte[] output, final int outOffset, final int outLength) {
426         //int level = ((*(const flzuint8*)input) >> 5) + 1;
427         final int level = (input[inOffset] >> 5) + 1;
428         if (level != LEVEL_1 && level != LEVEL_2) {
429             throw new DecompressionException(String.format(
430                     "invalid level: %d (expected: %d or %d)", level, LEVEL_1, LEVEL_2
431             ));
432         }
433 
434         // const flzuint8* ip = (const flzuint8*) input;
435         int ip = 0;
436         // flzuint8* op = (flzuint8*) output;
437         int op = 0;
438         // flzuint32 ctrl = (*ip++) & 31;
439         long ctrl = input[inOffset + ip++] & 31;
440 
441         int loop = 1;
442         do {
443             //  const flzuint8* ref = op;
444             int ref = op;
445             // flzuint32 len = ctrl >> 5;
446             long len = ctrl >> 5;
447             // flzuint32 ofs = (ctrl & 31) << 8;
448             long ofs = (ctrl & 31) << 8;
449 
450             if (ctrl >= 32) {
451                 len--;
452                 // ref -= ofs;
453                 ref -= ofs;
454 
455                 int code;
456                 if (len == 6) {
457                     if (level == LEVEL_1) {
458                         // len += *ip++;
459                         len += input[inOffset + ip++] & 0xFF;
460                     } else {
461                         do {
462                             code = input[inOffset + ip++] & 0xFF;
463                             len += code;
464                         } while (code == 255);
465                     }
466                 }
467                 if (level == LEVEL_1) {
468                     //  ref -= *ip++;
469                     ref -= input[inOffset + ip++] & 0xFF;
470                 } else {
471                     code = input[inOffset + ip++] & 0xFF;
472                     ref -= code;
473 
474                     /* match from 16-bit distance */
475                     // if(FASTLZ_UNEXPECT_CONDITIONAL(code==255))
476                     // if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
477                     if (code == 255 && ofs == 31 << 8) {
478                         ofs = (input[inOffset + ip++] & 0xFF) << 8;
479                         ofs += input[inOffset + ip++] & 0xFF;
480 
481                         ref = (int) (op - ofs - MAX_DISTANCE);
482                     }
483                 }
484 
485                 // if the output index + length of block(?) + 3(?) is over the output limit?
486                 if (op + len + 3 > outLength) {
487                     return 0;
488                 }
489 
490                 // if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output))
491                 // if the address space of ref-1 is < the address of output?
492                 // if we are still at the beginning of the output address?
493                 if (ref - 1 < 0) {
494                     return 0;
495                 }
496 
497                 if (ip < inLength) {
498                     ctrl = input[inOffset + ip++] & 0xFF;
499                 } else {
500                     loop = 0;
501                 }
502 
503                 if (ref == op) {
504                     /* optimize copy for a run */
505                     // flzuint8 b = ref[-1];
506                     byte b = output[outOffset + ref - 1];
507                     output[outOffset + op++] = b;
508                     output[outOffset + op++] = b;
509                     output[outOffset + op++] = b;
510                     while (len != 0) {
511                         output[outOffset + op++] = b;
512                         --len;
513                     }
514                 } else {
515                     /* copy from reference */
516                     ref--;
517 
518                     // *op++ = *ref++;
519                     output[outOffset + op++] = output[outOffset + ref++];
520                     output[outOffset + op++] = output[outOffset + ref++];
521                     output[outOffset + op++] = output[outOffset + ref++];
522 
523                     while (len != 0) {
524                         output[outOffset + op++] = output[outOffset + ref++];
525                         --len;
526                     }
527                 }
528             } else {
529                 ctrl++;
530 
531                 if (op + ctrl > outLength) {
532                     return 0;
533                 }
534                 if (ip + ctrl > inLength) {
535                     return 0;
536                 }
537 
538                 //*op++ = *ip++;
539                 output[outOffset + op++] = input[inOffset + ip++];
540 
541                 for (--ctrl; ctrl != 0; ctrl--) {
542                     // *op++ = *ip++;
543                     output[outOffset + op++] = input[inOffset + ip++];
544                 }
545 
546                 loop = ip < inLength ? 1 : 0;
547                 if (loop != 0) {
548                     //  ctrl = *ip++;
549                     ctrl = input[inOffset + ip++] & 0xFF;
550                 }
551             }
552 
553         // while(FASTLZ_EXPECT_CONDITIONAL(loop));
554         } while (loop != 0);
555 
556         //  return op - (flzuint8*)output;
557         return op;
558     }
559 
560     private static int hashFunction(byte[] p, int offset) {
561         int v = readU16(p, offset);
562         v ^= readU16(p, offset + 1) ^ v >> 16 - HASH_LOG;
563         v &= HASH_MASK;
564         return v;
565     }
566 
567     private static int readU16(byte[] data, int offset) {
568         if (offset + 1 >= data.length) {
569             return data[offset] & 0xff;
570         }
571         return  (data[offset + 1] & 0xff) << 8 | data[offset] & 0xff;
572     }
573 
574     private FastLz() { }
575 }