View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  /**
19   * Core of FastLZ compression algorithm.
20   *
21   * This class provides methods for compression and decompression of buffers and saves
22   * constants which use by {@link FastLzFrameEncoder} and {@link FastLzFrameDecoder}.
23   *
24   * This is refactored code of <a href="https://code.google.com/p/jfastlz/">jfastlz</a>
25   * library written by William Kinney.
26   */
27  final class FastLz {
28  
29      private static final int MAX_DISTANCE = 8191;
30      private static final int MAX_FARDISTANCE = 65535 + MAX_DISTANCE - 1;
31  
32      private static final int HASH_LOG = 13;
33      private static final int HASH_SIZE = 1 << HASH_LOG; // 8192
34      private static final int HASH_MASK = HASH_SIZE - 1;
35  
36      private static final int MAX_COPY = 32;
37      private static final int MAX_LEN = 256 + 8;
38  
39      private static final int MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 = 1024 * 64;
40  
41      static final int MAGIC_NUMBER = 'F' << 16 | 'L' << 8 | 'Z';
42  
43      static final byte BLOCK_TYPE_NON_COMPRESSED = 0x00;
44      static final byte     BLOCK_TYPE_COMPRESSED = 0x01;
45      static final byte    BLOCK_WITHOUT_CHECKSUM = 0x00;
46      static final byte       BLOCK_WITH_CHECKSUM = 0x10;
47  
48      static final int OPTIONS_OFFSET = 3;
49      static final int CHECKSUM_OFFSET = 4;
50  
51      static final int MAX_CHUNK_LENGTH = 0xFFFF;
52  
53      /**
54       * Do not call {@link #compress(byte[], int, int, byte[], int, int)} for input buffers
55       * which length less than this value.
56       */
57      static final int MIN_LENGTH_TO_COMPRESSION = 32;
58  
59      /**
60       * In this case {@link #compress(byte[], int, int, byte[], int, int)} will choose level
61       * automatically depending on the length of the input buffer. If length less than
62       * {@link #MIN_RECOMENDED_LENGTH_FOR_LEVEL_2} {@link #LEVEL_1} will be chosen,
63       * otherwise {@link #LEVEL_2}.
64       */
65      static final int LEVEL_AUTO = 0;
66  
67      /**
68       * Level 1 is the fastest compression and generally useful for short data.
69       */
70      static final int LEVEL_1 = 1;
71  
72      /**
73       * Level 2 is slightly slower but it gives better compression ratio.
74       */
75      static final int LEVEL_2 = 2;
76  
77      /**
78       * The output buffer must be at least 6% larger than the input buffer and can not be smaller than 66 bytes.
79       * @param inputLength length of input buffer
80       * @return Maximum output buffer length
81       */
82      static int calculateOutputBufferLength(int inputLength) {
83          final int outputLength = (int) (inputLength * 1.06);
84          return Math.max(outputLength, 66);
85      }
86  
87      /**
88       * Compress a block of data in the input buffer and returns the size of compressed block.
89       * The size of input buffer is specified by length. The minimum input buffer size is 32.
90       *
91       * If the input is not compressible, the return value might be larger than length (input buffer size).
92       */
93      @SuppressWarnings("IdentityBinaryExpression")
94      static int compress(final byte[] input, final int inOffset, final int inLength,
95                          final byte[] output, final int outOffset, final int proposedLevel) {
96          final int level;
97          if (proposedLevel == LEVEL_AUTO) {
98              level = inLength < MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 ? LEVEL_1 : LEVEL_2;
99          } else {
100             level = proposedLevel;
101         }
102 
103         int ip = 0;
104         int ipBound = ip + inLength - 2;
105         int ipLimit = ip + inLength - 12;
106 
107         int op = 0;
108 
109         // const flzuint8* htab[HASH_SIZE];
110         int[] htab = new int[HASH_SIZE];
111         // const flzuint8** hslot;
112         int hslot;
113         // flzuint32 hval;
114         // int OK b/c address starting from 0
115         int hval;
116         // flzuint32 copy;
117         // int OK b/c address starting from 0
118         int copy;
119 
120         /* sanity check */
121         if (inLength < 4) {
122             if (inLength != 0) {
123                 // *op++ = length-1;
124                 output[outOffset + op++] = (byte) (inLength - 1);
125                 ipBound++;
126                 while (ip <= ipBound) {
127                     output[outOffset + op++] = input[inOffset + ip++];
128                 }
129                 return inLength + 1;
130             }
131             // else
132             return 0;
133         }
134 
135         /* initializes hash table */
136         //  for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
137         for (hslot = 0; hslot < HASH_SIZE; hslot++) {
138             //*hslot = ip;
139             htab[hslot] = ip;
140         }
141 
142         /* we start with literal copy */
143         copy = 2;
144         output[outOffset + op++] = MAX_COPY - 1;
145         output[outOffset + op++] = input[inOffset + ip++];
146         output[outOffset + op++] = input[inOffset + ip++];
147 
148         /* main loop */
149         while (ip < ipLimit) {
150             int ref = 0;
151 
152             long distance = 0;
153 
154             /* minimum match length */
155             // flzuint32 len = 3;
156             // int OK b/c len is 0 and octal based
157             int len = 3;
158 
159             /* comparison starting-point */
160             int anchor = ip;
161 
162             boolean matchLabel = false;
163 
164             /* check for a run */
165             if (level == LEVEL_2) {
166                 //if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
167                 if (input[inOffset + ip] == input[inOffset + ip - 1] &&
168                         readU16(input, inOffset + ip - 1) == readU16(input, inOffset + ip + 1)) {
169                     distance = 1;
170                     ip += 3;
171                     ref = anchor - 1 + 3;
172 
173                     /*
174                      * goto match;
175                      */
176                     matchLabel = true;
177                 }
178             }
179             if (!matchLabel) {
180                 /* find potential match */
181                 // HASH_FUNCTION(hval,ip);
182                 hval = hashFunction(input, inOffset + ip);
183                 // hslot = htab + hval;
184                 hslot = hval;
185                 // ref = htab[hval];
186                 ref = htab[hval];
187 
188                 /* calculate distance to the match */
189                 distance = anchor - ref;
190 
191                 /* update hash table */
192                 //*hslot = anchor;
193                 htab[hslot] = anchor;
194 
195                 /* is this a match? check the first 3 bytes */
196                 if (distance == 0
197                         || (level == LEVEL_1 ? distance >= MAX_DISTANCE : distance >= MAX_FARDISTANCE)
198                         || input[inOffset + ref++] != input[inOffset + ip++]
199                         || input[inOffset + ref++] != input[inOffset + ip++]
200                         || input[inOffset + ref++] != input[inOffset + ip++]) {
201                     /*
202                      * goto literal;
203                      */
204                     output[outOffset + op++] = input[inOffset + anchor++];
205                     ip = anchor;
206                     copy++;
207                     if (copy == MAX_COPY) {
208                         copy = 0;
209                         output[outOffset + op++] = MAX_COPY - 1;
210                     }
211                     continue;
212                 }
213 
214                 if (level == LEVEL_2) {
215                     /* far, needs at least 5-byte match */
216                     if (distance >= MAX_DISTANCE) {
217                         if (input[inOffset + ip++] != input[inOffset + ref++]
218                                 || input[inOffset + ip++] != input[inOffset + ref++]) {
219                             /*
220                              * goto literal;
221                              */
222                             output[outOffset + op++] = input[inOffset + anchor++];
223                             ip = anchor;
224                             copy++;
225                             if (copy == MAX_COPY) {
226                                 copy = 0;
227                                 output[outOffset + op++] = MAX_COPY - 1;
228                             }
229                             continue;
230                         }
231                         len += 2;
232                     }
233                 }
234             } // end if(!matchLabel)
235             /*
236              * match:
237              */
238             /* last matched byte */
239             ip = anchor + len;
240 
241             /* distance is biased */
242             distance--;
243 
244             if (distance == 0) {
245                 /* zero distance means a run */
246                 //flzuint8 x = ip[-1];
247                 byte x = input[inOffset + ip - 1];
248                 while (ip < ipBound) {
249                     if (input[inOffset + ref++] != x) {
250                         break;
251                     } else {
252                         ip++;
253                     }
254                 }
255             } else {
256                 for (;;) {
257                     /* safe because the outer check against ip limit */
258                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
259                         break;
260                     }
261                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
262                         break;
263                     }
264                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
265                         break;
266                     }
267                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
268                         break;
269                     }
270                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
271                         break;
272                     }
273                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
274                         break;
275                     }
276                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
277                         break;
278                     }
279                     if (input[inOffset + ref++] != input[inOffset + ip++]) {
280                         break;
281                     }
282                     while (ip < ipBound) {
283                         if (input[inOffset + ref++] != input[inOffset + ip++]) {
284                             break;
285                         }
286                     }
287                     break;
288                 }
289             }
290 
291             /* if we have copied something, adjust the copy count */
292             if (copy != 0) {
293                 /* copy is biased, '0' means 1 byte copy */
294                 // *(op-copy-1) = copy-1;
295                 output[outOffset + op - copy - 1] = (byte) (copy - 1);
296             } else {
297                 /* back, to overwrite the copy count */
298                 op--;
299             }
300 
301             /* reset literal counter */
302             copy = 0;
303 
304             /* length is biased, '1' means a match of 3 bytes */
305             ip -= 3;
306             len = ip - anchor;
307 
308             /* encode the match */
309             if (level == LEVEL_2) {
310                 if (distance < MAX_DISTANCE) {
311                     if (len < 7) {
312                         output[outOffset + op++] = (byte) ((len << 5) + (distance >>> 8));
313                         output[outOffset + op++] = (byte) (distance & 255);
314                     } else {
315                         output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
316                         for (len -= 7; len >= 255; len -= 255) {
317                             output[outOffset + op++] = (byte) 255;
318                         }
319                         output[outOffset + op++] = (byte) len;
320                         output[outOffset + op++] = (byte) (distance & 255);
321                     }
322                 } else {
323                     /* far away, but not yet in the another galaxy... */
324                     if (len < 7) {
325                         distance -= MAX_DISTANCE;
326                         output[outOffset + op++] = (byte) ((len << 5) + 31);
327                         output[outOffset + op++] = (byte) 255;
328                         output[outOffset + op++] = (byte) (distance >>> 8);
329                         output[outOffset + op++] = (byte) (distance & 255);
330                     } else {
331                         distance -= MAX_DISTANCE;
332                         output[outOffset + op++] = (byte) ((7 << 5) + 31);
333                         for (len -= 7; len >= 255; len -= 255) {
334                             output[outOffset + op++] = (byte) 255;
335                         }
336                         output[outOffset + op++] = (byte) len;
337                         output[outOffset + op++] = (byte) 255;
338                         output[outOffset + op++] = (byte) (distance >>> 8);
339                         output[outOffset + op++] = (byte) (distance & 255);
340                     }
341                 }
342             } else {
343                 if (len > MAX_LEN - 2) {
344                     while (len > MAX_LEN - 2) {
345                         output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
346                         output[outOffset + op++] = (byte) (MAX_LEN - 2 - 7 - 2);
347                         output[outOffset + op++] = (byte) (distance & 255);
348                         len -= MAX_LEN - 2;
349                     }
350                 }
351 
352                 if (len < 7) {
353                     output[outOffset + op++] = (byte) ((len << 5) + (distance >>> 8));
354                     output[outOffset + op++] = (byte) (distance & 255);
355                 } else {
356                     output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
357                     output[outOffset + op++] = (byte) (len - 7);
358                     output[outOffset + op++] = (byte) (distance & 255);
359                 }
360             }
361 
362             /* update the hash at match boundary */
363             //HASH_FUNCTION(hval,ip);
364             hval = hashFunction(input, inOffset + ip);
365             htab[hval] = ip++;
366 
367             //HASH_FUNCTION(hval,ip);
368             hval = hashFunction(input, inOffset + ip);
369             htab[hval] = ip++;
370 
371             /* assuming literal copy */
372             output[outOffset + op++] = MAX_COPY - 1;
373 
374             continue;
375 
376             // Moved to be inline, with a 'continue'
377             /*
378              * literal:
379              *
380               output[outOffset + op++] = input[inOffset + anchor++];
381               ip = anchor;
382               copy++;
383               if(copy == MAX_COPY){
384                 copy = 0;
385                 output[outOffset + op++] = MAX_COPY-1;
386               }
387             */
388         }
389 
390         /* left-over as literal copy */
391         ipBound++;
392         while (ip <= ipBound) {
393             output[outOffset + op++] = input[inOffset + ip++];
394             copy++;
395             if (copy == MAX_COPY) {
396                 copy = 0;
397                 output[outOffset + op++] = MAX_COPY - 1;
398             }
399         }
400 
401         /* if we have copied something, adjust the copy length */
402         if (copy != 0) {
403             //*(op-copy-1) = copy-1;
404             output[outOffset + op - copy - 1] = (byte) (copy - 1);
405         } else {
406             op--;
407         }
408 
409         if (level == LEVEL_2) {
410             /* marker for fastlz2 */
411             output[outOffset] |= 1 << 5;
412         }
413 
414         return op;
415     }
416 
417     /**
418      * Decompress a block of compressed data and returns the size of the decompressed block.
419      * If error occurs, e.g. the compressed data is corrupted or the output buffer is not large
420      * enough, then 0 (zero) will be returned instead.
421      *
422      * Decompression is memory safe and guaranteed not to write the output buffer
423      * more than what is specified in outLength.
424      */
425     static int decompress(final byte[] input, final int inOffset, final int inLength,
426                           final byte[] output, final int outOffset, final int outLength) {
427         //int level = ((*(const flzuint8*)input) >> 5) + 1;
428         final int level = (input[inOffset] >> 5) + 1;
429         if (level != LEVEL_1 && level != LEVEL_2) {
430             throw new DecompressionException(String.format(
431                     "invalid level: %d (expected: %d or %d)", level, LEVEL_1, LEVEL_2
432             ));
433         }
434 
435         // const flzuint8* ip = (const flzuint8*) input;
436         int ip = 0;
437         // flzuint8* op = (flzuint8*) output;
438         int op = 0;
439         // flzuint32 ctrl = (*ip++) & 31;
440         long ctrl = input[inOffset + ip++] & 31;
441 
442         int loop = 1;
443         do {
444             //  const flzuint8* ref = op;
445             int ref = op;
446             // flzuint32 len = ctrl >> 5;
447             long len = ctrl >> 5;
448             // flzuint32 ofs = (ctrl & 31) << 8;
449             long ofs = (ctrl & 31) << 8;
450 
451             if (ctrl >= 32) {
452                 len--;
453                 // ref -= ofs;
454                 ref -= ofs;
455 
456                 int code;
457                 if (len == 6) {
458                     if (level == LEVEL_1) {
459                         // len += *ip++;
460                         len += input[inOffset + ip++] & 0xFF;
461                     } else {
462                         do {
463                             code = input[inOffset + ip++] & 0xFF;
464                             len += code;
465                         } while (code == 255);
466                     }
467                 }
468                 if (level == LEVEL_1) {
469                     //  ref -= *ip++;
470                     ref -= input[inOffset + ip++] & 0xFF;
471                 } else {
472                     code = input[inOffset + ip++] & 0xFF;
473                     ref -= code;
474 
475                     /* match from 16-bit distance */
476                     // if(FASTLZ_UNEXPECT_CONDITIONAL(code==255))
477                     // if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
478                     if (code == 255 && ofs == 31 << 8) {
479                         ofs = (input[inOffset + ip++] & 0xFF) << 8;
480                         ofs += input[inOffset + ip++] & 0xFF;
481 
482                         ref = (int) (op - ofs - MAX_DISTANCE);
483                     }
484                 }
485 
486                 // if the output index + length of block(?) + 3(?) is over the output limit?
487                 if (op + len + 3 > outLength) {
488                     return 0;
489                 }
490 
491                 // if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output))
492                 // if the address space of ref-1 is < the address of output?
493                 // if we are still at the beginning of the output address?
494                 if (ref - 1 < 0) {
495                     return 0;
496                 }
497 
498                 if (ip < inLength) {
499                     ctrl = input[inOffset + ip++] & 0xFF;
500                 } else {
501                     loop = 0;
502                 }
503 
504                 if (ref == op) {
505                     /* optimize copy for a run */
506                     // flzuint8 b = ref[-1];
507                     byte b = output[outOffset + ref - 1];
508                     output[outOffset + op++] = b;
509                     output[outOffset + op++] = b;
510                     output[outOffset + op++] = b;
511                     while (len != 0) {
512                         output[outOffset + op++] = b;
513                         --len;
514                     }
515                 } else {
516                     /* copy from reference */
517                     ref--;
518 
519                     // *op++ = *ref++;
520                     output[outOffset + op++] = output[outOffset + ref++];
521                     output[outOffset + op++] = output[outOffset + ref++];
522                     output[outOffset + op++] = output[outOffset + ref++];
523 
524                     while (len != 0) {
525                         output[outOffset + op++] = output[outOffset + ref++];
526                         --len;
527                     }
528                 }
529             } else {
530                 ctrl++;
531 
532                 if (op + ctrl > outLength) {
533                     return 0;
534                 }
535                 if (ip + ctrl > inLength) {
536                     return 0;
537                 }
538 
539                 //*op++ = *ip++;
540                 output[outOffset + op++] = input[inOffset + ip++];
541 
542                 for (--ctrl; ctrl != 0; ctrl--) {
543                     // *op++ = *ip++;
544                     output[outOffset + op++] = input[inOffset + ip++];
545                 }
546 
547                 loop = ip < inLength ? 1 : 0;
548                 if (loop != 0) {
549                     //  ctrl = *ip++;
550                     ctrl = input[inOffset + ip++] & 0xFF;
551                 }
552             }
553 
554         // while(FASTLZ_EXPECT_CONDITIONAL(loop));
555         } while (loop != 0);
556 
557         //  return op - (flzuint8*)output;
558         return op;
559     }
560 
561     private static int hashFunction(byte[] p, int offset) {
562         int v = readU16(p, offset);
563         v ^= readU16(p, offset + 1) ^ v >> 16 - HASH_LOG;
564         v &= HASH_MASK;
565         return v;
566     }
567 
568     private static int readU16(byte[] data, int offset) {
569         if (offset + 1 >= data.length) {
570             return data[offset] & 0xff;
571         }
572         return  (data[offset + 1] & 0xff) << 8 | data[offset] & 0xff;
573     }
574 
575     private FastLz() { }
576 }