View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty5.handler.codec.compression;
17  
18  import io.netty5.buffer.api.Buffer;
19  
20  /**
21   * Core of FastLZ compression algorithm.
22   *
23   * This class provides methods for compression and decompression of buffers and saves
24   * constants which use by {@link FastLzCompressor} and {@link FastLzDecompressor}.
25   *
26   * This is refactored code of <a href="https://code.google.com/p/jfastlz/">jfastlz</a>
27   * library written by William Kinney.
28   */
29  final class FastLz {
30  
31      private static final int MAX_DISTANCE = 8191;
32      private static final int MAX_FARDISTANCE = 65535 + MAX_DISTANCE - 1;
33  
34      private static final int HASH_LOG = 13;
35      private static final int HASH_SIZE = 1 << HASH_LOG; // 8192
36      private static final int HASH_MASK = HASH_SIZE - 1;
37  
38      private static final int MAX_COPY = 32;
39      private static final int MAX_LEN = 256 + 8;
40  
41      private static final int MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 = 1024 * 64;
42  
43      static final int MAGIC_NUMBER = 'F' << 16 | 'L' << 8 | 'Z';
44  
45      static final byte BLOCK_TYPE_NON_COMPRESSED = 0x00;
46      static final byte     BLOCK_TYPE_COMPRESSED = 0x01;
47      static final byte    BLOCK_WITHOUT_CHECKSUM = 0x00;
48      static final byte       BLOCK_WITH_CHECKSUM = 0x10;
49  
50      static final int OPTIONS_OFFSET = 3;
51      static final int CHECKSUM_OFFSET = 4;
52  
53      static final int MAX_CHUNK_LENGTH = 0xFFFF;
54  
55      /**
56       * Do not call {@link #compress(Buffer, int, int, Buffer, int, int)} for input buffers
57       * which length less than this value.
58       */
59      static final int MIN_LENGTH_TO_COMPRESSION = 32;
60  
61      /**
62       * In this case {@link #compress(Buffer, int, int, Buffer, int, int)} will choose level
63       * automatically depending on the length of the input buffer. If length less than
64       * {@link #MIN_RECOMENDED_LENGTH_FOR_LEVEL_2} {@link #LEVEL_1} will be chosen,
65       * otherwise {@link #LEVEL_2}.
66       */
67      static final int LEVEL_AUTO = 0;
68  
69      /**
70       * Level 1 is the fastest compression and generally useful for short data.
71       */
72      static final int LEVEL_1 = 1;
73  
74      /**
75       * Level 2 is slightly slower but it gives better compression ratio.
76       */
77      static final int LEVEL_2 = 2;
78  
79      /**
80       * The output buffer must be at least 6% larger than the input buffer and can not be smaller than 66 bytes.
81       * @param inputLength length of input buffer
82       * @return Maximum output buffer length
83       */
84      static int calculateOutputBufferLength(int inputLength) {
85          final int outputLength = (int) (inputLength * 1.06);
86          return Math.max(outputLength, 66);
87      }
88  
89      /**
90       * Compress a block of data in the input buffer and returns the size of compressed block.
91       * The size of input buffer is specified by length. The minimum input buffer size is 32.
92       *
93       * If the input is not compressible, the return value might be larger than length (input buffer size).
94       */
95      @SuppressWarnings("IdentityBinaryExpression")
96      static int compress(final Buffer input, final int inOffset, final int inLength,
97                          final Buffer output, final int outOffset, final int proposedLevel) {
98          final int level;
99          if (proposedLevel == LEVEL_AUTO) {
100             level = inLength < MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 ? LEVEL_1 : LEVEL_2;
101         } else {
102             level = proposedLevel;
103         }
104 
105         int ip = 0;
106         int ipBound = ip + inLength - 2;
107         int ipLimit = ip + inLength - 12;
108 
109         int op = 0;
110 
111         // const flzuint8* htab[HASH_SIZE];
112         int[] htab = new int[HASH_SIZE];
113         // const flzuint8** hslot;
114         int hslot;
115         // flzuint32 hval;
116         // int OK b/c address starting from 0
117         int hval;
118         // flzuint32 copy;
119         // int OK b/c address starting from 0
120         int copy;
121 
122         /* sanity check */
123         if (inLength < 4) {
124             if (inLength != 0) {
125                 // *op++ = length-1;
126                 output.setByte(outOffset + op++, (byte) (inLength - 1));
127                 ipBound++;
128                 while (ip <= ipBound) {
129                     output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
130                 }
131                 return inLength + 1;
132             }
133             // else
134             return 0;
135         }
136 
137         /* initializes hash table */
138         //  for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
139         for (hslot = 0; hslot < HASH_SIZE; hslot++) {
140             //*hslot = ip;
141             htab[hslot] = ip;
142         }
143 
144         /* we start with literal copy */
145         copy = 2;
146         output.setByte(outOffset + op++, (byte) (MAX_COPY - 1));
147         output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
148         output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
149 
150         /* main loop */
151         while (ip < ipLimit) {
152             int ref = 0;
153 
154             long distance = 0;
155 
156             /* minimum match length */
157             // flzuint32 len = 3;
158             // int OK b/c len is 0 and octal based
159             int len = 3;
160 
161             /* comparison starting-point */
162             int anchor = ip;
163 
164             boolean matchLabel = false;
165 
166             /* check for a run */
167             if (level == LEVEL_2) {
168                 //if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
169                 if (input.getByte(inOffset + ip) == input.getByte(inOffset + ip - 1) &&
170                         readU16(input, inOffset + ip - 1) == readU16(input, inOffset + ip + 1)) {
171                     distance = 1;
172                     ip += 3;
173                     ref = anchor + (3 - 1);
174 
175                     /*
176                      * goto match;
177                      */
178                     matchLabel = true;
179                 }
180             }
181             if (!matchLabel) {
182                 /* find potential match */
183                 // HASH_FUNCTION(hval,ip);
184                 hval = hashFunction(input, inOffset + ip);
185                 // hslot = htab + hval;
186                 hslot = hval;
187                 // ref = htab[hval];
188                 ref = htab[hval];
189 
190                 /* calculate distance to the match */
191                 distance = anchor - ref;
192 
193                 /* update hash table */
194                 //*hslot = anchor;
195                 htab[hslot] = anchor;
196 
197                 /* is this a match? check the first 3 bytes */
198                 if (distance == 0
199                         || (level == LEVEL_1 ? distance >= MAX_DISTANCE : distance >= MAX_FARDISTANCE)
200                         || input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)
201                         || input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)
202                         || input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
203                     /*
204                      * goto literal;
205                      */
206                     output.setByte(outOffset + op++, input.getByte(inOffset + anchor++));
207                     ip = anchor;
208                     copy++;
209                     if (copy == MAX_COPY) {
210                         copy = 0;
211                         output.setByte(outOffset + op++, (byte) (MAX_COPY - 1));
212                     }
213                     continue;
214                 }
215 
216                 if (level == LEVEL_2) {
217                     /* far, needs at least 5-byte match */
218                     if (distance >= MAX_DISTANCE) {
219                         if (input.getByte(inOffset + ip++) != input.getByte(inOffset + ref++)
220                                 || input.getByte(inOffset + ip++) != input.getByte(inOffset + ref++)) {
221                             /*
222                              * goto literal;
223                              */
224                             output.setByte(outOffset + op++, input.getByte(inOffset + anchor++));
225                             ip = anchor;
226                             copy++;
227                             if (copy == MAX_COPY) {
228                                 copy = 0;
229                                 output.setByte(outOffset + op++, (byte) (MAX_COPY - 1));
230                             }
231                             continue;
232                         }
233                         len += 2;
234                     }
235                 }
236             } // end if(!matchLabel)
237             /*
238              * match:
239              */
240             /* last matched byte */
241             ip = anchor + len;
242 
243             /* distance is biased */
244             distance--;
245 
246             if (distance == 0) {
247                 /* zero distance means a run */
248                 //flzuint8 x = ip[-1];
249                 byte x = input.getByte(inOffset + ip - 1);
250                 while (ip < ipBound) {
251                     if (input.getByte(inOffset + ref++) != x) {
252                         break;
253                     } else {
254                         ip++;
255                     }
256                 }
257             } else {
258                 /* safe because the outer check against ip limit */
259                 boolean missMatch = false;
260                 for (int i = 0; i < 8; i++) {
261                     if (input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
262                         missMatch = true;
263                         break;
264                     }
265                 }
266                 if (!missMatch) {
267                     while (ip < ipBound) {
268                         if (input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
269                             break;
270                         }
271                     }
272                 }
273             }
274 
275             /* if we have copied something, adjust the copy count */
276             if (copy != 0) {
277                 /* copy is biased, '0' means 1 byte copy */
278                 // *(op-copy-1) = copy-1;
279                 output.setByte(outOffset + op - copy - 1, (byte) (copy - 1));
280             } else {
281                 /* back, to overwrite the copy count */
282                 op--;
283             }
284 
285             /* reset literal counter */
286             copy = 0;
287 
288             /* length is biased, '1' means a match of 3 bytes */
289             ip -= 3;
290             len = ip - anchor;
291 
292             /* encode the match */
293             if (level == LEVEL_2) {
294                 if (distance < MAX_DISTANCE) {
295                     if (len < 7) {
296                         output.setByte(outOffset + op++, (byte) ((len << 5) + (distance >>> 8)));
297                         output.setByte(outOffset + op++, (byte) (distance & 255));
298                     } else {
299                         output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
300                         for (len -= 7; len >= 255; len -= 255) {
301                             output.setByte(outOffset + op++, (byte) 255);
302                         }
303                         output.setByte(outOffset + op++, (byte) len);
304                         output.setByte(outOffset + op++, (byte) (distance & 255));
305                     }
306                 } else {
307                     /* far away, but not yet in the another galaxy... */
308                     if (len < 7) {
309                         distance -= MAX_DISTANCE;
310                         output.setByte(outOffset + op++, (byte) ((len << 5) + 31));
311                         output.setByte(outOffset + op++, (byte) 255);
312                         output.setByte(outOffset + op++, (byte) (distance >>> 8));
313                         output.setByte(outOffset + op++, (byte) (distance & 255));
314                     } else {
315                         distance -= MAX_DISTANCE;
316                         output.setByte(outOffset + op++, (byte) ((7 << 5) + 31));
317                         for (len -= 7; len >= 255; len -= 255) {
318                             output.setByte(outOffset + op++, (byte) 255);
319                         }
320                         output.setByte(outOffset + op++, (byte) len);
321                         output.setByte(outOffset + op++, (byte) 255);
322                         output.setByte(outOffset + op++, (byte) (distance >>> 8));
323                         output.setByte(outOffset + op++, (byte) (distance & 255));
324                     }
325                 }
326             } else {
327                 if (len > MAX_LEN - 2) {
328                     while (len > MAX_LEN - 2) {
329                         output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
330                         output.setByte(outOffset + op++, (byte) (MAX_LEN - 2 - 7 - 2));
331                         output.setByte(outOffset + op++, (byte) (distance & 255));
332                         len -= MAX_LEN - 2;
333                     }
334                 }
335 
336                 if (len < 7) {
337                     output.setByte(outOffset + op++, (byte) ((len << 5) + (distance >>> 8)));
338                     output.setByte(outOffset + op++, (byte) (distance & 255));
339                 } else {
340                     output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
341                     output.setByte(outOffset + op++, (byte) (len - 7));
342                     output.setByte(outOffset + op++, (byte) (distance & 255));
343                 }
344             }
345 
346             /* update the hash at match boundary */
347             //HASH_FUNCTION(hval,ip);
348             hval = hashFunction(input, inOffset + ip);
349             htab[hval] = ip++;
350 
351             //HASH_FUNCTION(hval,ip);
352             hval = hashFunction(input, inOffset + ip);
353             htab[hval] = ip++;
354 
355             /* assuming literal copy */
356             output.setByte(outOffset + op++, (byte) (MAX_COPY - 1));
357 
358             continue;
359 
360             // Moved to be inline, with a 'continue'
361             /*
362              * literal:
363              *
364               output[outOffset + op++] = input[inOffset + anchor++];
365               ip = anchor;
366               copy++;
367               if(copy == MAX_COPY){
368                 copy = 0;
369                 output[outOffset + op++] = MAX_COPY-1;
370               }
371             */
372         }
373 
374         /* left-over as literal copy */
375         ipBound++;
376         while (ip <= ipBound) {
377             output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
378             copy++;
379             if (copy == MAX_COPY) {
380                 copy = 0;
381                 output.setByte(outOffset + op++, (byte) (MAX_COPY - 1));
382             }
383         }
384 
385         /* if we have copied something, adjust the copy length */
386         if (copy != 0) {
387             //*(op-copy-1) = copy-1;
388             output.setByte(outOffset + op - copy - 1, (byte) (copy - 1));
389         } else {
390             op--;
391         }
392 
393         if (level == LEVEL_2) {
394             /* marker for fastlz2 */
395             output.setByte(outOffset, (byte) (output.getByte(outOffset) | 1 << 5));
396         }
397 
398         return op;
399     }
400 
401     /**
402      * Decompress a block of compressed data and returns the size of the decompressed block.
403      * If error occurs, e.g. the compressed data is corrupted or the output buffer is not large
404      * enough, then 0 (zero) will be returned instead.
405      *
406      * Decompression is memory safe and guaranteed not to write the output buffer
407      * more than what is specified in outLength.
408      */
409     static int decompress(final Buffer input, final int inOffset, final int inLength,
410                           final Buffer output, final int outOffset, final int outLength) {
411         //int level = ((*(const flzuint8*)input) >> 5) + 1;
412         final int level = (input.getByte(inOffset) >> 5) + 1;
413         if (level != LEVEL_1 && level != LEVEL_2) {
414             throw new DecompressionException(String.format(
415                     "invalid level: %d (expected: %d or %d)", level, LEVEL_1, LEVEL_2
416             ));
417         }
418 
419         // const flzuint8* ip = (const flzuint8*) input;
420         int ip = 0;
421         // flzuint8* op = (flzuint8*) output;
422         int op = 0;
423         // flzuint32 ctrl = (*ip++) & 31;
424         long ctrl = input.getByte(inOffset + ip++) & 31;
425 
426         int loop = 1;
427         do {
428             //  const flzuint8* ref = op;
429             int ref = op;
430             // flzuint32 len = ctrl >> 5;
431             long len = ctrl >> 5;
432             // flzuint32 ofs = (ctrl & 31) << 8;
433             long ofs = (ctrl & 31) << 8;
434 
435             if (ctrl >= 32) {
436                 len--;
437                 // ref -= ofs;
438                 ref -= ofs;
439 
440                 int code;
441                 if (len == 6) {
442                     if (level == LEVEL_1) {
443                         // len += *ip++;
444                         len += input.getUnsignedByte(inOffset + ip++);
445                     } else {
446                         do {
447                             code = input.getUnsignedByte(inOffset + ip++);
448                             len += code;
449                         } while (code == 255);
450                     }
451                 }
452                 if (level == LEVEL_1) {
453                     //  ref -= *ip++;
454                     ref -= input.getUnsignedByte(inOffset + ip++);
455                 } else {
456                     code = input.getUnsignedByte(inOffset + ip++);
457                     ref -= code;
458 
459                     /* match from 16-bit distance */
460                     // if(FASTLZ_UNEXPECT_CONDITIONAL(code==255))
461                     // if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
462                     if (code == 255 && ofs == 31 << 8) {
463                         ofs = input.getUnsignedByte(inOffset + ip++) << 8;
464                         ofs += input.getUnsignedByte(inOffset + ip++);
465 
466                         ref = (int) (op - ofs - MAX_DISTANCE);
467                     }
468                 }
469 
470                 // if the output index + length of block(?) + 3(?) is over the output limit?
471                 if (op + len + 3 > outLength) {
472                     return 0;
473                 }
474 
475                 // if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output))
476                 // if the address space of ref-1 is < the address of output?
477                 // if we are still at the beginning of the output address?
478                 if (ref - 1 < 0) {
479                     return 0;
480                 }
481 
482                 if (ip < inLength) {
483                     ctrl = input.getUnsignedByte(inOffset + ip++);
484                 } else {
485                     loop = 0;
486                 }
487 
488                 if (ref == op) {
489                     /* optimize copy for a run */
490                     // flzuint8 b = ref[-1];
491                     byte b = output.getByte(outOffset + ref - 1);
492                     output.setByte(outOffset + op++, b);
493                     output.setByte(outOffset + op++, b);
494                     output.setByte(outOffset + op++, b);
495                     while (len != 0) {
496                         output.setByte(outOffset + op++, b);
497                         --len;
498                     }
499                 } else {
500                     /* copy from reference */
501                     ref--;
502 
503                     // *op++ = *ref++;
504                     output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
505                     output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
506                     output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
507 
508                     while (len != 0) {
509                         output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
510                         --len;
511                     }
512                 }
513             } else {
514                 ctrl++;
515 
516                 if (op + ctrl > outLength) {
517                     return 0;
518                 }
519                 if (ip + ctrl > inLength) {
520                     return 0;
521                 }
522 
523                 //*op++ = *ip++;
524                 output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
525 
526                 for (--ctrl; ctrl != 0; ctrl--) {
527                     // *op++ = *ip++;
528                     output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
529                 }
530 
531                 loop = ip < inLength ? 1 : 0;
532                 if (loop != 0) {
533                     //  ctrl = *ip++;
534                     ctrl = input.getUnsignedByte(inOffset + ip++);
535                 }
536             }
537 
538         // while(FASTLZ_EXPECT_CONDITIONAL(loop));
539         } while (loop != 0);
540 
541         //  return op - (flzuint8*)output;
542         return op;
543     }
544 
545     private static int hashFunction(Buffer p, int offset) {
546         int v = readU16(p, offset);
547         v ^= readU16(p, offset + 1) ^ v >> 16 - HASH_LOG;
548         v &= HASH_MASK;
549         return v;
550     }
551 
552     private static int readU16(Buffer data, int offset) {
553         if (offset + 1 >= data.readableBytes()) {
554             return data.getUnsignedByte(offset);
555         }
556         return data.getUnsignedByte(offset + 1) << 8 | data.getUnsignedByte(offset);
557     }
558 
559     private FastLz() { }
560 }