1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.compression;
17
18 import io.netty.buffer.ByteBuf;
19
20 /**
21 * Core of FastLZ compression algorithm.
22 *
23 * This class provides methods for compression and decompression of buffers and saves
24 * constants which use by {@link FastLzFrameEncoder} and {@link FastLzFrameDecoder}.
25 *
26 * This is refactored code of <a href="https://code.google.com/p/jfastlz/">jfastlz</a>
27 * library written by William Kinney.
28 */
29 final class FastLz {
30
31 private static final int MAX_DISTANCE = 8191;
32 private static final int MAX_FARDISTANCE = 65535 + MAX_DISTANCE - 1;
33
34 private static final int HASH_LOG = 13;
35 private static final int HASH_SIZE = 1 << HASH_LOG; // 8192
36 private static final int HASH_MASK = HASH_SIZE - 1;
37
38 private static final int MAX_COPY = 32;
39 private static final int MAX_LEN = 256 + 8;
40
41 private static final int MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 = 1024 * 64;
42
43 static final int MAGIC_NUMBER = 'F' << 16 | 'L' << 8 | 'Z';
44
45 static final byte BLOCK_TYPE_NON_COMPRESSED = 0x00;
46 static final byte BLOCK_TYPE_COMPRESSED = 0x01;
47 static final byte BLOCK_WITHOUT_CHECKSUM = 0x00;
48 static final byte BLOCK_WITH_CHECKSUM = 0x10;
49
50 static final int OPTIONS_OFFSET = 3;
51 static final int CHECKSUM_OFFSET = 4;
52
53 static final int MAX_CHUNK_LENGTH = 0xFFFF;
54
55 /**
56 * Do not call {@link #compress(ByteBuf, int, int, ByteBuf, int, int)} for input buffers
57 * which length less than this value.
58 */
59 static final int MIN_LENGTH_TO_COMPRESSION = 32;
60
61 /**
62 * In this case {@link #compress(ByteBuf, int, int, ByteBuf, int, int)} will choose level
63 * automatically depending on the length of the input buffer. If length less than
64 * {@link #MIN_RECOMENDED_LENGTH_FOR_LEVEL_2} {@link #LEVEL_1} will be chosen,
65 * otherwise {@link #LEVEL_2}.
66 */
67 static final int LEVEL_AUTO = 0;
68
69 /**
70 * Level 1 is the fastest compression and generally useful for short data.
71 */
72 static final int LEVEL_1 = 1;
73
74 /**
75 * Level 2 is slightly slower but it gives better compression ratio.
76 */
77 static final int LEVEL_2 = 2;
78
79 /**
80 * The output buffer must be at least 6% larger than the input buffer and can not be smaller than 66 bytes.
81 * @param inputLength length of input buffer
82 * @return Maximum output buffer length
83 */
84 static int calculateOutputBufferLength(int inputLength) {
85 final int outputLength = (int) (inputLength * 1.06);
86 return Math.max(outputLength, 66);
87 }
88
89 /**
90 * Compress a block of data in the input buffer and returns the size of compressed block.
91 * The size of input buffer is specified by length. The minimum input buffer size is 32.
92 *
93 * If the input is not compressible, the return value might be larger than length (input buffer size).
94 */
95 @SuppressWarnings("IdentityBinaryExpression")
96 static int compress(final ByteBuf input, final int inOffset, final int inLength,
97 final ByteBuf output, final int outOffset, final int proposedLevel) {
98 final int level;
99 if (proposedLevel == LEVEL_AUTO) {
100 level = inLength < MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 ? LEVEL_1 : LEVEL_2;
101 } else {
102 level = proposedLevel;
103 }
104
105 int ip = 0;
106 int ipBound = ip + inLength - 2;
107 int ipLimit = ip + inLength - 12;
108
109 int op = 0;
110
111 // const flzuint8* htab[HASH_SIZE];
112 int[] htab = new int[HASH_SIZE];
113 // const flzuint8** hslot;
114 int hslot;
115 // flzuint32 hval;
116 // int OK b/c address starting from 0
117 int hval;
118 // flzuint32 copy;
119 // int OK b/c address starting from 0
120 int copy;
121
122 /* sanity check */
123 if (inLength < 4) {
124 if (inLength != 0) {
125 // *op++ = length-1;
126 output.setByte(outOffset + op++, (byte) (inLength - 1));
127 ipBound++;
128 while (ip <= ipBound) {
129 output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
130 }
131 return inLength + 1;
132 }
133 // else
134 return 0;
135 }
136
137 /* initializes hash table */
138 // for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
139 for (hslot = 0; hslot < HASH_SIZE; hslot++) {
140 //*hslot = ip;
141 htab[hslot] = ip;
142 }
143
144 /* we start with literal copy */
145 copy = 2;
146 output.setByte(outOffset + op++, MAX_COPY - 1);
147 output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
148 output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
149
150 /* main loop */
151 while (ip < ipLimit) {
152 int ref = 0;
153
154 long distance = 0;
155
156 /* minimum match length */
157 // flzuint32 len = 3;
158 // int OK b/c len is 0 and octal based
159 int len = 3;
160
161 /* comparison starting-point */
162 int anchor = ip;
163
164 boolean matchLabel = false;
165
166 /* check for a run */
167 if (level == LEVEL_2) {
168 //if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
169 if (input.getByte(inOffset + ip) == input.getByte(inOffset + ip - 1) &&
170 readU16(input, inOffset + ip - 1) == readU16(input, inOffset + ip + 1)) {
171 distance = 1;
172 ip += 3;
173 ref = anchor + (3 - 1);
174
175 /*
176 * goto match;
177 */
178 matchLabel = true;
179 }
180 }
181 if (!matchLabel) {
182 /* find potential match */
183 // HASH_FUNCTION(hval,ip);
184 hval = hashFunction(input, inOffset + ip);
185 // hslot = htab + hval;
186 hslot = hval;
187 // ref = htab[hval];
188 ref = htab[hval];
189
190 /* calculate distance to the match */
191 distance = anchor - ref;
192
193 /* update hash table */
194 //*hslot = anchor;
195 htab[hslot] = anchor;
196
197 /* is this a match? check the first 3 bytes */
198 if (distance == 0
199 || (level == LEVEL_1 ? distance >= MAX_DISTANCE : distance >= MAX_FARDISTANCE)
200 || input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)
201 || input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)
202 || input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
203 /*
204 * goto literal;
205 */
206 output.setByte(outOffset + op++, input.getByte(inOffset + anchor++));
207 ip = anchor;
208 copy++;
209 if (copy == MAX_COPY) {
210 copy = 0;
211 output.setByte(outOffset + op++, MAX_COPY - 1);
212 }
213 continue;
214 }
215
216 if (level == LEVEL_2) {
217 /* far, needs at least 5-byte match */
218 if (distance >= MAX_DISTANCE) {
219 if (input.getByte(inOffset + ip++) != input.getByte(inOffset + ref++)
220 || input.getByte(inOffset + ip++) != input.getByte(inOffset + ref++)) {
221 /*
222 * goto literal;
223 */
224 output.setByte(outOffset + op++, input.getByte(inOffset + anchor++));
225 ip = anchor;
226 copy++;
227 if (copy == MAX_COPY) {
228 copy = 0;
229 output.setByte(outOffset + op++, MAX_COPY - 1);
230 }
231 continue;
232 }
233 len += 2;
234 }
235 }
236 } // end if(!matchLabel)
237 /*
238 * match:
239 */
240 /* last matched byte */
241 ip = anchor + len;
242
243 /* distance is biased */
244 distance--;
245
246 if (distance == 0) {
247 /* zero distance means a run */
248 //flzuint8 x = ip[-1];
249 byte x = input.getByte(inOffset + ip - 1);
250 while (ip < ipBound) {
251 if (input.getByte(inOffset + ref++) != x) {
252 break;
253 } else {
254 ip++;
255 }
256 }
257 } else {
258 /* safe because the outer check against ip limit */
259 boolean missMatch = false;
260 for (int i = 0; i < 8; i++) {
261 if (input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
262 missMatch = true;
263 break;
264 }
265 }
266 if (!missMatch) {
267 while (ip < ipBound) {
268 if (input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
269 break;
270 }
271 }
272 }
273 }
274
275 /* if we have copied something, adjust the copy count */
276 if (copy != 0) {
277 /* copy is biased, '0' means 1 byte copy */
278 // *(op-copy-1) = copy-1;
279 output.setByte(outOffset + op - copy - 1, (byte) (copy - 1));
280 } else {
281 /* back, to overwrite the copy count */
282 op--;
283 }
284
285 /* reset literal counter */
286 copy = 0;
287
288 /* length is biased, '1' means a match of 3 bytes */
289 ip -= 3;
290 len = ip - anchor;
291
292 /* encode the match */
293 if (level == LEVEL_2) {
294 if (distance < MAX_DISTANCE) {
295 if (len < 7) {
296 output.setByte(outOffset + op++, (byte) ((len << 5) + (distance >>> 8)));
297 output.setByte(outOffset + op++, (byte) (distance & 255));
298 } else {
299 output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
300 for (len -= 7; len >= 255; len -= 255) {
301 output.setByte(outOffset + op++, (byte) 255);
302 }
303 output.setByte(outOffset + op++, (byte) len);
304 output.setByte(outOffset + op++, (byte) (distance & 255));
305 }
306 } else {
307 /* far away, but not yet in the another galaxy... */
308 if (len < 7) {
309 distance -= MAX_DISTANCE;
310 output.setByte(outOffset + op++, (byte) ((len << 5) + 31));
311 output.setByte(outOffset + op++, (byte) 255);
312 output.setByte(outOffset + op++, (byte) (distance >>> 8));
313 output.setByte(outOffset + op++, (byte) (distance & 255));
314 } else {
315 distance -= MAX_DISTANCE;
316 output.setByte(outOffset + op++, (byte) ((7 << 5) + 31));
317 for (len -= 7; len >= 255; len -= 255) {
318 output.setByte(outOffset + op++, (byte) 255);
319 }
320 output.setByte(outOffset + op++, (byte) len);
321 output.setByte(outOffset + op++, (byte) 255);
322 output.setByte(outOffset + op++, (byte) (distance >>> 8));
323 output.setByte(outOffset + op++, (byte) (distance & 255));
324 }
325 }
326 } else {
327 if (len > MAX_LEN - 2) {
328 while (len > MAX_LEN - 2) {
329 output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
330 output.setByte(outOffset + op++, (byte) (MAX_LEN - 2 - 7 - 2));
331 output.setByte(outOffset + op++, (byte) (distance & 255));
332 len -= MAX_LEN - 2;
333 }
334 }
335
336 if (len < 7) {
337 output.setByte(outOffset + op++, (byte) ((len << 5) + (distance >>> 8)));
338 output.setByte(outOffset + op++, (byte) (distance & 255));
339 } else {
340 output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
341 output.setByte(outOffset + op++, (byte) (len - 7));
342 output.setByte(outOffset + op++, (byte) (distance & 255));
343 }
344 }
345
346 /* update the hash at match boundary */
347 //HASH_FUNCTION(hval,ip);
348 hval = hashFunction(input, inOffset + ip);
349 htab[hval] = ip++;
350
351 //HASH_FUNCTION(hval,ip);
352 hval = hashFunction(input, inOffset + ip);
353 htab[hval] = ip++;
354
355 /* assuming literal copy */
356 output.setByte(outOffset + op++, MAX_COPY - 1);
357
358 continue;
359
360 // Moved to be inline, with a 'continue'
361 /*
362 * literal:
363 *
364 output[outOffset + op++] = input[inOffset + anchor++];
365 ip = anchor;
366 copy++;
367 if(copy == MAX_COPY){
368 copy = 0;
369 output[outOffset + op++] = MAX_COPY-1;
370 }
371 */
372 }
373
374 /* left-over as literal copy */
375 ipBound++;
376 while (ip <= ipBound) {
377 output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
378 copy++;
379 if (copy == MAX_COPY) {
380 copy = 0;
381 output.setByte(outOffset + op++, MAX_COPY - 1);
382 }
383 }
384
385 /* if we have copied something, adjust the copy length */
386 if (copy != 0) {
387 //*(op-copy-1) = copy-1;
388 output.setByte(outOffset + op - copy - 1, (byte) (copy - 1));
389 } else {
390 op--;
391 }
392
393 if (level == LEVEL_2) {
394 /* marker for fastlz2 */
395 output.setByte(outOffset, output.getByte(outOffset) | 1 << 5);
396 }
397
398 return op;
399 }
400
401 /**
402 * Decompress a block of compressed data and returns the size of the decompressed block.
403 * If error occurs, e.g. the compressed data is corrupted or the output buffer is not large
404 * enough, then 0 (zero) will be returned instead.
405 *
406 * Decompression is memory safe and guaranteed not to write the output buffer
407 * more than what is specified in outLength.
408 */
409 static int decompress(final ByteBuf input, final int inOffset, final int inLength,
410 final ByteBuf output, final int outOffset, final int outLength) {
411 //int level = ((*(const flzuint8*)input) >> 5) + 1;
412 final int level = (input.getByte(inOffset) >> 5) + 1;
413 if (level != LEVEL_1 && level != LEVEL_2) {
414 throw new DecompressionException(String.format(
415 "invalid level: %d (expected: %d or %d)", level, LEVEL_1, LEVEL_2
416 ));
417 }
418
419 // const flzuint8* ip = (const flzuint8*) input;
420 int ip = 0;
421 // flzuint8* op = (flzuint8*) output;
422 int op = 0;
423 // flzuint32 ctrl = (*ip++) & 31;
424 long ctrl = input.getByte(inOffset + ip++) & 31;
425
426 int loop = 1;
427 do {
428 // const flzuint8* ref = op;
429 int ref = op;
430 // flzuint32 len = ctrl >> 5;
431 long len = ctrl >> 5;
432 // flzuint32 ofs = (ctrl & 31) << 8;
433 long ofs = (ctrl & 31) << 8;
434
435 if (ctrl >= 32) {
436 len--;
437 // ref -= ofs;
438 ref -= ofs;
439
440 int code;
441 if (len == 6) {
442 if (level == LEVEL_1) {
443 // len += *ip++;
444 len += input.getUnsignedByte(inOffset + ip++);
445 } else {
446 do {
447 code = input.getUnsignedByte(inOffset + ip++);
448 len += code;
449 } while (code == 255);
450 }
451 }
452 if (level == LEVEL_1) {
453 // ref -= *ip++;
454 ref -= input.getUnsignedByte(inOffset + ip++);
455 } else {
456 code = input.getUnsignedByte(inOffset + ip++);
457 ref -= code;
458
459 /* match from 16-bit distance */
460 // if(FASTLZ_UNEXPECT_CONDITIONAL(code==255))
461 // if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
462 if (code == 255 && ofs == 31 << 8) {
463 ofs = input.getUnsignedByte(inOffset + ip++) << 8;
464 ofs += input.getUnsignedByte(inOffset + ip++);
465
466 ref = (int) (op - ofs - MAX_DISTANCE);
467 }
468 }
469
470 // if the output index + length of block(?) + 3(?) is over the output limit?
471 if (op + len + 3 > outLength) {
472 return 0;
473 }
474
475 // if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output))
476 // if the address space of ref-1 is < the address of output?
477 // if we are still at the beginning of the output address?
478 if (ref - 1 < 0) {
479 return 0;
480 }
481
482 if (ip < inLength) {
483 ctrl = input.getUnsignedByte(inOffset + ip++);
484 } else {
485 loop = 0;
486 }
487
488 if (ref == op) {
489 /* optimize copy for a run */
490 // flzuint8 b = ref[-1];
491 byte b = output.getByte(outOffset + ref - 1);
492 output.setByte(outOffset + op++, b);
493 output.setByte(outOffset + op++, b);
494 output.setByte(outOffset + op++, b);
495 while (len != 0) {
496 output.setByte(outOffset + op++, b);
497 --len;
498 }
499 } else {
500 /* copy from reference */
501 ref--;
502
503 // *op++ = *ref++;
504 output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
505 output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
506 output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
507
508 while (len != 0) {
509 output.setByte(outOffset + op++, output.getByte(outOffset + ref++));
510 --len;
511 }
512 }
513 } else {
514 ctrl++;
515
516 if (op + ctrl > outLength) {
517 return 0;
518 }
519 if (ip + ctrl > inLength) {
520 return 0;
521 }
522
523 //*op++ = *ip++;
524 output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
525
526 for (--ctrl; ctrl != 0; ctrl--) {
527 // *op++ = *ip++;
528 output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
529 }
530
531 loop = ip < inLength ? 1 : 0;
532 if (loop != 0) {
533 // ctrl = *ip++;
534 ctrl = input.getUnsignedByte(inOffset + ip++);
535 }
536 }
537
538 // while(FASTLZ_EXPECT_CONDITIONAL(loop));
539 } while (loop != 0);
540
541 // return op - (flzuint8*)output;
542 return op;
543 }
544
545 private static int hashFunction(ByteBuf p, int offset) {
546 int v = readU16(p, offset);
547 v ^= readU16(p, offset + 1) ^ v >> 16 - HASH_LOG;
548 v &= HASH_MASK;
549 return v;
550 }
551
552 private static int readU16(ByteBuf data, int offset) {
553 if (offset + 1 >= data.readableBytes()) {
554 return data.getUnsignedByte(offset);
555 }
556 return data.getUnsignedByte(offset + 1) << 8 | data.getUnsignedByte(offset);
557 }
558
559 private FastLz() { }
560 }