1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty5.handler.codec.compression;
17
18 import io.netty5.buffer.BufferUtil;
19 import io.netty5.buffer.api.Buffer;
20
21 /**
22 * Uncompresses an input {@link Buffer} encoded with Snappy compression into an
23 * output {@link Buffer}.
24 *
25 * See <a href="https://github.com/google/snappy/blob/master/format_description.txt">snappy format</a>.
26 */
27 public final class Snappy {
28
29 private static final int MAX_HT_SIZE = 1 << 14;
30 private static final int MIN_COMPRESSIBLE_BYTES = 15;
31
32 // used as a return value to indicate that we haven't yet read our full preamble
33 private static final int PREAMBLE_NOT_FULL = -1;
34 private static final int NOT_ENOUGH_INPUT = -1;
35
36 // constants for the tag types
37 private static final int LITERAL = 0;
38 private static final int COPY_1_BYTE_OFFSET = 1;
39 private static final int COPY_2_BYTE_OFFSET = 2;
40 private static final int COPY_4_BYTE_OFFSET = 3;
41
42 private State state = State.READING_PREAMBLE;
43 private byte tag;
44 private int written;
45
46 private enum State {
47 READING_PREAMBLE,
48 READING_TAG,
49 READING_LITERAL,
50 READING_COPY
51 }
52
53 public void reset() {
54 state = State.READING_PREAMBLE;
55 tag = 0;
56 written = 0;
57 }
58
59 public void encode(final Buffer in, final Buffer out, final int length) {
60 // Write the preamble length to the output buffer
61 for (int i = 0;; i ++) {
62 int b = length >>> i * 7;
63 if ((b & 0xFFFFFF80) != 0) {
64 out.writeByte((byte) (b & 0x7f | 0x80));
65 } else {
66 out.writeByte((byte) b);
67 break;
68 }
69 }
70
71 int inIndex = in.readerOffset();
72 final int baseIndex = inIndex;
73
74 final short[] table = getHashTable(length);
75 final int shift = Integer.numberOfLeadingZeros(table.length) + 1;
76
77 int nextEmit = inIndex;
78
79 if (length - inIndex >= MIN_COMPRESSIBLE_BYTES) {
80 int nextHash = hash(in, ++inIndex, shift);
81 outer: while (true) {
82 int skip = 32;
83
84 int candidate;
85 int nextIndex = inIndex;
86 do {
87 inIndex = nextIndex;
88 int hash = nextHash;
89 int bytesBetweenHashLookups = skip++ >> 5;
90 nextIndex = inIndex + bytesBetweenHashLookups;
91
92 // We need at least 4 remaining bytes to read the hash
93 if (nextIndex > length - 4) {
94 break outer;
95 }
96
97 nextHash = hash(in, nextIndex, shift);
98
99 candidate = baseIndex + table[hash];
100
101 table[hash] = (short) (inIndex - baseIndex);
102 }
103 while (in.getInt(inIndex) != in.getInt(candidate));
104
105 encodeLiteral(in, out, inIndex - nextEmit);
106
107 int insertTail;
108 do {
109 int base = inIndex;
110 int matched = 4 + findMatchingLength(in, candidate + 4, inIndex + 4, length);
111 inIndex += matched;
112 int offset = base - candidate;
113 encodeCopy(out, offset, matched);
114 in.readerOffset(in.readerOffset() + matched);
115 insertTail = inIndex - 1;
116 nextEmit = inIndex;
117 if (inIndex >= length - 4) {
118 break outer;
119 }
120
121 int prevHash = hash(in, insertTail, shift);
122 table[prevHash] = (short) (inIndex - baseIndex - 1);
123 int currentHash = hash(in, insertTail + 1, shift);
124 candidate = baseIndex + table[currentHash];
125 table[currentHash] = (short) (inIndex - baseIndex);
126 }
127 while (in.getInt(insertTail + 1) == in.getInt(candidate));
128
129 nextHash = hash(in, insertTail + 2, shift);
130 ++inIndex;
131 }
132 }
133
134 // If there are any remaining characters, write them out as a literal
135 if (nextEmit < length) {
136 encodeLiteral(in, out, length - nextEmit);
137 }
138 }
139
140 /**
141 * Hashes the 4 bytes located at index, shifting the resulting hash into
142 * the appropriate range for our hash table.
143 *
144 * @param in The input buffer to read 4 bytes from
145 * @param index The index to read at
146 * @param shift The shift value, for ensuring that the resulting value is
147 * within the range of our hash table size
148 * @return A 32-bit hash of 4 bytes located at index
149 */
150 private static int hash(Buffer in, int index, int shift) {
151 return in.getInt(index) * 0x1e35a7bd >>> shift;
152 }
153
154 /**
155 * Creates an appropriately sized hashtable for the given input size
156 *
157 * @param inputSize The size of our input, ie. the number of bytes we need to encode
158 * @return An appropriately sized empty hashtable
159 */
160 private static short[] getHashTable(int inputSize) {
161 int htSize = 256;
162 while (htSize < MAX_HT_SIZE && htSize < inputSize) {
163 htSize <<= 1;
164 }
165 return new short[htSize];
166 }
167
168 /**
169 * Iterates over the supplied input buffer between the supplied minIndex and
170 * maxIndex to find how long our matched copy overlaps with an already-written
171 * literal value.
172 *
173 * @param in The input buffer to scan over
174 * @param minIndex The index in the input buffer to start scanning from
175 * @param inIndex The index of the start of our copy
176 * @param maxIndex The length of our input buffer
177 * @return The number of bytes for which our candidate copy is a repeat of
178 */
179 private static int findMatchingLength(Buffer in, int minIndex, int inIndex, int maxIndex) {
180 int matched = 0;
181
182 while (inIndex <= maxIndex - 4 &&
183 in.getInt(inIndex) == in.getInt(minIndex + matched)) {
184 inIndex += 4;
185 matched += 4;
186 }
187
188 while (inIndex < maxIndex && in.getByte(minIndex + matched) == in.getByte(inIndex)) {
189 ++inIndex;
190 ++matched;
191 }
192
193 return matched;
194 }
195
196 /**
197 * Calculates the minimum number of bits required to encode a value. This can
198 * then in turn be used to calculate the number of septets or octets (as
199 * appropriate) to use to encode a length parameter.
200 *
201 * @param value The value to calculate the minimum number of bits required to encode
202 * @return The minimum number of bits required to encode the supplied value
203 */
204 private static int bitsToEncode(int value) {
205 int highestOneBit = Integer.highestOneBit(value);
206 int bitLength = 0;
207 while ((highestOneBit >>= 1) != 0) {
208 bitLength++;
209 }
210
211 return bitLength;
212 }
213
214 /**
215 * Writes a literal to the supplied output buffer by directly copying from
216 * the input buffer. The literal is taken from the current readerIndex
217 * up to the supplied length.
218 *
219 * @param in The input buffer to copy from
220 * @param out The output buffer to copy to
221 * @param length The length of the literal to copy
222 */
223 static void encodeLiteral(Buffer in, Buffer out, int length) {
224 if (length < 61) {
225 out.writeByte((byte) (length - 1 << 2));
226 } else {
227 int bitLength = bitsToEncode(length - 1);
228 int bytesToEncode = 1 + bitLength / 8;
229 out.writeByte((byte) (59 + bytesToEncode << 2));
230 for (int i = 0; i < bytesToEncode; i++) {
231 out.writeByte((byte) (length - 1 >> i * 8 & 0x0ff));
232 }
233 }
234
235 out.ensureWritable(length);
236 in.copyInto(in.readerOffset(), out, out.writerOffset(), length);
237 in.skipReadableBytes(length);
238 out.skipWritableBytes(length);
239 }
240
241 private static void encodeCopyWithOffset(Buffer out, int offset, int length) {
242 if (length < 12 && offset < 2048) {
243 out.writeByte((byte) (COPY_1_BYTE_OFFSET | length - 4 << 2 | offset >> 8 << 5));
244 out.writeByte((byte) (offset & 0x0ff));
245 } else {
246 out.writeByte((byte) (COPY_2_BYTE_OFFSET | length - 1 << 2));
247 out.writeByte((byte) (offset & 0x0ff));
248 out.writeByte((byte) (offset >> 8 & 0x0ff));
249 }
250 }
251
252 /**
253 * Encodes a series of copies, each at most 64 bytes in length.
254 *
255 * @param out The output buffer to write the copy pointer to
256 * @param offset The offset at which the original instance lies
257 * @param length The length of the original instance
258 */
259 private static void encodeCopy(Buffer out, int offset, int length) {
260 while (length >= 68) {
261 encodeCopyWithOffset(out, offset, 64);
262 length -= 64;
263 }
264
265 if (length > 64) {
266 encodeCopyWithOffset(out, offset, 60);
267 length -= 60;
268 }
269
270 encodeCopyWithOffset(out, offset, length);
271 }
272
273 public void decode(Buffer in, Buffer out) {
274 while (in.readableBytes() > 0) {
275 switch (state) {
276 case READING_PREAMBLE:
277 int uncompressedLength = readPreamble(in);
278 if (uncompressedLength == PREAMBLE_NOT_FULL) {
279 // We've not yet read all of the preamble, so wait until we can
280 return;
281 }
282 if (uncompressedLength == 0) {
283 // Should never happen, but it does mean we have nothing further to do
284 return;
285 }
286 out.ensureWritable(uncompressedLength);
287 state = State.READING_TAG;
288 // fall through
289 case READING_TAG:
290 if (in.readableBytes() == 0) {
291 return;
292 }
293 tag = in.readByte();
294 switch (tag & 0x03) {
295 case LITERAL:
296 state = State.READING_LITERAL;
297 break;
298 case COPY_1_BYTE_OFFSET:
299 case COPY_2_BYTE_OFFSET:
300 case COPY_4_BYTE_OFFSET:
301 state = State.READING_COPY;
302 break;
303 }
304 break;
305 case READING_LITERAL:
306 int literalWritten = decodeLiteral(tag, in, out);
307 if (literalWritten != NOT_ENOUGH_INPUT) {
308 state = State.READING_TAG;
309 written += literalWritten;
310 } else {
311 // Need to wait for more data
312 return;
313 }
314 break;
315 case READING_COPY:
316 int decodeWritten;
317 switch (tag & 0x03) {
318 case COPY_1_BYTE_OFFSET:
319 decodeWritten = decodeCopyWith1ByteOffset(tag, in, out, written);
320 if (decodeWritten != NOT_ENOUGH_INPUT) {
321 state = State.READING_TAG;
322 written += decodeWritten;
323 } else {
324 // Need to wait for more data
325 return;
326 }
327 break;
328 case COPY_2_BYTE_OFFSET:
329 decodeWritten = decodeCopyWith2ByteOffset(tag, in, out, written);
330 if (decodeWritten != NOT_ENOUGH_INPUT) {
331 state = State.READING_TAG;
332 written += decodeWritten;
333 } else {
334 // Need to wait for more data
335 return;
336 }
337 break;
338 case COPY_4_BYTE_OFFSET:
339 decodeWritten = decodeCopyWith4ByteOffset(tag, in, out, written);
340 if (decodeWritten != NOT_ENOUGH_INPUT) {
341 state = State.READING_TAG;
342 written += decodeWritten;
343 } else {
344 // Need to wait for more data
345 return;
346 }
347 break;
348 }
349 }
350 }
351 }
352
353 /**
354 * Reads the length varint (a series of bytes, where the lower 7 bits
355 * are data and the upper bit is a flag to indicate more bytes to be
356 * read).
357 *
358 * @param in The input buffer to read the preamble from
359 * @return The calculated length based on the input buffer, or 0 if
360 * no preamble is able to be calculated
361 */
362 private static int readPreamble(Buffer in) {
363 int length = 0;
364 int byteIndex = 0;
365 while (in.readableBytes() > 0) {
366 int current = in.readUnsignedByte();
367 length |= (current & 0x7f) << byteIndex++ * 7;
368 if ((current & 0x80) == 0) {
369 return length;
370 }
371
372 if (byteIndex >= 4) {
373 throw new DecompressionException("Preamble is greater than 4 bytes");
374 }
375 }
376
377 return 0;
378 }
379
380 /**
381 * Get the length varint (a series of bytes, where the lower 7 bits
382 * are data and the upper bit is a flag to indicate more bytes to be
383 * read).
384 *
385 * @param in The input buffer to get the preamble from
386 * @return The calculated length based on the input buffer, or 0 if
387 * no preamble is able to be calculated
388 */
389 int getPreamble(Buffer in) {
390 if (state == State.READING_PREAMBLE) {
391 int readerIndex = in.readerOffset();
392 try {
393 return readPreamble(in);
394 } finally {
395 in.readerOffset(readerIndex);
396 }
397 }
398 return 0;
399 }
400
401 /**
402 * Reads a literal from the input buffer directly to the output buffer.
403 * A "literal" is an uncompressed segment of data stored directly in the
404 * byte stream.
405 *
406 * @param tag The tag that identified this segment as a literal is also
407 * used to encode part of the length of the data
408 * @param in The input buffer to read the literal from
409 * @param out The output buffer to write the literal to
410 * @return The number of bytes appended to the output buffer, or -1 to indicate "try again later"
411 */
412 static int decodeLiteral(byte tag, Buffer in, Buffer out) {
413 int readerIndex = in.readerOffset();
414 int length;
415 switch(tag >> 2 & 0x3F) {
416 case 60:
417 if (in.readableBytes() == 0) {
418 return NOT_ENOUGH_INPUT;
419 }
420 length = in.readUnsignedByte();
421 break;
422 case 61:
423 if (in.readableBytes() < 2) {
424 return NOT_ENOUGH_INPUT;
425 }
426 length = BufferUtil.reverseUnsignedShort(in.readUnsignedShort());
427 break;
428 case 62:
429 if (in.readableBytes() < 3) {
430 return NOT_ENOUGH_INPUT;
431 }
432 length = BufferUtil.reverseUnsignedMedium(in.readUnsignedMedium());
433 break;
434 case 63:
435 if (in.readableBytes() < 4) {
436 return NOT_ENOUGH_INPUT;
437 }
438 length = Integer.reverseBytes(in.readInt());
439 break;
440 default:
441 length = tag >> 2 & 0x3F;
442 }
443 length += 1;
444
445 if (in.readableBytes() < length) {
446 in.readerOffset(readerIndex);
447 return NOT_ENOUGH_INPUT;
448 }
449
450 out.ensureWritable(length);
451 in.copyInto(in.readerOffset(), out, out.writerOffset(), length);
452 in.skipReadableBytes(length);
453 out.skipWritableBytes(length);
454 return length;
455 }
456
457 /**
458 * Reads a compressed reference offset and length from the supplied input
459 * buffer, seeks back to the appropriate place in the input buffer and
460 * writes the found data to the supplied output stream.
461 *
462 * @param tag The tag used to identify this as a copy is also used to encode
463 * the length and part of the offset
464 * @param in The input buffer to read from
465 * @param out The output buffer to write to
466 * @return The number of bytes appended to the output buffer, or -1 to indicate
467 * "try again later"
468 * @throws DecompressionException If the read offset is invalid
469 */
470 private static int decodeCopyWith1ByteOffset(byte tag, Buffer in, Buffer out, int writtenSoFar) {
471 if (in.readableBytes() == 0) {
472 return NOT_ENOUGH_INPUT;
473 }
474
475 int initialIndex = out.writerOffset();
476 int length = 4 + ((tag & 0x01c) >> 2);
477 int offset = (tag & 0x0e0) << 8 >> 5 | in.readUnsignedByte();
478
479 validateOffset(offset, writtenSoFar);
480
481 int readerIndex = out.readerOffset();
482 if (offset < length) {
483 int copies = length / offset;
484 for (; copies > 0; copies--) {
485 copyRegion(out, initialIndex, offset, offset);
486 }
487 if (length % offset != 0) {
488 copyRegion(out, initialIndex, offset, length % offset);
489 }
490 } else {
491 copyRegion(out, initialIndex, offset, length);
492 }
493 out.readerOffset(readerIndex);
494
495 return length;
496 }
497
498 private static void copyRegion(Buffer out, int initialIndex, int offset, int length) {
499 out.readerOffset(initialIndex - offset);
500 out.copyInto(out.readerOffset(), out, out.writerOffset(), length);
501 out.skipWritableBytes(length).skipReadableBytes(length);
502 }
503
504 /**
505 * Reads a compressed reference offset and length from the supplied input
506 * buffer, seeks back to the appropriate place in the input buffer and
507 * writes the found data to the supplied output stream.
508 *
509 * @param tag The tag used to identify this as a copy is also used to encode
510 * the length and part of the offset
511 * @param in The input buffer to read from
512 * @param out The output buffer to write to
513 * @throws DecompressionException If the read offset is invalid
514 * @return The number of bytes appended to the output buffer, or -1 to indicate
515 * "try again later"
516 */
517 private static int decodeCopyWith2ByteOffset(byte tag, Buffer in, Buffer out, int writtenSoFar) {
518 if (in.readableBytes() < 2) {
519 return NOT_ENOUGH_INPUT;
520 }
521
522 int initialIndex = out.writerOffset();
523 int length = 1 + (tag >> 2 & 0x03f);
524 int offset = BufferUtil.reverseUnsignedShort(in.readUnsignedShort());
525
526 validateOffset(offset, writtenSoFar);
527
528 int readerIndex = out.readerOffset();
529 if (offset < length) {
530 int copies = length / offset;
531 for (; copies > 0; copies--) {
532 copyRegion(out, initialIndex, offset, offset);
533 }
534 if (length % offset != 0) {
535 copyRegion(out, initialIndex, offset, length % offset);
536 }
537 } else {
538 copyRegion(out, initialIndex, offset, length);
539 }
540 out.readerOffset(readerIndex);
541
542 return length;
543 }
544
545 /**
546 * Reads a compressed reference offset and length from the supplied input
547 * buffer, seeks back to the appropriate place in the input buffer and
548 * writes the found data to the supplied output stream.
549 *
550 * @param tag The tag used to identify this as a copy is also used to encode
551 * the length and part of the offset
552 * @param in The input buffer to read from
553 * @param out The output buffer to write to
554 * @return The number of bytes appended to the output buffer, or -1 to indicate
555 * "try again later"
556 * @throws DecompressionException If the read offset is invalid
557 */
558 private static int decodeCopyWith4ByteOffset(byte tag, Buffer in, Buffer out, int writtenSoFar) {
559 if (in.readableBytes() < 4) {
560 return NOT_ENOUGH_INPUT;
561 }
562
563 int initialIndex = out.writerOffset();
564 int length = 1 + (tag >> 2 & 0x03F);
565 int offset = Integer.reverseBytes(in.readInt());
566
567 validateOffset(offset, writtenSoFar);
568
569 int readerIndex = out.readerOffset();
570 if (offset < length) {
571 int copies = length / offset;
572 for (; copies > 0; copies--) {
573 copyRegion(out, initialIndex, offset, offset);
574 }
575 if (length % offset != 0) {
576 copyRegion(out, initialIndex, offset, length % offset);
577 }
578 } else {
579 copyRegion(out, initialIndex, offset, length);
580 }
581 out.readerOffset(readerIndex);
582
583 return length;
584 }
585
586 /**
587 * Validates that the offset extracted from a compressed reference is within
588 * the permissible bounds of an offset (0 < offset < Integer.MAX_VALUE), and does not
589 * exceed the length of the chunk currently read so far.
590 *
591 * @param offset The offset extracted from the compressed reference
592 * @param chunkSizeSoFar The number of bytes read so far from this chunk
593 * @throws DecompressionException if the offset is invalid
594 */
595 private static void validateOffset(int offset, int chunkSizeSoFar) {
596 if (offset == 0) {
597 throw new DecompressionException("Offset is less than minimum permissible value");
598 }
599
600 if (offset < 0) {
601 // Due to arithmetic overflow
602 throw new DecompressionException("Offset is greater than maximum value supported by this implementation");
603 }
604
605 if (offset > chunkSizeSoFar) {
606 throw new DecompressionException("Offset exceeds size of chunk");
607 }
608 }
609
610 /**
611 * Computes the CRC32C checksum of the supplied data and performs the "mask" operation
612 * on the computed checksum
613 *
614 * @param data The input data to calculate the CRC32C checksum of
615 */
616 static int calculateChecksum(Buffer data) {
617 return calculateChecksum(data, data.readerOffset(), data.readableBytes());
618 }
619
620 /**
621 * Computes the CRC32C checksum of the supplied data and performs the "mask" operation
622 * on the computed checksum
623 *
624 * @param data The input data to calculate the CRC32C checksum of
625 */
626 static int calculateChecksum(Buffer data, int offset, int length) {
627 Crc32c crc32 = new Crc32c();
628 try {
629 crc32.update(data, offset, length);
630 return maskChecksum(crc32.getValue());
631 } finally {
632 crc32.reset();
633 }
634 }
635
636 /**
637 * Computes the CRC32C checksum of the supplied data, performs the "mask" operation
638 * on the computed checksum, and then compares the resulting masked checksum to the
639 * supplied checksum.
640 *
641 * @param expectedChecksum The checksum decoded from the stream to compare against
642 * @param data The input data to calculate the CRC32C checksum of
643 * @throws DecompressionException If the calculated and supplied checksums do not match
644 */
645 static void validateChecksum(int expectedChecksum, Buffer data) {
646 validateChecksum(expectedChecksum, data, data.readerOffset(), data.readableBytes());
647 }
648
649 /**
650 * Computes the CRC32C checksum of the supplied data, performs the "mask" operation
651 * on the computed checksum, and then compares the resulting masked checksum to the
652 * supplied checksum.
653 *
654 * @param expectedChecksum The checksum decoded from the stream to compare against
655 * @param data The input data to calculate the CRC32C checksum of
656 * @throws DecompressionException If the calculated and supplied checksums do not match
657 */
658 static void validateChecksum(int expectedChecksum, Buffer data, int offset, int length) {
659 final int actualChecksum = calculateChecksum(data, offset, length);
660 if (actualChecksum != expectedChecksum) {
661 throw new DecompressionException(
662 "mismatching checksum: " + Integer.toHexString(actualChecksum) +
663 " (expected: " + Integer.toHexString(expectedChecksum) + ')');
664 }
665 }
666
667 /**
668 * From the spec:
669 *
670 * "Checksums are not stored directly, but masked, as checksumming data and
671 * then its own checksum can be problematic. The masking is the same as used
672 * in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
673 * 0xa282ead8 (using wraparound as normal for unsigned integers)."
674 *
675 * @param checksum The actual checksum of the data
676 * @return The masked checksum
677 */
678 static int maskChecksum(long checksum) {
679 return (int) ((checksum >> 15 | checksum << 17) + 0xa282ead8);
680 }
681 }