1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.compression;
17
18 import io.netty.buffer.ByteBuf;
19 import io.netty.buffer.ByteBufUtil;
20
21 /**
22 * Uncompresses an input {@link ByteBuf} encoded with Snappy compression into an
23 * output {@link ByteBuf}.
24 *
25 * See <a href="https://github.com/google/snappy/blob/master/format_description.txt">snappy format</a>.
26 */
27 class Snappy {
28
29 private static final int MAX_HT_SIZE = 1 << 14;
30 private static final int MIN_COMPRESSIBLE_BYTES = 15;
31
32 // used as a return value to indicate that we haven't yet read our full preamble
33 private static final int PREAMBLE_NOT_FULL = -1;
34 private static final int NOT_ENOUGH_INPUT = -1;
35
36 // constants for the tag types
37 private static final int LITERAL = 0;
38 private static final int COPY_1_BYTE_OFFSET = 1;
39 private static final int COPY_2_BYTE_OFFSET = 2;
40 private static final int COPY_4_BYTE_OFFSET = 3;
41
42 private State state = State.READY;
43 private byte tag;
44 private int written;
45
46 private enum State {
47 READY,
48 READING_PREAMBLE,
49 READING_TAG,
50 READING_LITERAL,
51 READING_COPY
52 }
53
54 public void reset() {
55 state = State.READY;
56 tag = 0;
57 written = 0;
58 }
59
60 public void encode(final ByteBuf in, final ByteBuf out, final int length) {
61 // Write the preamble length to the output buffer
62 for (int i = 0;; i ++) {
63 int b = length >>> i * 7;
64 if ((b & 0xFFFFFF80) != 0) {
65 out.writeByte(b & 0x7f | 0x80);
66 } else {
67 out.writeByte(b);
68 break;
69 }
70 }
71
72 int inIndex = in.readerIndex();
73 final int baseIndex = inIndex;
74
75 final short[] table = getHashTable(length);
76 final int shift = Integer.numberOfLeadingZeros(table.length) + 1;
77
78 int nextEmit = inIndex;
79
80 if (length - inIndex >= MIN_COMPRESSIBLE_BYTES) {
81 int nextHash = hash(in, ++inIndex, shift);
82 outer: while (true) {
83 int skip = 32;
84
85 int candidate;
86 int nextIndex = inIndex;
87 do {
88 inIndex = nextIndex;
89 int hash = nextHash;
90 int bytesBetweenHashLookups = skip++ >> 5;
91 nextIndex = inIndex + bytesBetweenHashLookups;
92
93 // We need at least 4 remaining bytes to read the hash
94 if (nextIndex > length - 4) {
95 break outer;
96 }
97
98 nextHash = hash(in, nextIndex, shift);
99
100 candidate = baseIndex + table[hash];
101
102 table[hash] = (short) (inIndex - baseIndex);
103 }
104 while (in.getInt(inIndex) != in.getInt(candidate));
105
106 encodeLiteral(in, out, inIndex - nextEmit);
107
108 int insertTail;
109 do {
110 int base = inIndex;
111 int matched = 4 + findMatchingLength(in, candidate + 4, inIndex + 4, length);
112 inIndex += matched;
113 int offset = base - candidate;
114 encodeCopy(out, offset, matched);
115 in.readerIndex(in.readerIndex() + matched);
116 insertTail = inIndex - 1;
117 nextEmit = inIndex;
118 if (inIndex >= length - 4) {
119 break outer;
120 }
121
122 int prevHash = hash(in, insertTail, shift);
123 table[prevHash] = (short) (inIndex - baseIndex - 1);
124 int currentHash = hash(in, insertTail + 1, shift);
125 candidate = baseIndex + table[currentHash];
126 table[currentHash] = (short) (inIndex - baseIndex);
127 }
128 while (in.getInt(insertTail + 1) == in.getInt(candidate));
129
130 nextHash = hash(in, insertTail + 2, shift);
131 ++inIndex;
132 }
133 }
134
135 // If there are any remaining characters, write them out as a literal
136 if (nextEmit < length) {
137 encodeLiteral(in, out, length - nextEmit);
138 }
139 }
140
141 /**
142 * Hashes the 4 bytes located at index, shifting the resulting hash into
143 * the appropriate range for our hash table.
144 *
145 * @param in The input buffer to read 4 bytes from
146 * @param index The index to read at
147 * @param shift The shift value, for ensuring that the resulting value is
148 * withing the range of our hash table size
149 * @return A 32-bit hash of 4 bytes located at index
150 */
151 private static int hash(ByteBuf in, int index, int shift) {
152 return in.getInt(index) * 0x1e35a7bd >>> shift;
153 }
154
155 /**
156 * Creates an appropriately sized hashtable for the given input size
157 *
158 * @param inputSize The size of our input, ie. the number of bytes we need to encode
159 * @return An appropriately sized empty hashtable
160 */
161 private static short[] getHashTable(int inputSize) {
162 int htSize = 256;
163 while (htSize < MAX_HT_SIZE && htSize < inputSize) {
164 htSize <<= 1;
165 }
166 return new short[htSize];
167 }
168
169 /**
170 * Iterates over the supplied input buffer between the supplied minIndex and
171 * maxIndex to find how long our matched copy overlaps with an already-written
172 * literal value.
173 *
174 * @param in The input buffer to scan over
175 * @param minIndex The index in the input buffer to start scanning from
176 * @param inIndex The index of the start of our copy
177 * @param maxIndex The length of our input buffer
178 * @return The number of bytes for which our candidate copy is a repeat of
179 */
180 private static int findMatchingLength(ByteBuf in, int minIndex, int inIndex, int maxIndex) {
181 int matched = 0;
182
183 while (inIndex <= maxIndex - 4 &&
184 in.getInt(inIndex) == in.getInt(minIndex + matched)) {
185 inIndex += 4;
186 matched += 4;
187 }
188
189 while (inIndex < maxIndex && in.getByte(minIndex + matched) == in.getByte(inIndex)) {
190 ++inIndex;
191 ++matched;
192 }
193
194 return matched;
195 }
196
197 /**
198 * Calculates the minimum number of bits required to encode a value. This can
199 * then in turn be used to calculate the number of septets or octets (as
200 * appropriate) to use to encode a length parameter.
201 *
202 * @param value The value to calculate the minimum number of bits required to encode
203 * @return The minimum number of bits required to encode the supplied value
204 */
205 private static int bitsToEncode(int value) {
206 int highestOneBit = Integer.highestOneBit(value);
207 int bitLength = 0;
208 while ((highestOneBit >>= 1) != 0) {
209 bitLength++;
210 }
211
212 return bitLength;
213 }
214
215 /**
216 * Writes a literal to the supplied output buffer by directly copying from
217 * the input buffer. The literal is taken from the current readerIndex
218 * up to the supplied length.
219 *
220 * @param in The input buffer to copy from
221 * @param out The output buffer to copy to
222 * @param length The length of the literal to copy
223 */
224 static void encodeLiteral(ByteBuf in, ByteBuf out, int length) {
225 if (length < 61) {
226 out.writeByte(length - 1 << 2);
227 } else {
228 int bitLength = bitsToEncode(length - 1);
229 int bytesToEncode = 1 + bitLength / 8;
230 out.writeByte(59 + bytesToEncode << 2);
231 for (int i = 0; i < bytesToEncode; i++) {
232 out.writeByte(length - 1 >> i * 8 & 0x0ff);
233 }
234 }
235
236 out.writeBytes(in, length);
237 }
238
239 private static void encodeCopyWithOffset(ByteBuf out, int offset, int length) {
240 if (length < 12 && offset < 2048) {
241 out.writeByte(COPY_1_BYTE_OFFSET | length - 4 << 2 | offset >> 8 << 5);
242 out.writeByte(offset & 0x0ff);
243 } else {
244 out.writeByte(COPY_2_BYTE_OFFSET | length - 1 << 2);
245 out.writeByte(offset & 0x0ff);
246 out.writeByte(offset >> 8 & 0x0ff);
247 }
248 }
249
250 /**
251 * Encodes a series of copies, each at most 64 bytes in length.
252 *
253 * @param out The output buffer to write the copy pointer to
254 * @param offset The offset at which the original instance lies
255 * @param length The length of the original instance
256 */
257 private static void encodeCopy(ByteBuf out, int offset, int length) {
258 while (length >= 68) {
259 encodeCopyWithOffset(out, offset, 64);
260 length -= 64;
261 }
262
263 if (length > 64) {
264 encodeCopyWithOffset(out, offset, 60);
265 length -= 60;
266 }
267
268 encodeCopyWithOffset(out, offset, length);
269 }
270
271 public void decode(ByteBuf in, ByteBuf out) {
272 while (in.isReadable()) {
273 switch (state) {
274 case READY:
275 state = State.READING_PREAMBLE;
276 // fall through
277 case READING_PREAMBLE:
278 int uncompressedLength = readPreamble(in);
279 if (uncompressedLength == PREAMBLE_NOT_FULL) {
280 // We've not yet read all of the preamble, so wait until we can
281 return;
282 }
283 if (uncompressedLength == 0) {
284 // Should never happen, but it does mean we have nothing further to do
285 state = State.READY;
286 return;
287 }
288 out.ensureWritable(uncompressedLength);
289 state = State.READING_TAG;
290 // fall through
291 case READING_TAG:
292 if (!in.isReadable()) {
293 return;
294 }
295 tag = in.readByte();
296 switch (tag & 0x03) {
297 case LITERAL:
298 state = State.READING_LITERAL;
299 break;
300 case COPY_1_BYTE_OFFSET:
301 case COPY_2_BYTE_OFFSET:
302 case COPY_4_BYTE_OFFSET:
303 state = State.READING_COPY;
304 break;
305 }
306 break;
307 case READING_LITERAL:
308 int literalWritten = decodeLiteral(tag, in, out);
309 if (literalWritten != NOT_ENOUGH_INPUT) {
310 state = State.READING_TAG;
311 written += literalWritten;
312 } else {
313 // Need to wait for more data
314 return;
315 }
316 break;
317 case READING_COPY:
318 int decodeWritten;
319 switch (tag & 0x03) {
320 case COPY_1_BYTE_OFFSET:
321 decodeWritten = decodeCopyWith1ByteOffset(tag, in, out, written);
322 if (decodeWritten != NOT_ENOUGH_INPUT) {
323 state = State.READING_TAG;
324 written += decodeWritten;
325 } else {
326 // Need to wait for more data
327 return;
328 }
329 break;
330 case COPY_2_BYTE_OFFSET:
331 decodeWritten = decodeCopyWith2ByteOffset(tag, in, out, written);
332 if (decodeWritten != NOT_ENOUGH_INPUT) {
333 state = State.READING_TAG;
334 written += decodeWritten;
335 } else {
336 // Need to wait for more data
337 return;
338 }
339 break;
340 case COPY_4_BYTE_OFFSET:
341 decodeWritten = decodeCopyWith4ByteOffset(tag, in, out, written);
342 if (decodeWritten != NOT_ENOUGH_INPUT) {
343 state = State.READING_TAG;
344 written += decodeWritten;
345 } else {
346 // Need to wait for more data
347 return;
348 }
349 break;
350 }
351 }
352 }
353 }
354
355 /**
356 * Reads the length varint (a series of bytes, where the lower 7 bits
357 * are data and the upper bit is a flag to indicate more bytes to be
358 * read).
359 *
360 * @param in The input buffer to read the preamble from
361 * @return The calculated length based on the input buffer, or 0 if
362 * no preamble is able to be calculated
363 */
364 private static int readPreamble(ByteBuf in) {
365 int length = 0;
366 int byteIndex = 0;
367 while (in.isReadable()) {
368 int current = in.readUnsignedByte();
369 length |= (current & 0x7f) << byteIndex++ * 7;
370 if ((current & 0x80) == 0) {
371 return length;
372 }
373
374 if (byteIndex >= 4) {
375 throw new DecompressionException("Preamble is greater than 4 bytes");
376 }
377 }
378
379 return 0;
380 }
381
382 /**
383 * Reads a literal from the input buffer directly to the output buffer.
384 * A "literal" is an uncompressed segment of data stored directly in the
385 * byte stream.
386 *
387 * @param tag The tag that identified this segment as a literal is also
388 * used to encode part of the length of the data
389 * @param in The input buffer to read the literal from
390 * @param out The output buffer to write the literal to
391 * @return The number of bytes appended to the output buffer, or -1 to indicate "try again later"
392 */
393 static int decodeLiteral(byte tag, ByteBuf in, ByteBuf out) {
394 in.markReaderIndex();
395 int length;
396 switch(tag >> 2 & 0x3F) {
397 case 60:
398 if (!in.isReadable()) {
399 return NOT_ENOUGH_INPUT;
400 }
401 length = in.readUnsignedByte();
402 break;
403 case 61:
404 if (in.readableBytes() < 2) {
405 return NOT_ENOUGH_INPUT;
406 }
407 length = ByteBufUtil.swapShort(in.readShort());
408 break;
409 case 62:
410 if (in.readableBytes() < 3) {
411 return NOT_ENOUGH_INPUT;
412 }
413 length = ByteBufUtil.swapMedium(in.readUnsignedMedium());
414 break;
415 case 63:
416 if (in.readableBytes() < 4) {
417 return NOT_ENOUGH_INPUT;
418 }
419 length = ByteBufUtil.swapInt(in.readInt());
420 break;
421 default:
422 length = tag >> 2 & 0x3F;
423 }
424 length += 1;
425
426 if (in.readableBytes() < length) {
427 in.resetReaderIndex();
428 return NOT_ENOUGH_INPUT;
429 }
430
431 out.writeBytes(in, length);
432 return length;
433 }
434
435 /**
436 * Reads a compressed reference offset and length from the supplied input
437 * buffer, seeks back to the appropriate place in the input buffer and
438 * writes the found data to the supplied output stream.
439 *
440 * @param tag The tag used to identify this as a copy is also used to encode
441 * the length and part of the offset
442 * @param in The input buffer to read from
443 * @param out The output buffer to write to
444 * @return The number of bytes appended to the output buffer, or -1 to indicate
445 * "try again later"
446 * @throws DecompressionException If the read offset is invalid
447 */
448 private static int decodeCopyWith1ByteOffset(byte tag, ByteBuf in, ByteBuf out, int writtenSoFar) {
449 if (!in.isReadable()) {
450 return NOT_ENOUGH_INPUT;
451 }
452
453 int initialIndex = out.writerIndex();
454 int length = 4 + ((tag & 0x01c) >> 2);
455 int offset = (tag & 0x0e0) << 8 >> 5 | in.readUnsignedByte();
456
457 validateOffset(offset, writtenSoFar);
458
459 out.markReaderIndex();
460 if (offset < length) {
461 int copies = length / offset;
462 for (; copies > 0; copies--) {
463 out.readerIndex(initialIndex - offset);
464 out.readBytes(out, offset);
465 }
466 if (length % offset != 0) {
467 out.readerIndex(initialIndex - offset);
468 out.readBytes(out, length % offset);
469 }
470 } else {
471 out.readerIndex(initialIndex - offset);
472 out.readBytes(out, length);
473 }
474 out.resetReaderIndex();
475
476 return length;
477 }
478
479 /**
480 * Reads a compressed reference offset and length from the supplied input
481 * buffer, seeks back to the appropriate place in the input buffer and
482 * writes the found data to the supplied output stream.
483 *
484 * @param tag The tag used to identify this as a copy is also used to encode
485 * the length and part of the offset
486 * @param in The input buffer to read from
487 * @param out The output buffer to write to
488 * @throws DecompressionException If the read offset is invalid
489 * @return The number of bytes appended to the output buffer, or -1 to indicate
490 * "try again later"
491 */
492 private static int decodeCopyWith2ByteOffset(byte tag, ByteBuf in, ByteBuf out, int writtenSoFar) {
493 if (in.readableBytes() < 2) {
494 return NOT_ENOUGH_INPUT;
495 }
496
497 int initialIndex = out.writerIndex();
498 int length = 1 + (tag >> 2 & 0x03f);
499 int offset = ByteBufUtil.swapShort(in.readShort());
500
501 validateOffset(offset, writtenSoFar);
502
503 out.markReaderIndex();
504 if (offset < length) {
505 int copies = length / offset;
506 for (; copies > 0; copies--) {
507 out.readerIndex(initialIndex - offset);
508 out.readBytes(out, offset);
509 }
510 if (length % offset != 0) {
511 out.readerIndex(initialIndex - offset);
512 out.readBytes(out, length % offset);
513 }
514 } else {
515 out.readerIndex(initialIndex - offset);
516 out.readBytes(out, length);
517 }
518 out.resetReaderIndex();
519
520 return length;
521 }
522
523 /**
524 * Reads a compressed reference offset and length from the supplied input
525 * buffer, seeks back to the appropriate place in the input buffer and
526 * writes the found data to the supplied output stream.
527 *
528 * @param tag The tag used to identify this as a copy is also used to encode
529 * the length and part of the offset
530 * @param in The input buffer to read from
531 * @param out The output buffer to write to
532 * @return The number of bytes appended to the output buffer, or -1 to indicate
533 * "try again later"
534 * @throws DecompressionException If the read offset is invalid
535 */
536 private static int decodeCopyWith4ByteOffset(byte tag, ByteBuf in, ByteBuf out, int writtenSoFar) {
537 if (in.readableBytes() < 4) {
538 return NOT_ENOUGH_INPUT;
539 }
540
541 int initialIndex = out.writerIndex();
542 int length = 1 + (tag >> 2 & 0x03F);
543 int offset = ByteBufUtil.swapInt(in.readInt());
544
545 validateOffset(offset, writtenSoFar);
546
547 out.markReaderIndex();
548 if (offset < length) {
549 int copies = length / offset;
550 for (; copies > 0; copies--) {
551 out.readerIndex(initialIndex - offset);
552 out.readBytes(out, offset);
553 }
554 if (length % offset != 0) {
555 out.readerIndex(initialIndex - offset);
556 out.readBytes(out, length % offset);
557 }
558 } else {
559 out.readerIndex(initialIndex - offset);
560 out.readBytes(out, length);
561 }
562 out.resetReaderIndex();
563
564 return length;
565 }
566
567 /**
568 * Validates that the offset extracted from a compressed reference is within
569 * the permissible bounds of an offset (4 <= offset <= 32768), and does not
570 * exceed the length of the chunk currently read so far.
571 *
572 * @param offset The offset extracted from the compressed reference
573 * @param chunkSizeSoFar The number of bytes read so far from this chunk
574 * @throws DecompressionException if the offset is invalid
575 */
576 private static void validateOffset(int offset, int chunkSizeSoFar) {
577 if (offset > Short.MAX_VALUE) {
578 throw new DecompressionException("Offset exceeds maximum permissible value");
579 }
580
581 if (offset <= 0) {
582 throw new DecompressionException("Offset is less than minimum permissible value");
583 }
584
585 if (offset > chunkSizeSoFar) {
586 throw new DecompressionException("Offset exceeds size of chunk");
587 }
588 }
589
590 /**
591 * Computes the CRC32C checksum of the supplied data and performs the "mask" operation
592 * on the computed checksum
593 *
594 * @param data The input data to calculate the CRC32C checksum of
595 */
596 public static int calculateChecksum(ByteBuf data) {
597 return calculateChecksum(data, data.readerIndex(), data.readableBytes());
598 }
599
600 /**
601 * Computes the CRC32C checksum of the supplied data and performs the "mask" operation
602 * on the computed checksum
603 *
604 * @param data The input data to calculate the CRC32C checksum of
605 */
606 public static int calculateChecksum(ByteBuf data, int offset, int length) {
607 Crc32c crc32 = new Crc32c();
608 try {
609 if (data.hasArray()) {
610 crc32.update(data.array(), data.arrayOffset() + offset, length);
611 } else {
612 byte[] array = new byte[length];
613 data.getBytes(offset, array);
614 crc32.update(array, 0, length);
615 }
616
617 return maskChecksum((int) crc32.getValue());
618 } finally {
619 crc32.reset();
620 }
621 }
622
623 /**
624 * Computes the CRC32C checksum of the supplied data, performs the "mask" operation
625 * on the computed checksum, and then compares the resulting masked checksum to the
626 * supplied checksum.
627 *
628 * @param expectedChecksum The checksum decoded from the stream to compare against
629 * @param data The input data to calculate the CRC32C checksum of
630 * @throws DecompressionException If the calculated and supplied checksums do not match
631 */
632 static void validateChecksum(int expectedChecksum, ByteBuf data) {
633 validateChecksum(expectedChecksum, data, data.readerIndex(), data.readableBytes());
634 }
635
636 /**
637 * Computes the CRC32C checksum of the supplied data, performs the "mask" operation
638 * on the computed checksum, and then compares the resulting masked checksum to the
639 * supplied checksum.
640 *
641 * @param expectedChecksum The checksum decoded from the stream to compare against
642 * @param data The input data to calculate the CRC32C checksum of
643 * @throws DecompressionException If the calculated and supplied checksums do not match
644 */
645 static void validateChecksum(int expectedChecksum, ByteBuf data, int offset, int length) {
646 final int actualChecksum = calculateChecksum(data, offset, length);
647 if (actualChecksum != expectedChecksum) {
648 throw new DecompressionException(
649 "mismatching checksum: " + Integer.toHexString(actualChecksum) +
650 " (expected: " + Integer.toHexString(expectedChecksum) + ')');
651 }
652 }
653
654 /**
655 * From the spec:
656 *
657 * "Checksums are not stored directly, but masked, as checksumming data and
658 * then its own checksum can be problematic. The masking is the same as used
659 * in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
660 * 0xa282ead8 (using wraparound as normal for unsigned integers)."
661 *
662 * @param checksum The actual checksum of the data
663 * @return The masked checksum
664 */
665 static int maskChecksum(int checksum) {
666 return (checksum >> 15 | checksum << 17) + 0xa282ead8;
667 }
668 }