View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty5.handler.codec.compression;
17  
18  import io.netty5.buffer.api.Buffer;
19  import io.netty5.buffer.api.BufferAllocator;
20  
21  import java.util.function.Supplier;
22  
23  import static io.netty5.handler.codec.compression.Bzip2Constants.BASE_BLOCK_SIZE;
24  import static io.netty5.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
25  import static io.netty5.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
26  import static io.netty5.handler.codec.compression.Bzip2Constants.END_OF_STREAM_MAGIC_1;
27  import static io.netty5.handler.codec.compression.Bzip2Constants.END_OF_STREAM_MAGIC_2;
28  import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_MAXIMUM_TABLES;
29  import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_MAX_ALPHABET_SIZE;
30  import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_MINIMUM_TABLES;
31  import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SELECTOR_LIST_MAX_LENGTH;
32  import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
33  import static io.netty5.handler.codec.compression.Bzip2Constants.MAGIC_NUMBER;
34  import static io.netty5.handler.codec.compression.Bzip2Constants.MAX_BLOCK_SIZE;
35  import static io.netty5.handler.codec.compression.Bzip2Constants.MAX_SELECTORS;
36  import static io.netty5.handler.codec.compression.Bzip2Constants.MIN_BLOCK_SIZE;
37  
38  /**
39   * Uncompresses a {@link Buffer} encoded with the Bzip2 format.
40   *
41   * See <a href="https://en.wikipedia.org/wiki/Bzip2">Bzip2</a>.
42   */
43  public final class Bzip2Decompressor implements Decompressor {
44      /**
45       * Current state of stream.
46       */
47      private enum State {
48          INIT,
49          INIT_BLOCK,
50          INIT_BLOCK_PARAMS,
51          RECEIVE_HUFFMAN_USED_MAP,
52          RECEIVE_HUFFMAN_USED_BITMAPS,
53          RECEIVE_SELECTORS_NUMBER,
54          RECEIVE_SELECTORS,
55          RECEIVE_HUFFMAN_LENGTH,
56          DECODE_HUFFMAN_DATA,
57          EOF,
58          CLOSED
59      }
60      private State currentState = State.INIT;
61  
62      /**
63       * A reader that provides bit-level reads.
64       */
65      private final Bzip2BitReader reader = new Bzip2BitReader();
66  
67      /**
68       * The decompressor for the current block.
69       */
70      private Bzip2BlockDecompressor blockDecompressor;
71  
72      /**
73       * Bzip2 Huffman coding stage.
74       */
75      private Bzip2HuffmanStageDecoder huffmanStageDecoder;
76  
77      /**
78       * Always: in the range 0 .. 9. The current block size is 100000 * this number.
79       */
80      private int blockSize;
81  
82      /**
83       * The CRC of the current block as read from the block header.
84       */
85      private int blockCRC;
86  
87      /**
88       * The merged CRC of all blocks decompressed so far.
89       */
90      private int streamCRC;
91  
92      private Bzip2Decompressor() { }
93  
94      /**
95       * Returns a factory for {@link Bzip2Decompressor}s.
96       *
97       * @return a factory.
98       */
99      public static Supplier<Bzip2Decompressor> newFactory() {
100         return Bzip2Decompressor::new;
101     }
102 
103     @Override
104     public Buffer decompress(Buffer in, BufferAllocator allocator)
105             throws DecompressionException {
106         switch (currentState) {
107             case CLOSED:
108                 throw new DecompressionException("Decompressor closed");
109             case EOF:
110                 return allocator.allocate(0);
111             default:
112 
113                 if (in.readableBytes() == 0) {
114                     return null;
115                 }
116 
117                 final Bzip2BitReader reader = this.reader;
118                 reader.setBuffer(in);
119 
120                 for (;;) {
121                     switch (currentState) {
122                         case INIT:
123                             if (in.readableBytes() < 4) {
124                                 return null;
125                             }
126                             int magicNumber = in.readUnsignedMedium();
127                             if (magicNumber != MAGIC_NUMBER) {
128                                 currentState = State.EOF;
129                                 throw new DecompressionException("Unexpected stream identifier contents. " +
130                                         "Mismatched bzip2 protocol version?");
131                             }
132                             int blockSize = in.readByte() - '0';
133                             if (blockSize < MIN_BLOCK_SIZE || blockSize > MAX_BLOCK_SIZE) {
134                                 currentState = State.EOF;
135                                 throw new DecompressionException("block size is invalid");
136                             }
137                             this.blockSize = blockSize * BASE_BLOCK_SIZE;
138 
139                             streamCRC = 0;
140                             currentState = State.INIT_BLOCK;
141                             // fall through
142                         case INIT_BLOCK:
143                             if (!reader.hasReadableBytes(10)) {
144                                 return null;
145                             }
146                             // Get the block magic bytes.
147                             final int magic1 = reader.readBits(24);
148                             final int magic2 = reader.readBits(24);
149                             if (magic1 == END_OF_STREAM_MAGIC_1 && magic2 == END_OF_STREAM_MAGIC_2) {
150                                 // End of stream was reached. Check the combined CRC.
151                                 final int storedCombinedCRC = reader.readInt();
152                                 if (storedCombinedCRC != streamCRC) {
153                                     currentState = State.EOF;
154                                     throw new DecompressionException("stream CRC error");
155                                 }
156                                 currentState = State.EOF;
157                                 break;
158                             }
159                             if (magic1 != BLOCK_HEADER_MAGIC_1 || magic2 != BLOCK_HEADER_MAGIC_2) {
160                                 currentState = State.EOF;
161                                 throw new DecompressionException("bad block header");
162                             }
163                             blockCRC = reader.readInt();
164                             currentState = State.INIT_BLOCK_PARAMS;
165                             // fall through
166                         case INIT_BLOCK_PARAMS:
167                             if (!reader.hasReadableBits(25)) {
168                                 return null;
169                             }
170                             final boolean blockRandomised = reader.readBoolean();
171                             final int bwtStartPointer = reader.readBits(24);
172 
173                             blockDecompressor = new Bzip2BlockDecompressor(this.blockSize, blockCRC,
174                                     blockRandomised, bwtStartPointer, reader);
175                             currentState = State.RECEIVE_HUFFMAN_USED_MAP;
176                             // fall through
177                         case RECEIVE_HUFFMAN_USED_MAP:
178                             if (!reader.hasReadableBits(16)) {
179                                 return null;
180                             }
181                             blockDecompressor.huffmanInUse16 = reader.readBits(16);
182                             currentState = State.RECEIVE_HUFFMAN_USED_BITMAPS;
183                             // fall through
184                         case RECEIVE_HUFFMAN_USED_BITMAPS:
185                             Bzip2BlockDecompressor blockDecompressor = this.blockDecompressor;
186                             final int inUse16 = blockDecompressor.huffmanInUse16;
187                             final int bitNumber = Integer.bitCount(inUse16);
188                             final byte[] huffmanSymbolMap = blockDecompressor.huffmanSymbolMap;
189 
190                             if (!reader.hasReadableBits(bitNumber * HUFFMAN_SYMBOL_RANGE_SIZE + 3)) {
191                                 return null;
192                             }
193 
194                             int huffmanSymbolCount = 0;
195                             if (bitNumber > 0) {
196                                 for (int i = 0; i < 16; i++) {
197                                     if ((inUse16 & 1 << 15 >>> i) != 0) {
198                                         for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
199                                             if (reader.readBoolean()) {
200                                                 huffmanSymbolMap[huffmanSymbolCount++] = (byte) k;
201                                             }
202                                         }
203                                     }
204                                 }
205                             }
206                             blockDecompressor.huffmanEndOfBlockSymbol = huffmanSymbolCount + 1;
207 
208                             int totalTables = reader.readBits(3);
209                             if (totalTables < HUFFMAN_MINIMUM_TABLES || totalTables > HUFFMAN_MAXIMUM_TABLES) {
210                                 throw new DecompressionException("incorrect huffman groups number");
211                             }
212                             int alphaSize = huffmanSymbolCount + 2;
213                             if (alphaSize > HUFFMAN_MAX_ALPHABET_SIZE) {
214                                 throw new DecompressionException("incorrect alphabet size");
215                             }
216                             huffmanStageDecoder = new Bzip2HuffmanStageDecoder(reader, totalTables, alphaSize);
217                             currentState = State.RECEIVE_SELECTORS_NUMBER;
218                             // fall through
219                         case RECEIVE_SELECTORS_NUMBER:
220                             if (!reader.hasReadableBits(15)) {
221                                 return null;
222                             }
223                             int totalSelectors = reader.readBits(15);
224                             if (totalSelectors < 1 || totalSelectors > MAX_SELECTORS) {
225                                 currentState = State.EOF;
226                                 throw new DecompressionException("incorrect selectors number");
227                             }
228                             huffmanStageDecoder.selectors = new byte[totalSelectors];
229 
230                             currentState = State.RECEIVE_SELECTORS;
231                             // fall through
232                         case RECEIVE_SELECTORS:
233                             Bzip2HuffmanStageDecoder huffmanStageDecoder = this.huffmanStageDecoder;
234                             byte[] selectors = huffmanStageDecoder.selectors;
235                             totalSelectors = selectors.length;
236                             final Bzip2MoveToFrontTable tableMtf = huffmanStageDecoder.tableMTF;
237 
238                             int currSelector;
239                             // Get zero-terminated bit runs (0..62) of MTF'ed Huffman table. length = 1..6
240                             for (currSelector = huffmanStageDecoder.currentSelector;
241                                  currSelector < totalSelectors; currSelector++) {
242                                 if (!reader.hasReadableBits(HUFFMAN_SELECTOR_LIST_MAX_LENGTH)) {
243                                     // Save state if end of current ByteBuf was reached
244                                     huffmanStageDecoder.currentSelector = currSelector;
245                                     return null;
246                                 }
247                                 int index = 0;
248                                 while (reader.readBoolean()) {
249                                     index++;
250                                 }
251                                 selectors[currSelector] = tableMtf.indexToFront(index);
252                             }
253 
254                             currentState = State.RECEIVE_HUFFMAN_LENGTH;
255                             // fall through
256                         case RECEIVE_HUFFMAN_LENGTH:
257                             huffmanStageDecoder = this.huffmanStageDecoder;
258                             totalTables = huffmanStageDecoder.totalTables;
259                             final byte[][] codeLength = huffmanStageDecoder.tableCodeLengths;
260                             alphaSize = huffmanStageDecoder.alphabetSize;
261 
262                             /* Now the coding tables */
263                             int currGroup;
264                             int currLength = huffmanStageDecoder.currentLength;
265                             int currAlpha = 0;
266                             boolean modifyLength = huffmanStageDecoder.modifyLength;
267                             boolean saveStateAndReturn = false;
268                             loop: for (currGroup = huffmanStageDecoder.currentGroup;
269                                        currGroup < totalTables; currGroup++) {
270                                 // start_huffman_length
271                                 if (!reader.hasReadableBits(5)) {
272                                     saveStateAndReturn = true;
273                                     break;
274                                 }
275                                 if (currLength < 0) {
276                                     currLength = reader.readBits(5);
277                                 }
278                                 for (currAlpha = huffmanStageDecoder.currentAlpha; currAlpha < alphaSize; currAlpha++) {
279                                     // delta_bit_length: 1..40
280                                     if (!reader.isReadable()) {
281                                         saveStateAndReturn = true;
282                                         break loop;
283                                     }
284                                     while (modifyLength || reader.readBoolean()) {  // 0=>next symbol; 1=>alter length
285                                         if (!reader.isReadable()) {
286                                             modifyLength = true;
287                                             saveStateAndReturn = true;
288                                             break loop;
289                                         }
290                                         // 1=>decrement length;  0=>increment length
291                                         currLength += reader.readBoolean() ? -1 : 1;
292                                         modifyLength = false;
293                                         if (!reader.isReadable()) {
294                                             saveStateAndReturn = true;
295                                             break loop;
296                                         }
297                                     }
298                                     codeLength[currGroup][currAlpha] = (byte) currLength;
299                                 }
300                                 currLength = -1;
301                                 currAlpha = huffmanStageDecoder.currentAlpha = 0;
302                                 modifyLength = false;
303                             }
304                             if (saveStateAndReturn) {
305                                 // Save state if end of current ByteBuf was reached
306                                 huffmanStageDecoder.currentGroup = currGroup;
307                                 huffmanStageDecoder.currentLength = currLength;
308                                 huffmanStageDecoder.currentAlpha = currAlpha;
309                                 huffmanStageDecoder.modifyLength = modifyLength;
310                                 return null;
311                             }
312 
313                             // Finally create the Huffman tables
314                             huffmanStageDecoder.createHuffmanDecodingTables();
315                             currentState = State.DECODE_HUFFMAN_DATA;
316                             // fall through
317                         case DECODE_HUFFMAN_DATA:
318                             blockDecompressor = this.blockDecompressor;
319                             final int oldReaderIndex = in.readerOffset();
320                             final boolean decoded = blockDecompressor.decodeHuffmanData(this.huffmanStageDecoder);
321                             if (!decoded) {
322                                 return null;
323                             }
324                             // It used to avoid "Bzip2Decoder.decode() did not read anything but decoded a message"
325                             // exception. Because previous operation may read only a few bits from
326                             // Bzip2BitReader.bitBuffer and don't read incoming ByteBuf.
327                             if (in.readerOffset() == oldReaderIndex && in.readableBytes() > 0) {
328                                 reader.refill();
329                             }
330 
331                             final int blockLength = blockDecompressor.blockLength();
332                             Buffer uncompressed = allocator.allocate(blockLength);
333                             try {
334                                 int uncByte;
335                                 while ((uncByte = blockDecompressor.read()) >= 0) {
336                                     uncompressed.writeByte((byte) uncByte);
337                                 }
338                                 // We did read all the data, lets reset the state and do the CRC check.
339                                 currentState = State.INIT_BLOCK;
340                                 int currentBlockCRC = blockDecompressor.checkCRC();
341                                 streamCRC = (streamCRC << 1 | streamCRC >>> 31) ^ currentBlockCRC;
342 
343                                 // Return here so the ByteBuf that was put in the List will be forwarded to the user
344                                 // and so can be released as soon as possible.
345                                 Buffer data = uncompressed;
346                                 uncompressed = null;
347                                 return data;
348                             } finally {
349                                 if (uncompressed != null) {
350                                     uncompressed.close();
351                                 }
352                             }
353                         case EOF:
354                             return allocator.allocate(0);
355                         default:
356                             throw new IllegalStateException();
357                     }
358                 }
359         }
360     }
361 
362     /**
363      * Returns {@code true} if and only if the end of the compressed stream
364      * has been reached.
365      */
366     @Override
367     public boolean isFinished() {
368         return currentState == State.EOF || currentState == State.CLOSED;
369     }
370 
371     @Override
372     public void close() {
373         currentState = State.CLOSED;
374     }
375 
376     @Override
377     public boolean isClosed() {
378         return currentState == State.CLOSED;
379     }
380 }