View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.channel.ChannelHandlerContext;
20  import io.netty.handler.codec.ByteToMessageDecoder;
21  
22  import java.util.List;
23  
24  import static io.netty.handler.codec.compression.Bzip2Constants.BASE_BLOCK_SIZE;
25  import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
26  import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
27  import static io.netty.handler.codec.compression.Bzip2Constants.END_OF_STREAM_MAGIC_1;
28  import static io.netty.handler.codec.compression.Bzip2Constants.END_OF_STREAM_MAGIC_2;
29  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_MAXIMUM_TABLES;
30  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_MAX_ALPHABET_SIZE;
31  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_MINIMUM_TABLES;
32  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SELECTOR_LIST_MAX_LENGTH;
33  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
34  import static io.netty.handler.codec.compression.Bzip2Constants.MAGIC_NUMBER;
35  import static io.netty.handler.codec.compression.Bzip2Constants.MAX_BLOCK_SIZE;
36  import static io.netty.handler.codec.compression.Bzip2Constants.MAX_SELECTORS;
37  import static io.netty.handler.codec.compression.Bzip2Constants.MIN_BLOCK_SIZE;
38  
39  /**
40   * Uncompresses a {@link ByteBuf} encoded with the Bzip2 format.
41   *
42   * See <a href="https://en.wikipedia.org/wiki/Bzip2">Bzip2</a>.
43   */
44  public class Bzip2Decoder extends ByteToMessageDecoder {
45      /**
46       * Current state of stream.
47       */
48      private enum State {
49          INIT,
50          INIT_BLOCK,
51          INIT_BLOCK_PARAMS,
52          RECEIVE_HUFFMAN_USED_MAP,
53          RECEIVE_HUFFMAN_USED_BITMAPS,
54          RECEIVE_SELECTORS_NUMBER,
55          RECEIVE_SELECTORS,
56          RECEIVE_HUFFMAN_LENGTH,
57          DECODE_HUFFMAN_DATA,
58          EOF
59      }
60      private State currentState = State.INIT;
61  
62      /**
63       * A reader that provides bit-level reads.
64       */
65      private final Bzip2BitReader reader = new Bzip2BitReader();
66  
67      /**
68       * The decompressor for the current block.
69       */
70      private Bzip2BlockDecompressor blockDecompressor;
71  
72      /**
73       * Bzip2 Huffman coding stage.
74       */
75      private Bzip2HuffmanStageDecoder huffmanStageDecoder;
76  
77      /**
78       * Always: in the range 0 .. 9. The current block size is 100000 * this number.
79       */
80      private int blockSize;
81  
82      /**
83       * The CRC of the current block as read from the block header.
84       */
85      private int blockCRC;
86  
87      /**
88       * The merged CRC of all blocks decompressed so far.
89       */
90      private int streamCRC;
91  
92      @Override
93      protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception {
94          if (!in.isReadable()) {
95              return;
96          }
97  
98          final Bzip2BitReader reader = this.reader;
99          reader.setByteBuf(in);
100 
101         for (;;) {
102             switch (currentState) {
103             case INIT:
104                 if (in.readableBytes() < 4) {
105                     return;
106                 }
107                 int magicNumber = in.readUnsignedMedium();
108                 if (magicNumber != MAGIC_NUMBER) {
109                     throw new DecompressionException("Unexpected stream identifier contents. Mismatched bzip2 " +
110                             "protocol version?");
111                 }
112                 int blockSize = in.readByte() - '0';
113                 if (blockSize < MIN_BLOCK_SIZE || blockSize > MAX_BLOCK_SIZE) {
114                     throw new DecompressionException("block size is invalid");
115                 }
116                 this.blockSize = blockSize * BASE_BLOCK_SIZE;
117 
118                 streamCRC = 0;
119                 currentState = State.INIT_BLOCK;
120                  // fall through
121             case INIT_BLOCK:
122                 if (!reader.hasReadableBytes(10)) {
123                     return;
124                 }
125                 // Get the block magic bytes.
126                 final int magic1 = reader.readBits(24);
127                 final int magic2 = reader.readBits(24);
128                 if (magic1 == END_OF_STREAM_MAGIC_1 && magic2 == END_OF_STREAM_MAGIC_2) {
129                     // End of stream was reached. Check the combined CRC.
130                     final int storedCombinedCRC = reader.readInt();
131                     if (storedCombinedCRC != streamCRC) {
132                         throw new DecompressionException("stream CRC error");
133                     }
134                     currentState = State.EOF;
135                     break;
136                 }
137                 if (magic1 != BLOCK_HEADER_MAGIC_1 || magic2 != BLOCK_HEADER_MAGIC_2) {
138                     throw new DecompressionException("bad block header");
139                 }
140                 blockCRC = reader.readInt();
141                 currentState = State.INIT_BLOCK_PARAMS;
142                 // fall through
143             case INIT_BLOCK_PARAMS:
144                 if (!reader.hasReadableBits(25)) {
145                     return;
146                 }
147                 final boolean blockRandomised = reader.readBoolean();
148                 final int bwtStartPointer = reader.readBits(24);
149 
150                 blockDecompressor = new Bzip2BlockDecompressor(this.blockSize, blockCRC,
151                                                                 blockRandomised, bwtStartPointer, reader);
152                 currentState = State.RECEIVE_HUFFMAN_USED_MAP;
153                 // fall through
154             case RECEIVE_HUFFMAN_USED_MAP:
155                 if (!reader.hasReadableBits(16)) {
156                     return;
157                 }
158                 blockDecompressor.huffmanInUse16 = reader.readBits(16);
159                 currentState = State.RECEIVE_HUFFMAN_USED_BITMAPS;
160                 // fall through
161             case RECEIVE_HUFFMAN_USED_BITMAPS:
162                 Bzip2BlockDecompressor blockDecompressor = this.blockDecompressor;
163                 final int inUse16 = blockDecompressor.huffmanInUse16;
164                 final int bitNumber = Integer.bitCount(inUse16);
165                 final byte[] huffmanSymbolMap = blockDecompressor.huffmanSymbolMap;
166 
167                 if (!reader.hasReadableBits(bitNumber * HUFFMAN_SYMBOL_RANGE_SIZE + 3)) {
168                     return;
169                 }
170 
171                 int huffmanSymbolCount = 0;
172                 if (bitNumber > 0) {
173                     for (int i = 0; i < 16; i++) {
174                         if ((inUse16 & 1 << 15 >>> i) != 0) {
175                             for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
176                                 if (reader.readBoolean()) {
177                                     huffmanSymbolMap[huffmanSymbolCount++] = (byte) k;
178                                 }
179                             }
180                         }
181                     }
182                 }
183                 blockDecompressor.huffmanEndOfBlockSymbol = huffmanSymbolCount + 1;
184 
185                 int totalTables = reader.readBits(3);
186                 if (totalTables < HUFFMAN_MINIMUM_TABLES || totalTables > HUFFMAN_MAXIMUM_TABLES) {
187                     throw new DecompressionException("incorrect huffman groups number");
188                 }
189                 int alphaSize = huffmanSymbolCount + 2;
190                 if (alphaSize > HUFFMAN_MAX_ALPHABET_SIZE) {
191                     throw new DecompressionException("incorrect alphabet size");
192                 }
193                 huffmanStageDecoder = new Bzip2HuffmanStageDecoder(reader, totalTables, alphaSize);
194                 currentState = State.RECEIVE_SELECTORS_NUMBER;
195                 // fall through
196             case RECEIVE_SELECTORS_NUMBER:
197                 if (!reader.hasReadableBits(15)) {
198                     return;
199                 }
200                 int totalSelectors = reader.readBits(15);
201                 if (totalSelectors < 1 || totalSelectors > MAX_SELECTORS) {
202                     throw new DecompressionException("incorrect selectors number");
203                 }
204                 huffmanStageDecoder.selectors = new byte[totalSelectors];
205 
206                 currentState = State.RECEIVE_SELECTORS;
207                 // fall through
208             case RECEIVE_SELECTORS:
209                 Bzip2HuffmanStageDecoder huffmanStageDecoder = this.huffmanStageDecoder;
210                 byte[] selectors = huffmanStageDecoder.selectors;
211                 totalSelectors = selectors.length;
212                 final Bzip2MoveToFrontTable tableMtf = huffmanStageDecoder.tableMTF;
213 
214                 int currSelector;
215                 // Get zero-terminated bit runs (0..62) of MTF'ed Huffman table. length = 1..6
216                 for (currSelector = huffmanStageDecoder.currentSelector;
217                             currSelector < totalSelectors; currSelector++) {
218                     if (!reader.hasReadableBits(HUFFMAN_SELECTOR_LIST_MAX_LENGTH)) {
219                         // Save state if end of current ByteBuf was reached
220                         huffmanStageDecoder.currentSelector = currSelector;
221                         return;
222                     }
223                     int index = 0;
224                     while (reader.readBoolean()) {
225                         index++;
226                     }
227                     selectors[currSelector] = tableMtf.indexToFront(index);
228                 }
229 
230                 currentState = State.RECEIVE_HUFFMAN_LENGTH;
231                 // fall through
232             case RECEIVE_HUFFMAN_LENGTH:
233                 huffmanStageDecoder = this.huffmanStageDecoder;
234                 totalTables = huffmanStageDecoder.totalTables;
235                 final byte[][] codeLength = huffmanStageDecoder.tableCodeLengths;
236                 alphaSize = huffmanStageDecoder.alphabetSize;
237 
238                 /* Now the coding tables */
239                 int currGroup;
240                 int currLength = huffmanStageDecoder.currentLength;
241                 int currAlpha = 0;
242                 boolean modifyLength = huffmanStageDecoder.modifyLength;
243                 boolean saveStateAndReturn = false;
244                 loop: for (currGroup = huffmanStageDecoder.currentGroup; currGroup < totalTables; currGroup++) {
245                     // start_huffman_length
246                     if (!reader.hasReadableBits(5)) {
247                         saveStateAndReturn = true;
248                         break;
249                     }
250                     if (currLength < 0) {
251                         currLength = reader.readBits(5);
252                     }
253                     for (currAlpha = huffmanStageDecoder.currentAlpha; currAlpha < alphaSize; currAlpha++) {
254                         // delta_bit_length: 1..40
255                         if (!reader.isReadable()) {
256                             saveStateAndReturn = true;
257                             break loop;
258                         }
259                         while (modifyLength || reader.readBoolean()) {  // 0=>next symbol; 1=>alter length
260                             if (!reader.isReadable()) {
261                                 modifyLength = true;
262                                 saveStateAndReturn = true;
263                                 break loop;
264                             }
265                             // 1=>decrement length;  0=>increment length
266                             currLength += reader.readBoolean() ? -1 : 1;
267                             modifyLength = false;
268                             if (!reader.isReadable()) {
269                                 saveStateAndReturn = true;
270                                 break loop;
271                             }
272                         }
273                         codeLength[currGroup][currAlpha] = (byte) currLength;
274                     }
275                     currLength = -1;
276                     currAlpha = huffmanStageDecoder.currentAlpha = 0;
277                     modifyLength = false;
278                 }
279                 if (saveStateAndReturn) {
280                     // Save state if end of current ByteBuf was reached
281                     huffmanStageDecoder.currentGroup = currGroup;
282                     huffmanStageDecoder.currentLength = currLength;
283                     huffmanStageDecoder.currentAlpha = currAlpha;
284                     huffmanStageDecoder.modifyLength = modifyLength;
285                     return;
286                 }
287 
288                 // Finally create the Huffman tables
289                 huffmanStageDecoder.createHuffmanDecodingTables();
290                 currentState = State.DECODE_HUFFMAN_DATA;
291                 // fall through
292             case DECODE_HUFFMAN_DATA:
293                 blockDecompressor = this.blockDecompressor;
294                 final int oldReaderIndex = in.readerIndex();
295                 final boolean decoded = blockDecompressor.decodeHuffmanData(this.huffmanStageDecoder);
296                 if (!decoded) {
297                     return;
298                 }
299                 // It used to avoid "Bzip2Decoder.decode() did not read anything but decoded a message" exception.
300                 // Because previous operation may read only a few bits from Bzip2BitReader.bitBuffer and
301                 // don't read incoming ByteBuf.
302                 if (in.readerIndex() == oldReaderIndex && in.isReadable()) {
303                     reader.refill();
304                 }
305 
306                 final int blockLength = blockDecompressor.blockLength();
307                 ByteBuf uncompressed = ctx.alloc().buffer(blockLength);
308                 try {
309                     int uncByte;
310                     while ((uncByte = blockDecompressor.read()) >= 0) {
311                         uncompressed.writeByte(uncByte);
312                     }
313                     // We did read all the data, lets reset the state and do the CRC check.
314                     currentState = State.INIT_BLOCK;
315                     int currentBlockCRC = blockDecompressor.checkCRC();
316                     streamCRC = (streamCRC << 1 | streamCRC >>> 31) ^ currentBlockCRC;
317 
318                     out.add(uncompressed);
319                     uncompressed = null;
320                 } finally {
321                     if (uncompressed != null) {
322                         uncompressed.release();
323                     }
324                 }
325                 // Return here so the ByteBuf that was put in the List will be forwarded to the user and so can be
326                 // released as soon as possible.
327                 return;
328             case EOF:
329                 in.skipBytes(in.readableBytes());
330                 return;
331             default:
332                 throw new IllegalStateException();
333             }
334         }
335     }
336 
337     /**
338      * Returns {@code true} if and only if the end of the compressed stream
339      * has been reached.
340      */
341     public boolean isClosed() {
342         return currentState == State.EOF;
343     }
344 }