View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_DECODE_MAX_CODE_LENGTH;
19  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_GROUP_RUN_LENGTH;
20  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_MAX_ALPHABET_SIZE;
21  
22  /**
23   * A decoder for the Bzip2 Huffman coding stage.
24   */
25  final class Bzip2HuffmanStageDecoder {
26      /**
27       * A reader that provides bit-level reads.
28       */
29      private final Bzip2BitReader reader;
30  
31      /**
32       * The Huffman table number to use for each group of 50 symbols.
33       */
34      byte[] selectors;
35  
36      /**
37       * The minimum code length for each Huffman table.
38       */
39      private final int[] minimumLengths;
40  
41      /**
42       * An array of values for each Huffman table that must be subtracted from the numerical value of
43       * a Huffman code of a given bit length to give its canonical code index.
44       */
45      private final int[][] codeBases;
46  
47      /**
48       * An array of values for each Huffman table that gives the highest numerical value of a Huffman
49       * code of a given bit length.
50       */
51      private final int[][] codeLimits;
52  
53      /**
54       * A mapping for each Huffman table from canonical code index to output symbol.
55       */
56      private final int[][] codeSymbols;
57  
58      /**
59       * The Huffman table for the current group.
60       */
61      private int currentTable;
62  
63      /**
64       * The index of the current group within the selectors array.
65       */
66      private int groupIndex = -1;
67  
68      /**
69       * The byte position within the current group. A new group is selected every 50 decoded bytes.
70       */
71      private int groupPosition = -1;
72  
73      /**
74       * Total number of used Huffman tables in range 2..6.
75       */
76      final int totalTables;
77  
78      /**
79       * The total number of codes (uniform for each table).
80       */
81      final int alphabetSize;
82  
83      /**
84       * Table for Move To Front transformations.
85       */
86      final Bzip2MoveToFrontTable tableMTF = new Bzip2MoveToFrontTable();
87  
88      // For saving state if end of current ByteBuf was reached
89      int currentSelector;
90  
91      /**
92       * The Canonical Huffman code lengths for each table.
93       */
94      final byte[][] tableCodeLengths;
95  
96      // For saving state if end of current ByteBuf was reached
97      int currentGroup;
98      int currentLength = -1;
99      int currentAlpha;
100     boolean modifyLength;
101 
102     Bzip2HuffmanStageDecoder(final Bzip2BitReader reader, final int totalTables, final int alphabetSize) {
103         this.reader = reader;
104         this.totalTables = totalTables;
105         this.alphabetSize = alphabetSize;
106 
107         minimumLengths = new int[totalTables];
108         codeBases = new int[totalTables][HUFFMAN_DECODE_MAX_CODE_LENGTH + 2];
109         codeLimits = new int[totalTables][HUFFMAN_DECODE_MAX_CODE_LENGTH + 1];
110         codeSymbols = new int[totalTables][HUFFMAN_MAX_ALPHABET_SIZE];
111         tableCodeLengths = new byte[totalTables][HUFFMAN_MAX_ALPHABET_SIZE];
112     }
113 
114     /**
115      * Constructs Huffman decoding tables from lists of Canonical Huffman code lengths.
116      */
117     void createHuffmanDecodingTables() {
118         final int alphabetSize = this.alphabetSize;
119 
120         for (int table = 0; table < tableCodeLengths.length; table++) {
121             final int[] tableBases = codeBases[table];
122             final int[] tableLimits = codeLimits[table];
123             final int[] tableSymbols = codeSymbols[table];
124             final byte[] codeLengths = tableCodeLengths[table];
125 
126             int minimumLength = HUFFMAN_DECODE_MAX_CODE_LENGTH;
127             int maximumLength = 0;
128 
129             // Find the minimum and maximum code length for the table
130             for (int i = 0; i < alphabetSize; i++) {
131                 final byte currLength = codeLengths[i];
132                 maximumLength = Math.max(currLength, maximumLength);
133                 minimumLength = Math.min(currLength, minimumLength);
134             }
135             minimumLengths[table] = minimumLength;
136 
137             // Calculate the first output symbol for each code length
138             for (int i = 0; i < alphabetSize; i++) {
139                 tableBases[codeLengths[i] + 1]++;
140             }
141             for (int i = 1, b = tableBases[0]; i < HUFFMAN_DECODE_MAX_CODE_LENGTH + 2; i++) {
142                 b += tableBases[i];
143                 tableBases[i] = b;
144             }
145 
146             // Calculate the first and last Huffman code for each code length (codes at a given
147             // length are sequential in value)
148             for (int i = minimumLength, code = 0; i <= maximumLength; i++) {
149                 int base = code;
150                 code += tableBases[i + 1] - tableBases[i];
151                 tableBases[i] = base - tableBases[i];
152                 tableLimits[i] = code - 1;
153                 code <<= 1;
154             }
155 
156             // Populate the mapping from canonical code index to output symbol
157             for (int bitLength = minimumLength, codeIndex = 0; bitLength <= maximumLength; bitLength++) {
158                 for (int symbol = 0; symbol < alphabetSize; symbol++) {
159                     if (codeLengths[symbol] == bitLength) {
160                         tableSymbols[codeIndex++] = symbol;
161                     }
162                 }
163             }
164         }
165 
166         currentTable = selectors[0];
167     }
168 
169     /**
170      * Decodes and returns the next symbol.
171      * @return The decoded symbol
172      */
173     int nextSymbol() {
174         // Move to next group selector if required
175         if (++groupPosition % HUFFMAN_GROUP_RUN_LENGTH == 0) {
176             groupIndex++;
177             if (groupIndex == selectors.length) {
178                 throw new DecompressionException("error decoding block");
179             }
180             currentTable = selectors[groupIndex] & 0xff;
181         }
182 
183         final Bzip2BitReader reader = this.reader;
184         final int currentTable = this.currentTable;
185         final int[] tableLimits = codeLimits[currentTable];
186         final int[] tableBases = codeBases[currentTable];
187         final int[] tableSymbols = codeSymbols[currentTable];
188         int codeLength = minimumLengths[currentTable];
189 
190         // Starting with the minimum bit length for the table, read additional bits one at a time
191         // until a complete code is recognised
192         int codeBits = reader.readBits(codeLength);
193         for (; codeLength <= HUFFMAN_DECODE_MAX_CODE_LENGTH; codeLength++) {
194             if (codeBits <= tableLimits[codeLength]) {
195                 // Convert the code to a symbol index and return
196                 return tableSymbols[codeBits - tableBases[codeLength]];
197             }
198             codeBits = codeBits << 1 | reader.readBits(1);
199         }
200 
201         throw new DecompressionException("a valid code was not recognised");
202     }
203 }