View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import static io.netty.handler.codec.compression.Bzip2Constants.*;
19  
20  /**
21   * A decoder for the Bzip2 Huffman coding stage.
22   */
23  final class Bzip2HuffmanStageDecoder {
24      /**
25       * A reader that provides bit-level reads.
26       */
27      private final Bzip2BitReader reader;
28  
29      /**
30       * The Huffman table number to use for each group of 50 symbols.
31       */
32      byte[] selectors;
33  
34      /**
35       * The minimum code length for each Huffman table.
36       */
37      private final int[] minimumLengths;
38  
39      /**
40       * An array of values for each Huffman table that must be subtracted from the numerical value of
41       * a Huffman code of a given bit length to give its canonical code index.
42       */
43      private final int[][] codeBases;
44  
45      /**
46       * An array of values for each Huffman table that gives the highest numerical value of a Huffman
47       * code of a given bit length.
48       */
49      private final int[][] codeLimits;
50  
51      /**
52       * A mapping for each Huffman table from canonical code index to output symbol.
53       */
54      private final int[][] codeSymbols;
55  
56      /**
57       * The Huffman table for the current group.
58       */
59      private int currentTable;
60  
61      /**
62       * The index of the current group within the selectors array.
63       */
64      private int groupIndex = -1;
65  
66      /**
67       * The byte position within the current group. A new group is selected every 50 decoded bytes.
68       */
69      private int groupPosition = -1;
70  
71      /**
72       * Total number of used Huffman tables in range 2..6.
73       */
74      final int totalTables;
75  
76      /**
77       * The total number of codes (uniform for each table).
78       */
79      final int alphabetSize;
80  
81      /**
82       * Table for Move To Front transformations.
83       */
84      final Bzip2MoveToFrontTable tableMTF = new Bzip2MoveToFrontTable();
85  
86      // For saving state if end of current ByteBuf was reached
87      int currentSelector;
88  
89      /**
90       * The Canonical Huffman code lengths for each table.
91       */
92      final byte[][] tableCodeLengths;
93  
94      // For saving state if end of current ByteBuf was reached
95      int currentGroup;
96      int currentLength = -1;
97      int currentAlpha;
98      boolean modifyLength;
99  
100     Bzip2HuffmanStageDecoder(final Bzip2BitReader reader, final int totalTables, final int alphabetSize) {
101         this.reader = reader;
102         this.totalTables = totalTables;
103         this.alphabetSize = alphabetSize;
104 
105         minimumLengths = new int[totalTables];
106         codeBases = new int[totalTables][HUFFMAN_DECODE_MAX_CODE_LENGTH + 2];
107         codeLimits = new int[totalTables][HUFFMAN_DECODE_MAX_CODE_LENGTH + 1];
108         codeSymbols = new int[totalTables][HUFFMAN_MAX_ALPHABET_SIZE];
109         tableCodeLengths = new byte[totalTables][HUFFMAN_MAX_ALPHABET_SIZE];
110     }
111 
112     /**
113      * Constructs Huffman decoding tables from lists of Canonical Huffman code lengths.
114      */
115     void createHuffmanDecodingTables() {
116         final int alphabetSize = this.alphabetSize;
117 
118         for (int table = 0; table < tableCodeLengths.length; table++) {
119             final int[] tableBases = codeBases[table];
120             final int[] tableLimits = codeLimits[table];
121             final int[] tableSymbols = codeSymbols[table];
122             final byte[] codeLengths = tableCodeLengths[table];
123 
124             int minimumLength = HUFFMAN_DECODE_MAX_CODE_LENGTH;
125             int maximumLength = 0;
126 
127             // Find the minimum and maximum code length for the table
128             for (int i = 0; i < alphabetSize; i++) {
129                 final byte currLength = codeLengths[i];
130                 maximumLength = Math.max(currLength, maximumLength);
131                 minimumLength = Math.min(currLength, minimumLength);
132             }
133             minimumLengths[table] = minimumLength;
134 
135             // Calculate the first output symbol for each code length
136             for (int i = 0; i < alphabetSize; i++) {
137                 tableBases[codeLengths[i] + 1]++;
138             }
139             for (int i = 1, b = tableBases[0]; i < HUFFMAN_DECODE_MAX_CODE_LENGTH + 2; i++) {
140                 b += tableBases[i];
141                 tableBases[i] = b;
142             }
143 
144             // Calculate the first and last Huffman code for each code length (codes at a given
145             // length are sequential in value)
146             for (int i = minimumLength, code = 0; i <= maximumLength; i++) {
147                 int base = code;
148                 code += tableBases[i + 1] - tableBases[i];
149                 tableBases[i] = base - tableBases[i];
150                 tableLimits[i] = code - 1;
151                 code <<= 1;
152             }
153 
154             // Populate the mapping from canonical code index to output symbol
155             for (int bitLength = minimumLength, codeIndex = 0; bitLength <= maximumLength; bitLength++) {
156                 for (int symbol = 0; symbol < alphabetSize; symbol++) {
157                     if (codeLengths[symbol] == bitLength) {
158                         tableSymbols[codeIndex++] = symbol;
159                     }
160                 }
161             }
162         }
163 
164         currentTable = selectors[0];
165     }
166 
167     /**
168      * Decodes and returns the next symbol.
169      * @return The decoded symbol
170      */
171     int nextSymbol() {
172         // Move to next group selector if required
173         if (++groupPosition % HUFFMAN_GROUP_RUN_LENGTH == 0) {
174             groupIndex++;
175             if (groupIndex == selectors.length) {
176                 throw new DecompressionException("error decoding block");
177             }
178             currentTable = selectors[groupIndex] & 0xff;
179         }
180 
181         final Bzip2BitReader reader = this.reader;
182         final int currentTable = this.currentTable;
183         final int[] tableLimits = codeLimits[currentTable];
184         final int[] tableBases = codeBases[currentTable];
185         final int[] tableSymbols = codeSymbols[currentTable];
186         int codeLength = minimumLengths[currentTable];
187 
188         // Starting with the minimum bit length for the table, read additional bits one at a time
189         // until a complete code is recognised
190         int codeBits = reader.readBits(codeLength);
191         for (; codeLength <= HUFFMAN_DECODE_MAX_CODE_LENGTH; codeLength++) {
192             if (codeBits <= tableLimits[codeLength]) {
193                 // Convert the code to a symbol index and return
194                 return tableSymbols[codeBits - tableBases[codeLength]];
195             }
196             codeBits = codeBits << 1 | reader.readBits(1);
197         }
198 
199         throw new DecompressionException("a valid code was not recognised");
200     }
201 }