1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  package io.netty.handler.codec.compression;
17  
18  import io.netty.buffer.ByteBuf;
19  
20  import java.util.Arrays;
21  
22  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_ENCODE_MAX_CODE_LENGTH;
23  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_GROUP_RUN_LENGTH;
24  
25  
26  
27  
28  final class Bzip2HuffmanStageEncoder {
29      
30  
31  
32      private static final int HUFFMAN_HIGH_SYMBOL_COST = 15;
33  
34      
35  
36  
37      private final Bzip2BitWriter writer;
38  
39      
40  
41  
42      private final char[] mtfBlock;
43  
44      
45  
46  
47      private final int mtfLength;
48  
49      
50  
51  
52      private final int mtfAlphabetSize;
53  
54      
55  
56  
57      private final int[] mtfSymbolFrequencies;
58  
59      
60  
61  
62      private final int[][] huffmanCodeLengths;
63  
64      
65  
66  
67      private final int[][] huffmanMergedCodeSymbols;
68  
69      
70  
71  
72      private final byte[] selectors;
73  
74      
75  
76  
77  
78  
79  
80  
81      Bzip2HuffmanStageEncoder(final Bzip2BitWriter writer, final char[] mtfBlock,
82                               final int mtfLength, final int mtfAlphabetSize, final int[] mtfSymbolFrequencies) {
83          this.writer = writer;
84          this.mtfBlock = mtfBlock;
85          this.mtfLength = mtfLength;
86          this.mtfAlphabetSize = mtfAlphabetSize;
87          this.mtfSymbolFrequencies = mtfSymbolFrequencies;
88  
89          final int totalTables = selectTableCount(mtfLength);
90  
91          huffmanCodeLengths = new int[totalTables][mtfAlphabetSize];
92          huffmanMergedCodeSymbols = new int[totalTables][mtfAlphabetSize];
93          selectors = new byte[(mtfLength + HUFFMAN_GROUP_RUN_LENGTH - 1) / HUFFMAN_GROUP_RUN_LENGTH];
94      }
95  
96      
97  
98  
99  
100 
101     private static int selectTableCount(final int mtfLength) {
102         if (mtfLength >= 2400) {
103             return 6;
104         }
105         if (mtfLength >= 1200) {
106             return 5;
107         }
108         if (mtfLength >= 600) {
109             return 4;
110         }
111         if (mtfLength >= 200) {
112             return 3;
113         }
114         return 2;
115     }
116 
117     
118 
119 
120 
121 
122 
123     private static void generateHuffmanCodeLengths(final int alphabetSize,
124                                                    final int[] symbolFrequencies, final int[] codeLengths) {
125 
126         final int[] mergedFrequenciesAndIndices = new int[alphabetSize];
127         final int[] sortedFrequencies = new int[alphabetSize];
128 
129         
130         
131 
132         
133         
134         
135         
136         
137         
138         for (int i = 0; i < alphabetSize; i++) {
139             mergedFrequenciesAndIndices[i] = (symbolFrequencies[i] << 9) | i;
140         }
141         Arrays.sort(mergedFrequenciesAndIndices);
142         for (int i = 0; i < alphabetSize; i++) {
143             sortedFrequencies[i] = mergedFrequenciesAndIndices[i] >>> 9;
144         }
145 
146         
147         
148         Bzip2HuffmanAllocator.allocateHuffmanCodeLengths(sortedFrequencies, HUFFMAN_ENCODE_MAX_CODE_LENGTH);
149 
150         
151         for (int i = 0; i < alphabetSize; i++) {
152             codeLengths[mergedFrequenciesAndIndices[i] & 0x1ff] = sortedFrequencies[i];
153         }
154     }
155 
156     
157 
158 
159 
160 
161 
162     private void generateHuffmanOptimisationSeeds() {
163         final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
164         final int[] mtfSymbolFrequencies = this.mtfSymbolFrequencies;
165         final int mtfAlphabetSize = this.mtfAlphabetSize;
166 
167         final int totalTables = huffmanCodeLengths.length;
168 
169         int remainingLength = mtfLength;
170         int lowCostEnd = -1;
171 
172         for (int i = 0; i < totalTables; i++) {
173 
174             final int targetCumulativeFrequency = remainingLength / (totalTables - i);
175             final int lowCostStart = lowCostEnd + 1;
176             int actualCumulativeFrequency = 0;
177 
178             while (actualCumulativeFrequency < targetCumulativeFrequency && lowCostEnd < mtfAlphabetSize - 1) {
179                 actualCumulativeFrequency += mtfSymbolFrequencies[++lowCostEnd];
180             }
181 
182             if (lowCostEnd > lowCostStart && i != 0 && i != totalTables - 1 && (totalTables - i & 1) == 0) {
183                 actualCumulativeFrequency -= mtfSymbolFrequencies[lowCostEnd--];
184             }
185 
186             final int[] tableCodeLengths = huffmanCodeLengths[i];
187             for (int j = 0; j < mtfAlphabetSize; j++) {
188                 if (j < lowCostStart || j > lowCostEnd) {
189                     tableCodeLengths[j] = HUFFMAN_HIGH_SYMBOL_COST;
190                 }
191             }
192 
193             remainingLength -= actualCumulativeFrequency;
194         }
195     }
196 
197     
198 
199 
200 
201 
202 
203 
204 
205     private void optimiseSelectorsAndHuffmanTables(final boolean storeSelectors) {
206         final char[] mtfBlock = this.mtfBlock;
207         final byte[] selectors = this.selectors;
208         final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
209         final int mtfLength = this.mtfLength;
210         final int mtfAlphabetSize = this.mtfAlphabetSize;
211 
212         final int totalTables = huffmanCodeLengths.length;
213         final int[][] tableFrequencies = new int[totalTables][mtfAlphabetSize];
214 
215         int selectorIndex = 0;
216 
217         
218         for (int groupStart = 0; groupStart < mtfLength;) {
219 
220             final int groupEnd = Math.min(groupStart + HUFFMAN_GROUP_RUN_LENGTH, mtfLength) - 1;
221 
222             
223             int[] cost = new int[totalTables];
224             for (int i = groupStart; i <= groupEnd; i++) {
225                 final int value = mtfBlock[i];
226                 for (int j = 0; j < totalTables; j++) {
227                     cost[j] += huffmanCodeLengths[j][value];
228                 }
229             }
230 
231             
232             byte bestTable = 0;
233             int bestCost = cost[0];
234             for (byte i = 1 ; i < totalTables; i++) {
235                 final int tableCost = cost[i];
236                 if (tableCost < bestCost) {
237                     bestCost = tableCost;
238                     bestTable = i;
239                 }
240             }
241 
242             
243             final int[] bestGroupFrequencies = tableFrequencies[bestTable];
244             for (int i = groupStart; i <= groupEnd; i++) {
245                 bestGroupFrequencies[mtfBlock[i]]++;
246             }
247 
248             
249             if (storeSelectors) {
250                 selectors[selectorIndex++] = bestTable;
251             }
252             groupStart = groupEnd + 1;
253         }
254 
255         
256         for (int i = 0; i < totalTables; i++) {
257             generateHuffmanCodeLengths(mtfAlphabetSize, tableFrequencies[i], huffmanCodeLengths[i]);
258         }
259     }
260 
261     
262 
263 
264     private void assignHuffmanCodeSymbols() {
265         final int[][] huffmanMergedCodeSymbols = this.huffmanMergedCodeSymbols;
266         final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
267         final int mtfAlphabetSize = this.mtfAlphabetSize;
268 
269         final int totalTables = huffmanCodeLengths.length;
270 
271         for (int i = 0; i < totalTables; i++) {
272             final int[] tableLengths = huffmanCodeLengths[i];
273 
274             int minimumLength = 32;
275             int maximumLength = 0;
276             for (int j = 0; j < mtfAlphabetSize; j++) {
277                 final int length = tableLengths[j];
278                 if (length > maximumLength) {
279                     maximumLength = length;
280                 }
281                 if (length < minimumLength) {
282                     minimumLength = length;
283                 }
284             }
285 
286             int code = 0;
287             for (int j = minimumLength; j <= maximumLength; j++) {
288                 for (int k = 0; k < mtfAlphabetSize; k++) {
289                     if ((huffmanCodeLengths[i][k] & 0xff) == j) {
290                         huffmanMergedCodeSymbols[i][k] = (j << 24) | code;
291                         code++;
292                     }
293                 }
294                 code <<= 1;
295             }
296         }
297     }
298 
299     
300 
301 
302     private void writeSelectorsAndHuffmanTables(ByteBuf out) {
303         final Bzip2BitWriter writer = this.writer;
304         final byte[] selectors = this.selectors;
305         final int totalSelectors = selectors.length;
306         final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
307         final int totalTables = huffmanCodeLengths.length;
308         final int mtfAlphabetSize = this.mtfAlphabetSize;
309 
310         writer.writeBits(out, 3, totalTables);
311         writer.writeBits(out, 15, totalSelectors);
312 
313         
314         Bzip2MoveToFrontTable selectorMTF = new Bzip2MoveToFrontTable();
315         for (byte selector : selectors) {
316             writer.writeUnary(out, selectorMTF.valueToFront(selector));
317         }
318 
319         
320         for (final int[] tableLengths : huffmanCodeLengths) {
321             int currentLength = tableLengths[0];
322 
323             writer.writeBits(out, 5, currentLength);
324 
325             for (int j = 0; j < mtfAlphabetSize; j++) {
326                 final int codeLength = tableLengths[j];
327                 final int value = currentLength < codeLength ? 2 : 3;
328                 int delta = Math.abs(codeLength - currentLength);
329                 while (delta-- > 0) {
330                     writer.writeBits(out, 2, value);
331                 }
332                 writer.writeBoolean(out, false);
333                 currentLength = codeLength;
334             }
335         }
336     }
337 
338     
339 
340 
341     private void writeBlockData(ByteBuf out) {
342         final Bzip2BitWriter writer = this.writer;
343         final int[][] huffmanMergedCodeSymbols = this.huffmanMergedCodeSymbols;
344         final byte[] selectors = this.selectors;
345         final int mtfLength = this.mtfLength;
346 
347         int selectorIndex = 0;
348         for (int mtfIndex = 0; mtfIndex < mtfLength;) {
349             final int groupEnd = Math.min(mtfIndex + HUFFMAN_GROUP_RUN_LENGTH, mtfLength) - 1;
350             final int[] tableMergedCodeSymbols = huffmanMergedCodeSymbols[selectors[selectorIndex++]];
351 
352             while (mtfIndex <= groupEnd) {
353                 final int mergedCodeSymbol = tableMergedCodeSymbols[mtfBlock[mtfIndex++]];
354                 writer.writeBits(out, mergedCodeSymbol >>> 24, mergedCodeSymbol);
355             }
356         }
357     }
358 
359     
360 
361 
362     void encode(ByteBuf out) {
363         
364         generateHuffmanOptimisationSeeds();
365         for (int i = 3; i >= 0; i--) {
366             optimiseSelectorsAndHuffmanTables(i == 0);
367         }
368         assignHuffmanCodeSymbols();
369 
370         
371         writeSelectorsAndHuffmanTables(out);
372         writeBlockData(out);
373     }
374 }