1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package io.netty5.handler.codec.compression;
17
18 import io.netty5.buffer.api.Buffer;
19
20 import java.util.Arrays;
21
22 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_ENCODE_MAX_CODE_LENGTH;
23 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_GROUP_RUN_LENGTH;
24
25
26
27
28 final class Bzip2HuffmanStageEncoder {
29
30
31
32 private static final int HUFFMAN_HIGH_SYMBOL_COST = 15;
33
34
35
36
37 private final Bzip2BitWriter writer;
38
39
40
41
42 private final char[] mtfBlock;
43
44
45
46
47 private final int mtfLength;
48
49
50
51
52 private final int mtfAlphabetSize;
53
54
55
56
57 private final int[] mtfSymbolFrequencies;
58
59
60
61
62 private final int[][] huffmanCodeLengths;
63
64
65
66
67 private final int[][] huffmanMergedCodeSymbols;
68
69
70
71
72 private final byte[] selectors;
73
74
75
76
77
78
79
80
81 Bzip2HuffmanStageEncoder(final Bzip2BitWriter writer, final char[] mtfBlock,
82 final int mtfLength, final int mtfAlphabetSize, final int[] mtfSymbolFrequencies) {
83 this.writer = writer;
84 this.mtfBlock = mtfBlock;
85 this.mtfLength = mtfLength;
86 this.mtfAlphabetSize = mtfAlphabetSize;
87 this.mtfSymbolFrequencies = mtfSymbolFrequencies;
88
89 final int totalTables = selectTableCount(mtfLength);
90
91 huffmanCodeLengths = new int[totalTables][mtfAlphabetSize];
92 huffmanMergedCodeSymbols = new int[totalTables][mtfAlphabetSize];
93 selectors = new byte[(mtfLength + HUFFMAN_GROUP_RUN_LENGTH - 1) / HUFFMAN_GROUP_RUN_LENGTH];
94 }
95
96
97
98
99
100
101 private static int selectTableCount(final int mtfLength) {
102 if (mtfLength >= 2400) {
103 return 6;
104 }
105 if (mtfLength >= 1200) {
106 return 5;
107 }
108 if (mtfLength >= 600) {
109 return 4;
110 }
111 if (mtfLength >= 200) {
112 return 3;
113 }
114 return 2;
115 }
116
117
118
119
120
121
122
123 private static void generateHuffmanCodeLengths(final int alphabetSize,
124 final int[] symbolFrequencies, final int[] codeLengths) {
125
126 final int[] mergedFrequenciesAndIndices = new int[alphabetSize];
127 final int[] sortedFrequencies = new int[alphabetSize];
128
129
130
131
132
133
134
135
136
137
138 for (int i = 0; i < alphabetSize; i++) {
139 mergedFrequenciesAndIndices[i] = (symbolFrequencies[i] << 9) | i;
140 }
141 Arrays.sort(mergedFrequenciesAndIndices);
142 for (int i = 0; i < alphabetSize; i++) {
143 sortedFrequencies[i] = mergedFrequenciesAndIndices[i] >>> 9;
144 }
145
146
147
148 Bzip2HuffmanAllocator.allocateHuffmanCodeLengths(sortedFrequencies, HUFFMAN_ENCODE_MAX_CODE_LENGTH);
149
150
151 for (int i = 0; i < alphabetSize; i++) {
152 codeLengths[mergedFrequenciesAndIndices[i] & 0x1ff] = sortedFrequencies[i];
153 }
154 }
155
156
157
158
159
160
161
162 private void generateHuffmanOptimisationSeeds() {
163 final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
164 final int[] mtfSymbolFrequencies = this.mtfSymbolFrequencies;
165 final int mtfAlphabetSize = this.mtfAlphabetSize;
166
167 final int totalTables = huffmanCodeLengths.length;
168
169 int remainingLength = mtfLength;
170 int lowCostEnd = -1;
171
172 for (int i = 0; i < totalTables; i++) {
173
174 final int targetCumulativeFrequency = remainingLength / (totalTables - i);
175 final int lowCostStart = lowCostEnd + 1;
176 int actualCumulativeFrequency = 0;
177
178 while (actualCumulativeFrequency < targetCumulativeFrequency && lowCostEnd < mtfAlphabetSize - 1) {
179 actualCumulativeFrequency += mtfSymbolFrequencies[++lowCostEnd];
180 }
181
182 if (lowCostEnd > lowCostStart && i != 0 && i != totalTables - 1 && (totalTables - i & 1) == 0) {
183 actualCumulativeFrequency -= mtfSymbolFrequencies[lowCostEnd--];
184 }
185
186 final int[] tableCodeLengths = huffmanCodeLengths[i];
187 for (int j = 0; j < mtfAlphabetSize; j++) {
188 if (j < lowCostStart || j > lowCostEnd) {
189 tableCodeLengths[j] = HUFFMAN_HIGH_SYMBOL_COST;
190 }
191 }
192
193 remainingLength -= actualCumulativeFrequency;
194 }
195 }
196
197
198
199
200
201
202
203
204
205 private void optimiseSelectorsAndHuffmanTables(final boolean storeSelectors) {
206 final char[] mtfBlock = this.mtfBlock;
207 final byte[] selectors = this.selectors;
208 final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
209 final int mtfLength = this.mtfLength;
210 final int mtfAlphabetSize = this.mtfAlphabetSize;
211
212 final int totalTables = huffmanCodeLengths.length;
213 final int[][] tableFrequencies = new int[totalTables][mtfAlphabetSize];
214
215 int selectorIndex = 0;
216
217
218 for (int groupStart = 0; groupStart < mtfLength;) {
219
220 final int groupEnd = Math.min(groupStart + HUFFMAN_GROUP_RUN_LENGTH, mtfLength) - 1;
221
222
223 int[] cost = new int[totalTables];
224 for (int i = groupStart; i <= groupEnd; i++) {
225 final int value = mtfBlock[i];
226 for (int j = 0; j < totalTables; j++) {
227 cost[j] += huffmanCodeLengths[j][value];
228 }
229 }
230
231
232 byte bestTable = 0;
233 int bestCost = cost[0];
234 for (byte i = 1 ; i < totalTables; i++) {
235 final int tableCost = cost[i];
236 if (tableCost < bestCost) {
237 bestCost = tableCost;
238 bestTable = i;
239 }
240 }
241
242
243 final int[] bestGroupFrequencies = tableFrequencies[bestTable];
244 for (int i = groupStart; i <= groupEnd; i++) {
245 bestGroupFrequencies[mtfBlock[i]]++;
246 }
247
248
249 if (storeSelectors) {
250 selectors[selectorIndex++] = bestTable;
251 }
252 groupStart = groupEnd + 1;
253 }
254
255
256 for (int i = 0; i < totalTables; i++) {
257 generateHuffmanCodeLengths(mtfAlphabetSize, tableFrequencies[i], huffmanCodeLengths[i]);
258 }
259 }
260
261
262
263
264 private void assignHuffmanCodeSymbols() {
265 final int[][] huffmanMergedCodeSymbols = this.huffmanMergedCodeSymbols;
266 final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
267 final int mtfAlphabetSize = this.mtfAlphabetSize;
268
269 final int totalTables = huffmanCodeLengths.length;
270
271 for (int i = 0; i < totalTables; i++) {
272 final int[] tableLengths = huffmanCodeLengths[i];
273
274 int minimumLength = 32;
275 int maximumLength = 0;
276 for (int j = 0; j < mtfAlphabetSize; j++) {
277 final int length = tableLengths[j];
278 if (length > maximumLength) {
279 maximumLength = length;
280 }
281 if (length < minimumLength) {
282 minimumLength = length;
283 }
284 }
285
286 int code = 0;
287 for (int j = minimumLength; j <= maximumLength; j++) {
288 for (int k = 0; k < mtfAlphabetSize; k++) {
289 if ((huffmanCodeLengths[i][k] & 0xff) == j) {
290 huffmanMergedCodeSymbols[i][k] = (j << 24) | code;
291 code++;
292 }
293 }
294 code <<= 1;
295 }
296 }
297 }
298
299
300
301
302 private void writeSelectorsAndHuffmanTables(Buffer out) {
303 final Bzip2BitWriter writer = this.writer;
304 final byte[] selectors = this.selectors;
305 final int totalSelectors = selectors.length;
306 final int[][] huffmanCodeLengths = this.huffmanCodeLengths;
307 final int totalTables = huffmanCodeLengths.length;
308 final int mtfAlphabetSize = this.mtfAlphabetSize;
309
310 writer.writeBits(out, 3, totalTables);
311 writer.writeBits(out, 15, totalSelectors);
312
313
314 Bzip2MoveToFrontTable selectorMTF = new Bzip2MoveToFrontTable();
315 for (byte selector : selectors) {
316 writer.writeUnary(out, selectorMTF.valueToFront(selector));
317 }
318
319
320 for (final int[] tableLengths : huffmanCodeLengths) {
321 int currentLength = tableLengths[0];
322
323 writer.writeBits(out, 5, currentLength);
324
325 for (int j = 0; j < mtfAlphabetSize; j++) {
326 final int codeLength = tableLengths[j];
327 final int value = currentLength < codeLength ? 2 : 3;
328 int delta = Math.abs(codeLength - currentLength);
329 while (delta-- > 0) {
330 writer.writeBits(out, 2, value);
331 }
332 writer.writeBoolean(out, false);
333 currentLength = codeLength;
334 }
335 }
336 }
337
338
339
340
341 private void writeBlockData(Buffer out) {
342 final Bzip2BitWriter writer = this.writer;
343 final int[][] huffmanMergedCodeSymbols = this.huffmanMergedCodeSymbols;
344 final byte[] selectors = this.selectors;
345 final int mtfLength = this.mtfLength;
346
347 int selectorIndex = 0;
348 for (int mtfIndex = 0; mtfIndex < mtfLength;) {
349 final int groupEnd = Math.min(mtfIndex + HUFFMAN_GROUP_RUN_LENGTH, mtfLength) - 1;
350 final int[] tableMergedCodeSymbols = huffmanMergedCodeSymbols[selectors[selectorIndex++]];
351
352 while (mtfIndex <= groupEnd) {
353 final int mergedCodeSymbol = tableMergedCodeSymbols[mtfBlock[mtfIndex++]];
354 writer.writeBits(out, mergedCodeSymbol >>> 24, mergedCodeSymbol);
355 }
356 }
357 }
358
359
360
361
362 void encode(Buffer out) {
363
364 generateHuffmanOptimisationSeeds();
365 for (int i = 3; i >= 0; i--) {
366 optimiseSelectorsAndHuffmanTables(i == 0);
367 }
368 assignHuffmanCodeSymbols();
369
370
371 writeSelectorsAndHuffmanTables(out);
372 writeBlockData(out);
373 }
374 }