1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty5.handler.codec.compression;
17
18 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_DECODE_MAX_CODE_LENGTH;
19 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RUNA;
20 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RUNB;
21 import static io.netty5.handler.codec.compression.Bzip2Constants.MAX_BLOCK_LENGTH;
22
23 /**
24 * Reads and decompresses a single Bzip2 block.<br><br>
25 *
26 * Block decoding consists of the following stages:<br>
27 * 1. Read block header<br>
28 * 2. Read Huffman tables<br>
29 * 3. Read and decode Huffman encoded data - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}<br>
30 * 4. Run-Length Decoding[2] - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}<br>
31 * 5. Inverse Move To Front Transform - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}<br>
32 * 6. Inverse Burrows Wheeler Transform - {@link #initialiseInverseBWT()}<br>
33 * 7. Run-Length Decoding[1] - {@link #read()}<br>
34 * 8. Optional Block De-Randomisation - {@link #read()} (through {@link #decodeNextBWTByte()})
35 */
36 final class Bzip2BlockDecompressor {
37 /**
38 * A reader that provides bit-level reads.
39 */
40 private final Bzip2BitReader reader;
41
42 /**
43 * Calculates the block CRC from the fully decoded bytes of the block.
44 */
45 private final Crc32 crc = new Crc32();
46
47 /**
48 * The CRC of the current block as read from the block header.
49 */
50 private final int blockCRC;
51
52 /**
53 * {@code true} if the current block is randomised, otherwise {@code false}.
54 */
55 private final boolean blockRandomised;
56
57 /* Huffman Decoding stage */
58 /**
59 * The end-of-block Huffman symbol. Decoding of the block ends when this is encountered.
60 */
61 int huffmanEndOfBlockSymbol;
62
63 /**
64 * Bitmap, of ranges of 16 bytes, present/not present.
65 */
66 int huffmanInUse16;
67
68 /**
69 * A map from Huffman symbol index to output character. Some types of data (e.g. ASCII text)
70 * may contain only a limited number of byte values; Huffman symbols are only allocated to
71 * those values that actually occur in the uncompressed data.
72 */
73 final byte[] huffmanSymbolMap = new byte[256];
74
75 /* Move To Front stage */
76 /**
77 * Counts of each byte value within the {@link Bzip2BlockDecompressor#huffmanSymbolMap} data.
78 * Collected at the Move To Front stage, consumed by the Inverse Burrows Wheeler Transform stage.
79 */
80 private final int[] bwtByteCounts = new int[256];
81
82 /**
83 * The Burrows-Wheeler Transform processed data. Read at the Move To Front stage, consumed by the
84 * Inverse Burrows Wheeler Transform stage.
85 */
86 private final byte[] bwtBlock;
87
88 /**
89 * Starting pointer into BWT for after untransform.
90 */
91 private final int bwtStartPointer;
92
93 /* Inverse Burrows-Wheeler Transform stage */
94 /**
95 * At each position contains the union of :-
96 * An output character (8 bits)
97 * A pointer from each position to its successor (24 bits, left shifted 8 bits)
98 * As the pointer cannot exceed the maximum block size of 900k, 24 bits is more than enough to
99 * hold it; Folding the character data into the spare bits while performing the inverse BWT,
100 * when both pieces of information are available, saves a large number of memory accesses in
101 * the final decoding stages.
102 */
103 private int[] bwtMergedPointers;
104
105 /**
106 * The current merged pointer into the Burrow-Wheeler Transform array.
107 */
108 private int bwtCurrentMergedPointer;
109
110 /**
111 * The actual length in bytes of the current block at the Inverse Burrows Wheeler Transform
112 * stage (before final Run-Length Decoding).
113 */
114 private int bwtBlockLength;
115
116 /**
117 * The number of output bytes that have been decoded up to the Inverse Burrows Wheeler Transform stage.
118 */
119 private int bwtBytesDecoded;
120
121 /* Run-Length Encoding and Random Perturbation stage */
122 /**
123 * The most recently RLE decoded byte.
124 */
125 private int rleLastDecodedByte = -1;
126
127 /**
128 * The number of previous identical output bytes decoded. After 4 identical bytes, the next byte
129 * decoded is an RLE repeat count.
130 */
131 private int rleAccumulator;
132
133 /**
134 * The RLE repeat count of the current decoded byte. When this reaches zero, a new byte is decoded.
135 */
136 private int rleRepeat;
137
138 /**
139 * If the current block is randomised, the position within the RNUMS randomisation array.
140 */
141 private int randomIndex;
142
143 /**
144 * If the current block is randomised, the remaining count at the current RNUMS position.
145 */
146 private int randomCount = Bzip2Rand.rNums(0) - 1;
147
148 /**
149 * Table for Move To Front transformations.
150 */
151 private final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable();
152
153 // This variables is used to save current state if we haven't got enough readable bits
154 private int repeatCount;
155 private int repeatIncrement = 1;
156 private int mtfValue;
157
158 Bzip2BlockDecompressor(final int blockSize, final int blockCRC, final boolean blockRandomised,
159 final int bwtStartPointer, final Bzip2BitReader reader) {
160
161 bwtBlock = new byte[blockSize];
162
163 this.blockCRC = blockCRC;
164 this.blockRandomised = blockRandomised;
165 this.bwtStartPointer = bwtStartPointer;
166
167 this.reader = reader;
168 }
169
170 /**
171 * Reads the Huffman encoded data from the input stream, performs Run-Length Decoding and
172 * applies the Move To Front transform to reconstruct the Burrows-Wheeler Transform array.
173 */
174 boolean decodeHuffmanData(final Bzip2HuffmanStageDecoder huffmanDecoder) {
175 final Bzip2BitReader reader = this.reader;
176 final byte[] bwtBlock = this.bwtBlock;
177 final byte[] huffmanSymbolMap = this.huffmanSymbolMap;
178 final int streamBlockSize = this.bwtBlock.length;
179 final int huffmanEndOfBlockSymbol = this.huffmanEndOfBlockSymbol;
180 final int[] bwtByteCounts = this.bwtByteCounts;
181 final Bzip2MoveToFrontTable symbolMTF = this.symbolMTF;
182
183 int bwtBlockLength = this.bwtBlockLength;
184 int repeatCount = this.repeatCount;
185 int repeatIncrement = this.repeatIncrement;
186 int mtfValue = this.mtfValue;
187
188 for (;;) {
189 if (!reader.hasReadableBits(HUFFMAN_DECODE_MAX_CODE_LENGTH)) {
190 this.bwtBlockLength = bwtBlockLength;
191 this.repeatCount = repeatCount;
192 this.repeatIncrement = repeatIncrement;
193 this.mtfValue = mtfValue;
194 return false;
195 }
196 final int nextSymbol = huffmanDecoder.nextSymbol();
197
198 if (nextSymbol == HUFFMAN_SYMBOL_RUNA) {
199 repeatCount += repeatIncrement;
200 repeatIncrement <<= 1;
201 } else if (nextSymbol == HUFFMAN_SYMBOL_RUNB) {
202 repeatCount += repeatIncrement << 1;
203 repeatIncrement <<= 1;
204 } else {
205 if (repeatCount > 0) {
206 if (bwtBlockLength + repeatCount > streamBlockSize) {
207 throw new DecompressionException("block exceeds declared block size");
208 }
209 final byte nextByte = huffmanSymbolMap[mtfValue];
210 bwtByteCounts[nextByte & 0xff] += repeatCount;
211 while (--repeatCount >= 0) {
212 bwtBlock[bwtBlockLength++] = nextByte;
213 }
214
215 repeatCount = 0;
216 repeatIncrement = 1;
217 }
218
219 if (nextSymbol == huffmanEndOfBlockSymbol) {
220 break;
221 }
222
223 if (bwtBlockLength >= streamBlockSize) {
224 throw new DecompressionException("block exceeds declared block size");
225 }
226
227 mtfValue = symbolMTF.indexToFront(nextSymbol - 1) & 0xff;
228
229 final byte nextByte = huffmanSymbolMap[mtfValue];
230 bwtByteCounts[nextByte & 0xff]++;
231 bwtBlock[bwtBlockLength++] = nextByte;
232 }
233 }
234 if (bwtBlockLength > MAX_BLOCK_LENGTH) {
235 throw new DecompressionException("block length exceeds max block length: "
236 + bwtBlockLength + " > " + MAX_BLOCK_LENGTH);
237 }
238
239 this.bwtBlockLength = bwtBlockLength;
240 initialiseInverseBWT();
241 return true;
242 }
243
244 /**
245 * Set up the Inverse Burrows-Wheeler Transform merged pointer array.
246 */
247 private void initialiseInverseBWT() {
248 final int bwtStartPointer = this.bwtStartPointer;
249 final byte[] bwtBlock = this.bwtBlock;
250 final int[] bwtMergedPointers = new int[bwtBlockLength];
251 final int[] characterBase = new int[256];
252
253 if (bwtStartPointer < 0 || bwtStartPointer >= bwtBlockLength) {
254 throw new DecompressionException("start pointer invalid");
255 }
256
257 // Cumulative character counts
258 System.arraycopy(bwtByteCounts, 0, characterBase, 1, 255);
259 for (int i = 2; i <= 255; i++) {
260 characterBase[i] += characterBase[i - 1];
261 }
262
263 // Merged-Array Inverse Burrows-Wheeler Transform
264 // Combining the output characters and forward pointers into a single array here, where we
265 // have already read both of the corresponding values, cuts down on memory accesses in the
266 // final walk through the array
267 for (int i = 0; i < bwtBlockLength; i++) {
268 int value = bwtBlock[i] & 0xff;
269 bwtMergedPointers[characterBase[value]++] = (i << 8) + value;
270 }
271
272 this.bwtMergedPointers = bwtMergedPointers;
273 bwtCurrentMergedPointer = bwtMergedPointers[bwtStartPointer];
274 }
275
276 /**
277 * Decodes a byte from the final Run-Length Encoding stage, pulling a new byte from the
278 * Burrows-Wheeler Transform stage when required.
279 * @return The decoded byte, or -1 if there are no more bytes
280 */
281 public int read() {
282 while (rleRepeat < 1) {
283 if (bwtBytesDecoded == bwtBlockLength) {
284 return -1;
285 }
286
287 int nextByte = decodeNextBWTByte();
288 if (nextByte != rleLastDecodedByte) {
289 // New byte, restart accumulation
290 rleLastDecodedByte = nextByte;
291 rleRepeat = 1;
292 rleAccumulator = 1;
293 crc.updateCRC(nextByte);
294 } else {
295 if (++rleAccumulator == 4) {
296 // Accumulation complete, start repetition
297 int rleRepeat = decodeNextBWTByte() + 1;
298 this.rleRepeat = rleRepeat;
299 rleAccumulator = 0;
300 crc.updateCRC(nextByte, rleRepeat);
301 } else {
302 rleRepeat = 1;
303 crc.updateCRC(nextByte);
304 }
305 }
306 }
307 rleRepeat--;
308
309 return rleLastDecodedByte;
310 }
311
312 /**
313 * Decodes a byte from the Burrows-Wheeler Transform stage. If the block has randomisation
314 * applied, reverses the randomisation.
315 * @return The decoded byte
316 */
317 private int decodeNextBWTByte() {
318 int mergedPointer = bwtCurrentMergedPointer;
319 int nextDecodedByte = mergedPointer & 0xff;
320 bwtCurrentMergedPointer = bwtMergedPointers[mergedPointer >>> 8];
321
322 if (blockRandomised) {
323 if (--randomCount == 0) {
324 nextDecodedByte ^= 1;
325 randomIndex = (randomIndex + 1) % 512;
326 randomCount = Bzip2Rand.rNums(randomIndex);
327 }
328 }
329 bwtBytesDecoded++;
330
331 return nextDecodedByte;
332 }
333
334 public int blockLength() {
335 return bwtBlockLength;
336 }
337
338 /**
339 * Verify and return the block CRC. This method may only be called
340 * after all of the block's bytes have been read.
341 * @return The block CRC
342 */
343 int checkCRC() {
344 final int computedBlockCRC = crc.getCRC();
345 if (blockCRC != computedBlockCRC) {
346 throw new DecompressionException("block CRC error");
347 }
348 return computedBlockCRC;
349 }
350 }