1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty5.handler.codec.compression;
17
18 import io.netty5.buffer.api.Buffer;
19 import io.netty5.util.ByteProcessor;
20
21 import static io.netty5.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
22 import static io.netty5.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
23 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
24
25 /**
26 * Compresses and writes a single Bzip2 block.<br><br>
27 *
28 * Block encoding consists of the following stages:<br>
29 * 1. Run-Length Encoding[1] - {@link #write(int)}<br>
30 * 2. Burrows Wheeler Transform - {@link #close(Buffer)} (through {@link Bzip2DivSufSort})<br>
31 * 3. Write block header - {@link #close(Buffer)}<br>
32 * 4. Move To Front Transform - {@link #close(Buffer)} (through {@link Bzip2HuffmanStageEncoder})<br>
33 * 5. Run-Length Encoding[2] - {@link #close(Buffer)} (through {@link Bzip2HuffmanStageEncoder})<br>
34 * 6. Create and write Huffman tables - {@link #close(Buffer)} (through {@link Bzip2HuffmanStageEncoder})<br>
35 * 7. Huffman encode and write data - {@link #close(Buffer)} (through {@link Bzip2HuffmanStageEncoder})
36 */
37 final class Bzip2BlockCompressor {
38 private final ByteProcessor writeProcessor = this::write;
39
40 /**
41 * A writer that provides bit-level writes.
42 */
43 private final Bzip2BitWriter writer;
44
45 /**
46 * CRC builder for the block.
47 */
48 private final Crc32 crc = new Crc32();
49
50 /**
51 * The RLE'd block data.
52 */
53 private final byte[] block;
54
55 /**
56 * Current length of the data within the {@link #block} array.
57 */
58 private int blockLength;
59
60 /**
61 * A limit beyond which new data will not be accepted into the block.
62 */
63 private final int blockLengthLimit;
64
65 /**
66 * The values that are present within the RLE'd block data. For each index, {@code true} if that
67 * value is present within the data, otherwise {@code false}.
68 */
69 private final boolean[] blockValuesPresent = new boolean[256];
70
71 /**
72 * The Burrows Wheeler Transformed block data.
73 */
74 private final int[] bwtBlock;
75
76 /**
77 * The current RLE value being accumulated (undefined when {@link #rleLength} is 0).
78 */
79 private int rleCurrentValue = -1;
80
81 /**
82 * The repeat count of the current RLE value.
83 */
84 private int rleLength;
85
86 /**
87 * @param writer The {@link Bzip2BitWriter} which provides bit-level writes
88 * @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
89 * into the block after Run-Length Encoding is applied
90 */
91 Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
92 this.writer = writer;
93
94 // One extra byte is added to allow for the block wrap applied in close()
95 block = new byte[blockSize + 1];
96 bwtBlock = new int[blockSize + 1];
97 blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see {@link #write(int)}
98 }
99
100 /**
101 * Write the Huffman symbol to output byte map.
102 */
103 private void writeSymbolMap(Buffer out) {
104 Bzip2BitWriter writer = this.writer;
105
106 final boolean[] blockValuesPresent = this.blockValuesPresent;
107 final boolean[] condensedInUse = new boolean[16];
108
109 for (int i = 0; i < condensedInUse.length; i++) {
110 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
111 if (blockValuesPresent[k]) {
112 condensedInUse[i] = true;
113 break;
114 }
115 }
116 }
117
118 for (boolean isCondensedInUse : condensedInUse) {
119 writer.writeBoolean(out, isCondensedInUse);
120 }
121
122 for (int i = 0; i < condensedInUse.length; i++) {
123 if (condensedInUse[i]) {
124 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
125 writer.writeBoolean(out, blockValuesPresent[k]);
126 }
127 }
128 }
129 }
130
131 /**
132 * Writes an RLE run to the block array, updating the block CRC and present values array as required.
133 * @param value The value to write
134 * @param runLength The run length of the value to write
135 */
136 private void writeRun(final int value, int runLength) {
137 final int blockLength = this.blockLength;
138 final byte[] block = this.block;
139
140 blockValuesPresent[value] = true;
141 crc.updateCRC(value, runLength);
142
143 final byte byteValue = (byte) value;
144 switch (runLength) {
145 case 1:
146 block[blockLength] = byteValue;
147 this.blockLength = blockLength + 1;
148 break;
149 case 2:
150 block[blockLength] = byteValue;
151 block[blockLength + 1] = byteValue;
152 this.blockLength = blockLength + 2;
153 break;
154 case 3:
155 block[blockLength] = byteValue;
156 block[blockLength + 1] = byteValue;
157 block[blockLength + 2] = byteValue;
158 this.blockLength = blockLength + 3;
159 break;
160 default:
161 runLength -= 4;
162 blockValuesPresent[runLength] = true;
163 block[blockLength] = byteValue;
164 block[blockLength + 1] = byteValue;
165 block[blockLength + 2] = byteValue;
166 block[blockLength + 3] = byteValue;
167 block[blockLength + 4] = (byte) runLength;
168 this.blockLength = blockLength + 5;
169 break;
170 }
171 }
172
173 /**
174 * Writes a byte to the block, accumulating to an RLE run where possible.
175 * @param value The byte to write
176 * @return {@code true} if the byte was written, or {@code false} if the block is already full
177 */
178 boolean write(final int value) {
179 if (blockLength > blockLengthLimit) {
180 return false;
181 }
182 final int rleCurrentValue = this.rleCurrentValue;
183 final int rleLength = this.rleLength;
184
185 if (rleLength == 0) {
186 this.rleCurrentValue = value;
187 this.rleLength = 1;
188 } else if (rleCurrentValue != value) {
189 // This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
190 writeRun(rleCurrentValue & 0xff, rleLength);
191 this.rleCurrentValue = value;
192 this.rleLength = 1;
193 } else {
194 if (rleLength == 254) {
195 writeRun(rleCurrentValue & 0xff, 255);
196 this.rleLength = 0;
197 } else {
198 this.rleLength = rleLength + 1;
199 }
200 }
201 return true;
202 }
203
204 /**
205 * Writes an array to the block.
206 * @param buffer The buffer to write
207 * @param offset The offset within the input data to write from
208 * @param length The number of bytes of input data to write
209 * @return The actual number of input bytes written. May be less than the number requested, or
210 * zero if the block is already full
211 */
212 int write(final Buffer buffer, int offset, int length) {
213 int processed = buffer.openCursor(offset, length).process(writeProcessor);
214 return processed == -1 ? length : processed;
215 }
216
217 /**
218 * Compresses and writes out the block.
219 */
220 void close(Buffer out) {
221 // If an RLE run is in progress, write it out
222 if (rleLength > 0) {
223 writeRun(rleCurrentValue & 0xff, rleLength);
224 }
225
226 // Apply a one byte block wrap required by the BWT implementation
227 block[blockLength] = block[0];
228
229 // Perform the Burrows Wheeler Transform
230 Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
231 int bwtStartPointer = divSufSort.bwt();
232
233 Bzip2BitWriter writer = this.writer;
234
235 // Write out the block header
236 writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
237 writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
238 writer.writeInt(out, crc.getCRC());
239 writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
240 writer.writeBits(out, 24, bwtStartPointer);
241
242 // Write out the symbol map
243 writeSymbolMap(out);
244
245 // Perform the Move To Front Transform and Run-Length Encoding[2] stages
246 Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
247 blockValuesPresent);
248 mtfEncoder.encode();
249
250 // Perform the Huffman Encoding stage and write out the encoded data
251 Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
252 mtfEncoder.mtfBlock(),
253 mtfEncoder.mtfLength(),
254 mtfEncoder.mtfAlphabetSize(),
255 mtfEncoder.mtfSymbolFrequencies());
256 huffmanEncoder.encode(out);
257 }
258
259 /**
260 * Gets available size of the current block.
261 * @return Number of available bytes which can be written
262 */
263 int availableSize() {
264 if (blockLength == 0) {
265 return blockLengthLimit + 2;
266 }
267 return blockLengthLimit - blockLength + 1;
268 }
269
270 /**
271 * Determines if the block is full and ready for compression.
272 * @return {@code true} if the block is full, otherwise {@code false}
273 */
274 boolean isFull() {
275 return blockLength > blockLengthLimit;
276 }
277
278 /**
279 * Determines if any bytes have been written to the block.
280 * @return {@code true} if one or more bytes has been written to the block, otherwise {@code false}
281 */
282 boolean isEmpty() {
283 return blockLength == 0 && rleLength == 0;
284 }
285
286 /**
287 * Gets the CRC of the completed block. Only valid after calling {@link #close(Buffer)}.
288 * @return The block's CRC
289 */
290 int crc() {
291 return crc.getCRC();
292 }
293 }