1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.compression;
17
18 import io.netty.buffer.ByteBuf;
19 import io.netty.util.ByteProcessor;
20
21 import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
22 import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
23 import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
24
25 /**
26 * Compresses and writes a single Bzip2 block.<br><br>
27 *
28 * Block encoding consists of the following stages:<br>
29 * 1. Run-Length Encoding[1] - {@link #write(int)}<br>
30 * 2. Burrows Wheeler Transform - {@link #close(ByteBuf)} (through {@link Bzip2DivSufSort})<br>
31 * 3. Write block header - {@link #close(ByteBuf)}<br>
32 * 4. Move To Front Transform - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})<br>
33 * 5. Run-Length Encoding[2] - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})<br>
34 * 6. Create and write Huffman tables - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})<br>
35 * 7. Huffman encode and write data - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
36 */
37 final class Bzip2BlockCompressor {
38 private final ByteProcessor writeProcessor = new ByteProcessor() {
39 @Override
40 public boolean process(byte value) throws Exception {
41 return write(value);
42 }
43 };
44
45 /**
46 * A writer that provides bit-level writes.
47 */
48 private final Bzip2BitWriter writer;
49
50 /**
51 * CRC builder for the block.
52 */
53 private final Crc32 crc = new Crc32();
54
55 /**
56 * The RLE'd block data.
57 */
58 private final byte[] block;
59
60 /**
61 * Current length of the data within the {@link #block} array.
62 */
63 private int blockLength;
64
65 /**
66 * A limit beyond which new data will not be accepted into the block.
67 */
68 private final int blockLengthLimit;
69
70 /**
71 * The values that are present within the RLE'd block data. For each index, {@code true} if that
72 * value is present within the data, otherwise {@code false}.
73 */
74 private final boolean[] blockValuesPresent = new boolean[256];
75
76 /**
77 * The Burrows Wheeler Transformed block data.
78 */
79 private final int[] bwtBlock;
80
81 /**
82 * The current RLE value being accumulated (undefined when {@link #rleLength} is 0).
83 */
84 private int rleCurrentValue = -1;
85
86 /**
87 * The repeat count of the current RLE value.
88 */
89 private int rleLength;
90
91 /**
92 * @param writer The {@link Bzip2BitWriter} which provides bit-level writes
93 * @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
94 * into the block after Run-Length Encoding is applied
95 */
96 Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
97 this.writer = writer;
98
99 // One extra byte is added to allow for the block wrap applied in close()
100 block = new byte[blockSize + 1];
101 bwtBlock = new int[blockSize + 1];
102 blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see {@link #write(int)}
103 }
104
105 /**
106 * Write the Huffman symbol to output byte map.
107 */
108 private void writeSymbolMap(ByteBuf out) {
109 Bzip2BitWriter writer = this.writer;
110
111 final boolean[] blockValuesPresent = this.blockValuesPresent;
112 final boolean[] condensedInUse = new boolean[16];
113
114 for (int i = 0; i < condensedInUse.length; i++) {
115 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
116 if (blockValuesPresent[k]) {
117 condensedInUse[i] = true;
118 break;
119 }
120 }
121 }
122
123 for (boolean isCondensedInUse : condensedInUse) {
124 writer.writeBoolean(out, isCondensedInUse);
125 }
126
127 for (int i = 0; i < condensedInUse.length; i++) {
128 if (condensedInUse[i]) {
129 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
130 writer.writeBoolean(out, blockValuesPresent[k]);
131 }
132 }
133 }
134 }
135
136 /**
137 * Writes an RLE run to the block array, updating the block CRC and present values array as required.
138 * @param value The value to write
139 * @param runLength The run length of the value to write
140 */
141 private void writeRun(final int value, int runLength) {
142 final int blockLength = this.blockLength;
143 final byte[] block = this.block;
144
145 blockValuesPresent[value] = true;
146 crc.updateCRC(value, runLength);
147
148 final byte byteValue = (byte) value;
149 switch (runLength) {
150 case 1:
151 block[blockLength] = byteValue;
152 this.blockLength = blockLength + 1;
153 break;
154 case 2:
155 block[blockLength] = byteValue;
156 block[blockLength + 1] = byteValue;
157 this.blockLength = blockLength + 2;
158 break;
159 case 3:
160 block[blockLength] = byteValue;
161 block[blockLength + 1] = byteValue;
162 block[blockLength + 2] = byteValue;
163 this.blockLength = blockLength + 3;
164 break;
165 default:
166 runLength -= 4;
167 blockValuesPresent[runLength] = true;
168 block[blockLength] = byteValue;
169 block[blockLength + 1] = byteValue;
170 block[blockLength + 2] = byteValue;
171 block[blockLength + 3] = byteValue;
172 block[blockLength + 4] = (byte) runLength;
173 this.blockLength = blockLength + 5;
174 break;
175 }
176 }
177
178 /**
179 * Writes a byte to the block, accumulating to an RLE run where possible.
180 * @param value The byte to write
181 * @return {@code true} if the byte was written, or {@code false} if the block is already full
182 */
183 boolean write(final int value) {
184 if (blockLength > blockLengthLimit) {
185 return false;
186 }
187 final int rleCurrentValue = this.rleCurrentValue;
188 final int rleLength = this.rleLength;
189
190 if (rleLength == 0) {
191 this.rleCurrentValue = value;
192 this.rleLength = 1;
193 } else if (rleCurrentValue != value) {
194 // This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
195 writeRun(rleCurrentValue & 0xff, rleLength);
196 this.rleCurrentValue = value;
197 this.rleLength = 1;
198 } else {
199 if (rleLength == 254) {
200 writeRun(rleCurrentValue & 0xff, 255);
201 this.rleLength = 0;
202 } else {
203 this.rleLength = rleLength + 1;
204 }
205 }
206 return true;
207 }
208
209 /**
210 * Writes an array to the block.
211 * @param buffer The buffer to write
212 * @param offset The offset within the input data to write from
213 * @param length The number of bytes of input data to write
214 * @return The actual number of input bytes written. May be less than the number requested, or
215 * zero if the block is already full
216 */
217 int write(final ByteBuf buffer, int offset, int length) {
218 int index = buffer.forEachByte(offset, length, writeProcessor);
219 return index == -1 ? length : index - offset;
220 }
221
222 /**
223 * Compresses and writes out the block.
224 */
225 void close(ByteBuf out) {
226 // If an RLE run is in progress, write it out
227 if (rleLength > 0) {
228 writeRun(rleCurrentValue & 0xff, rleLength);
229 }
230
231 // Apply a one byte block wrap required by the BWT implementation
232 block[blockLength] = block[0];
233
234 // Perform the Burrows Wheeler Transform
235 Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
236 int bwtStartPointer = divSufSort.bwt();
237
238 Bzip2BitWriter writer = this.writer;
239
240 // Write out the block header
241 writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
242 writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
243 writer.writeInt(out, crc.getCRC());
244 writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
245 writer.writeBits(out, 24, bwtStartPointer);
246
247 // Write out the symbol map
248 writeSymbolMap(out);
249
250 // Perform the Move To Front Transform and Run-Length Encoding[2] stages
251 Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
252 blockValuesPresent);
253 mtfEncoder.encode();
254
255 // Perform the Huffman Encoding stage and write out the encoded data
256 Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
257 mtfEncoder.mtfBlock(),
258 mtfEncoder.mtfLength(),
259 mtfEncoder.mtfAlphabetSize(),
260 mtfEncoder.mtfSymbolFrequencies());
261 huffmanEncoder.encode(out);
262 }
263
264 /**
265 * Gets available size of the current block.
266 * @return Number of available bytes which can be written
267 */
268 int availableSize() {
269 if (blockLength == 0) {
270 return blockLengthLimit + 2;
271 }
272 return blockLengthLimit - blockLength + 1;
273 }
274
275 /**
276 * Determines if the block is full and ready for compression.
277 * @return {@code true} if the block is full, otherwise {@code false}
278 */
279 boolean isFull() {
280 return blockLength > blockLengthLimit;
281 }
282
283 /**
284 * Determines if any bytes have been written to the block.
285 * @return {@code true} if one or more bytes has been written to the block, otherwise {@code false}
286 */
287 boolean isEmpty() {
288 return blockLength == 0 && rleLength == 0;
289 }
290
291 /**
292 * Gets the CRC of the completed block. Only valid after calling {@link #close(ByteBuf)}.
293 * @return The block's CRC
294 */
295 int crc() {
296 return crc.getCRC();
297 }
298 }