View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.util.ByteProcessor;
20  
21  import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
22  import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
23  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
24  
25  /**
26   * Compresses and writes a single Bzip2 block.<br><br>
27   *
28   * Block encoding consists of the following stages:<br>
29   * 1. Run-Length Encoding[1] - {@link #write(int)}<br>
30   * 2. Burrows Wheeler Transform - {@link #close(ByteBuf)} (through {@link Bzip2DivSufSort})<br>
31   * 3. Write block header - {@link #close(ByteBuf)}<br>
32   * 4. Move To Front Transform - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})<br>
33   * 5. Run-Length Encoding[2] - {@link #close(ByteBuf)}  (through {@link Bzip2HuffmanStageEncoder})<br>
34   * 6. Create and write Huffman tables - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})<br>
35   * 7. Huffman encode and write data - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
36   */
37  final class Bzip2BlockCompressor {
38      private final ByteProcessor writeProcessor = new ByteProcessor() {
39          @Override
40          public boolean process(byte value) throws Exception {
41              return write(value);
42          }
43      };
44  
45      /**
46       * A writer that provides bit-level writes.
47       */
48      private final Bzip2BitWriter writer;
49  
50      /**
51       * CRC builder for the block.
52       */
53      private final Crc32 crc = new Crc32();
54  
55      /**
56       * The RLE'd block data.
57       */
58      private final byte[] block;
59  
60      /**
61       * Current length of the data within the {@link #block} array.
62       */
63      private int blockLength;
64  
65      /**
66       * A limit beyond which new data will not be accepted into the block.
67       */
68      private final int blockLengthLimit;
69  
70      /**
71       * The values that are present within the RLE'd block data. For each index, {@code true} if that
72       * value is present within the data, otherwise {@code false}.
73       */
74      private final boolean[] blockValuesPresent = new boolean[256];
75  
76      /**
77       * The Burrows Wheeler Transformed block data.
78       */
79      private final int[] bwtBlock;
80  
81      /**
82       * The current RLE value being accumulated (undefined when {@link #rleLength} is 0).
83       */
84      private int rleCurrentValue = -1;
85  
86      /**
87       * The repeat count of the current RLE value.
88       */
89      private int rleLength;
90  
91      /**
92       * @param writer The {@link Bzip2BitWriter} which provides bit-level writes
93       * @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
94       *                  into the block after Run-Length Encoding is applied
95       */
96      Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
97          this.writer = writer;
98  
99          // One extra byte is added to allow for the block wrap applied in close()
100         block = new byte[blockSize + 1];
101         bwtBlock = new int[blockSize + 1];
102         blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see {@link #write(int)}
103     }
104 
105     /**
106      * Write the Huffman symbol to output byte map.
107      */
108     private void writeSymbolMap(ByteBuf out) {
109         Bzip2BitWriter writer = this.writer;
110 
111         final boolean[] blockValuesPresent = this.blockValuesPresent;
112         final boolean[] condensedInUse = new boolean[16];
113 
114         for (int i = 0; i < condensedInUse.length; i++) {
115             for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
116                 if (blockValuesPresent[k]) {
117                     condensedInUse[i] = true;
118                     break;
119                 }
120             }
121         }
122 
123         for (boolean isCondensedInUse : condensedInUse) {
124             writer.writeBoolean(out, isCondensedInUse);
125         }
126 
127         for (int i = 0; i < condensedInUse.length; i++) {
128             if (condensedInUse[i]) {
129                 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
130                     writer.writeBoolean(out, blockValuesPresent[k]);
131                 }
132             }
133         }
134     }
135 
136     /**
137      * Writes an RLE run to the block array, updating the block CRC and present values array as required.
138      * @param value The value to write
139      * @param runLength The run length of the value to write
140      */
141     private void writeRun(final int value, int runLength) {
142         final int blockLength = this.blockLength;
143         final byte[] block = this.block;
144 
145         blockValuesPresent[value] = true;
146         crc.updateCRC(value, runLength);
147 
148         final byte byteValue = (byte) value;
149         switch (runLength) {
150             case 1:
151                 block[blockLength] = byteValue;
152                 this.blockLength = blockLength + 1;
153                 break;
154             case 2:
155                 block[blockLength] = byteValue;
156                 block[blockLength + 1] = byteValue;
157                 this.blockLength = blockLength + 2;
158                 break;
159             case 3:
160                 block[blockLength] = byteValue;
161                 block[blockLength + 1] = byteValue;
162                 block[blockLength + 2] = byteValue;
163                 this.blockLength = blockLength + 3;
164                 break;
165             default:
166                 runLength -= 4;
167                 blockValuesPresent[runLength] = true;
168                 block[blockLength] = byteValue;
169                 block[blockLength + 1] = byteValue;
170                 block[blockLength + 2] = byteValue;
171                 block[blockLength + 3] = byteValue;
172                 block[blockLength + 4] = (byte) runLength;
173                 this.blockLength = blockLength + 5;
174                 break;
175         }
176     }
177 
178     /**
179      * Writes a byte to the block, accumulating to an RLE run where possible.
180      * @param value The byte to write
181      * @return {@code true} if the byte was written, or {@code false} if the block is already full
182      */
183     boolean write(final int value) {
184         if (blockLength > blockLengthLimit) {
185             return false;
186         }
187         final int rleCurrentValue = this.rleCurrentValue;
188         final int rleLength = this.rleLength;
189 
190         if (rleLength == 0) {
191             this.rleCurrentValue = value;
192             this.rleLength = 1;
193         } else if (rleCurrentValue != value) {
194             // This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
195             writeRun(rleCurrentValue & 0xff, rleLength);
196             this.rleCurrentValue = value;
197             this.rleLength = 1;
198         } else {
199             if (rleLength == 254) {
200                 writeRun(rleCurrentValue & 0xff, 255);
201                 this.rleLength = 0;
202             } else {
203                 this.rleLength = rleLength + 1;
204             }
205         }
206         return true;
207     }
208 
209     /**
210      * Writes an array to the block.
211      * @param buffer The buffer to write
212      * @param offset The offset within the input data to write from
213      * @param length The number of bytes of input data to write
214      * @return The actual number of input bytes written. May be less than the number requested, or
215      *         zero if the block is already full
216      */
217     int write(final ByteBuf buffer, int offset, int length) {
218         int index = buffer.forEachByte(offset, length, writeProcessor);
219         return index == -1 ? length : index - offset;
220     }
221 
222     /**
223      * Compresses and writes out the block.
224      */
225     void close(ByteBuf out) {
226         // If an RLE run is in progress, write it out
227         if (rleLength > 0) {
228             writeRun(rleCurrentValue & 0xff, rleLength);
229         }
230 
231         // Apply a one byte block wrap required by the BWT implementation
232         block[blockLength] = block[0];
233 
234         // Perform the Burrows Wheeler Transform
235         Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
236         int bwtStartPointer = divSufSort.bwt();
237 
238         Bzip2BitWriter writer = this.writer;
239 
240         // Write out the block header
241         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
242         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
243         writer.writeInt(out, crc.getCRC());
244         writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
245         writer.writeBits(out, 24, bwtStartPointer);
246 
247         // Write out the symbol map
248         writeSymbolMap(out);
249 
250         // Perform the Move To Front Transform and Run-Length Encoding[2] stages
251         Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
252                                                                                     blockValuesPresent);
253         mtfEncoder.encode();
254 
255         // Perform the Huffman Encoding stage and write out the encoded data
256         Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
257                 mtfEncoder.mtfBlock(),
258                 mtfEncoder.mtfLength(),
259                 mtfEncoder.mtfAlphabetSize(),
260                 mtfEncoder.mtfSymbolFrequencies());
261         huffmanEncoder.encode(out);
262     }
263 
264     /**
265      * Gets available size of the current block.
266      * @return Number of available bytes which can be written
267      */
268     int availableSize() {
269         if (blockLength == 0) {
270             return blockLengthLimit + 2;
271         }
272         return blockLengthLimit - blockLength + 1;
273     }
274 
275     /**
276      * Determines if the block is full and ready for compression.
277      * @return {@code true} if the block is full, otherwise {@code false}
278      */
279     boolean isFull() {
280         return blockLength > blockLengthLimit;
281     }
282 
283     /**
284      * Determines if any bytes have been written to the block.
285      * @return {@code true} if one or more bytes has been written to the block, otherwise {@code false}
286      */
287     boolean isEmpty() {
288         return blockLength == 0 && rleLength == 0;
289     }
290 
291     /**
292      * Gets the CRC of the completed block. Only valid after calling {@link #close(ByteBuf)}.
293      * @return The block's CRC
294      */
295     int crc() {
296         return crc.getCRC();
297     }
298 }