View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.util.ByteProcessor;
20  
21  import static io.netty.handler.codec.compression.Bzip2Constants.*;
22  
23  /**
24   * Compresses and writes a single Bzip2 block.<br><br>
25   *
26   * Block encoding consists of the following stages:<br>
27   * 1. Run-Length Encoding[1] - {@link #write(int)}<br>
28   * 2. Burrows Wheeler Transform - {@link #close(ByteBuf)} (through {@link Bzip2DivSufSort})<br>
29   * 3. Write block header - {@link #close(ByteBuf)}<br>
30   * 4. Move To Front Transform - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})<br>
31   * 5. Run-Length Encoding[2] - {@link #close(ByteBuf)}  (through {@link Bzip2HuffmanStageEncoder})<br>
32   * 6. Create and write Huffman tables - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})<br>
33   * 7. Huffman encode and write data - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
34   */
35  final class Bzip2BlockCompressor {
36      private final ByteProcessor writeProcessor = new ByteProcessor() {
37          @Override
38          public boolean process(byte value) throws Exception {
39              return write(value);
40          }
41      };
42  
43      /**
44       * A writer that provides bit-level writes.
45       */
46      private final Bzip2BitWriter writer;
47  
48      /**
49       * CRC builder for the block.
50       */
51      private final Crc32 crc = new Crc32();
52  
53      /**
54       * The RLE'd block data.
55       */
56      private final byte[] block;
57  
58      /**
59       * Current length of the data within the {@link #block} array.
60       */
61      private int blockLength;
62  
63      /**
64       * A limit beyond which new data will not be accepted into the block.
65       */
66      private final int blockLengthLimit;
67  
68      /**
69       * The values that are present within the RLE'd block data. For each index, {@code true} if that
70       * value is present within the data, otherwise {@code false}.
71       */
72      private final boolean[] blockValuesPresent = new boolean[256];
73  
74      /**
75       * The Burrows Wheeler Transformed block data.
76       */
77      private final int[] bwtBlock;
78  
79      /**
80       * The current RLE value being accumulated (undefined when {@link #rleLength} is 0).
81       */
82      private int rleCurrentValue = -1;
83  
84      /**
85       * The repeat count of the current RLE value.
86       */
87      private int rleLength;
88  
89      /**
90       * @param writer The {@link Bzip2BitWriter} which provides bit-level writes
91       * @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
92       *                  into the block after Run-Length Encoding is applied
93       */
94      Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
95          this.writer = writer;
96  
97          // One extra byte is added to allow for the block wrap applied in close()
98          block = new byte[blockSize + 1];
99          bwtBlock = new int[blockSize + 1];
100         blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see {@link #write(int)}
101     }
102 
103     /**
104      * Write the Huffman symbol to output byte map.
105      */
106     private void writeSymbolMap(ByteBuf out) {
107         Bzip2BitWriter writer = this.writer;
108 
109         final boolean[] blockValuesPresent = this.blockValuesPresent;
110         final boolean[] condensedInUse = new boolean[16];
111 
112         for (int i = 0; i < condensedInUse.length; i++) {
113             for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
114                 if (blockValuesPresent[k]) {
115                     condensedInUse[i] = true;
116                 }
117             }
118         }
119 
120         for (boolean isCondensedInUse : condensedInUse) {
121             writer.writeBoolean(out, isCondensedInUse);
122         }
123 
124         for (int i = 0; i < condensedInUse.length; i++) {
125             if (condensedInUse[i]) {
126                 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
127                     writer.writeBoolean(out, blockValuesPresent[k]);
128                 }
129             }
130         }
131     }
132 
133     /**
134      * Writes an RLE run to the block array, updating the block CRC and present values array as required.
135      * @param value The value to write
136      * @param runLength The run length of the value to write
137      */
138     private void writeRun(final int value, int runLength) {
139         final int blockLength = this.blockLength;
140         final byte[] block = this.block;
141 
142         blockValuesPresent[value] = true;
143         crc.updateCRC(value, runLength);
144 
145         final byte byteValue = (byte) value;
146         switch (runLength) {
147             case 1:
148                 block[blockLength] = byteValue;
149                 this.blockLength = blockLength + 1;
150                 break;
151             case 2:
152                 block[blockLength] = byteValue;
153                 block[blockLength + 1] = byteValue;
154                 this.blockLength = blockLength + 2;
155                 break;
156             case 3:
157                 block[blockLength] = byteValue;
158                 block[blockLength + 1] = byteValue;
159                 block[blockLength + 2] = byteValue;
160                 this.blockLength = blockLength + 3;
161                 break;
162             default:
163                 runLength -= 4;
164                 blockValuesPresent[runLength] = true;
165                 block[blockLength] = byteValue;
166                 block[blockLength + 1] = byteValue;
167                 block[blockLength + 2] = byteValue;
168                 block[blockLength + 3] = byteValue;
169                 block[blockLength + 4] = (byte) runLength;
170                 this.blockLength = blockLength + 5;
171                 break;
172         }
173     }
174 
175     /**
176      * Writes a byte to the block, accumulating to an RLE run where possible.
177      * @param value The byte to write
178      * @return {@code true} if the byte was written, or {@code false} if the block is already full
179      */
180     boolean write(final int value) {
181         if (blockLength > blockLengthLimit) {
182             return false;
183         }
184         final int rleCurrentValue = this.rleCurrentValue;
185         final int rleLength = this.rleLength;
186 
187         if (rleLength == 0) {
188             this.rleCurrentValue = value;
189             this.rleLength = 1;
190         } else if (rleCurrentValue != value) {
191             // This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
192             writeRun(rleCurrentValue & 0xff, rleLength);
193             this.rleCurrentValue = value;
194             this.rleLength = 1;
195         } else {
196             if (rleLength == 254) {
197                 writeRun(rleCurrentValue & 0xff, 255);
198                 this.rleLength = 0;
199             } else {
200                 this.rleLength = rleLength + 1;
201             }
202         }
203         return true;
204     }
205 
206     /**
207      * Writes an array to the block.
208      * @param buffer The buffer to write
209      * @param offset The offset within the input data to write from
210      * @param length The number of bytes of input data to write
211      * @return The actual number of input bytes written. May be less than the number requested, or
212      *         zero if the block is already full
213      */
214     int write(final ByteBuf buffer, int offset, int length) {
215         int index = buffer.forEachByte(offset, length, writeProcessor);
216         return index == -1 ? length : index - offset;
217     }
218 
219     /**
220      * Compresses and writes out the block.
221      */
222     void close(ByteBuf out) {
223         // If an RLE run is in progress, write it out
224         if (rleLength > 0) {
225             writeRun(rleCurrentValue & 0xff, rleLength);
226         }
227 
228         // Apply a one byte block wrap required by the BWT implementation
229         block[blockLength] = block[0];
230 
231         // Perform the Burrows Wheeler Transform
232         Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
233         int bwtStartPointer = divSufSort.bwt();
234 
235         Bzip2BitWriter writer = this.writer;
236 
237         // Write out the block header
238         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
239         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
240         writer.writeInt(out, crc.getCRC());
241         writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
242         writer.writeBits(out, 24, bwtStartPointer);
243 
244         // Write out the symbol map
245         writeSymbolMap(out);
246 
247         // Perform the Move To Front Transform and Run-Length Encoding[2] stages
248         Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
249                                                                                     blockValuesPresent);
250         mtfEncoder.encode();
251 
252         // Perform the Huffman Encoding stage and write out the encoded data
253         Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
254                 mtfEncoder.mtfBlock(),
255                 mtfEncoder.mtfLength(),
256                 mtfEncoder.mtfAlphabetSize(),
257                 mtfEncoder.mtfSymbolFrequencies());
258         huffmanEncoder.encode(out);
259     }
260 
261     /**
262      * Gets available size of the current block.
263      * @return Number of available bytes which can be written
264      */
265     int availableSize() {
266         if (blockLength == 0) {
267             return blockLengthLimit + 2;
268         }
269         return blockLengthLimit - blockLength + 1;
270     }
271 
272     /**
273      * Determines if the block is full and ready for compression.
274      * @return {@code true} if the block is full, otherwise {@code false}
275      */
276     boolean isFull() {
277         return blockLength > blockLengthLimit;
278     }
279 
280     /**
281      * Determines if any bytes have been written to the block.
282      * @return {@code true} if one or more bytes has been written to the block, otherwise {@code false}
283      */
284     boolean isEmpty() {
285         return blockLength == 0 && rleLength == 0;
286     }
287 
288     /**
289      * Gets the CRC of the completed block. Only valid after calling {@link #close(ByteBuf)}.
290      * @return The block's CRC
291      */
292     int crc() {
293         return crc.getCRC();
294     }
295 }