View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty5.handler.codec.compression;
17  
18  import io.netty5.buffer.api.Buffer;
19  import io.netty5.util.ByteProcessor;
20  
21  import static io.netty5.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
22  import static io.netty5.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
23  import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
24  
25  /**
26   * Compresses and writes a single Bzip2 block.<br><br>
27   *
28   * Block encoding consists of the following stages:<br>
29   * 1. Run-Length Encoding[1] - {@link #write(int)}<br>
30   * 2. Burrows Wheeler Transform - {@link #close(Buffer)} (through {@link Bzip2DivSufSort})<br>
31   * 3. Write block header - {@link #close(Buffer)}<br>
32   * 4. Move To Front Transform - {@link #close(Buffer)} (through {@link Bzip2HuffmanStageEncoder})<br>
33   * 5. Run-Length Encoding[2] - {@link #close(Buffer)}  (through {@link Bzip2HuffmanStageEncoder})<br>
34   * 6. Create and write Huffman tables - {@link #close(Buffer)} (through {@link Bzip2HuffmanStageEncoder})<br>
35   * 7. Huffman encode and write data - {@link #close(Buffer)} (through {@link Bzip2HuffmanStageEncoder})
36   */
37  final class Bzip2BlockCompressor {
38      private final ByteProcessor writeProcessor = this::write;
39  
40      /**
41       * A writer that provides bit-level writes.
42       */
43      private final Bzip2BitWriter writer;
44  
45      /**
46       * CRC builder for the block.
47       */
48      private final Crc32 crc = new Crc32();
49  
50      /**
51       * The RLE'd block data.
52       */
53      private final byte[] block;
54  
55      /**
56       * Current length of the data within the {@link #block} array.
57       */
58      private int blockLength;
59  
60      /**
61       * A limit beyond which new data will not be accepted into the block.
62       */
63      private final int blockLengthLimit;
64  
65      /**
66       * The values that are present within the RLE'd block data. For each index, {@code true} if that
67       * value is present within the data, otherwise {@code false}.
68       */
69      private final boolean[] blockValuesPresent = new boolean[256];
70  
71      /**
72       * The Burrows Wheeler Transformed block data.
73       */
74      private final int[] bwtBlock;
75  
76      /**
77       * The current RLE value being accumulated (undefined when {@link #rleLength} is 0).
78       */
79      private int rleCurrentValue = -1;
80  
81      /**
82       * The repeat count of the current RLE value.
83       */
84      private int rleLength;
85  
86      /**
87       * @param writer The {@link Bzip2BitWriter} which provides bit-level writes
88       * @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
89       *                  into the block after Run-Length Encoding is applied
90       */
91      Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
92          this.writer = writer;
93  
94          // One extra byte is added to allow for the block wrap applied in close()
95          block = new byte[blockSize + 1];
96          bwtBlock = new int[blockSize + 1];
97          blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see {@link #write(int)}
98      }
99  
100     /**
101      * Write the Huffman symbol to output byte map.
102      */
103     private void writeSymbolMap(Buffer out) {
104         Bzip2BitWriter writer = this.writer;
105 
106         final boolean[] blockValuesPresent = this.blockValuesPresent;
107         final boolean[] condensedInUse = new boolean[16];
108 
109         for (int i = 0; i < condensedInUse.length; i++) {
110             for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
111                 if (blockValuesPresent[k]) {
112                     condensedInUse[i] = true;
113                     break;
114                 }
115             }
116         }
117 
118         for (boolean isCondensedInUse : condensedInUse) {
119             writer.writeBoolean(out, isCondensedInUse);
120         }
121 
122         for (int i = 0; i < condensedInUse.length; i++) {
123             if (condensedInUse[i]) {
124                 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
125                     writer.writeBoolean(out, blockValuesPresent[k]);
126                 }
127             }
128         }
129     }
130 
131     /**
132      * Writes an RLE run to the block array, updating the block CRC and present values array as required.
133      * @param value The value to write
134      * @param runLength The run length of the value to write
135      */
136     private void writeRun(final int value, int runLength) {
137         final int blockLength = this.blockLength;
138         final byte[] block = this.block;
139 
140         blockValuesPresent[value] = true;
141         crc.updateCRC(value, runLength);
142 
143         final byte byteValue = (byte) value;
144         switch (runLength) {
145             case 1:
146                 block[blockLength] = byteValue;
147                 this.blockLength = blockLength + 1;
148                 break;
149             case 2:
150                 block[blockLength] = byteValue;
151                 block[blockLength + 1] = byteValue;
152                 this.blockLength = blockLength + 2;
153                 break;
154             case 3:
155                 block[blockLength] = byteValue;
156                 block[blockLength + 1] = byteValue;
157                 block[blockLength + 2] = byteValue;
158                 this.blockLength = blockLength + 3;
159                 break;
160             default:
161                 runLength -= 4;
162                 blockValuesPresent[runLength] = true;
163                 block[blockLength] = byteValue;
164                 block[blockLength + 1] = byteValue;
165                 block[blockLength + 2] = byteValue;
166                 block[blockLength + 3] = byteValue;
167                 block[blockLength + 4] = (byte) runLength;
168                 this.blockLength = blockLength + 5;
169                 break;
170         }
171     }
172 
173     /**
174      * Writes a byte to the block, accumulating to an RLE run where possible.
175      * @param value The byte to write
176      * @return {@code true} if the byte was written, or {@code false} if the block is already full
177      */
178     boolean write(final int value) {
179         if (blockLength > blockLengthLimit) {
180             return false;
181         }
182         final int rleCurrentValue = this.rleCurrentValue;
183         final int rleLength = this.rleLength;
184 
185         if (rleLength == 0) {
186             this.rleCurrentValue = value;
187             this.rleLength = 1;
188         } else if (rleCurrentValue != value) {
189             // This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
190             writeRun(rleCurrentValue & 0xff, rleLength);
191             this.rleCurrentValue = value;
192             this.rleLength = 1;
193         } else {
194             if (rleLength == 254) {
195                 writeRun(rleCurrentValue & 0xff, 255);
196                 this.rleLength = 0;
197             } else {
198                 this.rleLength = rleLength + 1;
199             }
200         }
201         return true;
202     }
203 
204     /**
205      * Writes an array to the block.
206      * @param buffer The buffer to write
207      * @param offset The offset within the input data to write from
208      * @param length The number of bytes of input data to write
209      * @return The actual number of input bytes written. May be less than the number requested, or
210      *         zero if the block is already full
211      */
212     int write(final Buffer buffer, int offset, int length) {
213         int processed = buffer.openCursor(offset, length).process(writeProcessor);
214         return processed == -1 ? length : processed;
215     }
216 
217     /**
218      * Compresses and writes out the block.
219      */
220     void close(Buffer out) {
221         // If an RLE run is in progress, write it out
222         if (rleLength > 0) {
223             writeRun(rleCurrentValue & 0xff, rleLength);
224         }
225 
226         // Apply a one byte block wrap required by the BWT implementation
227         block[blockLength] = block[0];
228 
229         // Perform the Burrows Wheeler Transform
230         Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
231         int bwtStartPointer = divSufSort.bwt();
232 
233         Bzip2BitWriter writer = this.writer;
234 
235         // Write out the block header
236         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
237         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
238         writer.writeInt(out, crc.getCRC());
239         writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
240         writer.writeBits(out, 24, bwtStartPointer);
241 
242         // Write out the symbol map
243         writeSymbolMap(out);
244 
245         // Perform the Move To Front Transform and Run-Length Encoding[2] stages
246         Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
247                                                                                     blockValuesPresent);
248         mtfEncoder.encode();
249 
250         // Perform the Huffman Encoding stage and write out the encoded data
251         Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
252                 mtfEncoder.mtfBlock(),
253                 mtfEncoder.mtfLength(),
254                 mtfEncoder.mtfAlphabetSize(),
255                 mtfEncoder.mtfSymbolFrequencies());
256         huffmanEncoder.encode(out);
257     }
258 
259     /**
260      * Gets available size of the current block.
261      * @return Number of available bytes which can be written
262      */
263     int availableSize() {
264         if (blockLength == 0) {
265             return blockLengthLimit + 2;
266         }
267         return blockLengthLimit - blockLength + 1;
268     }
269 
270     /**
271      * Determines if the block is full and ready for compression.
272      * @return {@code true} if the block is full, otherwise {@code false}
273      */
274     boolean isFull() {
275         return blockLength > blockLengthLimit;
276     }
277 
278     /**
279      * Determines if any bytes have been written to the block.
280      * @return {@code true} if one or more bytes has been written to the block, otherwise {@code false}
281      */
282     boolean isEmpty() {
283         return blockLength == 0 && rleLength == 0;
284     }
285 
286     /**
287      * Gets the CRC of the completed block. Only valid after calling {@link #close(Buffer)}.
288      * @return The block's CRC
289      */
290     int crc() {
291         return crc.getCRC();
292     }
293 }