1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty5.handler.codec.compression;
17
18 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_MAX_ALPHABET_SIZE;
19 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RUNA;
20 import static io.netty5.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RUNB;
21
22 /**
23 * An encoder for the Bzip2 Move To Front Transform and Run-Length Encoding[2] stages.<br>
24 * Although conceptually these two stages are separate, it is computationally efficient to perform
25 * them in one pass.
26 */
27 final class Bzip2MTFAndRLE2StageEncoder {
28 /**
29 * The Burrows-Wheeler transformed block.
30 */
31 private final int[] bwtBlock;
32
33 /**
34 * Actual length of the data in the {@link #bwtBlock} array.
35 */
36 private final int bwtLength;
37
38 /**
39 * At each position, {@code true} if the byte value with that index is present within the block,
40 * otherwise {@code false}.
41 */
42 private final boolean[] bwtValuesPresent;
43
44 /**
45 * The output of the Move To Front Transform and Run-Length Encoding[2] stages.
46 */
47 private final char[] mtfBlock;
48
49 /**
50 * The actual number of values contained in the {@link #mtfBlock} array.
51 */
52 private int mtfLength;
53
54 /**
55 * The global frequencies of values within the {@link #mtfBlock} array.
56 */
57 private final int[] mtfSymbolFrequencies = new int[HUFFMAN_MAX_ALPHABET_SIZE];
58
59 /**
60 * The encoded alphabet size.
61 */
62 private int alphabetSize;
63
64 /**
65 * @param bwtBlock The Burrows Wheeler Transformed block data
66 * @param bwtLength The actual length of the BWT data
67 * @param bwtValuesPresent The values that are present within the BWT data. For each index,
68 * {@code true} if that value is present within the data, otherwise {@code false}
69 */
70 Bzip2MTFAndRLE2StageEncoder(final int[] bwtBlock, final int bwtLength, final boolean[] bwtValuesPresent) {
71 this.bwtBlock = bwtBlock;
72 this.bwtLength = bwtLength;
73 this.bwtValuesPresent = bwtValuesPresent;
74 mtfBlock = new char[bwtLength + 1];
75 }
76
77 /**
78 * Performs the Move To Front transform and Run Length Encoding[1] stages.
79 */
80 void encode() {
81 final int bwtLength = this.bwtLength;
82 final boolean[] bwtValuesPresent = this.bwtValuesPresent;
83 final int[] bwtBlock = this.bwtBlock;
84 final char[] mtfBlock = this.mtfBlock;
85 final int[] mtfSymbolFrequencies = this.mtfSymbolFrequencies;
86 final byte[] huffmanSymbolMap = new byte[256];
87 final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable();
88
89 int totalUniqueValues = 0;
90 for (int i = 0; i < huffmanSymbolMap.length; i++) {
91 if (bwtValuesPresent[i]) {
92 huffmanSymbolMap[i] = (byte) totalUniqueValues++;
93 }
94 }
95 final int endOfBlockSymbol = totalUniqueValues + 1;
96
97 int mtfIndex = 0;
98 int repeatCount = 0;
99 int totalRunAs = 0;
100 int totalRunBs = 0;
101 for (int i = 0; i < bwtLength; i++) {
102 // Move To Front
103 final int mtfPosition = symbolMTF.valueToFront(huffmanSymbolMap[bwtBlock[i] & 0xff]);
104 // Run Length Encode
105 if (mtfPosition == 0) {
106 repeatCount++;
107 } else {
108 if (repeatCount > 0) {
109 repeatCount--;
110 while (true) {
111 if ((repeatCount & 1) == 0) {
112 mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNA;
113 totalRunAs++;
114 } else {
115 mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNB;
116 totalRunBs++;
117 }
118
119 if (repeatCount <= 1) {
120 break;
121 }
122 repeatCount = (repeatCount - 2) >>> 1;
123 }
124 repeatCount = 0;
125 }
126 mtfBlock[mtfIndex++] = (char) (mtfPosition + 1);
127 mtfSymbolFrequencies[mtfPosition + 1]++;
128 }
129 }
130
131 if (repeatCount > 0) {
132 repeatCount--;
133 while (true) {
134 if ((repeatCount & 1) == 0) {
135 mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNA;
136 totalRunAs++;
137 } else {
138 mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNB;
139 totalRunBs++;
140 }
141
142 if (repeatCount <= 1) {
143 break;
144 }
145 repeatCount = (repeatCount - 2) >>> 1;
146 }
147 }
148
149 mtfBlock[mtfIndex] = (char) endOfBlockSymbol;
150 mtfSymbolFrequencies[endOfBlockSymbol]++;
151 mtfSymbolFrequencies[HUFFMAN_SYMBOL_RUNA] += totalRunAs;
152 mtfSymbolFrequencies[HUFFMAN_SYMBOL_RUNB] += totalRunBs;
153
154 mtfLength = mtfIndex + 1;
155 alphabetSize = endOfBlockSymbol + 1;
156 }
157
158 /**
159 * @return The encoded MTF block
160 */
161 char[] mtfBlock() {
162 return mtfBlock;
163 }
164
165 /**
166 * @return The actual length of the MTF block
167 */
168 int mtfLength() {
169 return mtfLength;
170 }
171
172 /**
173 * @return The size of the MTF block's alphabet
174 */
175 int mtfAlphabetSize() {
176 return alphabetSize;
177 }
178
179 /**
180 * @return The frequencies of the MTF block's symbols
181 */
182 int[] mtfSymbolFrequencies() {
183 return mtfSymbolFrequencies;
184 }
185 }