1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty5.handler.codec.compression;
17
18 import io.netty5.buffer.BufferInputStream;
19 import io.netty5.buffer.BufferOutputStream;
20 import io.netty5.buffer.api.Buffer;
21 import io.netty5.buffer.api.BufferAllocator;
22 import io.netty5.util.internal.logging.InternalLogger;
23 import io.netty5.util.internal.logging.InternalLoggerFactory;
24 import lzma.sdk.lzma.Base;
25 import lzma.sdk.lzma.Encoder;
26
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.util.function.Supplier;
30
31 import static lzma.sdk.lzma.Encoder.EMatchFinderTypeBT4;
32
33 /**
34 * Compresses a {@link Buffer} using the LZMA algorithm.
35 *
36 * See <a href="https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm">LZMA</a>
37 * and <a href="https://svn.python.org/projects/external/xz-5.0.5/doc/lzma-file-format.txt">LZMA format</a>
38 * or documents in <a href="https://www.7-zip.org/sdk.html">LZMA SDK</a> archive.
39 */
40 public final class LzmaCompressor implements Compressor {
41 private static final InternalLogger logger = InternalLoggerFactory.getInstance(LzmaCompressor.class);
42
43 private static final int MEDIUM_DICTIONARY_SIZE = 1 << 16;
44
45 private static final int MIN_FAST_BYTES = 5;
46 private static final int MEDIUM_FAST_BYTES = 0x20;
47 private static final int MAX_FAST_BYTES = Base.kMatchMaxLen;
48
49 private static final int DEFAULT_MATCH_FINDER = EMatchFinderTypeBT4;
50
51 private static final int DEFAULT_LC = 3;
52 private static final int DEFAULT_LP = 0;
53 private static final int DEFAULT_PB = 2;
54
55 /**
56 * Underlying LZMA encoder in use.
57 */
58 private final Encoder encoder;
59
60 /**
61 * The Properties field contains three properties which are encoded using the following formula:
62 *
63 * <p>{@code Properties = (pb * 5 + lp) * 9 + lc}</p>
64 *
65 * The field consists of
66 * <ol>
67 * <li>the number of literal context bits (lc, [0, 8]);</li>
68 * <li>the number of literal position bits (lp, [0, 4]);</li>
69 * <li>the number of position bits (pb, [0, 4]).</li>
70 * </ol>
71 */
72 private final byte properties;
73
74 /**
75 * Dictionary Size is stored as an unsigned 32-bit little endian integer.
76 */
77 private final int littleEndianDictionarySize;
78
79 /**
80 * For log warning only once.
81 */
82 private static boolean warningLogged;
83
84 private enum State {
85 PROCESSING,
86 FINISHED,
87 CLOSED
88 }
89
90 private State state = State.PROCESSING;
91
92 /**
93 * Creates LZMA compressor with specified settings.
94 *
95 * @param lc
96 * the number of "literal context" bits, available values [0, 8], default value {@value #DEFAULT_LC}.
97 * @param lp
98 * the number of "literal position" bits, available values [0, 4], default value {@value #DEFAULT_LP}.
99 * @param pb
100 * the number of "position" bits, available values [0, 4], default value {@value #DEFAULT_PB}.
101 * @param dictionarySize
102 * available values [0, {@link java.lang.Integer#MAX_VALUE}],
103 * default value is {@value #MEDIUM_DICTIONARY_SIZE}.
104 * @param endMarkerMode
105 * indicates should {@link LzmaCompressor} use end of stream marker or not.
106 * Note, that {@link LzmaCompressor} always sets size of uncompressed data
107 * in LZMA header, so EOS marker is unnecessary. But you may use it for
108 * better portability. For full description see "LZMA Decoding modes" section
109 * of LZMA-Specification.txt in official LZMA SDK.
110 * @param numFastBytes
111 * available values [{@value #MIN_FAST_BYTES}, {@value #MAX_FAST_BYTES}].
112 */
113 private LzmaCompressor(int lc, int lp, int pb, int dictionarySize, boolean endMarkerMode, int numFastBytes) {
114 encoder = new Encoder();
115 encoder.setDictionarySize(dictionarySize);
116 encoder.setEndMarkerMode(endMarkerMode);
117 encoder.setMatchFinder(DEFAULT_MATCH_FINDER);
118 encoder.setNumFastBytes(numFastBytes);
119 encoder.setLcLpPb(lc, lp, pb);
120
121 properties = (byte) ((pb * 5 + lp) * 9 + lc);
122 littleEndianDictionarySize = Integer.reverseBytes(dictionarySize);
123 }
124
125 /**
126 * Creates LZMA compressor factory with default settings.
127 *
128 * @return the factory.
129 */
130 public static Supplier<LzmaCompressor> newFactory() {
131 return newFactory(LzmaCompressor.MEDIUM_DICTIONARY_SIZE);
132 }
133
134 /**
135 * Creates LZMA compressor factory with specified {@code lc}, {@code lp}, {@code pb}
136 * values and the medium dictionary size of {@value #MEDIUM_DICTIONARY_SIZE}.
137 *
138 * @return the factory.
139 */
140 public static Supplier<LzmaCompressor> newFactory(int lc, int lp, int pb) {
141 return newFactory(lc, lp, pb, LzmaCompressor.MEDIUM_DICTIONARY_SIZE);
142 }
143
144 /**
145 * Creates LZMA compressor factory with specified dictionary size and default values of
146 * {@code lc} = {@value #DEFAULT_LC},
147 * {@code lp} = {@value #DEFAULT_LP},
148 * {@code pb} = {@value #DEFAULT_PB}.
149 *
150 * @return the factory.
151 */
152 public static Supplier<LzmaCompressor> newFactory(int dictionarySize) {
153 return newFactory(LzmaCompressor.DEFAULT_LC, LzmaCompressor.DEFAULT_LP,
154 LzmaCompressor.DEFAULT_PB, dictionarySize);
155 }
156
157 /**
158 * Creates LZMA compressor factory with specified {@code lc}, {@code lp}, {@code pb} values and custom
159 * dictionary size.
160 *
161 * @return the factory.
162 */
163 public static Supplier<LzmaCompressor> newFactory(int lc, int lp, int pb, int dictionarySize) {
164 return newFactory(lc, lp, pb, dictionarySize, false, LzmaCompressor.MEDIUM_FAST_BYTES);
165 }
166
167 /**
168 * Creates LZMA compressor factory with specified settings.
169 *
170 * @param lc
171 * the number of "literal context" bits, available values [0, 8], default value {@value #DEFAULT_LC}.
172 * @param lp
173 * the number of "literal position" bits, available values [0, 4], default value {@value #DEFAULT_LP}.
174 * @param pb
175 * the number of "position" bits, available values [0, 4], default value {@value #DEFAULT_PB}.
176 * @param dictionarySize
177 * available values [0, {@link java.lang.Integer#MAX_VALUE}],
178 * default value is {@value #MEDIUM_DICTIONARY_SIZE}.
179 * @param endMarkerMode
180 * indicates should {@link LzmaCompressor} use end of stream marker or not.
181 * Note, that {@link LzmaCompressor} always sets size of uncompressed data
182 * in LZMA header, so EOS marker is unnecessary. But you may use it for
183 * better portability. For full description see "LZMA Decoding modes" section
184 * of LZMA-Specification.txt in official LZMA SDK.
185 * @param numFastBytes
186 * available values [{@value #MIN_FAST_BYTES}, {@value #MAX_FAST_BYTES}].
187 * @return the factory.
188 */
189 public static Supplier<LzmaCompressor> newFactory(int lc, int lp, int pb, int dictionarySize,
190 boolean endMarkerMode, int numFastBytes) {
191 if (lc < 0 || lc > 8) {
192 throw new IllegalArgumentException("lc: " + lc + " (expected: 0-8)");
193 }
194 if (lp < 0 || lp > 4) {
195 throw new IllegalArgumentException("lp: " + lp + " (expected: 0-4)");
196 }
197 if (pb < 0 || pb > 4) {
198 throw new IllegalArgumentException("pb: " + pb + " (expected: 0-4)");
199 }
200 if (lc + lp > 4) {
201 if (!warningLogged) {
202 logger.warn("The latest versions of LZMA libraries (for example, XZ Utils) " +
203 "has an additional requirement: lc + lp <= 4. Data which don't follow " +
204 "this requirement cannot be decompressed with this libraries.");
205 warningLogged = true;
206 }
207 }
208 if (dictionarySize < 0) {
209 throw new IllegalArgumentException("dictionarySize: " + dictionarySize + " (expected: 0+)");
210 }
211 if (numFastBytes < MIN_FAST_BYTES || numFastBytes > MAX_FAST_BYTES) {
212 throw new IllegalArgumentException(String.format(
213 "numFastBytes: %d (expected: %d-%d)", numFastBytes, MIN_FAST_BYTES, MAX_FAST_BYTES
214 ));
215 }
216
217 return () -> new LzmaCompressor(lc, lp, pb, dictionarySize, endMarkerMode, numFastBytes);
218 }
219
220 @Override
221 public Buffer compress(Buffer in, BufferAllocator allocator) throws CompressionException {
222 switch (state) {
223 case CLOSED:
224 throw new CompressionException("Compressor closed");
225 case FINISHED:
226 return allocator.allocate(0);
227 case PROCESSING:
228
229 final int length = in.readableBytes();
230 Buffer out = allocateBuffer(in, allocator);
231 try {
232 try (InputStream bbIn = new BufferInputStream(in.send());
233 BufferOutputStream bbOut = new BufferOutputStream(out)) {
234 bbOut.writeByte(properties);
235 bbOut.writeInt(littleEndianDictionarySize);
236 bbOut.writeLong(Long.reverseBytes(length));
237 encoder.code(bbIn, bbOut, -1, -1, null);
238 }
239 } catch (IOException e) {
240 out.close();
241 throw new CompressionException(e);
242 } catch (Throwable cause) {
243 out.close();
244 throw cause;
245 }
246 return out;
247 default:
248 throw new IllegalStateException();
249 }
250 }
251
252 private static Buffer allocateBuffer(Buffer in, BufferAllocator allocator) {
253 final int length = in.readableBytes();
254 final int maxOutputLength = maxOutputBufferLength(length);
255 return allocator.allocate(maxOutputLength);
256 }
257
258 /**
259 * Calculates maximum possible size of output buffer for not compressible data.
260 */
261 private static int maxOutputBufferLength(int inputLength) {
262 double factor;
263 if (inputLength < 200) {
264 factor = 1.5;
265 } else if (inputLength < 500) {
266 factor = 1.2;
267 } else if (inputLength < 1000) {
268 factor = 1.1;
269 } else if (inputLength < 10000) {
270 factor = 1.05;
271 } else {
272 factor = 1.02;
273 }
274 return 13 + (int) (inputLength * factor);
275 }
276
277 @Override
278 public Buffer finish(BufferAllocator allocator) {
279 switch (state) {
280 case CLOSED:
281 throw new CompressionException("Compressor closed");
282 case FINISHED:
283 case PROCESSING:
284 state = State.FINISHED;
285 return allocator.allocate(0);
286 default:
287 throw new IllegalStateException();
288 }
289 }
290
291 @Override
292 public boolean isFinished() {
293 return state != State.PROCESSING;
294 }
295
296 @Override
297 public boolean isClosed() {
298 return state == State.CLOSED;
299 }
300
301 @Override
302 public void close() {
303 state = State.CLOSED;
304 }
305 }