View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty5.handler.codec.compression;
17  
18  import io.netty5.buffer.BufferInputStream;
19  import io.netty5.buffer.BufferOutputStream;
20  import io.netty5.buffer.api.Buffer;
21  import io.netty5.buffer.api.BufferAllocator;
22  import io.netty5.util.internal.logging.InternalLogger;
23  import io.netty5.util.internal.logging.InternalLoggerFactory;
24  import lzma.sdk.lzma.Base;
25  import lzma.sdk.lzma.Encoder;
26  
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.util.function.Supplier;
30  
31  import static lzma.sdk.lzma.Encoder.EMatchFinderTypeBT4;
32  
33  /**
34   * Compresses a {@link Buffer} using the LZMA algorithm.
35   *
36   * See <a href="https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm">LZMA</a>
37   * and <a href="https://svn.python.org/projects/external/xz-5.0.5/doc/lzma-file-format.txt">LZMA format</a>
38   * or documents in <a href="https://www.7-zip.org/sdk.html">LZMA SDK</a> archive.
39   */
40  public final class LzmaCompressor implements Compressor {
41      private static final InternalLogger logger = InternalLoggerFactory.getInstance(LzmaCompressor.class);
42  
43      private static final int MEDIUM_DICTIONARY_SIZE = 1 << 16;
44  
45      private static final int MIN_FAST_BYTES = 5;
46      private static final int MEDIUM_FAST_BYTES = 0x20;
47      private static final int MAX_FAST_BYTES = Base.kMatchMaxLen;
48  
49      private static final int DEFAULT_MATCH_FINDER = EMatchFinderTypeBT4;
50  
51      private static final int DEFAULT_LC = 3;
52      private static final int DEFAULT_LP = 0;
53      private static final int DEFAULT_PB = 2;
54  
55      /**
56       * Underlying LZMA encoder in use.
57       */
58      private final Encoder encoder;
59  
60      /**
61       * The Properties field contains three properties which are encoded using the following formula:
62       *
63       * <p>{@code Properties = (pb * 5 + lp) * 9 + lc}</p>
64       *
65       * The field consists of
66       *  <ol>
67       *      <li>the number of literal context bits (lc, [0, 8]);</li>
68       *      <li>the number of literal position bits (lp, [0, 4]);</li>
69       *      <li>the number of position bits (pb, [0, 4]).</li>
70       *  </ol>
71       */
72      private final byte properties;
73  
74      /**
75       * Dictionary Size is stored as an unsigned 32-bit little endian integer.
76       */
77      private final int littleEndianDictionarySize;
78  
79      /**
80       * For log warning only once.
81       */
82      private static boolean warningLogged;
83  
84      private enum State {
85          PROCESSING,
86          FINISHED,
87          CLOSED
88      }
89  
90      private State state = State.PROCESSING;
91  
92      /**
93       * Creates LZMA compressor with specified settings.
94       *
95       * @param lc
96       *        the number of "literal context" bits, available values [0, 8], default value {@value #DEFAULT_LC}.
97       * @param lp
98       *        the number of "literal position" bits, available values [0, 4], default value {@value #DEFAULT_LP}.
99       * @param pb
100      *        the number of "position" bits, available values [0, 4], default value {@value #DEFAULT_PB}.
101      * @param dictionarySize
102      *        available values [0, {@link java.lang.Integer#MAX_VALUE}],
103      *        default value is {@value #MEDIUM_DICTIONARY_SIZE}.
104      * @param endMarkerMode
105      *        indicates should {@link LzmaCompressor} use end of stream marker or not.
106      *        Note, that {@link LzmaCompressor} always sets size of uncompressed data
107      *        in LZMA header, so EOS marker is unnecessary. But you may use it for
108      *        better portability. For full description see "LZMA Decoding modes" section
109      *        of LZMA-Specification.txt in official LZMA SDK.
110      * @param numFastBytes
111      *        available values [{@value #MIN_FAST_BYTES}, {@value #MAX_FAST_BYTES}].
112      */
113     private LzmaCompressor(int lc, int lp, int pb, int dictionarySize, boolean endMarkerMode, int numFastBytes) {
114         encoder = new Encoder();
115         encoder.setDictionarySize(dictionarySize);
116         encoder.setEndMarkerMode(endMarkerMode);
117         encoder.setMatchFinder(DEFAULT_MATCH_FINDER);
118         encoder.setNumFastBytes(numFastBytes);
119         encoder.setLcLpPb(lc, lp, pb);
120 
121         properties = (byte) ((pb * 5 + lp) * 9 + lc);
122         littleEndianDictionarySize = Integer.reverseBytes(dictionarySize);
123     }
124 
125     /**
126      * Creates LZMA compressor factory with default settings.
127      *
128      * @return the factory.
129      */
130     public static Supplier<LzmaCompressor> newFactory() {
131         return newFactory(LzmaCompressor.MEDIUM_DICTIONARY_SIZE);
132     }
133 
134     /**
135      * Creates LZMA compressor factory with specified {@code lc}, {@code lp}, {@code pb}
136      * values and the medium dictionary size of {@value #MEDIUM_DICTIONARY_SIZE}.
137      *
138      * @return the factory.
139      */
140     public static Supplier<LzmaCompressor> newFactory(int lc, int lp, int pb) {
141         return newFactory(lc, lp, pb, LzmaCompressor.MEDIUM_DICTIONARY_SIZE);
142     }
143 
144     /**
145      * Creates LZMA compressor factory with specified dictionary size and default values of
146      * {@code lc} = {@value #DEFAULT_LC},
147      * {@code lp} = {@value #DEFAULT_LP},
148      * {@code pb} = {@value #DEFAULT_PB}.
149      *
150      * @return the factory.
151      */
152     public static Supplier<LzmaCompressor> newFactory(int dictionarySize) {
153         return newFactory(LzmaCompressor.DEFAULT_LC, LzmaCompressor.DEFAULT_LP,
154                 LzmaCompressor.DEFAULT_PB, dictionarySize);
155     }
156 
157     /**
158      * Creates LZMA compressor factory with specified {@code lc}, {@code lp}, {@code pb} values and custom
159      * dictionary size.
160      *
161      * @return the factory.
162      */
163     public static Supplier<LzmaCompressor> newFactory(int lc, int lp, int pb, int dictionarySize) {
164         return newFactory(lc, lp, pb, dictionarySize, false, LzmaCompressor.MEDIUM_FAST_BYTES);
165     }
166 
167     /**
168      * Creates LZMA compressor factory with specified settings.
169      *
170      * @param lc
171      *        the number of "literal context" bits, available values [0, 8], default value {@value #DEFAULT_LC}.
172      * @param lp
173      *        the number of "literal position" bits, available values [0, 4], default value {@value #DEFAULT_LP}.
174      * @param pb
175      *        the number of "position" bits, available values [0, 4], default value {@value #DEFAULT_PB}.
176      * @param dictionarySize
177      *        available values [0, {@link java.lang.Integer#MAX_VALUE}],
178      *        default value is {@value #MEDIUM_DICTIONARY_SIZE}.
179      * @param endMarkerMode
180      *        indicates should {@link LzmaCompressor} use end of stream marker or not.
181      *        Note, that {@link LzmaCompressor} always sets size of uncompressed data
182      *        in LZMA header, so EOS marker is unnecessary. But you may use it for
183      *        better portability. For full description see "LZMA Decoding modes" section
184      *        of LZMA-Specification.txt in official LZMA SDK.
185      * @param numFastBytes
186      *        available values [{@value #MIN_FAST_BYTES}, {@value #MAX_FAST_BYTES}].
187      * @return the factory.
188      */
189     public static Supplier<LzmaCompressor> newFactory(int lc, int lp, int pb, int dictionarySize,
190                                                       boolean endMarkerMode, int numFastBytes) {
191         if (lc < 0 || lc > 8) {
192             throw new IllegalArgumentException("lc: " + lc + " (expected: 0-8)");
193         }
194         if (lp < 0 || lp > 4) {
195             throw new IllegalArgumentException("lp: " + lp + " (expected: 0-4)");
196         }
197         if (pb < 0 || pb > 4) {
198             throw new IllegalArgumentException("pb: " + pb + " (expected: 0-4)");
199         }
200         if (lc + lp > 4) {
201             if (!warningLogged) {
202                 logger.warn("The latest versions of LZMA libraries (for example, XZ Utils) " +
203                         "has an additional requirement: lc + lp <= 4. Data which don't follow " +
204                         "this requirement cannot be decompressed with this libraries.");
205                 warningLogged = true;
206             }
207         }
208         if (dictionarySize < 0) {
209             throw new IllegalArgumentException("dictionarySize: " + dictionarySize + " (expected: 0+)");
210         }
211         if (numFastBytes < MIN_FAST_BYTES || numFastBytes > MAX_FAST_BYTES) {
212             throw new IllegalArgumentException(String.format(
213                     "numFastBytes: %d (expected: %d-%d)", numFastBytes, MIN_FAST_BYTES, MAX_FAST_BYTES
214             ));
215         }
216 
217         return () -> new LzmaCompressor(lc, lp, pb, dictionarySize, endMarkerMode, numFastBytes);
218     }
219 
220     @Override
221     public Buffer compress(Buffer in, BufferAllocator allocator) throws CompressionException {
222         switch (state) {
223             case CLOSED:
224                 throw new CompressionException("Compressor closed");
225             case FINISHED:
226                 return allocator.allocate(0);
227             case PROCESSING:
228 
229                 final int length = in.readableBytes();
230                 Buffer out = allocateBuffer(in, allocator);
231                 try {
232                     try (InputStream bbIn = new BufferInputStream(in.send());
233                          BufferOutputStream bbOut = new BufferOutputStream(out)) {
234                         bbOut.writeByte(properties);
235                         bbOut.writeInt(littleEndianDictionarySize);
236                         bbOut.writeLong(Long.reverseBytes(length));
237                         encoder.code(bbIn, bbOut, -1, -1, null);
238                     }
239                 } catch (IOException e) {
240                     out.close();
241                     throw new CompressionException(e);
242                 } catch (Throwable cause) {
243                     out.close();
244                     throw cause;
245                 }
246                 return out;
247             default:
248                 throw new IllegalStateException();
249         }
250     }
251 
252     private static Buffer allocateBuffer(Buffer in, BufferAllocator allocator) {
253         final int length = in.readableBytes();
254         final int maxOutputLength = maxOutputBufferLength(length);
255         return allocator.allocate(maxOutputLength);
256     }
257 
258     /**
259      * Calculates maximum possible size of output buffer for not compressible data.
260      */
261     private static int maxOutputBufferLength(int inputLength) {
262         double factor;
263         if (inputLength < 200) {
264             factor = 1.5;
265         } else if (inputLength < 500) {
266             factor = 1.2;
267         } else if (inputLength < 1000) {
268             factor = 1.1;
269         } else if (inputLength < 10000) {
270             factor = 1.05;
271         } else {
272             factor = 1.02;
273         }
274         return 13 + (int) (inputLength * factor);
275     }
276 
277     @Override
278     public Buffer finish(BufferAllocator allocator) {
279         switch (state) {
280             case CLOSED:
281                 throw new CompressionException("Compressor closed");
282             case FINISHED:
283             case PROCESSING:
284                 state = State.FINISHED;
285                 return allocator.allocate(0);
286             default:
287                 throw new IllegalStateException();
288         }
289     }
290 
291     @Override
292     public boolean isFinished() {
293         return state != State.PROCESSING;
294     }
295 
296     @Override
297     public boolean isClosed() {
298         return state == State.CLOSED;
299     }
300 
301     @Override
302     public void close() {
303         state = State.CLOSED;
304     }
305 }