View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.buffer.ByteBufInputStream;
20  import io.netty.buffer.ByteBufOutputStream;
21  import io.netty.channel.ChannelHandlerContext;
22  import io.netty.handler.codec.MessageToByteEncoder;
23  import io.netty.util.internal.logging.InternalLogger;
24  import io.netty.util.internal.logging.InternalLoggerFactory;
25  import lzma.sdk.lzma.Base;
26  import lzma.sdk.lzma.Encoder;
27  
28  import java.io.InputStream;
29  
30  import static lzma.sdk.lzma.Encoder.EMatchFinderTypeBT4;
31  
32  /**
33   * Compresses a {@link ByteBuf} using the LZMA algorithm.
34   *
35   * See <a href="https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm">LZMA</a>
36   * and <a href="https://svn.python.org/projects/external/xz-5.0.5/doc/lzma-file-format.txt">LZMA format</a>
37   * or documents in <a href="https://www.7-zip.org/sdk.html">LZMA SDK</a> archive.
38   */
39  public class LzmaFrameEncoder extends MessageToByteEncoder<ByteBuf> {
40  
41      private static final InternalLogger logger = InternalLoggerFactory.getInstance(LzmaFrameEncoder.class);
42  
43      private static final int MEDIUM_DICTIONARY_SIZE = 1 << 16;
44  
45      private static final int MIN_FAST_BYTES = 5;
46      private static final int MEDIUM_FAST_BYTES = 0x20;
47      private static final int MAX_FAST_BYTES = Base.kMatchMaxLen;
48  
49      private static final int DEFAULT_MATCH_FINDER = EMatchFinderTypeBT4;
50  
51      private static final int DEFAULT_LC = 3;
52      private static final int DEFAULT_LP = 0;
53      private static final int DEFAULT_PB = 2;
54  
55      /**
56       * Underlying LZMA encoder in use.
57       */
58      private final Encoder encoder;
59  
60      /**
61       * The Properties field contains three properties which are encoded using the following formula:
62       *
63       * <p>{@code Properties = (pb * 5 + lp) * 9 + lc}</p>
64       *
65       * The field consists of
66       *  <ol>
67       *      <li>the number of literal context bits (lc, [0, 8]);</li>
68       *      <li>the number of literal position bits (lp, [0, 4]);</li>
69       *      <li>the number of position bits (pb, [0, 4]).</li>
70       *  </ol>
71       */
72      private final byte properties;
73  
74      /**
75       * Dictionary Size is stored as an unsigned 32-bit little endian integer.
76       */
77      private final int littleEndianDictionarySize;
78  
79      /**
80       * For log warning only once.
81       */
82      private static boolean warningLogged;
83  
84      /**
85       * Creates LZMA encoder with default settings.
86       */
87      public LzmaFrameEncoder() {
88          this(MEDIUM_DICTIONARY_SIZE);
89      }
90  
91      /**
92       * Creates LZMA encoder with specified {@code lc}, {@code lp}, {@code pb}
93       * values and the medium dictionary size of {@value #MEDIUM_DICTIONARY_SIZE}.
94       */
95      public LzmaFrameEncoder(int lc, int lp, int pb) {
96          this(lc, lp, pb, MEDIUM_DICTIONARY_SIZE);
97      }
98  
99      /**
100      * Creates LZMA encoder with specified dictionary size and default values of
101      * {@code lc} = {@value #DEFAULT_LC},
102      * {@code lp} = {@value #DEFAULT_LP},
103      * {@code pb} = {@value #DEFAULT_PB}.
104      */
105     public LzmaFrameEncoder(int dictionarySize) {
106         this(DEFAULT_LC, DEFAULT_LP, DEFAULT_PB, dictionarySize);
107     }
108 
109     /**
110      * Creates LZMA encoder with specified {@code lc}, {@code lp}, {@code pb} values and custom dictionary size.
111      */
112     public LzmaFrameEncoder(int lc, int lp, int pb, int dictionarySize) {
113         this(lc, lp, pb, dictionarySize, false, MEDIUM_FAST_BYTES);
114     }
115 
116     /**
117      * Creates LZMA encoder with specified settings.
118      *
119      * @param lc
120      *        the number of "literal context" bits, available values [0, 8], default value {@value #DEFAULT_LC}.
121      * @param lp
122      *        the number of "literal position" bits, available values [0, 4], default value {@value #DEFAULT_LP}.
123      * @param pb
124      *        the number of "position" bits, available values [0, 4], default value {@value #DEFAULT_PB}.
125      * @param dictionarySize
126      *        available values [0, {@link java.lang.Integer#MAX_VALUE}],
127      *        default value is {@value #MEDIUM_DICTIONARY_SIZE}.
128      * @param endMarkerMode
129      *        indicates should {@link LzmaFrameEncoder} use end of stream marker or not.
130      *        Note, that {@link LzmaFrameEncoder} always sets size of uncompressed data
131      *        in LZMA header, so EOS marker is unnecessary. But you may use it for
132      *        better portability. For full description see "LZMA Decoding modes" section
133      *        of LZMA-Specification.txt in official LZMA SDK.
134      * @param numFastBytes
135      *        available values [{@value #MIN_FAST_BYTES}, {@value #MAX_FAST_BYTES}].
136      */
137     public LzmaFrameEncoder(int lc, int lp, int pb, int dictionarySize, boolean endMarkerMode, int numFastBytes) {
138         if (lc < 0 || lc > 8) {
139             throw new IllegalArgumentException("lc: " + lc + " (expected: 0-8)");
140         }
141         if (lp < 0 || lp > 4) {
142             throw new IllegalArgumentException("lp: " + lp + " (expected: 0-4)");
143         }
144         if (pb < 0 || pb > 4) {
145             throw new IllegalArgumentException("pb: " + pb + " (expected: 0-4)");
146         }
147         if (lc + lp > 4) {
148             if (!warningLogged) {
149                 logger.warn("The latest versions of LZMA libraries (for example, XZ Utils) " +
150                         "has an additional requirement: lc + lp <= 4. Data which don't follow " +
151                         "this requirement cannot be decompressed with this libraries.");
152                 warningLogged = true;
153             }
154         }
155         if (dictionarySize < 0) {
156             throw new IllegalArgumentException("dictionarySize: " + dictionarySize + " (expected: 0+)");
157         }
158         if (numFastBytes < MIN_FAST_BYTES || numFastBytes > MAX_FAST_BYTES) {
159             throw new IllegalArgumentException(String.format(
160                     "numFastBytes: %d (expected: %d-%d)", numFastBytes, MIN_FAST_BYTES, MAX_FAST_BYTES
161             ));
162         }
163 
164         encoder = new Encoder();
165         encoder.setDictionarySize(dictionarySize);
166         encoder.setEndMarkerMode(endMarkerMode);
167         encoder.setMatchFinder(DEFAULT_MATCH_FINDER);
168         encoder.setNumFastBytes(numFastBytes);
169         encoder.setLcLpPb(lc, lp, pb);
170 
171         properties = (byte) ((pb * 5 + lp) * 9 + lc);
172         littleEndianDictionarySize = Integer.reverseBytes(dictionarySize);
173     }
174 
175     @Override
176     protected void encode(ChannelHandlerContext ctx, ByteBuf in, ByteBuf out) throws Exception {
177         final int length = in.readableBytes();
178         InputStream bbIn = null;
179         ByteBufOutputStream bbOut = null;
180         try {
181             bbIn = new ByteBufInputStream(in);
182             bbOut = new ByteBufOutputStream(out);
183             bbOut.writeByte(properties);
184             bbOut.writeInt(littleEndianDictionarySize);
185             bbOut.writeLong(Long.reverseBytes(length));
186             encoder.code(bbIn, bbOut, -1, -1, null);
187         } finally {
188             if (bbIn != null) {
189                 bbIn.close();
190             }
191             if (bbOut != null) {
192                 bbOut.close();
193             }
194         }
195     }
196 
197     @Override
198     protected ByteBuf allocateBuffer(ChannelHandlerContext ctx, ByteBuf in, boolean preferDirect) throws Exception {
199         final int length = in.readableBytes();
200         final int maxOutputLength = maxOutputBufferLength(length);
201         return ctx.alloc().ioBuffer(maxOutputLength);
202     }
203 
204     /**
205      * Calculates maximum possible size of output buffer for not compressible data.
206      */
207     private static int maxOutputBufferLength(int inputLength) {
208         double factor;
209         if (inputLength < 200) {
210             factor = 1.5;
211         } else if (inputLength < 500) {
212             factor = 1.2;
213         } else if (inputLength < 1000) {
214             factor = 1.1;
215         } else if (inputLength < 10000) {
216             factor = 1.05;
217         } else {
218             factor = 1.02;
219         }
220         return 13 + (int) (inputLength * factor);
221     }
222 }