View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_MAX_ALPHABET_SIZE;
19  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RUNA;
20  import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RUNB;
21  
22  /**
23   * An encoder for the Bzip2 Move To Front Transform and Run-Length Encoding[2] stages.<br>
24   * Although conceptually these two stages are separate, it is computationally efficient to perform
25   * them in one pass.
26   */
27  final class Bzip2MTFAndRLE2StageEncoder {
28      /**
29       * The Burrows-Wheeler transformed block.
30       */
31      private final int[] bwtBlock;
32  
33      /**
34       * Actual length of the data in the {@link #bwtBlock} array.
35       */
36      private final int bwtLength;
37  
38      /**
39       * At each position, {@code true} if the byte value with that index is present within the block,
40       * otherwise {@code false}.
41       */
42      private final boolean[] bwtValuesPresent;
43  
44      /**
45       * The output of the Move To Front Transform and Run-Length Encoding[2] stages.
46       */
47      private final char[] mtfBlock;
48  
49      /**
50       * The actual number of values contained in the {@link #mtfBlock} array.
51       */
52      private int mtfLength;
53  
54      /**
55       * The global frequencies of values within the {@link #mtfBlock} array.
56       */
57      private final int[] mtfSymbolFrequencies = new int[HUFFMAN_MAX_ALPHABET_SIZE];
58  
59      /**
60       * The encoded alphabet size.
61       */
62      private int alphabetSize;
63  
64      /**
65       * @param bwtBlock The Burrows Wheeler Transformed block data
66       * @param bwtLength The actual length of the BWT data
67       * @param bwtValuesPresent The values that are present within the BWT data. For each index,
68       *            {@code true} if that value is present within the data, otherwise {@code false}
69       */
70      Bzip2MTFAndRLE2StageEncoder(final int[] bwtBlock, final int bwtLength, final boolean[] bwtValuesPresent) {
71          this.bwtBlock = bwtBlock;
72          this.bwtLength = bwtLength;
73          this.bwtValuesPresent = bwtValuesPresent;
74          mtfBlock = new char[bwtLength + 1];
75      }
76  
77      /**
78       * Performs the Move To Front transform and Run Length Encoding[1] stages.
79       */
80      void encode() {
81          final int bwtLength = this.bwtLength;
82          final boolean[] bwtValuesPresent = this.bwtValuesPresent;
83          final int[] bwtBlock = this.bwtBlock;
84          final char[] mtfBlock = this.mtfBlock;
85          final int[] mtfSymbolFrequencies = this.mtfSymbolFrequencies;
86          final byte[] huffmanSymbolMap = new byte[256];
87          final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable();
88  
89          int totalUniqueValues = 0;
90          for (int i = 0; i < huffmanSymbolMap.length; i++) {
91              if (bwtValuesPresent[i]) {
92                  huffmanSymbolMap[i] = (byte) totalUniqueValues++;
93              }
94          }
95          final int endOfBlockSymbol = totalUniqueValues + 1;
96  
97          int mtfIndex = 0;
98          int repeatCount = 0;
99          int totalRunAs = 0;
100         int totalRunBs = 0;
101         for (int i = 0; i < bwtLength; i++) {
102             // Move To Front
103             final int mtfPosition = symbolMTF.valueToFront(huffmanSymbolMap[bwtBlock[i] & 0xff]);
104             // Run Length Encode
105             if (mtfPosition == 0) {
106                 repeatCount++;
107             } else {
108                 if (repeatCount > 0) {
109                     repeatCount--;
110                     while (true) {
111                         if ((repeatCount & 1) == 0) {
112                             mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNA;
113                             totalRunAs++;
114                         } else {
115                             mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNB;
116                             totalRunBs++;
117                         }
118 
119                         if (repeatCount <= 1) {
120                             break;
121                         }
122                         repeatCount = (repeatCount - 2) >>> 1;
123                     }
124                     repeatCount = 0;
125                 }
126                 mtfBlock[mtfIndex++] = (char) (mtfPosition + 1);
127                 mtfSymbolFrequencies[mtfPosition + 1]++;
128             }
129         }
130 
131         if (repeatCount > 0) {
132             repeatCount--;
133             while (true) {
134                 if ((repeatCount & 1) == 0) {
135                     mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNA;
136                     totalRunAs++;
137                 } else {
138                     mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNB;
139                     totalRunBs++;
140                 }
141 
142                 if (repeatCount <= 1) {
143                     break;
144                 }
145                 repeatCount = (repeatCount - 2) >>> 1;
146             }
147         }
148 
149         mtfBlock[mtfIndex] = (char) endOfBlockSymbol;
150         mtfSymbolFrequencies[endOfBlockSymbol]++;
151         mtfSymbolFrequencies[HUFFMAN_SYMBOL_RUNA] += totalRunAs;
152         mtfSymbolFrequencies[HUFFMAN_SYMBOL_RUNB] += totalRunBs;
153 
154         mtfLength = mtfIndex + 1;
155         alphabetSize = endOfBlockSymbol + 1;
156     }
157 
158     /**
159      * @return The encoded MTF block
160      */
161     char[] mtfBlock() {
162         return mtfBlock;
163     }
164 
165     /**
166      * @return The actual length of the MTF block
167      */
168     int mtfLength() {
169         return mtfLength;
170     }
171 
172     /**
173      * @return The size of the MTF block's alphabet
174      */
175     int mtfAlphabetSize() {
176         return alphabetSize;
177     }
178 
179     /**
180      * @return The frequencies of the MTF block's symbols
181      */
182     int[] mtfSymbolFrequencies() {
183         return mtfSymbolFrequencies;
184     }
185 }