View Javadoc
1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import static io.netty.handler.codec.compression.Bzip2Constants.*;
19  
20  /**
21   * An encoder for the Bzip2 Move To Front Transform and Run-Length Encoding[2] stages.<br>
22   * Although conceptually these two stages are separate, it is computationally efficient to perform
23   * them in one pass.
24   */
25  final class Bzip2MTFAndRLE2StageEncoder {
26      /**
27       * The Burrows-Wheeler transformed block.
28       */
29      private final int[] bwtBlock;
30  
31      /**
32       * Actual length of the data in the {@link #bwtBlock} array.
33       */
34      private final int bwtLength;
35  
36      /**
37       * At each position, {@code true} if the byte value with that index is present within the block,
38       * otherwise {@code false}.
39       */
40      private final boolean[] bwtValuesPresent;
41  
42      /**
43       * The output of the Move To Front Transform and Run-Length Encoding[2] stages.
44       */
45      private final char[] mtfBlock;
46  
47      /**
48       * The actual number of values contained in the {@link #mtfBlock} array.
49       */
50      private int mtfLength;
51  
52      /**
53       * The global frequencies of values within the {@link #mtfBlock} array.
54       */
55      private final int[] mtfSymbolFrequencies = new int[HUFFMAN_MAX_ALPHABET_SIZE];
56  
57      /**
58       * The encoded alphabet size.
59       */
60      private int alphabetSize;
61  
62      /**
63       * @param bwtBlock The Burrows Wheeler Transformed block data
64       * @param bwtLength The actual length of the BWT data
65       * @param bwtValuesPresent The values that are present within the BWT data. For each index,
66       *            {@code true} if that value is present within the data, otherwise {@code false}
67       */
68      Bzip2MTFAndRLE2StageEncoder(final int[] bwtBlock, final int bwtLength, final boolean[] bwtValuesPresent) {
69          this.bwtBlock = bwtBlock;
70          this.bwtLength = bwtLength;
71          this.bwtValuesPresent = bwtValuesPresent;
72          mtfBlock = new char[bwtLength + 1];
73      }
74  
75      /**
76       * Performs the Move To Front transform and Run Length Encoding[1] stages.
77       */
78      void encode() {
79          final int bwtLength = this.bwtLength;
80          final boolean[] bwtValuesPresent = this.bwtValuesPresent;
81          final int[] bwtBlock = this.bwtBlock;
82          final char[] mtfBlock = this.mtfBlock;
83          final int[] mtfSymbolFrequencies = this.mtfSymbolFrequencies;
84          final byte[] huffmanSymbolMap = new byte[256];
85          final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable();
86  
87          int totalUniqueValues = 0;
88          for (int i = 0; i < huffmanSymbolMap.length; i++) {
89              if (bwtValuesPresent[i]) {
90                  huffmanSymbolMap[i] = (byte) totalUniqueValues++;
91              }
92          }
93          final int endOfBlockSymbol = totalUniqueValues + 1;
94  
95          int mtfIndex = 0;
96          int repeatCount = 0;
97          int totalRunAs = 0;
98          int totalRunBs = 0;
99          for (int i = 0; i < bwtLength; i++) {
100             // Move To Front
101             final int mtfPosition = symbolMTF.valueToFront(huffmanSymbolMap[bwtBlock[i] & 0xff]);
102             // Run Length Encode
103             if (mtfPosition == 0) {
104                 repeatCount++;
105             } else {
106                 if (repeatCount > 0) {
107                     repeatCount--;
108                     while (true) {
109                         if ((repeatCount & 1) == 0) {
110                             mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNA;
111                             totalRunAs++;
112                         } else {
113                             mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNB;
114                             totalRunBs++;
115                         }
116 
117                         if (repeatCount <= 1) {
118                             break;
119                         }
120                         repeatCount = (repeatCount - 2) >>> 1;
121                     }
122                     repeatCount = 0;
123                 }
124                 mtfBlock[mtfIndex++] = (char) (mtfPosition + 1);
125                 mtfSymbolFrequencies[mtfPosition + 1]++;
126             }
127         }
128 
129         if (repeatCount > 0) {
130             repeatCount--;
131             while (true) {
132                 if ((repeatCount & 1) == 0) {
133                     mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNA;
134                     totalRunAs++;
135                 } else {
136                     mtfBlock[mtfIndex++] = HUFFMAN_SYMBOL_RUNB;
137                     totalRunBs++;
138                 }
139 
140                 if (repeatCount <= 1) {
141                     break;
142                 }
143                 repeatCount = (repeatCount - 2) >>> 1;
144             }
145         }
146 
147         mtfBlock[mtfIndex] = (char) endOfBlockSymbol;
148         mtfSymbolFrequencies[endOfBlockSymbol]++;
149         mtfSymbolFrequencies[HUFFMAN_SYMBOL_RUNA] += totalRunAs;
150         mtfSymbolFrequencies[HUFFMAN_SYMBOL_RUNB] += totalRunBs;
151 
152         mtfLength = mtfIndex + 1;
153         alphabetSize = endOfBlockSymbol + 1;
154     }
155 
156     /**
157      * @return The encoded MTF block
158      */
159     char[] mtfBlock() {
160         return mtfBlock;
161     }
162 
163     /**
164      * @return The actual length of the MTF block
165      */
166     int mtfLength() {
167         return mtfLength;
168     }
169 
170     /**
171      * @return The size of the MTF block's alphabet
172      */
173     int mtfAlphabetSize() {
174         return alphabetSize;
175     }
176 
177     /**
178      * @return The frequencies of the MTF block's symbols
179      */
180     int[] mtfSymbolFrequencies() {
181         return mtfSymbolFrequencies;
182     }
183 }