View Javadoc
1   /*
2    * Copyright 2020 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License, version 2.0 (the
5    * "License"); you may not use this file except in compliance with the License. You may obtain a
6    * copy of the License at:
7    *
8    * https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software distributed under the License
11   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12   * or implied. See the License for the specific language governing permissions and limitations under
13   * the License.
14   */
15  package io.netty.buffer.search;
16  
17  import io.netty.util.internal.PlatformDependent;
18  
19  import java.util.ArrayDeque;
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Queue;
23  
24  /**
25   * Implements <a href="https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm">Aho–Corasick</a>
26   * string search algorithm.
27   * Use static {@link AbstractMultiSearchProcessorFactory#newAhoCorasicSearchProcessorFactory}
28   * to create an instance of this factory.
29   * Use {@link AhoCorasicSearchProcessorFactory#newSearchProcessor} to get an instance of
30   * {@link io.netty.util.ByteProcessor} implementation for performing the actual search.
31   * @see AbstractMultiSearchProcessorFactory
32   */
33  public class AhoCorasicSearchProcessorFactory extends AbstractMultiSearchProcessorFactory {
34  
35      private final int[] jumpTable;
36      private final int[] matchForNeedleId;
37  
38      static final int BITS_PER_SYMBOL = 8;
39      static final int ALPHABET_SIZE = 1 << BITS_PER_SYMBOL;
40  
41      private static class Context {
42          int[] jumpTable;
43          int[] matchForNeedleId;
44      }
45  
46      public static class Processor implements MultiSearchProcessor {
47  
48          private final int[] jumpTable;
49          private final int[] matchForNeedleId;
50          private long currentPosition;
51  
52          Processor(int[] jumpTable, int[] matchForNeedleId) {
53              this.jumpTable = jumpTable;
54              this.matchForNeedleId = matchForNeedleId;
55          }
56  
57          @Override
58          public boolean process(byte value) {
59              currentPosition = PlatformDependent.getInt(jumpTable, currentPosition | (value & 0xffL));
60              if (currentPosition < 0) {
61                  currentPosition = -currentPosition;
62                  return false;
63              }
64              return true;
65          }
66  
67          @Override
68          public int getFoundNeedleId() {
69              return matchForNeedleId[(int) currentPosition >> AhoCorasicSearchProcessorFactory.BITS_PER_SYMBOL];
70          }
71  
72          @Override
73          public void reset() {
74              currentPosition = 0;
75          }
76      }
77  
78      AhoCorasicSearchProcessorFactory(byte[] ...needles) {
79  
80          for (byte[] needle: needles) {
81              if (needle.length == 0) {
82                  throw new IllegalArgumentException("Needle must be non empty");
83              }
84          }
85  
86          Context context = buildTrie(needles);
87          jumpTable = context.jumpTable;
88          matchForNeedleId = context.matchForNeedleId;
89  
90          linkSuffixes();
91  
92          for (int i = 0; i < jumpTable.length; i++) {
93              if (matchForNeedleId[jumpTable[i] >> BITS_PER_SYMBOL] >= 0) {
94                  jumpTable[i] = -jumpTable[i];
95              }
96          }
97      }
98  
99      private static Context buildTrie(byte[][] needles) {
100 
101         ArrayList<Integer> jumpTableBuilder = new ArrayList<Integer>(ALPHABET_SIZE);
102         for (int i = 0; i < ALPHABET_SIZE; i++) {
103             jumpTableBuilder.add(-1);
104         }
105 
106         ArrayList<Integer> matchForBuilder = new ArrayList<Integer>();
107         matchForBuilder.add(-1);
108 
109         for (int needleId = 0; needleId < needles.length; needleId++) {
110             byte[] needle = needles[needleId];
111             int currentPosition = 0;
112 
113             for (byte ch0: needle) {
114 
115                 final int ch = ch0 & 0xff;
116                 final int next = currentPosition + ch;
117 
118                 if (jumpTableBuilder.get(next) == -1) {
119                     jumpTableBuilder.set(next, jumpTableBuilder.size());
120                     for (int i = 0; i < ALPHABET_SIZE; i++) {
121                         jumpTableBuilder.add(-1);
122                     }
123                     matchForBuilder.add(-1);
124                 }
125 
126                 currentPosition = jumpTableBuilder.get(next);
127             }
128 
129             matchForBuilder.set(currentPosition >> BITS_PER_SYMBOL, needleId);
130         }
131 
132         Context context = new Context();
133 
134         context.jumpTable = new int[jumpTableBuilder.size()];
135         for (int i = 0; i < jumpTableBuilder.size(); i++) {
136             context.jumpTable[i] = jumpTableBuilder.get(i);
137         }
138 
139         context.matchForNeedleId = new int[matchForBuilder.size()];
140         for (int i = 0; i < matchForBuilder.size(); i++) {
141             context.matchForNeedleId[i] = matchForBuilder.get(i);
142         }
143 
144         return context;
145     }
146 
147     private void linkSuffixes() {
148 
149         Queue<Integer> queue = new ArrayDeque<Integer>();
150         queue.add(0);
151 
152         int[] suffixLinks = new int[matchForNeedleId.length];
153         Arrays.fill(suffixLinks, -1);
154 
155         while (!queue.isEmpty()) {
156 
157             final int v = queue.remove();
158             int vPosition = v >> BITS_PER_SYMBOL;
159             final int u = suffixLinks[vPosition] == -1 ? 0 : suffixLinks[vPosition];
160 
161             if (matchForNeedleId[vPosition] == -1) {
162                 matchForNeedleId[vPosition] = matchForNeedleId[u >> BITS_PER_SYMBOL];
163             }
164 
165             for (int ch = 0; ch < ALPHABET_SIZE; ch++) {
166 
167                 final int vIndex = v | ch;
168                 final int uIndex = u | ch;
169 
170                 final int jumpV = jumpTable[vIndex];
171                 final int jumpU = jumpTable[uIndex];
172 
173                 if (jumpV != -1) {
174                     suffixLinks[jumpV >> BITS_PER_SYMBOL] = v > 0 && jumpU != -1 ? jumpU : 0;
175                     queue.add(jumpV);
176                 } else {
177                     jumpTable[vIndex] = jumpU != -1 ? jumpU : 0;
178                 }
179             }
180         }
181     }
182 
183     /**
184      * Returns a new {@link Processor}.
185      */
186     @Override
187     public Processor newSearchProcessor() {
188         return new Processor(jumpTable, matchForNeedleId);
189     }
190 
191 }