View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.http;
17  
18  import io.netty.util.CharsetUtil;
19  import io.netty.util.internal.PlatformDependent;
20  
21  import java.net.URI;
22  import java.net.URLDecoder;
23  import java.nio.charset.Charset;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.LinkedHashMap;
27  import java.util.List;
28  import java.util.Map;
29  
30  import static io.netty.util.internal.ObjectUtil.checkNotNull;
31  import static io.netty.util.internal.ObjectUtil.checkPositive;
32  import static io.netty.util.internal.StringUtil.EMPTY_STRING;
33  import static io.netty.util.internal.StringUtil.SPACE;
34  import static io.netty.util.internal.StringUtil.decodeHexByte;
35  
36  /**
37   * Splits an HTTP query string into a path string and key-value parameter pairs.
38   * This decoder is for one time use only.  Create a new instance for each URI:
39   * <pre>
40   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
41   * assert decoder.path().equals("/hello");
42   * assert decoder.parameters().get("recipient").get(0).equals("world");
43   * assert decoder.parameters().get("x").get(0).equals("1");
44   * assert decoder.parameters().get("y").get(0).equals("2");
45   * </pre>
46   *
47   * This decoder can also decode the content of an HTTP POST request whose
48   * content type is <tt>application/x-www-form-urlencoded</tt>:
49   * <pre>
50   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
51   * ...
52   * </pre>
53   *
54   * <h3>HashDOS vulnerability fix</h3>
55   *
56   * As a workaround to the <a href="https://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
57   * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
58   * default, and you can configure it when you construct the decoder by passing an additional
59   * integer parameter.
60   *
61   * @see QueryStringEncoder
62   */
63  public class QueryStringDecoder {
64  
65      private static final int DEFAULT_MAX_PARAMS = 1024;
66  
67      private final Charset charset;
68      private final String uri;
69      private final int maxParams;
70      private final boolean semicolonIsNormalChar;
71      private int pathEndIdx;
72      private String path;
73      private Map<String, List<String>> params;
74  
75      /**
76       * Creates a new decoder that decodes the specified URI. The decoder will
77       * assume that the query string is encoded in UTF-8.
78       */
79      public QueryStringDecoder(String uri) {
80          this(uri, HttpConstants.DEFAULT_CHARSET);
81      }
82  
83      /**
84       * Creates a new decoder that decodes the specified URI encoded in the
85       * specified charset.
86       */
87      public QueryStringDecoder(String uri, boolean hasPath) {
88          this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
89      }
90  
91      /**
92       * Creates a new decoder that decodes the specified URI encoded in the
93       * specified charset.
94       */
95      public QueryStringDecoder(String uri, Charset charset) {
96          this(uri, charset, true);
97      }
98  
99      /**
100      * Creates a new decoder that decodes the specified URI encoded in the
101      * specified charset.
102      */
103     public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
104         this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
105     }
106 
107     /**
108      * Creates a new decoder that decodes the specified URI encoded in the
109      * specified charset.
110      */
111     public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
112         this(uri, charset, hasPath, maxParams, false);
113     }
114 
115     /**
116      * Creates a new decoder that decodes the specified URI encoded in the
117      * specified charset.
118      */
119     public QueryStringDecoder(String uri, Charset charset, boolean hasPath,
120                               int maxParams, boolean semicolonIsNormalChar) {
121         this.uri = checkNotNull(uri, "uri");
122         this.charset = checkNotNull(charset, "charset");
123         this.maxParams = checkPositive(maxParams, "maxParams");
124         this.semicolonIsNormalChar = semicolonIsNormalChar;
125 
126         // `-1` means that path end index will be initialized lazily
127         pathEndIdx = hasPath ? -1 : 0;
128     }
129 
130     /**
131      * Creates a new decoder that decodes the specified URI. The decoder will
132      * assume that the query string is encoded in UTF-8.
133      */
134     public QueryStringDecoder(URI uri) {
135         this(uri, HttpConstants.DEFAULT_CHARSET);
136     }
137 
138     /**
139      * Creates a new decoder that decodes the specified URI encoded in the
140      * specified charset.
141      */
142     public QueryStringDecoder(URI uri, Charset charset) {
143         this(uri, charset, DEFAULT_MAX_PARAMS);
144     }
145 
146     /**
147      * Creates a new decoder that decodes the specified URI encoded in the
148      * specified charset.
149      */
150     public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
151         this(uri, charset, maxParams, false);
152     }
153 
154     /**
155      * Creates a new decoder that decodes the specified URI encoded in the
156      * specified charset.
157      */
158     public QueryStringDecoder(URI uri, Charset charset, int maxParams, boolean semicolonIsNormalChar) {
159         String rawPath = uri.getRawPath();
160         if (rawPath == null) {
161             rawPath = EMPTY_STRING;
162         }
163         String rawQuery = uri.getRawQuery();
164         // Also take care of cut of things like "http://localhost"
165         this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
166         this.charset = checkNotNull(charset, "charset");
167         this.maxParams = checkPositive(maxParams, "maxParams");
168         this.semicolonIsNormalChar = semicolonIsNormalChar;
169         pathEndIdx = rawPath.length();
170     }
171 
172     @Override
173     public String toString() {
174         return uri();
175     }
176 
177     /**
178      * Returns the uri used to initialize this {@link QueryStringDecoder}.
179      */
180     public String uri() {
181         return uri;
182     }
183 
184     /**
185      * Returns the decoded path string of the URI.
186      */
187     public String path() {
188         if (path == null) {
189             path = decodeComponent(uri, 0, pathEndIdx(), charset, true);
190         }
191         return path;
192     }
193 
194     /**
195      * Returns the decoded key-value parameter pairs of the URI.
196      */
197     public Map<String, List<String>> parameters() {
198         if (params == null) {
199             params = decodeParams(uri, pathEndIdx(), charset, maxParams, semicolonIsNormalChar);
200         }
201         return params;
202     }
203 
204     /**
205      * Returns the raw path string of the URI.
206      */
207     public String rawPath() {
208         return uri.substring(0, pathEndIdx());
209     }
210 
211     /**
212      * Returns raw query string of the URI.
213      */
214     public String rawQuery() {
215         int start = pathEndIdx() + 1;
216         return start < uri.length() ? uri.substring(start) : EMPTY_STRING;
217     }
218 
219     private int pathEndIdx() {
220         if (pathEndIdx == -1) {
221             pathEndIdx = findPathEndIndex(uri);
222         }
223         return pathEndIdx;
224     }
225 
226     private static Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit,
227                                                           boolean semicolonIsNormalChar) {
228         int len = s.length();
229         if (from >= len) {
230             return Collections.emptyMap();
231         }
232         if (s.charAt(from) == '?') {
233             from++;
234         }
235         Map<String, List<String>> params = new LinkedHashMap<String, List<String>>();
236         int nameStart = from;
237         int valueStart = -1;
238         int i;
239         loop:
240         for (i = from; i < len; i++) {
241             switch (s.charAt(i)) {
242             case '=':
243                 if (nameStart == i) {
244                     nameStart = i + 1;
245                 } else if (valueStart < nameStart) {
246                     valueStart = i + 1;
247                 }
248                 break;
249             case ';':
250                 if (semicolonIsNormalChar) {
251                     continue;
252                 }
253                 // fall-through
254             case '&':
255                 if (addParam(s, nameStart, valueStart, i, params, charset)) {
256                     paramsLimit--;
257                     if (paramsLimit == 0) {
258                         return params;
259                     }
260                 }
261                 nameStart = i + 1;
262                 break;
263             case '#':
264                 break loop;
265             default:
266                 // continue
267             }
268         }
269         addParam(s, nameStart, valueStart, i, params, charset);
270         return params;
271     }
272 
273     private static boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
274                                     Map<String, List<String>> params, Charset charset) {
275         if (nameStart >= valueEnd) {
276             return false;
277         }
278         if (valueStart <= nameStart) {
279             valueStart = valueEnd + 1;
280         }
281         String name = decodeComponent(s, nameStart, valueStart - 1, charset, false);
282         String value = decodeComponent(s, valueStart, valueEnd, charset, false);
283         List<String> values = params.get(name);
284         if (values == null) {
285             values = new ArrayList<String>(1);  // Often there's only 1 value.
286             params.put(name, values);
287         }
288         values.add(value);
289         return true;
290     }
291 
292     /**
293      * Decodes a bit of a URL encoded by a browser.
294      * <p>
295      * This is equivalent to calling {@link #decodeComponent(String, Charset)}
296      * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
297      * @param s The string to decode (can be empty).
298      * @return The decoded string, or {@code s} if there's nothing to decode.
299      * If the string to decode is {@code null}, returns an empty string.
300      * @throws IllegalArgumentException if the string contains a malformed
301      * escape sequence.
302      */
303     public static String decodeComponent(final String s) {
304         return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
305     }
306 
307     /**
308      * Decodes a bit of a URL encoded by a browser.
309      * <p>
310      * The string is expected to be encoded as per RFC 3986, Section 2.
311      * This is the encoding used by JavaScript functions {@code encodeURI}
312      * and {@code encodeURIComponent}, but not {@code escape}.  For example
313      * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
314      * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
315      * <p>
316      * This is essentially equivalent to calling
317      *   {@link URLDecoder#decode(String, String)}
318      * except that it's over 2x faster and generates less garbage for the GC.
319      * Actually this function doesn't allocate any memory if there's nothing
320      * to decode, the argument itself is returned.
321      * @param s The string to decode (can be empty).
322      * @param charset The charset to use to decode the string (should really
323      * be {@link CharsetUtil#UTF_8}.
324      * @return The decoded string, or {@code s} if there's nothing to decode.
325      * If the string to decode is {@code null}, returns an empty string.
326      * @throws IllegalArgumentException if the string contains a malformed
327      * escape sequence.
328      */
329     public static String decodeComponent(final String s, final Charset charset) {
330         if (s == null) {
331             return EMPTY_STRING;
332         }
333         return decodeComponent(s, 0, s.length(), charset, false);
334     }
335 
336     private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean isPath) {
337         int len = toExcluded - from;
338         if (len <= 0) {
339             return EMPTY_STRING;
340         }
341         int firstEscaped = -1;
342         for (int i = from; i < toExcluded; i++) {
343             char c = s.charAt(i);
344             if (c == '%' || c == '+' && !isPath) {
345                 firstEscaped = i;
346                 break;
347             }
348         }
349         if (firstEscaped == -1) {
350             return s.substring(from, toExcluded);
351         }
352 
353         // Each encoded byte takes 3 characters (e.g. "%20")
354         int decodedCapacity = (toExcluded - firstEscaped) / 3;
355         byte[] buf = PlatformDependent.allocateUninitializedArray(decodedCapacity);
356         int bufIdx;
357 
358         StringBuilder strBuf = new StringBuilder(len);
359         strBuf.append(s, from, firstEscaped);
360 
361         for (int i = firstEscaped; i < toExcluded; i++) {
362             char c = s.charAt(i);
363             if (c != '%') {
364                 strBuf.append(c != '+' || isPath? c : SPACE);
365                 continue;
366             }
367 
368             bufIdx = 0;
369             do {
370                 if (i + 3 > toExcluded) {
371                     throw new IllegalArgumentException("unterminated escape sequence at index " + i + " of: " + s);
372                 }
373                 buf[bufIdx++] = decodeHexByte(s, i + 1);
374                 i += 3;
375             } while (i < toExcluded && s.charAt(i) == '%');
376             i--;
377 
378             strBuf.append(new String(buf, 0, bufIdx, charset));
379         }
380         return strBuf.toString();
381     }
382 
383     private static int findPathEndIndex(String uri) {
384         int len = uri.length();
385         for (int i = 0; i < len; i++) {
386             char c = uri.charAt(i);
387             if (c == '?' || c == '#') {
388                 return i;
389             }
390         }
391         return len;
392     }
393 }