View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.http;
17  
18  import io.netty.util.CharsetUtil;
19  
20  import java.net.URI;
21  import java.net.URLDecoder;
22  import java.nio.charset.Charset;
23  import java.util.ArrayList;
24  import java.util.Collections;
25  import java.util.LinkedHashMap;
26  import java.util.List;
27  import java.util.Map;
28  
29  /**
30   * Splits an HTTP query string into a path string and key-value parameter pairs.
31   * This decoder is for one time use only.  Create a new instance for each URI:
32   * <pre>
33   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
34   * assert decoder.getPath().equals("/hello");
35   * assert decoder.getParameters().get("recipient").get(0).equals("world");
36   * assert decoder.getParameters().get("x").get(0).equals("1");
37   * assert decoder.getParameters().get("y").get(0).equals("2");
38   * </pre>
39   *
40   * This decoder can also decode the content of an HTTP POST request whose
41   * content type is <tt>application/x-www-form-urlencoded</tt>:
42   * <pre>
43   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
44   * ...
45   * </pre>
46   *
47   * <h3>HashDOS vulnerability fix</h3>
48   *
49   * As a workaround to the <a href="http://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
50   * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
51   * default, and you can configure it when you construct the decoder by passing an additional
52   * integer parameter.
53   *
54   * @see QueryStringEncoder
55   */
56  public class QueryStringDecoder {
57  
58      private static final int DEFAULT_MAX_PARAMS = 1024;
59  
60      private final Charset charset;
61      private final String uri;
62      private final boolean hasPath;
63      private final int maxParams;
64      private String path;
65      private Map<String, List<String>> params;
66      private int nParams;
67  
68      /**
69       * Creates a new decoder that decodes the specified URI. The decoder will
70       * assume that the query string is encoded in UTF-8.
71       */
72      public QueryStringDecoder(String uri) {
73          this(uri, HttpConstants.DEFAULT_CHARSET);
74      }
75  
76      /**
77       * Creates a new decoder that decodes the specified URI encoded in the
78       * specified charset.
79       */
80      public QueryStringDecoder(String uri, boolean hasPath) {
81          this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
82      }
83  
84      /**
85       * Creates a new decoder that decodes the specified URI encoded in the
86       * specified charset.
87       */
88      public QueryStringDecoder(String uri, Charset charset) {
89          this(uri, charset, true);
90      }
91  
92      /**
93       * Creates a new decoder that decodes the specified URI encoded in the
94       * specified charset.
95       */
96      public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
97          this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
98      }
99  
100     /**
101      * Creates a new decoder that decodes the specified URI encoded in the
102      * specified charset.
103      */
104     public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
105         if (uri == null) {
106             throw new NullPointerException("getUri");
107         }
108         if (charset == null) {
109             throw new NullPointerException("charset");
110         }
111         if (maxParams <= 0) {
112             throw new IllegalArgumentException(
113                     "maxParams: " + maxParams + " (expected: a positive integer)");
114         }
115 
116         this.uri = uri;
117         this.charset = charset;
118         this.maxParams = maxParams;
119         this.hasPath = hasPath;
120     }
121 
122     /**
123      * Creates a new decoder that decodes the specified URI. The decoder will
124      * assume that the query string is encoded in UTF-8.
125      */
126     public QueryStringDecoder(URI uri) {
127         this(uri, HttpConstants.DEFAULT_CHARSET);
128     }
129 
130     /**
131      * Creates a new decoder that decodes the specified URI encoded in the
132      * specified charset.
133      */
134     public QueryStringDecoder(URI uri, Charset charset) {
135         this(uri, charset, DEFAULT_MAX_PARAMS);
136     }
137 
138     /**
139      * Creates a new decoder that decodes the specified URI encoded in the
140      * specified charset.
141      */
142     public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
143         if (uri == null) {
144             throw new NullPointerException("getUri");
145         }
146         if (charset == null) {
147             throw new NullPointerException("charset");
148         }
149         if (maxParams <= 0) {
150             throw new IllegalArgumentException(
151                     "maxParams: " + maxParams + " (expected: a positive integer)");
152         }
153 
154         String rawPath = uri.getRawPath();
155         if (rawPath != null) {
156             hasPath = true;
157         } else {
158             rawPath = "";
159             hasPath = false;
160         }
161         // Also take care of cut of things like "http://localhost"
162         this.uri = rawPath + (uri.getRawQuery() == null? "" : '?' + uri.getRawQuery());
163 
164         this.charset = charset;
165         this.maxParams = maxParams;
166     }
167 
168     /**
169      * Returns the uri used to initialize this {@link QueryStringDecoder}.
170      */
171     public String uri() {
172         return uri;
173     }
174 
175     /**
176      * Returns the decoded path string of the URI.
177      */
178     public String path() {
179         if (path == null) {
180             if (!hasPath) {
181                 return path = "";
182             }
183 
184             int pathEndPos = uri.indexOf('?');
185             if (pathEndPos < 0) {
186                 path = uri;
187             } else {
188                 return path = uri.substring(0, pathEndPos);
189             }
190         }
191         return path;
192     }
193 
194     /**
195      * Returns the decoded key-value parameter pairs of the URI.
196      */
197     public Map<String, List<String>> parameters() {
198         if (params == null) {
199             if (hasPath) {
200                 int pathLength = path().length();
201                 if (uri.length() == pathLength) {
202                     return Collections.emptyMap();
203                 }
204                 decodeParams(uri.substring(pathLength + 1));
205             } else {
206                 if (uri.isEmpty()) {
207                     return Collections.emptyMap();
208                 }
209                 decodeParams(uri);
210             }
211         }
212         return params;
213     }
214 
215     private void decodeParams(String s) {
216         Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
217         nParams = 0;
218         String name = null;
219         int pos = 0; // Beginning of the unprocessed region
220         int i;       // End of the unprocessed region
221         char c;  // Current character
222         for (i = 0; i < s.length(); i++) {
223             c = s.charAt(i);
224             if (c == '=' && name == null) {
225                 if (pos != i) {
226                     name = decodeComponent(s.substring(pos, i), charset);
227                 }
228                 pos = i + 1;
229             // http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2
230             } else if (c == '&' || c == ';') {
231                 if (name == null && pos != i) {
232                     // We haven't seen an `=' so far but moved forward.
233                     // Must be a param of the form '&a&' so add it with
234                     // an empty value.
235                     if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
236                         return;
237                     }
238                 } else if (name != null) {
239                     if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
240                         return;
241                     }
242                     name = null;
243                 }
244                 pos = i + 1;
245             }
246         }
247 
248         if (pos != i) {  // Are there characters we haven't dealt with?
249             if (name == null) {     // Yes and we haven't seen any `='.
250                 addParam(params, decodeComponent(s.substring(pos, i), charset), "");
251             } else {                // Yes and this must be the last value.
252                 addParam(params, name, decodeComponent(s.substring(pos, i), charset));
253             }
254         } else if (name != null) {  // Have we seen a name without value?
255             addParam(params, name, "");
256         }
257     }
258 
259     private boolean addParam(Map<String, List<String>> params, String name, String value) {
260         if (nParams >= maxParams) {
261             return false;
262         }
263 
264         List<String> values = params.get(name);
265         if (values == null) {
266             values = new ArrayList<String>(1);  // Often there's only 1 value.
267             params.put(name, values);
268         }
269         values.add(value);
270         nParams ++;
271         return true;
272     }
273 
274     /**
275      * Decodes a bit of an URL encoded by a browser.
276      * <p>
277      * This is equivalent to calling {@link #decodeComponent(String, Charset)}
278      * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
279      * @param s The string to decode (can be empty).
280      * @return The decoded string, or {@code s} if there's nothing to decode.
281      * If the string to decode is {@code null}, returns an empty string.
282      * @throws IllegalArgumentException if the string contains a malformed
283      * escape sequence.
284      */
285     public static String decodeComponent(final String s) {
286         return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
287     }
288 
289     /**
290      * Decodes a bit of an URL encoded by a browser.
291      * <p>
292      * The string is expected to be encoded as per RFC 3986, Section 2.
293      * This is the encoding used by JavaScript functions {@code encodeURI}
294      * and {@code encodeURIComponent}, but not {@code escape}.  For example
295      * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
296      * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
297      * <p>
298      * This is essentially equivalent to calling
299      *   {@link URLDecoder#decode(String, String) URLDecoder.decode(s, charset.name())}
300      * except that it's over 2x faster and generates less garbage for the GC.
301      * Actually this function doesn't allocate any memory if there's nothing
302      * to decode, the argument itself is returned.
303      * @param s The string to decode (can be empty).
304      * @param charset The charset to use to decode the string (should really
305      * be {@link CharsetUtil#UTF_8}.
306      * @return The decoded string, or {@code s} if there's nothing to decode.
307      * If the string to decode is {@code null}, returns an empty string.
308      * @throws IllegalArgumentException if the string contains a malformed
309      * escape sequence.
310      */
311     public static String decodeComponent(final String s, final Charset charset) {
312         if (s == null) {
313             return "";
314         }
315         final int size = s.length();
316         boolean modified = false;
317         for (int i = 0; i < size; i++) {
318             final char c = s.charAt(i);
319             if (c == '%' || c == '+') {
320                 modified = true;
321                 break;
322             }
323         }
324         if (!modified) {
325             return s;
326         }
327         final byte[] buf = new byte[size];
328         int pos = 0;  // position in `buf'.
329         for (int i = 0; i < size; i++) {
330             char c = s.charAt(i);
331             switch (c) {
332                 case '+':
333                     buf[pos++] = ' ';  // "+" -> " "
334                     break;
335                 case '%':
336                     if (i == size - 1) {
337                         throw new IllegalArgumentException("unterminated escape"
338                                 + " sequence at end of string: " + s);
339                     }
340                     c = s.charAt(++i);
341                     if (c == '%') {
342                         buf[pos++] = '%';  // "%%" -> "%"
343                         break;
344                     }
345                     if (i == size - 1) {
346                         throw new IllegalArgumentException("partial escape"
347                                 + " sequence at end of string: " + s);
348                     }
349                     c = decodeHexNibble(c);
350                     final char c2 = decodeHexNibble(s.charAt(++i));
351                     if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
352                         throw new IllegalArgumentException(
353                                 "invalid escape sequence `%" + s.charAt(i - 1)
354                                 + s.charAt(i) + "' at index " + (i - 2)
355                                 + " of: " + s);
356                     }
357                     c = (char) (c * 16 + c2);
358                     // Fall through.
359                 default:
360                     buf[pos++] = (byte) c;
361                     break;
362             }
363         }
364         return new String(buf, 0, pos, charset);
365     }
366 
367     /**
368      * Helper to decode half of a hexadecimal number from a string.
369      * @param c The ASCII character of the hexadecimal number to decode.
370      * Must be in the range {@code [0-9a-fA-F]}.
371      * @return The hexadecimal value represented in the ASCII character
372      * given, or {@link Character#MAX_VALUE} if the character is invalid.
373      */
374     private static char decodeHexNibble(final char c) {
375         if ('0' <= c && c <= '9') {
376             return (char) (c - '0');
377         } else if ('a' <= c && c <= 'f') {
378             return (char) (c - 'a' + 10);
379         } else if ('A' <= c && c <= 'F') {
380             return (char) (c - 'A' + 10);
381         } else {
382             return Character.MAX_VALUE;
383         }
384     }
385 }