View Javadoc

1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package org.jboss.netty.handler.codec.http;
17  
18  import java.io.UnsupportedEncodingException;
19  import java.net.URI;
20  import java.net.URLDecoder;
21  import java.nio.charset.Charset;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.LinkedHashMap;
25  import java.util.List;
26  import java.util.Map;
27  
28  import org.jboss.netty.util.CharsetUtil;
29  
30  /**
31   * Splits an HTTP query string into a path string and key-value parameter pairs.
32   * This decoder is for one time use only.  Create a new instance for each URI:
33   * <pre>
34   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
35   * assert decoder.getPath().equals("/hello");
36   * assert decoder.getParameters().get("recipient").get(0).equals("world");
37   * assert decoder.getParameters().get("x").get(0).equals("1");
38   * assert decoder.getParameters().get("y").get(0).equals("2");
39   * </pre>
40   *
41   * This decoder can also decode the content of an HTTP POST request whose
42   * content type is <tt>application/x-www-form-urlencoded</tt>:
43   * <pre>
44   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
45   * ...
46   * </pre>
47   *
48   * <h3>HashDOS vulnerability fix</h3>
49   *
50   * As a workaround to the <a href="http://goo.gl/I4Nky">HashDOS</a> vulnerability, the decoder
51   * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
52   * default, and you can configure it when you construct the decoder by passing an additional
53   * integer parameter.
54   *
55   * @see QueryStringEncoder
56   *
57   * @apiviz.stereotype utility
58   * @apiviz.has        org.jboss.netty.handler.codec.http.HttpRequest oneway - - decodes
59   */
60  public class QueryStringDecoder {
61  
62      private static final int DEFAULT_MAX_PARAMS = 1024;
63  
64      private final Charset charset;
65      private final String uri;
66      private final boolean hasPath;
67      private final int maxParams;
68      private String path;
69      private Map<String, List<String>> params;
70      private int nParams;
71  
72      /**
73       * Creates a new decoder that decodes the specified URI. The decoder will
74       * assume that the query string is encoded in UTF-8.
75       */
76      public QueryStringDecoder(String uri) {
77          this(uri, HttpConstants.DEFAULT_CHARSET);
78      }
79  
80      /**
81       * Creates a new decoder that decodes the specified URI encoded in the
82       * specified charset.
83       */
84      public QueryStringDecoder(String uri, boolean hasPath) {
85          this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
86      }
87  
88      /**
89       * Creates a new decoder that decodes the specified URI encoded in the
90       * specified charset.
91       */
92      public QueryStringDecoder(String uri, Charset charset) {
93          this(uri, charset, true);
94      }
95  
96      /**
97       * Creates a new decoder that decodes the specified URI encoded in the
98       * specified charset.
99       */
100     public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
101         this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
102     }
103 
104     /**
105      * Creates a new decoder that decodes the specified URI encoded in the
106      * specified charset.
107      */
108     public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
109         if (uri == null) {
110             throw new NullPointerException("uri");
111         }
112         if (charset == null) {
113             throw new NullPointerException("charset");
114         }
115         if (maxParams <= 0) {
116             throw new IllegalArgumentException(
117                     "maxParams: " + maxParams + " (expected: a positive integer)");
118         }
119 
120         // http://en.wikipedia.org/wiki/Query_string
121         this.uri = uri.replace(';', '&');
122         this.charset = charset;
123         this.maxParams = maxParams;
124         this.hasPath = hasPath;
125     }
126 
127     /**
128      * @deprecated Use {@link #QueryStringDecoder(String, Charset)} instead.
129      */
130     @Deprecated
131     public QueryStringDecoder(String uri, String charset) {
132         this(uri, Charset.forName(charset));
133     }
134 
135     /**
136      * Creates a new decoder that decodes the specified URI. The decoder will
137      * assume that the query string is encoded in UTF-8.
138      */
139     public QueryStringDecoder(URI uri) {
140         this(uri, HttpConstants.DEFAULT_CHARSET);
141     }
142 
143     /**
144      * Creates a new decoder that decodes the specified URI encoded in the
145      * specified charset.
146      */
147     public QueryStringDecoder(URI uri, Charset charset) {
148         this(uri, charset, DEFAULT_MAX_PARAMS);
149     }
150 
151     /**
152      * Creates a new decoder that decodes the specified URI encoded in the
153      * specified charset.
154      */
155     public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
156         if (uri == null) {
157             throw new NullPointerException("uri");
158         }
159         if (charset == null) {
160             throw new NullPointerException("charset");
161         }
162         if (maxParams <= 0) {
163             throw new IllegalArgumentException(
164                     "maxParams: " + maxParams + " (expected: a positive integer)");
165         }
166 
167         String rawPath = uri.getRawPath();
168         if (rawPath != null) {
169             hasPath = true;
170         } else {
171             rawPath = "";
172             hasPath = false;
173         }
174         // Also take care of cut of things like "http://localhost"
175         String newUri = rawPath + '?' + uri.getRawQuery();
176 
177         // http://en.wikipedia.org/wiki/Query_string
178         this.uri = newUri.replace(';', '&');
179         this.charset = charset;
180         this.maxParams = maxParams;
181 
182     }
183 
184     /**
185      * @deprecated Use {@link #QueryStringDecoder(URI, Charset)} instead.
186      */
187     @Deprecated
188     public QueryStringDecoder(URI uri, String charset) {
189         this(uri, Charset.forName(charset));
190     }
191 
192     /**
193      * Returns the decoded path string of the URI.
194      */
195     public String getPath() {
196         if (path == null) {
197             if (!hasPath) {
198                 return path = "";
199             }
200 
201             int pathEndPos = uri.indexOf('?');
202             if (pathEndPos < 0) {
203                 path = uri;
204             } else {
205                 return path = uri.substring(0, pathEndPos);
206             }
207         }
208         return path;
209     }
210 
211     /**
212      * Returns the decoded key-value parameter pairs of the URI.
213      */
214     public Map<String, List<String>> getParameters() {
215         if (params == null) {
216             if (hasPath) {
217                 int pathLength = getPath().length();
218                 if (uri.length() == pathLength) {
219                     return Collections.emptyMap();
220                 }
221                 decodeParams(uri.substring(pathLength + 1));
222             } else {
223                 if (uri.length() == 0) {
224                     return Collections.emptyMap();
225                 }
226                 decodeParams(uri);
227             }
228         }
229         return params;
230     }
231 
232     private void decodeParams(String s) {
233         Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
234         nParams = 0;
235         String name = null;
236         int pos = 0; // Beginning of the unprocessed region
237         int i;       // End of the unprocessed region
238         char c = 0;  // Current character
239         for (i = 0; i < s.length(); i++) {
240             c = s.charAt(i);
241             if (c == '=' && name == null) {
242                 if (pos != i) {
243                     name = decodeComponent(s.substring(pos, i), charset);
244                 }
245                 pos = i + 1;
246             } else if (c == '&') {
247                 if (name == null && pos != i) {
248                     // We haven't seen an `=' so far but moved forward.
249                     // Must be a param of the form '&a&' so add it with
250                     // an empty value.
251                     if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
252                         return;
253                     }
254                 } else if (name != null) {
255                     if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
256                         return;
257                     }
258                     name = null;
259                 }
260                 pos = i + 1;
261             }
262         }
263 
264         if (pos != i) {  // Are there characters we haven't dealt with?
265             if (name == null) {     // Yes and we haven't seen any `='.
266                 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
267                     return;
268                 }
269             } else {                // Yes and this must be the last value.
270                 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
271                     return;
272                 }
273             }
274         } else if (name != null) {  // Have we seen a name without value?
275             if (!addParam(params, name, "")) {
276                 return;
277             }
278         }
279     }
280 
281     private boolean addParam(Map<String, List<String>> params, String name, String value) {
282         if (nParams >= maxParams) {
283             return false;
284         }
285 
286         List<String> values = params.get(name);
287         if (values == null) {
288             values = new ArrayList<String>(1);  // Often there's only 1 value.
289             params.put(name, values);
290         }
291         values.add(value);
292         nParams ++;
293         return true;
294     }
295 
296     /**
297      * Decodes a bit of an URL encoded by a browser.
298      * <p>
299      * This is equivalent to calling {@link #decodeComponent(String, Charset)}
300      * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
301      * @param s The string to decode (can be empty).
302      * @return The decoded string, or {@code s} if there's nothing to decode.
303      * If the string to decode is {@code null}, returns an empty string.
304      * @throws IllegalArgumentException if the string contains a malformed
305      * escape sequence.
306      */
307     public static String decodeComponent(final String s) {
308         return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
309     }
310 
311     /**
312      * Decodes a bit of an URL encoded by a browser.
313      * <p>
314      * The string is expected to be encoded as per RFC 3986, Section 2.
315      * This is the encoding used by JavaScript functions {@code encodeURI}
316      * and {@code encodeURIComponent}, but not {@code escape}.  For example
317      * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
318      * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
319      * <p>
320      * This is essentially equivalent to calling
321      *   <code>{@link URLDecoder#decode(String, String) URLDecoder.decode}(s, charset.name())</code>
322      * except that it's over 2x faster and generates less garbage for the GC.
323      * Actually this function doesn't allocate any memory if there's nothing
324      * to decode, the argument itself is returned.
325      * @param s The string to decode (can be empty).
326      * @param charset The charset to use to decode the string (should really
327      * be {@link CharsetUtil#UTF_8}.
328      * @return The decoded string, or {@code s} if there's nothing to decode.
329      * If the string to decode is {@code null}, returns an empty string.
330      * @throws IllegalArgumentException if the string contains a malformed
331      * escape sequence.
332      */
333     @SuppressWarnings("fallthrough")
334     public static String decodeComponent(final String s,
335                                          final Charset charset) {
336         if (s == null) {
337             return "";
338         }
339         final int size = s.length();
340         boolean modified = false;
341         for (int i = 0; i < size; i++) {
342             final char c = s.charAt(i);
343             switch (c) {
344                 case '%':
345                     i++;  // We can skip at least one char, e.g. `%%'.
346                     // Fall through.
347                 case '+':
348                     modified = true;
349                     break;
350             }
351         }
352         if (!modified) {
353             return s;
354         }
355         final byte[] buf = new byte[size];
356         int pos = 0;  // position in `buf'.
357         for (int i = 0; i < size; i++) {
358             char c = s.charAt(i);
359             switch (c) {
360                 case '+':
361                     buf[pos++] = ' ';  // "+" -> " "
362                     break;
363                 case '%':
364                     if (i == size - 1) {
365                         throw new IllegalArgumentException("unterminated escape"
366                                 + " sequence at end of string: " + s);
367                     }
368                     c = s.charAt(++i);
369                     if (c == '%') {
370                         buf[pos++] = '%';  // "%%" -> "%"
371                         break;
372                     } else if (i == size - 1) {
373                         throw new IllegalArgumentException("partial escape"
374                                 + " sequence at end of string: " + s);
375                     }
376                     c = decodeHexNibble(c);
377                     final char c2 = decodeHexNibble(s.charAt(++i));
378                     if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
379                         throw new IllegalArgumentException(
380                                 "invalid escape sequence `%" + s.charAt(i - 1)
381                                 + s.charAt(i) + "' at index " + (i - 2)
382                                 + " of: " + s);
383                     }
384                     c = (char) (c * 16 + c2);
385                     // Fall through.
386                 default:
387                     buf[pos++] = (byte) c;
388                     break;
389             }
390         }
391         try {
392             return new String(buf, 0, pos, charset.name());
393         } catch (UnsupportedEncodingException e) {
394             throw new IllegalArgumentException("unsupported encoding: " + charset.name());
395         }
396     }
397 
398     /**
399      * Helper to decode half of a hexadecimal number from a string.
400      * @param c The ASCII character of the hexadecimal number to decode.
401      * Must be in the range {@code [0-9a-fA-F]}.
402      * @return The hexadecimal value represented in the ASCII character
403      * given, or {@link Character#MAX_VALUE} if the character is invalid.
404      */
405     private static char decodeHexNibble(final char c) {
406         if ('0' <= c && c <= '9') {
407             return (char) (c - '0');
408         } else if ('a' <= c && c <= 'f') {
409             return (char) (c - 'a' + 10);
410         } else if ('A' <= c && c <= 'F') {
411             return (char) (c - 'A' + 10);
412         } else {
413             return Character.MAX_VALUE;
414         }
415     }
416 }