View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.http;
17  
18  import io.netty.util.CharsetUtil;
19  import io.netty.util.internal.PlatformDependent;
20  
21  import java.net.URI;
22  import java.net.URLDecoder;
23  import java.nio.charset.Charset;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.LinkedHashMap;
27  import java.util.List;
28  import java.util.Map;
29  
30  import static io.netty.util.internal.ObjectUtil.checkNotNull;
31  import static io.netty.util.internal.ObjectUtil.checkPositive;
32  import static io.netty.util.internal.StringUtil.EMPTY_STRING;
33  import static io.netty.util.internal.StringUtil.SPACE;
34  import static io.netty.util.internal.StringUtil.decodeHexByte;
35  
36  /**
37   * Splits an HTTP query string into a path string and key-value parameter pairs.
38   * This decoder is for one time use only.  Create a new instance for each URI:
39   * <pre>
40   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
41   * assert decoder.path().equals("/hello");
42   * assert decoder.parameters().get("recipient").get(0).equals("world");
43   * assert decoder.parameters().get("x").get(0).equals("1");
44   * assert decoder.parameters().get("y").get(0).equals("2");
45   * </pre>
46   *
47   * This decoder can also decode the content of an HTTP POST request whose
48   * content type is <tt>application/x-www-form-urlencoded</tt>:
49   * <pre>
50   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
51   * ...
52   * </pre>
53   *
54   * <h3>HashDOS vulnerability fix</h3>
55   *
56   * As a workaround to the <a href="https://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
57   * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
58   * default, and you can configure it when you construct the decoder by passing an additional
59   * integer parameter.
60   *
61   * @see QueryStringEncoder
62   */
63  public class QueryStringDecoder {
64  
65      private static final int DEFAULT_MAX_PARAMS = 1024;
66  
67      private final Charset charset;
68      private final String uri;
69      private final int maxParams;
70      private final boolean semicolonIsNormalChar;
71      private final boolean htmlQueryDecoding;
72      private int pathEndIdx;
73      private String path;
74      private Map<String, List<String>> params;
75  
76      /**
77       * Creates a new decoder that decodes the specified URI. The decoder will
78       * assume that the query string is encoded in UTF-8.
79       */
80      public QueryStringDecoder(String uri) {
81          this(builder(), uri);
82      }
83  
84      /**
85       * Creates a new decoder that decodes the specified URI encoded in the
86       * specified charset.
87       */
88      public QueryStringDecoder(String uri, boolean hasPath) {
89          this(builder().hasPath(hasPath), uri);
90      }
91  
92      /**
93       * Creates a new decoder that decodes the specified URI encoded in the
94       * specified charset.
95       */
96      public QueryStringDecoder(String uri, Charset charset) {
97          this(builder().charset(charset), uri);
98      }
99  
100     /**
101      * Creates a new decoder that decodes the specified URI encoded in the
102      * specified charset.
103      */
104     public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
105         this(builder().hasPath(hasPath).charset(charset), uri);
106     }
107 
108     /**
109      * Creates a new decoder that decodes the specified URI encoded in the
110      * specified charset.
111      */
112     public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
113         this(builder().hasPath(hasPath).charset(charset).maxParams(maxParams), uri);
114     }
115 
116     /**
117      * Creates a new decoder that decodes the specified URI encoded in the
118      * specified charset.
119      */
120     public QueryStringDecoder(String uri, Charset charset, boolean hasPath,
121                               int maxParams, boolean semicolonIsNormalChar) {
122         this(
123                 builder()
124                         .hasPath(hasPath)
125                         .charset(charset)
126                         .maxParams(maxParams)
127                         .semicolonIsNormalChar(semicolonIsNormalChar),
128                 uri);
129     }
130 
131     /**
132      * Creates a new decoder that decodes the specified URI. The decoder will
133      * assume that the query string is encoded in UTF-8.
134      */
135     public QueryStringDecoder(URI uri) {
136         this(builder(), uri);
137     }
138 
139     /**
140      * Creates a new decoder that decodes the specified URI encoded in the
141      * specified charset.
142      */
143     public QueryStringDecoder(URI uri, Charset charset) {
144         this(builder().charset(charset), uri);
145     }
146 
147     /**
148      * Creates a new decoder that decodes the specified URI encoded in the
149      * specified charset.
150      */
151     public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
152         this(builder().charset(charset).maxParams(maxParams), uri);
153     }
154 
155     /**
156      * Creates a new decoder that decodes the specified URI encoded in the
157      * specified charset.
158      */
159     public QueryStringDecoder(URI uri, Charset charset, int maxParams, boolean semicolonIsNormalChar) {
160         this(builder().charset(charset).maxParams(maxParams).semicolonIsNormalChar(semicolonIsNormalChar), uri);
161     }
162 
163     private QueryStringDecoder(Builder builder, String uri) {
164         this.uri = checkNotNull(uri, "uri");
165         this.charset = checkNotNull(builder.charset, "charset");
166         this.maxParams = checkPositive(builder.maxParams, "maxParams");
167         this.semicolonIsNormalChar = builder.semicolonIsNormalChar;
168         this.htmlQueryDecoding = builder.htmlQueryDecoding;
169 
170         // `-1` means that path end index will be initialized lazily
171         pathEndIdx = builder.hasPath ? -1 : 0;
172     }
173 
174     private QueryStringDecoder(Builder builder, URI uri) {
175         String rawPath = uri.getRawPath();
176         if (rawPath == null) {
177             rawPath = EMPTY_STRING;
178         }
179         String rawQuery = uri.getRawQuery();
180         // Also take care of cut of things like "http://localhost"
181         this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
182         this.charset = checkNotNull(builder.charset, "charset");
183         this.maxParams = checkPositive(builder.maxParams, "maxParams");
184         this.semicolonIsNormalChar = builder.semicolonIsNormalChar;
185         this.htmlQueryDecoding = builder.htmlQueryDecoding;
186         pathEndIdx = rawPath.length();
187     }
188 
189     @Override
190     public String toString() {
191         return uri();
192     }
193 
194     /**
195      * Returns the uri used to initialize this {@link QueryStringDecoder}.
196      */
197     public String uri() {
198         return uri;
199     }
200 
201     /**
202      * Returns the decoded path string of the URI.
203      */
204     public String path() {
205         if (path == null) {
206             path = decodeComponent(uri, 0, pathEndIdx(), charset, false);
207         }
208         return path;
209     }
210 
211     /**
212      * Returns the decoded key-value parameter pairs of the URI.
213      */
214     public Map<String, List<String>> parameters() {
215         if (params == null) {
216             params = decodeParams(uri, pathEndIdx(), charset, maxParams);
217         }
218         return params;
219     }
220 
221     /**
222      * Returns the raw path string of the URI.
223      */
224     public String rawPath() {
225         return uri.substring(0, pathEndIdx());
226     }
227 
228     /**
229      * Returns raw query string of the URI.
230      */
231     public String rawQuery() {
232         int start = pathEndIdx() + 1;
233         return start < uri.length() ? uri.substring(start) : EMPTY_STRING;
234     }
235 
236     private int pathEndIdx() {
237         if (pathEndIdx == -1) {
238             pathEndIdx = findPathEndIndex(uri);
239         }
240         return pathEndIdx;
241     }
242 
243     private Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit) {
244         int len = s.length();
245         if (from >= len) {
246             return Collections.emptyMap();
247         }
248         if (s.charAt(from) == '?') {
249             from++;
250         }
251         Map<String, List<String>> params = new LinkedHashMap<>();
252         int nameStart = from;
253         int valueStart = -1;
254         int i;
255         loop:
256         for (i = from; i < len; i++) {
257             switch (s.charAt(i)) {
258             case '=':
259                 if (nameStart == i) {
260                     nameStart = i + 1;
261                 } else if (valueStart < nameStart) {
262                     valueStart = i + 1;
263                 }
264                 break;
265             case ';':
266                 if (semicolonIsNormalChar) {
267                     continue;
268                 }
269                 // fall-through
270             case '&':
271                 if (addParam(s, nameStart, valueStart, i, params, charset)) {
272                     paramsLimit--;
273                     if (paramsLimit == 0) {
274                         return params;
275                     }
276                 }
277                 nameStart = i + 1;
278                 break;
279             case '#':
280                 break loop;
281             default:
282                 // continue
283             }
284         }
285         addParam(s, nameStart, valueStart, i, params, charset);
286         return params;
287     }
288 
289     private boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
290                                     Map<String, List<String>> params, Charset charset) {
291         if (nameStart >= valueEnd) {
292             return false;
293         }
294         if (valueStart <= nameStart) {
295             valueStart = valueEnd + 1;
296         }
297         String name = decodeComponent(s, nameStart, valueStart - 1, charset, htmlQueryDecoding);
298         String value = decodeComponent(s, valueStart, valueEnd, charset, htmlQueryDecoding);
299         List<String> values = params.get(name);
300         if (values == null) {
301             params.put(name, Collections.singletonList(value));
302         } else if (values instanceof ArrayList) {
303             values.add(value);
304         } else {
305             List<String> newValues = new ArrayList<>(2);
306             newValues.add(values.get(0));
307             newValues.add(value);
308             params.put(name, newValues);
309         }
310         return true;
311     }
312 
313     /**
314      * Decodes a bit of a URL encoded by a browser.
315      * <p>
316      * This is equivalent to calling {@link #decodeComponent(String, Charset)}
317      * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
318      * @param s The string to decode (can be empty).
319      * @return The decoded string, or {@code s} if there's nothing to decode.
320      * If the string to decode is {@code null}, returns an empty string.
321      * @throws IllegalArgumentException if the string contains a malformed
322      * escape sequence.
323      */
324     public static String decodeComponent(final String s) {
325         return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
326     }
327 
328     /**
329      * Decodes a bit of a URL encoded by a browser.
330      * <p>
331      * The string is expected to be encoded as per RFC 3986, Section 2.
332      * This is the encoding used by JavaScript functions {@code encodeURI}
333      * and {@code encodeURIComponent}, but not {@code escape}.  For example
334      * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
335      * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
336      * <p>
337      * This is essentially equivalent to calling
338      *   {@link URLDecoder#decode(String, String)}
339      * except that it's over 2x faster and generates less garbage for the GC.
340      * Actually this function doesn't allocate any memory if there's nothing
341      * to decode, the argument itself is returned.
342      * @param s The string to decode (can be empty).
343      * @param charset The charset to use to decode the string (should really
344      * be {@link CharsetUtil#UTF_8}.
345      * @return The decoded string, or {@code s} if there's nothing to decode.
346      * If the string to decode is {@code null}, returns an empty string.
347      * @throws IllegalArgumentException if the string contains a malformed
348      * escape sequence.
349      */
350     public static String decodeComponent(final String s, final Charset charset) {
351         if (s == null) {
352             return EMPTY_STRING;
353         }
354         return decodeComponent(s, 0, s.length(), charset, true);
355     }
356 
357     private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean plusToSpace) {
358         int len = toExcluded - from;
359         if (len <= 0) {
360             return EMPTY_STRING;
361         }
362         int firstEscaped = -1;
363         for (int i = from; i < toExcluded; i++) {
364             char c = s.charAt(i);
365             if (c == '%' || (c == '+' && plusToSpace)) {
366                 firstEscaped = i;
367                 break;
368             }
369         }
370         if (firstEscaped == -1) {
371             return s.substring(from, toExcluded);
372         }
373 
374         // Each encoded byte takes 3 characters (e.g. "%20")
375         int decodedCapacity = (toExcluded - firstEscaped) / 3;
376         byte[] buf = PlatformDependent.allocateUninitializedArray(decodedCapacity);
377         int bufIdx;
378 
379         StringBuilder strBuf = new StringBuilder(len);
380         strBuf.append(s, from, firstEscaped);
381 
382         for (int i = firstEscaped; i < toExcluded; i++) {
383             char c = s.charAt(i);
384             if (c != '%') {
385                 strBuf.append(c != '+' || !plusToSpace ? c : SPACE);
386                 continue;
387             }
388 
389             bufIdx = 0;
390             do {
391                 if (i + 3 > toExcluded) {
392                     throw new IllegalArgumentException("unterminated escape sequence at index " + i + " of: " + s);
393                 }
394                 buf[bufIdx++] = decodeHexByte(s, i + 1);
395                 i += 3;
396             } while (i < toExcluded && s.charAt(i) == '%');
397             i--;
398 
399             strBuf.append(new String(buf, 0, bufIdx, charset));
400         }
401         return strBuf.toString();
402     }
403 
404     private static int findPathEndIndex(String uri) {
405         int len = uri.length();
406         for (int i = 0; i < len; i++) {
407             char c = uri.charAt(i);
408             if (c == '?' || c == '#') {
409                 return i;
410             }
411         }
412         return len;
413     }
414 
415     public static Builder builder() {
416         return new Builder();
417     }
418 
419     public static final class Builder {
420         private boolean hasPath = true;
421         private int maxParams = DEFAULT_MAX_PARAMS;
422         private boolean semicolonIsNormalChar;
423         private Charset charset = HttpConstants.DEFAULT_CHARSET;
424         private boolean htmlQueryDecoding = true;
425 
426         private Builder() {
427         }
428 
429         /**
430          * {@code true} by default. When set to {@code false}, the input string only contains the query component of
431          * the URI.
432          *
433          * @param hasPath Whether the URI contains a path
434          * @return This builder
435          */
436         public Builder hasPath(boolean hasPath) {
437             this.hasPath = hasPath;
438             return this;
439         }
440 
441         /**
442          * Maximum number of query parameters allowed, to mitigate HashDOS. {@value DEFAULT_MAX_PARAMS} by default.
443          *
444          * @param maxParams The maximum number of query parameters
445          * @return This builder
446          */
447         public Builder maxParams(int maxParams) {
448             this.maxParams = maxParams;
449             return this;
450         }
451 
452         /**
453          * {@code false} by default. If set to {@code true}, instead of allowing query parameters to be separated by
454          * semicolons, treat the semicolon as a normal character in a query value.
455          *
456          * @param semicolonIsNormalChar Whether to treat semicolons as a normal character
457          * @return This builder
458          */
459         public Builder semicolonIsNormalChar(boolean semicolonIsNormalChar) {
460             this.semicolonIsNormalChar = semicolonIsNormalChar;
461             return this;
462         }
463 
464         /**
465          * The charset to use for decoding percent escape sequences. {@link HttpConstants#DEFAULT_CHARSET} by default.
466          *
467          * @param charset The charset
468          * @return This builder
469          */
470         public Builder charset(Charset charset) {
471             this.charset = charset;
472             return this;
473         }
474 
475         /**
476          * RFC 3986 (the URI standard) makes no mention of using '+' to encode a space in a URI query component. The
477          * whatwg HTML standard, however, defines the query to be encoded with the
478          * {@code application/x-www-form-urlencoded} serializer defined in the whatwg URL standard, which does use '+'
479          * to encode a space instead of {@code %20}.
480          * <p>This flag controls whether the decoding should happen according to HTML rules, which decodes the '+' to a
481          * space. The default is {@code true}.
482          *
483          * @param htmlQueryDecoding Whether to decode '+' to space
484          * @return This builder
485          */
486         public Builder htmlQueryDecoding(boolean htmlQueryDecoding) {
487             this.htmlQueryDecoding = htmlQueryDecoding;
488             return this;
489         }
490 
491         /**
492          * Create a decoder that will lazily decode the given URI with the settings configured in this builder.
493          *
494          * @param uri The URI in String form
495          * @return The decoder
496          */
497         public QueryStringDecoder build(String uri) {
498             return new QueryStringDecoder(this, uri);
499         }
500 
501         /**
502          * Create a decoder that will lazily decode the given URI with the settings configured in this builder. Note
503          * that {@link #hasPath(boolean)} has no effect when using this method.
504          *
505          * @param uri The already parsed URI
506          * @return The decoder
507          */
508         public QueryStringDecoder build(URI uri) {
509             return new QueryStringDecoder(this, uri);
510         }
511     }
512 }