1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import io.netty.util.CharsetUtil;
19 import io.netty.util.internal.PlatformDependent;
20
21 import java.net.URI;
22 import java.net.URLDecoder;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedHashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 import static io.netty.util.internal.ObjectUtil.checkNotNull;
31 import static io.netty.util.internal.ObjectUtil.checkPositive;
32 import static io.netty.util.internal.StringUtil.EMPTY_STRING;
33 import static io.netty.util.internal.StringUtil.SPACE;
34 import static io.netty.util.internal.StringUtil.decodeHexByte;
35
36 /**
37 * Splits an HTTP query string into a path string and key-value parameter pairs.
38 * This decoder is for one time use only. Create a new instance for each URI:
39 * <pre>
40 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
41 * assert decoder.path().equals("/hello");
42 * assert decoder.parameters().get("recipient").get(0).equals("world");
43 * assert decoder.parameters().get("x").get(0).equals("1");
44 * assert decoder.parameters().get("y").get(0).equals("2");
45 * </pre>
46 *
47 * This decoder can also decode the content of an HTTP POST request whose
48 * content type is <tt>application/x-www-form-urlencoded</tt>:
49 * <pre>
50 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
51 * ...
52 * </pre>
53 *
54 * <h3>HashDOS vulnerability fix</h3>
55 *
56 * As a workaround to the <a href="https://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
57 * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
58 * default, and you can configure it when you construct the decoder by passing an additional
59 * integer parameter.
60 *
61 * @see QueryStringEncoder
62 */
63 public class QueryStringDecoder {
64
65 private static final int DEFAULT_MAX_PARAMS = 1024;
66
67 private final Charset charset;
68 private final String uri;
69 private final int maxParams;
70 private final boolean semicolonIsNormalChar;
71 private final boolean htmlQueryDecoding;
72 private int pathEndIdx;
73 private String path;
74 private Map<String, List<String>> params;
75
76 /**
77 * Creates a new decoder that decodes the specified URI. The decoder will
78 * assume that the query string is encoded in UTF-8.
79 */
80 public QueryStringDecoder(String uri) {
81 this(builder(), uri);
82 }
83
84 /**
85 * Creates a new decoder that decodes the specified URI encoded in the
86 * specified charset.
87 */
88 public QueryStringDecoder(String uri, boolean hasPath) {
89 this(builder().hasPath(hasPath), uri);
90 }
91
92 /**
93 * Creates a new decoder that decodes the specified URI encoded in the
94 * specified charset.
95 */
96 public QueryStringDecoder(String uri, Charset charset) {
97 this(builder().charset(charset), uri);
98 }
99
100 /**
101 * Creates a new decoder that decodes the specified URI encoded in the
102 * specified charset.
103 */
104 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
105 this(builder().hasPath(hasPath).charset(charset), uri);
106 }
107
108 /**
109 * Creates a new decoder that decodes the specified URI encoded in the
110 * specified charset.
111 */
112 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
113 this(builder().hasPath(hasPath).charset(charset).maxParams(maxParams), uri);
114 }
115
116 /**
117 * Creates a new decoder that decodes the specified URI encoded in the
118 * specified charset.
119 */
120 public QueryStringDecoder(String uri, Charset charset, boolean hasPath,
121 int maxParams, boolean semicolonIsNormalChar) {
122 this(
123 builder()
124 .hasPath(hasPath)
125 .charset(charset)
126 .maxParams(maxParams)
127 .semicolonIsNormalChar(semicolonIsNormalChar),
128 uri);
129 }
130
131 /**
132 * Creates a new decoder that decodes the specified URI. The decoder will
133 * assume that the query string is encoded in UTF-8.
134 */
135 public QueryStringDecoder(URI uri) {
136 this(builder(), uri);
137 }
138
139 /**
140 * Creates a new decoder that decodes the specified URI encoded in the
141 * specified charset.
142 */
143 public QueryStringDecoder(URI uri, Charset charset) {
144 this(builder().charset(charset), uri);
145 }
146
147 /**
148 * Creates a new decoder that decodes the specified URI encoded in the
149 * specified charset.
150 */
151 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
152 this(builder().charset(charset).maxParams(maxParams), uri);
153 }
154
155 /**
156 * Creates a new decoder that decodes the specified URI encoded in the
157 * specified charset.
158 */
159 public QueryStringDecoder(URI uri, Charset charset, int maxParams, boolean semicolonIsNormalChar) {
160 this(builder().charset(charset).maxParams(maxParams).semicolonIsNormalChar(semicolonIsNormalChar), uri);
161 }
162
163 private QueryStringDecoder(Builder builder, String uri) {
164 this.uri = checkNotNull(uri, "uri");
165 this.charset = checkNotNull(builder.charset, "charset");
166 this.maxParams = checkPositive(builder.maxParams, "maxParams");
167 this.semicolonIsNormalChar = builder.semicolonIsNormalChar;
168 this.htmlQueryDecoding = builder.htmlQueryDecoding;
169
170 // `-1` means that path end index will be initialized lazily
171 pathEndIdx = builder.hasPath ? -1 : 0;
172 }
173
174 private QueryStringDecoder(Builder builder, URI uri) {
175 String rawPath = uri.getRawPath();
176 if (rawPath == null) {
177 rawPath = EMPTY_STRING;
178 }
179 String rawQuery = uri.getRawQuery();
180 // Also take care of cut of things like "http://localhost"
181 this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
182 this.charset = checkNotNull(builder.charset, "charset");
183 this.maxParams = checkPositive(builder.maxParams, "maxParams");
184 this.semicolonIsNormalChar = builder.semicolonIsNormalChar;
185 this.htmlQueryDecoding = builder.htmlQueryDecoding;
186 pathEndIdx = rawPath.length();
187 }
188
189 @Override
190 public String toString() {
191 return uri();
192 }
193
194 /**
195 * Returns the uri used to initialize this {@link QueryStringDecoder}.
196 */
197 public String uri() {
198 return uri;
199 }
200
201 /**
202 * Returns the decoded path string of the URI.
203 */
204 public String path() {
205 if (path == null) {
206 path = decodeComponent(uri, 0, pathEndIdx(), charset, false);
207 }
208 return path;
209 }
210
211 /**
212 * Returns the decoded key-value parameter pairs of the URI.
213 */
214 public Map<String, List<String>> parameters() {
215 if (params == null) {
216 params = decodeParams(uri, pathEndIdx(), charset, maxParams);
217 }
218 return params;
219 }
220
221 /**
222 * Returns the raw path string of the URI.
223 */
224 public String rawPath() {
225 return uri.substring(0, pathEndIdx());
226 }
227
228 /**
229 * Returns raw query string of the URI.
230 */
231 public String rawQuery() {
232 int start = pathEndIdx() + 1;
233 return start < uri.length() ? uri.substring(start) : EMPTY_STRING;
234 }
235
236 private int pathEndIdx() {
237 if (pathEndIdx == -1) {
238 pathEndIdx = findPathEndIndex(uri);
239 }
240 return pathEndIdx;
241 }
242
243 private Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit) {
244 int len = s.length();
245 if (from >= len) {
246 return Collections.emptyMap();
247 }
248 if (s.charAt(from) == '?') {
249 from++;
250 }
251 Map<String, List<String>> params = new LinkedHashMap<String, List<String>>();
252 int nameStart = from;
253 int valueStart = -1;
254 int i;
255 loop:
256 for (i = from; i < len; i++) {
257 switch (s.charAt(i)) {
258 case '=':
259 if (nameStart == i) {
260 nameStart = i + 1;
261 } else if (valueStart < nameStart) {
262 valueStart = i + 1;
263 }
264 break;
265 case ';':
266 if (semicolonIsNormalChar) {
267 continue;
268 }
269 // fall-through
270 case '&':
271 if (addParam(s, nameStart, valueStart, i, params, charset)) {
272 paramsLimit--;
273 if (paramsLimit == 0) {
274 return params;
275 }
276 }
277 nameStart = i + 1;
278 break;
279 case '#':
280 break loop;
281 default:
282 // continue
283 }
284 }
285 addParam(s, nameStart, valueStart, i, params, charset);
286 return params;
287 }
288
289 private boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
290 Map<String, List<String>> params, Charset charset) {
291 if (nameStart >= valueEnd) {
292 return false;
293 }
294 if (valueStart <= nameStart) {
295 valueStart = valueEnd + 1;
296 }
297 String name = decodeComponent(s, nameStart, valueStart - 1, charset, htmlQueryDecoding);
298 String value = decodeComponent(s, valueStart, valueEnd, charset, htmlQueryDecoding);
299 List<String> values = params.get(name);
300 if (values == null) {
301 values = new ArrayList<String>(1); // Often there's only 1 value.
302 params.put(name, values);
303 }
304 values.add(value);
305 return true;
306 }
307
308 /**
309 * Decodes a bit of a URL encoded by a browser.
310 * <p>
311 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
312 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
313 * @param s The string to decode (can be empty).
314 * @return The decoded string, or {@code s} if there's nothing to decode.
315 * If the string to decode is {@code null}, returns an empty string.
316 * @throws IllegalArgumentException if the string contains a malformed
317 * escape sequence.
318 */
319 public static String decodeComponent(final String s) {
320 return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
321 }
322
323 /**
324 * Decodes a bit of a URL encoded by a browser.
325 * <p>
326 * The string is expected to be encoded as per RFC 3986, Section 2.
327 * This is the encoding used by JavaScript functions {@code encodeURI}
328 * and {@code encodeURIComponent}, but not {@code escape}. For example
329 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
330 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
331 * <p>
332 * This is essentially equivalent to calling
333 * {@link URLDecoder#decode(String, String)}
334 * except that it's over 2x faster and generates less garbage for the GC.
335 * Actually this function doesn't allocate any memory if there's nothing
336 * to decode, the argument itself is returned.
337 * @param s The string to decode (can be empty).
338 * @param charset The charset to use to decode the string (should really
339 * be {@link CharsetUtil#UTF_8}.
340 * @return The decoded string, or {@code s} if there's nothing to decode.
341 * If the string to decode is {@code null}, returns an empty string.
342 * @throws IllegalArgumentException if the string contains a malformed
343 * escape sequence.
344 */
345 public static String decodeComponent(final String s, final Charset charset) {
346 if (s == null) {
347 return EMPTY_STRING;
348 }
349 return decodeComponent(s, 0, s.length(), charset, true);
350 }
351
352 private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean plusToSpace) {
353 int len = toExcluded - from;
354 if (len <= 0) {
355 return EMPTY_STRING;
356 }
357 int firstEscaped = -1;
358 for (int i = from; i < toExcluded; i++) {
359 char c = s.charAt(i);
360 if (c == '%' || (c == '+' && plusToSpace)) {
361 firstEscaped = i;
362 break;
363 }
364 }
365 if (firstEscaped == -1) {
366 return s.substring(from, toExcluded);
367 }
368
369 // Each encoded byte takes 3 characters (e.g. "%20")
370 int decodedCapacity = (toExcluded - firstEscaped) / 3;
371 byte[] buf = PlatformDependent.allocateUninitializedArray(decodedCapacity);
372 int bufIdx;
373
374 StringBuilder strBuf = new StringBuilder(len);
375 strBuf.append(s, from, firstEscaped);
376
377 for (int i = firstEscaped; i < toExcluded; i++) {
378 char c = s.charAt(i);
379 if (c != '%') {
380 strBuf.append(c != '+' || !plusToSpace ? c : SPACE);
381 continue;
382 }
383
384 bufIdx = 0;
385 do {
386 if (i + 3 > toExcluded) {
387 throw new IllegalArgumentException("unterminated escape sequence at index " + i + " of: " + s);
388 }
389 buf[bufIdx++] = decodeHexByte(s, i + 1);
390 i += 3;
391 } while (i < toExcluded && s.charAt(i) == '%');
392 i--;
393
394 strBuf.append(new String(buf, 0, bufIdx, charset));
395 }
396 return strBuf.toString();
397 }
398
399 private static int findPathEndIndex(String uri) {
400 int len = uri.length();
401 for (int i = 0; i < len; i++) {
402 char c = uri.charAt(i);
403 if (c == '?' || c == '#') {
404 return i;
405 }
406 }
407 return len;
408 }
409
410 public static Builder builder() {
411 return new Builder();
412 }
413
414 public static final class Builder {
415 private boolean hasPath = true;
416 private int maxParams = DEFAULT_MAX_PARAMS;
417 private boolean semicolonIsNormalChar;
418 private Charset charset = HttpConstants.DEFAULT_CHARSET;
419 private boolean htmlQueryDecoding = true;
420
421 private Builder() {
422 }
423
424 /**
425 * {@code true} by default. When set to {@code false}, the input string only contains the query component of
426 * the URI.
427 *
428 * @param hasPath Whether the URI contains a path
429 * @return This builder
430 */
431 public Builder hasPath(boolean hasPath) {
432 this.hasPath = hasPath;
433 return this;
434 }
435
436 /**
437 * Maximum number of query parameters allowed, to mitigate HashDOS. {@value DEFAULT_MAX_PARAMS} by default.
438 *
439 * @param maxParams The maximum number of query parameters
440 * @return This builder
441 */
442 public Builder maxParams(int maxParams) {
443 this.maxParams = maxParams;
444 return this;
445 }
446
447 /**
448 * {@code false} by default. If set to {@code true}, instead of allowing query parameters to be separated by
449 * semicolons, treat the semicolon as a normal character in a query value.
450 *
451 * @param semicolonIsNormalChar Whether to treat semicolons as a normal character
452 * @return This builder
453 */
454 public Builder semicolonIsNormalChar(boolean semicolonIsNormalChar) {
455 this.semicolonIsNormalChar = semicolonIsNormalChar;
456 return this;
457 }
458
459 /**
460 * The charset to use for decoding percent escape sequences. {@link HttpConstants#DEFAULT_CHARSET} by default.
461 *
462 * @param charset The charset
463 * @return This builder
464 */
465 public Builder charset(Charset charset) {
466 this.charset = charset;
467 return this;
468 }
469
470 /**
471 * RFC 3986 (the URI standard) makes no mention of using '+' to encode a space in a URI query component. The
472 * whatwg HTML standard, however, defines the query to be encoded with the
473 * {@code application/x-www-form-urlencoded} serializer defined in the whatwg URL standard, which does use '+'
474 * to encode a space instead of {@code %20}.
475 * <p>This flag controls whether the decoding should happen according to HTML rules, which decodes the '+' to a
476 * space. The default is {@code true}.
477 *
478 * @param htmlQueryDecoding Whether to decode '+' to space
479 * @return This builder
480 */
481 public Builder htmlQueryDecoding(boolean htmlQueryDecoding) {
482 this.htmlQueryDecoding = htmlQueryDecoding;
483 return this;
484 }
485
486 /**
487 * Create a decoder that will lazily decode the given URI with the settings configured in this builder.
488 *
489 * @param uri The URI in String form
490 * @return The decoder
491 */
492 public QueryStringDecoder build(String uri) {
493 return new QueryStringDecoder(this, uri);
494 }
495
496 /**
497 * Create a decoder that will lazily decode the given URI with the settings configured in this builder. Note
498 * that {@link #hasPath(boolean)} has no effect when using this method.
499 *
500 * @param uri The already parsed URI
501 * @return The decoder
502 */
503 public QueryStringDecoder build(URI uri) {
504 return new QueryStringDecoder(this, uri);
505 }
506 }
507 }