1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import io.netty.util.CharsetUtil;
19 import io.netty.util.internal.PlatformDependent;
20
21 import java.net.URI;
22 import java.net.URLDecoder;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedHashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 import static io.netty.util.internal.ObjectUtil.checkNotNull;
31 import static io.netty.util.internal.ObjectUtil.checkPositive;
32 import static io.netty.util.internal.StringUtil.EMPTY_STRING;
33 import static io.netty.util.internal.StringUtil.SPACE;
34 import static io.netty.util.internal.StringUtil.decodeHexByte;
35
36 /**
37 * Splits an HTTP query string into a path string and key-value parameter pairs.
38 * This decoder is for one time use only. Create a new instance for each URI:
39 * <pre>
40 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
41 * assert decoder.path().equals("/hello");
42 * assert decoder.parameters().get("recipient").get(0).equals("world");
43 * assert decoder.parameters().get("x").get(0).equals("1");
44 * assert decoder.parameters().get("y").get(0).equals("2");
45 * </pre>
46 *
47 * This decoder can also decode the content of an HTTP POST request whose
48 * content type is <tt>application/x-www-form-urlencoded</tt>:
49 * <pre>
50 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
51 * ...
52 * </pre>
53 *
54 * <h3>HashDOS vulnerability fix</h3>
55 *
56 * As a workaround to the <a href="https://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
57 * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
58 * default, and you can configure it when you construct the decoder by passing an additional
59 * integer parameter.
60 *
61 * @see QueryStringEncoder
62 */
63 public class QueryStringDecoder {
64
65 private static final int DEFAULT_MAX_PARAMS = 1024;
66
67 private final Charset charset;
68 private final String uri;
69 private final int maxParams;
70 private final boolean semicolonIsNormalChar;
71 private final boolean htmlQueryDecoding;
72 private int pathEndIdx;
73 private String path;
74 private Map<String, List<String>> params;
75
76 /**
77 * Creates a new decoder that decodes the specified URI. The decoder will
78 * assume that the query string is encoded in UTF-8.
79 */
80 public QueryStringDecoder(String uri) {
81 this(builder(), uri);
82 }
83
84 /**
85 * Creates a new decoder that decodes the specified URI encoded in the
86 * specified charset.
87 */
88 public QueryStringDecoder(String uri, boolean hasPath) {
89 this(builder().hasPath(hasPath), uri);
90 }
91
92 /**
93 * Creates a new decoder that decodes the specified URI encoded in the
94 * specified charset.
95 */
96 public QueryStringDecoder(String uri, Charset charset) {
97 this(builder().charset(charset), uri);
98 }
99
100 /**
101 * Creates a new decoder that decodes the specified URI encoded in the
102 * specified charset.
103 */
104 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
105 this(builder().hasPath(hasPath).charset(charset), uri);
106 }
107
108 /**
109 * Creates a new decoder that decodes the specified URI encoded in the
110 * specified charset.
111 */
112 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
113 this(builder().hasPath(hasPath).charset(charset).maxParams(maxParams), uri);
114 }
115
116 /**
117 * Creates a new decoder that decodes the specified URI encoded in the
118 * specified charset.
119 */
120 public QueryStringDecoder(String uri, Charset charset, boolean hasPath,
121 int maxParams, boolean semicolonIsNormalChar) {
122 this(
123 builder()
124 .hasPath(hasPath)
125 .charset(charset)
126 .maxParams(maxParams)
127 .semicolonIsNormalChar(semicolonIsNormalChar),
128 uri);
129 }
130
131 /**
132 * Creates a new decoder that decodes the specified URI. The decoder will
133 * assume that the query string is encoded in UTF-8.
134 */
135 public QueryStringDecoder(URI uri) {
136 this(builder(), uri);
137 }
138
139 /**
140 * Creates a new decoder that decodes the specified URI encoded in the
141 * specified charset.
142 */
143 public QueryStringDecoder(URI uri, Charset charset) {
144 this(builder().charset(charset), uri);
145 }
146
147 /**
148 * Creates a new decoder that decodes the specified URI encoded in the
149 * specified charset.
150 */
151 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
152 this(builder().charset(charset).maxParams(maxParams), uri);
153 }
154
155 /**
156 * Creates a new decoder that decodes the specified URI encoded in the
157 * specified charset.
158 */
159 public QueryStringDecoder(URI uri, Charset charset, int maxParams, boolean semicolonIsNormalChar) {
160 this(builder().charset(charset).maxParams(maxParams).semicolonIsNormalChar(semicolonIsNormalChar), uri);
161 }
162
163 private QueryStringDecoder(Builder builder, String uri) {
164 this.uri = checkNotNull(uri, "uri");
165 this.charset = checkNotNull(builder.charset, "charset");
166 this.maxParams = checkPositive(builder.maxParams, "maxParams");
167 this.semicolonIsNormalChar = builder.semicolonIsNormalChar;
168 this.htmlQueryDecoding = builder.htmlQueryDecoding;
169
170 // `-1` means that path end index will be initialized lazily
171 pathEndIdx = builder.hasPath ? -1 : 0;
172 }
173
174 private QueryStringDecoder(Builder builder, URI uri) {
175 String rawPath = uri.getRawPath();
176 if (rawPath == null) {
177 rawPath = EMPTY_STRING;
178 }
179 String rawQuery = uri.getRawQuery();
180 // Also take care of cut of things like "http://localhost"
181 this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
182 this.charset = checkNotNull(builder.charset, "charset");
183 this.maxParams = checkPositive(builder.maxParams, "maxParams");
184 this.semicolonIsNormalChar = builder.semicolonIsNormalChar;
185 this.htmlQueryDecoding = builder.htmlQueryDecoding;
186 pathEndIdx = rawPath.length();
187 }
188
189 @Override
190 public String toString() {
191 return uri();
192 }
193
194 /**
195 * Returns the uri used to initialize this {@link QueryStringDecoder}.
196 */
197 public String uri() {
198 return uri;
199 }
200
201 /**
202 * Returns the decoded path string of the URI.
203 */
204 public String path() {
205 if (path == null) {
206 path = decodeComponent(uri, 0, pathEndIdx(), charset, false);
207 }
208 return path;
209 }
210
211 /**
212 * Returns the decoded key-value parameter pairs of the URI.
213 */
214 public Map<String, List<String>> parameters() {
215 if (params == null) {
216 params = decodeParams(uri, pathEndIdx(), charset, maxParams);
217 }
218 return params;
219 }
220
221 /**
222 * Returns the raw path string of the URI.
223 */
224 public String rawPath() {
225 return uri.substring(0, pathEndIdx());
226 }
227
228 /**
229 * Returns raw query string of the URI.
230 */
231 public String rawQuery() {
232 int start = pathEndIdx() + 1;
233 return start < uri.length() ? uri.substring(start) : EMPTY_STRING;
234 }
235
236 private int pathEndIdx() {
237 if (pathEndIdx == -1) {
238 pathEndIdx = findPathEndIndex(uri);
239 }
240 return pathEndIdx;
241 }
242
243 private Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit) {
244 int len = s.length();
245 if (from >= len) {
246 return Collections.emptyMap();
247 }
248 if (s.charAt(from) == '?') {
249 from++;
250 }
251 Map<String, List<String>> params = new LinkedHashMap<>();
252 int nameStart = from;
253 int valueStart = -1;
254 int i;
255 loop:
256 for (i = from; i < len; i++) {
257 switch (s.charAt(i)) {
258 case '=':
259 if (nameStart == i) {
260 nameStart = i + 1;
261 } else if (valueStart < nameStart) {
262 valueStart = i + 1;
263 }
264 break;
265 case ';':
266 if (semicolonIsNormalChar) {
267 continue;
268 }
269 // fall-through
270 case '&':
271 if (addParam(s, nameStart, valueStart, i, params, charset)) {
272 paramsLimit--;
273 if (paramsLimit == 0) {
274 return params;
275 }
276 }
277 nameStart = i + 1;
278 break;
279 case '#':
280 break loop;
281 default:
282 // continue
283 }
284 }
285 addParam(s, nameStart, valueStart, i, params, charset);
286 return params;
287 }
288
289 private boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
290 Map<String, List<String>> params, Charset charset) {
291 if (nameStart >= valueEnd) {
292 return false;
293 }
294 if (valueStart <= nameStart) {
295 valueStart = valueEnd + 1;
296 }
297 String name = decodeComponent(s, nameStart, valueStart - 1, charset, htmlQueryDecoding);
298 String value = decodeComponent(s, valueStart, valueEnd, charset, htmlQueryDecoding);
299 List<String> values = params.get(name);
300 if (values == null) {
301 params.put(name, Collections.singletonList(value));
302 } else if (values instanceof ArrayList) {
303 values.add(value);
304 } else {
305 List<String> newValues = new ArrayList<>(2);
306 newValues.add(values.get(0));
307 newValues.add(value);
308 params.put(name, newValues);
309 }
310 return true;
311 }
312
313 /**
314 * Decodes a bit of a URL encoded by a browser.
315 * <p>
316 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
317 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
318 * @param s The string to decode (can be empty).
319 * @return The decoded string, or {@code s} if there's nothing to decode.
320 * If the string to decode is {@code null}, returns an empty string.
321 * @throws IllegalArgumentException if the string contains a malformed
322 * escape sequence.
323 */
324 public static String decodeComponent(final String s) {
325 return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
326 }
327
328 /**
329 * Decodes a bit of a URL encoded by a browser.
330 * <p>
331 * The string is expected to be encoded as per RFC 3986, Section 2.
332 * This is the encoding used by JavaScript functions {@code encodeURI}
333 * and {@code encodeURIComponent}, but not {@code escape}. For example
334 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
335 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
336 * <p>
337 * This is essentially equivalent to calling
338 * {@link URLDecoder#decode(String, String)}
339 * except that it's over 2x faster and generates less garbage for the GC.
340 * Actually this function doesn't allocate any memory if there's nothing
341 * to decode, the argument itself is returned.
342 * @param s The string to decode (can be empty).
343 * @param charset The charset to use to decode the string (should really
344 * be {@link CharsetUtil#UTF_8}.
345 * @return The decoded string, or {@code s} if there's nothing to decode.
346 * If the string to decode is {@code null}, returns an empty string.
347 * @throws IllegalArgumentException if the string contains a malformed
348 * escape sequence.
349 */
350 public static String decodeComponent(final String s, final Charset charset) {
351 if (s == null) {
352 return EMPTY_STRING;
353 }
354 return decodeComponent(s, 0, s.length(), charset, true);
355 }
356
357 private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean plusToSpace) {
358 int len = toExcluded - from;
359 if (len <= 0) {
360 return EMPTY_STRING;
361 }
362 int firstEscaped = -1;
363 for (int i = from; i < toExcluded; i++) {
364 char c = s.charAt(i);
365 if (c == '%' || (c == '+' && plusToSpace)) {
366 firstEscaped = i;
367 break;
368 }
369 }
370 if (firstEscaped == -1) {
371 return s.substring(from, toExcluded);
372 }
373
374 // Each encoded byte takes 3 characters (e.g. "%20")
375 int decodedCapacity = (toExcluded - firstEscaped) / 3;
376 byte[] buf = PlatformDependent.allocateUninitializedArray(decodedCapacity);
377 int bufIdx;
378
379 StringBuilder strBuf = new StringBuilder(len);
380 strBuf.append(s, from, firstEscaped);
381
382 for (int i = firstEscaped; i < toExcluded; i++) {
383 char c = s.charAt(i);
384 if (c != '%') {
385 strBuf.append(c != '+' || !plusToSpace ? c : SPACE);
386 continue;
387 }
388
389 bufIdx = 0;
390 do {
391 if (i + 3 > toExcluded) {
392 throw new IllegalArgumentException("unterminated escape sequence at index " + i + " of: " + s);
393 }
394 buf[bufIdx++] = decodeHexByte(s, i + 1);
395 i += 3;
396 } while (i < toExcluded && s.charAt(i) == '%');
397 i--;
398
399 strBuf.append(new String(buf, 0, bufIdx, charset));
400 }
401 return strBuf.toString();
402 }
403
404 private static int findPathEndIndex(String uri) {
405 int len = uri.length();
406 for (int i = 0; i < len; i++) {
407 char c = uri.charAt(i);
408 if (c == '?' || c == '#') {
409 return i;
410 }
411 }
412 return len;
413 }
414
415 public static Builder builder() {
416 return new Builder();
417 }
418
419 public static final class Builder {
420 private boolean hasPath = true;
421 private int maxParams = DEFAULT_MAX_PARAMS;
422 private boolean semicolonIsNormalChar;
423 private Charset charset = HttpConstants.DEFAULT_CHARSET;
424 private boolean htmlQueryDecoding = true;
425
426 private Builder() {
427 }
428
429 /**
430 * {@code true} by default. When set to {@code false}, the input string only contains the query component of
431 * the URI.
432 *
433 * @param hasPath Whether the URI contains a path
434 * @return This builder
435 */
436 public Builder hasPath(boolean hasPath) {
437 this.hasPath = hasPath;
438 return this;
439 }
440
441 /**
442 * Maximum number of query parameters allowed, to mitigate HashDOS. {@value DEFAULT_MAX_PARAMS} by default.
443 *
444 * @param maxParams The maximum number of query parameters
445 * @return This builder
446 */
447 public Builder maxParams(int maxParams) {
448 this.maxParams = maxParams;
449 return this;
450 }
451
452 /**
453 * {@code false} by default. If set to {@code true}, instead of allowing query parameters to be separated by
454 * semicolons, treat the semicolon as a normal character in a query value.
455 *
456 * @param semicolonIsNormalChar Whether to treat semicolons as a normal character
457 * @return This builder
458 */
459 public Builder semicolonIsNormalChar(boolean semicolonIsNormalChar) {
460 this.semicolonIsNormalChar = semicolonIsNormalChar;
461 return this;
462 }
463
464 /**
465 * The charset to use for decoding percent escape sequences. {@link HttpConstants#DEFAULT_CHARSET} by default.
466 *
467 * @param charset The charset
468 * @return This builder
469 */
470 public Builder charset(Charset charset) {
471 this.charset = charset;
472 return this;
473 }
474
475 /**
476 * RFC 3986 (the URI standard) makes no mention of using '+' to encode a space in a URI query component. The
477 * whatwg HTML standard, however, defines the query to be encoded with the
478 * {@code application/x-www-form-urlencoded} serializer defined in the whatwg URL standard, which does use '+'
479 * to encode a space instead of {@code %20}.
480 * <p>This flag controls whether the decoding should happen according to HTML rules, which decodes the '+' to a
481 * space. The default is {@code true}.
482 *
483 * @param htmlQueryDecoding Whether to decode '+' to space
484 * @return This builder
485 */
486 public Builder htmlQueryDecoding(boolean htmlQueryDecoding) {
487 this.htmlQueryDecoding = htmlQueryDecoding;
488 return this;
489 }
490
491 /**
492 * Create a decoder that will lazily decode the given URI with the settings configured in this builder.
493 *
494 * @param uri The URI in String form
495 * @return The decoder
496 */
497 public QueryStringDecoder build(String uri) {
498 return new QueryStringDecoder(this, uri);
499 }
500
501 /**
502 * Create a decoder that will lazily decode the given URI with the settings configured in this builder. Note
503 * that {@link #hasPath(boolean)} has no effect when using this method.
504 *
505 * @param uri The already parsed URI
506 * @return The decoder
507 */
508 public QueryStringDecoder build(URI uri) {
509 return new QueryStringDecoder(this, uri);
510 }
511 }
512 }