1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package org.jboss.netty.handler.codec.http;
17
18 import org.jboss.netty.util.CharsetUtil;
19
20 import java.io.UnsupportedEncodingException;
21 import java.net.URI;
22 import java.net.URLDecoder;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedHashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 /**
31 * Splits an HTTP query string into a path string and key-value parameter pairs.
32 * This decoder is for one time use only. Create a new instance for each URI:
33 * <pre>
34 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
35 * assert decoder.getPath().equals("/hello");
36 * assert decoder.getParameters().get("recipient").get(0).equals("world");
37 * assert decoder.getParameters().get("x").get(0).equals("1");
38 * assert decoder.getParameters().get("y").get(0).equals("2");
39 * </pre>
40 *
41 * This decoder can also decode the content of an HTTP POST request whose
42 * content type is <tt>application/x-www-form-urlencoded</tt>:
43 * <pre>
44 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
45 * ...
46 * </pre>
47 *
48 * <h3>HashDOS vulnerability fix</h3>
49 *
50 * As a workaround to the <a href="http://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
51 * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
52 * default, and you can configure it when you construct the decoder by passing an additional
53 * integer parameter.
54 *
55 * @see QueryStringEncoder
56 *
57 * @apiviz.stereotype utility
58 * @apiviz.has org.jboss.netty.handler.codec.http.HttpRequest oneway - - decodes
59 */
60 public class QueryStringDecoder {
61
62 private static final int DEFAULT_MAX_PARAMS = 1024;
63
64 private final Charset charset;
65 private final String uri;
66 private final boolean hasPath;
67 private final int maxParams;
68 private String path;
69 private Map<String, List<String>> params;
70 private int nParams;
71
72 /**
73 * Creates a new decoder that decodes the specified URI. The decoder will
74 * assume that the query string is encoded in UTF-8.
75 */
76 public QueryStringDecoder(String uri) {
77 this(uri, HttpConstants.DEFAULT_CHARSET);
78 }
79
80 /**
81 * Creates a new decoder that decodes the specified URI encoded in the
82 * specified charset.
83 */
84 public QueryStringDecoder(String uri, boolean hasPath) {
85 this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
86 }
87
88 /**
89 * Creates a new decoder that decodes the specified URI encoded in the
90 * specified charset.
91 */
92 public QueryStringDecoder(String uri, Charset charset) {
93 this(uri, charset, true);
94 }
95
96 /**
97 * Creates a new decoder that decodes the specified URI encoded in the
98 * specified charset.
99 */
100 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
101 this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
102 }
103
104 /**
105 * Creates a new decoder that decodes the specified URI encoded in the
106 * specified charset.
107 */
108 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
109 if (uri == null) {
110 throw new NullPointerException("uri");
111 }
112 if (charset == null) {
113 throw new NullPointerException("charset");
114 }
115 if (maxParams <= 0) {
116 throw new IllegalArgumentException(
117 "maxParams: " + maxParams + " (expected: a positive integer)");
118 }
119
120 this.uri = uri;
121 this.charset = charset;
122 this.maxParams = maxParams;
123 this.hasPath = hasPath;
124 }
125
126 /**
127 * @deprecated Use {@link #QueryStringDecoder(String, Charset)} instead.
128 */
129 @Deprecated
130 public QueryStringDecoder(String uri, String charset) {
131 this(uri, Charset.forName(charset));
132 }
133
134 /**
135 * Creates a new decoder that decodes the specified URI. The decoder will
136 * assume that the query string is encoded in UTF-8.
137 */
138 public QueryStringDecoder(URI uri) {
139 this(uri, HttpConstants.DEFAULT_CHARSET);
140 }
141
142 /**
143 * Creates a new decoder that decodes the specified URI encoded in the
144 * specified charset.
145 */
146 public QueryStringDecoder(URI uri, Charset charset) {
147 this(uri, charset, DEFAULT_MAX_PARAMS);
148 }
149
150 /**
151 * Creates a new decoder that decodes the specified URI encoded in the
152 * specified charset.
153 */
154 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
155 if (uri == null) {
156 throw new NullPointerException("uri");
157 }
158 if (charset == null) {
159 throw new NullPointerException("charset");
160 }
161 if (maxParams <= 0) {
162 throw new IllegalArgumentException(
163 "maxParams: " + maxParams + " (expected: a positive integer)");
164 }
165
166 String rawPath = uri.getRawPath();
167 if (rawPath != null) {
168 hasPath = true;
169 } else {
170 rawPath = "";
171 hasPath = false;
172 }
173 // Also take care of cut of things like "http://localhost"
174 this.uri = rawPath + '?' + uri.getRawQuery();
175 this.charset = charset;
176 this.maxParams = maxParams;
177 }
178
179 /**
180 * @deprecated Use {@link #QueryStringDecoder(URI, Charset)} instead.
181 */
182 @Deprecated
183 public QueryStringDecoder(URI uri, String charset) {
184 this(uri, Charset.forName(charset));
185 }
186
187 /**
188 * Returns the decoded path string of the URI.
189 */
190 public String getPath() {
191 if (path == null) {
192 if (!hasPath) {
193 return path = "";
194 }
195
196 int pathEndPos = uri.indexOf('?');
197 if (pathEndPos < 0) {
198 path = uri;
199 } else {
200 return path = uri.substring(0, pathEndPos);
201 }
202 }
203 return path;
204 }
205
206 /**
207 * Returns the decoded key-value parameter pairs of the URI.
208 */
209 public Map<String, List<String>> getParameters() {
210 if (params == null) {
211 if (hasPath) {
212 int pathLength = getPath().length();
213 if (uri.length() == pathLength) {
214 return Collections.emptyMap();
215 }
216 decodeParams(uri.substring(pathLength + 1));
217 } else {
218 if (uri.length() == 0) {
219 return Collections.emptyMap();
220 }
221 decodeParams(uri);
222 }
223 }
224 return params;
225 }
226
227 private void decodeParams(String s) {
228 Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
229 nParams = 0;
230 String name = null;
231 int pos = 0; // Beginning of the unprocessed region
232 int i; // End of the unprocessed region
233 char c; // Current character
234 for (i = 0; i < s.length(); i++) {
235 c = s.charAt(i);
236 if (c == '=' && name == null) {
237 if (pos != i) {
238 name = decodeComponent(s.substring(pos, i), charset);
239 }
240 pos = i + 1;
241 // http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2
242 } else if (c == '&' || c == ';') {
243 if (name == null && pos != i) {
244 // We haven't seen an `=' so far but moved forward.
245 // Must be a param of the form '&a&' so add it with
246 // an empty value.
247 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
248 return;
249 }
250 } else if (name != null) {
251 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
252 return;
253 }
254 name = null;
255 }
256 pos = i + 1;
257 }
258 }
259
260 if (pos != i) { // Are there characters we haven't dealt with?
261 if (name == null) { // Yes and we haven't seen any `='.
262 addParam(params, decodeComponent(s.substring(pos, i), charset), "");
263 } else { // Yes and this must be the last value.
264 addParam(params, name, decodeComponent(s.substring(pos, i), charset));
265 }
266 } else if (name != null) { // Have we seen a name without value?
267 addParam(params, name, "");
268 }
269 }
270
271 private boolean addParam(Map<String, List<String>> params, String name, String value) {
272 if (nParams >= maxParams) {
273 return false;
274 }
275
276 List<String> values = params.get(name);
277 if (values == null) {
278 values = new ArrayList<String>(1); // Often there's only 1 value.
279 params.put(name, values);
280 }
281 values.add(value);
282 nParams ++;
283 return true;
284 }
285
286 /**
287 * Decodes a bit of an URL encoded by a browser.
288 * <p>
289 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
290 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
291 * @param s The string to decode (can be empty).
292 * @return The decoded string, or {@code s} if there's nothing to decode.
293 * If the string to decode is {@code null}, returns an empty string.
294 * @throws IllegalArgumentException if the string contains a malformed
295 * escape sequence.
296 */
297 public static String decodeComponent(final String s) {
298 return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
299 }
300
301 /**
302 * Decodes a bit of an URL encoded by a browser.
303 * <p>
304 * The string is expected to be encoded as per RFC 3986, Section 2.
305 * This is the encoding used by JavaScript functions {@code encodeURI}
306 * and {@code encodeURIComponent}, but not {@code escape}. For example
307 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
308 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
309 * <p>
310 * This is essentially equivalent to calling
311 * {@link URLDecoder#decode(String, String) URLDecoder.decode(s, charset.name())}
312 * except that it's over 2x faster and generates less garbage for the GC.
313 * Actually this function doesn't allocate any memory if there's nothing
314 * to decode, the argument itself is returned.
315 * @param s The string to decode (can be empty).
316 * @param charset The charset to use to decode the string (should really
317 * be {@link CharsetUtil#UTF_8}.
318 * @return The decoded string, or {@code s} if there's nothing to decode.
319 * If the string to decode is {@code null}, returns an empty string.
320 * @throws IllegalArgumentException if the string contains a malformed
321 * escape sequence.
322 */
323 @SuppressWarnings("fallthrough")
324 public static String decodeComponent(final String s,
325 final Charset charset) {
326 if (s == null) {
327 return "";
328 }
329 final int size = s.length();
330 boolean modified = false;
331 for (int i = 0; i < size; i++) {
332 final char c = s.charAt(i);
333 switch (c) {
334 case '%':
335 i++; // We can skip at least one char, e.g. `%%'.
336 // Fall through.
337 case '+':
338 modified = true;
339 break;
340 }
341 }
342 if (!modified) {
343 return s;
344 }
345 final byte[] buf = new byte[size];
346 int pos = 0; // position in `buf'.
347 for (int i = 0; i < size; i++) {
348 char c = s.charAt(i);
349 switch (c) {
350 case '+':
351 buf[pos++] = ' '; // "+" -> " "
352 break;
353 case '%':
354 if (i == size - 1) {
355 throw new IllegalArgumentException("unterminated escape"
356 + " sequence at end of string: " + s);
357 }
358 c = s.charAt(++i);
359 if (c == '%') {
360 buf[pos++] = '%'; // "%%" -> "%"
361 break;
362 }
363
364 if (i == size - 1) {
365 throw new IllegalArgumentException("partial escape"
366 + " sequence at end of string: " + s);
367 }
368 c = decodeHexNibble(c);
369 final char c2 = decodeHexNibble(s.charAt(++i));
370 if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
371 throw new IllegalArgumentException(
372 "invalid escape sequence `%" + s.charAt(i - 1)
373 + s.charAt(i) + "' at index " + (i - 2)
374 + " of: " + s);
375 }
376 c = (char) (c * 16 + c2);
377 // Fall through.
378 default:
379 buf[pos++] = (byte) c;
380 break;
381 }
382 }
383 try {
384 return new String(buf, 0, pos, charset.name());
385 } catch (UnsupportedEncodingException e) {
386 throw new IllegalArgumentException("unsupported encoding: " + charset.name(), e);
387 }
388 }
389
390 /**
391 * Helper to decode half of a hexadecimal number from a string.
392 * @param c The ASCII character of the hexadecimal number to decode.
393 * Must be in the range {@code [0-9a-fA-F]}.
394 * @return The hexadecimal value represented in the ASCII character
395 * given, or {@link Character#MAX_VALUE} if the character is invalid.
396 */
397 private static char decodeHexNibble(final char c) {
398 if ('0' <= c && c <= '9') {
399 return (char) (c - '0');
400 } else if ('a' <= c && c <= 'f') {
401 return (char) (c - 'a' + 10);
402 } else if ('A' <= c && c <= 'F') {
403 return (char) (c - 'A' + 10);
404 } else {
405 return Character.MAX_VALUE;
406 }
407 }
408 }