1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package org.jboss.netty.handler.codec.http;
17
18 import org.jboss.netty.util.CharsetUtil;
19
20 import java.io.UnsupportedEncodingException;
21 import java.net.URI;
22 import java.net.URLDecoder;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedHashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 /**
31 * Splits an HTTP query string into a path string and key-value parameter pairs.
32 * This decoder is for one time use only. Create a new instance for each URI:
33 * <pre>
34 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
35 * assert decoder.getPath().equals("/hello");
36 * assert decoder.getParameters().get("recipient").get(0).equals("world");
37 * assert decoder.getParameters().get("x").get(0).equals("1");
38 * assert decoder.getParameters().get("y").get(0).equals("2");
39 * </pre>
40 *
41 * This decoder can also decode the content of an HTTP POST request whose
42 * content type is <tt>application/x-www-form-urlencoded</tt>:
43 * <pre>
44 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
45 * ...
46 * </pre>
47 *
48 * <h3>HashDOS vulnerability fix</h3>
49 *
50 * As a workaround to the <a href="http://goo.gl/I4Nky">HashDOS</a> vulnerability, the decoder
51 * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
52 * default, and you can configure it when you construct the decoder by passing an additional
53 * integer parameter.
54 *
55 * @see QueryStringEncoder
56 *
57 * @apiviz.stereotype utility
58 * @apiviz.has org.jboss.netty.handler.codec.http.HttpRequest oneway - - decodes
59 */
60 public class QueryStringDecoder {
61
62 private static final int DEFAULT_MAX_PARAMS = 1024;
63
64 private final Charset charset;
65 private final String uri;
66 private final boolean hasPath;
67 private final int maxParams;
68 private String path;
69 private Map<String, List<String>> params;
70 private int nParams;
71
72 /**
73 * Creates a new decoder that decodes the specified URI. The decoder will
74 * assume that the query string is encoded in UTF-8.
75 */
76 public QueryStringDecoder(String uri) {
77 this(uri, HttpConstants.DEFAULT_CHARSET);
78 }
79
80 /**
81 * Creates a new decoder that decodes the specified URI encoded in the
82 * specified charset.
83 */
84 public QueryStringDecoder(String uri, boolean hasPath) {
85 this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
86 }
87
88 /**
89 * Creates a new decoder that decodes the specified URI encoded in the
90 * specified charset.
91 */
92 public QueryStringDecoder(String uri, Charset charset) {
93 this(uri, charset, true);
94 }
95
96 /**
97 * Creates a new decoder that decodes the specified URI encoded in the
98 * specified charset.
99 */
100 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
101 this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
102 }
103
104 /**
105 * Creates a new decoder that decodes the specified URI encoded in the
106 * specified charset.
107 */
108 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
109 if (uri == null) {
110 throw new NullPointerException("uri");
111 }
112 if (charset == null) {
113 throw new NullPointerException("charset");
114 }
115 if (maxParams <= 0) {
116 throw new IllegalArgumentException(
117 "maxParams: " + maxParams + " (expected: a positive integer)");
118 }
119
120 // http://en.wikipedia.org/wiki/Query_string
121 this.uri = uri.replace(';', '&');
122 this.charset = charset;
123 this.maxParams = maxParams;
124 this.hasPath = hasPath;
125 }
126
127 /**
128 * @deprecated Use {@link #QueryStringDecoder(String, Charset)} instead.
129 */
130 @Deprecated
131 public QueryStringDecoder(String uri, String charset) {
132 this(uri, Charset.forName(charset));
133 }
134
135 /**
136 * Creates a new decoder that decodes the specified URI. The decoder will
137 * assume that the query string is encoded in UTF-8.
138 */
139 public QueryStringDecoder(URI uri) {
140 this(uri, HttpConstants.DEFAULT_CHARSET);
141 }
142
143 /**
144 * Creates a new decoder that decodes the specified URI encoded in the
145 * specified charset.
146 */
147 public QueryStringDecoder(URI uri, Charset charset) {
148 this(uri, charset, DEFAULT_MAX_PARAMS);
149 }
150
151 /**
152 * Creates a new decoder that decodes the specified URI encoded in the
153 * specified charset.
154 */
155 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
156 if (uri == null) {
157 throw new NullPointerException("uri");
158 }
159 if (charset == null) {
160 throw new NullPointerException("charset");
161 }
162 if (maxParams <= 0) {
163 throw new IllegalArgumentException(
164 "maxParams: " + maxParams + " (expected: a positive integer)");
165 }
166
167 String rawPath = uri.getRawPath();
168 if (rawPath != null) {
169 hasPath = true;
170 } else {
171 rawPath = "";
172 hasPath = false;
173 }
174 // Also take care of cut of things like "http://localhost"
175 String newUri = rawPath + '?' + uri.getRawQuery();
176
177 // http://en.wikipedia.org/wiki/Query_string
178 this.uri = newUri.replace(';', '&');
179 this.charset = charset;
180 this.maxParams = maxParams;
181 }
182
183 /**
184 * @deprecated Use {@link #QueryStringDecoder(URI, Charset)} instead.
185 */
186 @Deprecated
187 public QueryStringDecoder(URI uri, String charset) {
188 this(uri, Charset.forName(charset));
189 }
190
191 /**
192 * Returns the decoded path string of the URI.
193 */
194 public String getPath() {
195 if (path == null) {
196 if (!hasPath) {
197 return path = "";
198 }
199
200 int pathEndPos = uri.indexOf('?');
201 if (pathEndPos < 0) {
202 path = uri;
203 } else {
204 return path = uri.substring(0, pathEndPos);
205 }
206 }
207 return path;
208 }
209
210 /**
211 * Returns the decoded key-value parameter pairs of the URI.
212 */
213 public Map<String, List<String>> getParameters() {
214 if (params == null) {
215 if (hasPath) {
216 int pathLength = getPath().length();
217 if (uri.length() == pathLength) {
218 return Collections.emptyMap();
219 }
220 decodeParams(uri.substring(pathLength + 1));
221 } else {
222 if (uri.length() == 0) {
223 return Collections.emptyMap();
224 }
225 decodeParams(uri);
226 }
227 }
228 return params;
229 }
230
231 private void decodeParams(String s) {
232 Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
233 nParams = 0;
234 String name = null;
235 int pos = 0; // Beginning of the unprocessed region
236 int i; // End of the unprocessed region
237 char c; // Current character
238 for (i = 0; i < s.length(); i++) {
239 c = s.charAt(i);
240 if (c == '=' && name == null) {
241 if (pos != i) {
242 name = decodeComponent(s.substring(pos, i), charset);
243 }
244 pos = i + 1;
245 } else if (c == '&') {
246 if (name == null && pos != i) {
247 // We haven't seen an `=' so far but moved forward.
248 // Must be a param of the form '&a&' so add it with
249 // an empty value.
250 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
251 return;
252 }
253 } else if (name != null) {
254 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
255 return;
256 }
257 name = null;
258 }
259 pos = i + 1;
260 }
261 }
262
263 if (pos != i) { // Are there characters we haven't dealt with?
264 if (name == null) { // Yes and we haven't seen any `='.
265 addParam(params, decodeComponent(s.substring(pos, i), charset), "");
266 } else { // Yes and this must be the last value.
267 addParam(params, name, decodeComponent(s.substring(pos, i), charset));
268 }
269 } else if (name != null) { // Have we seen a name without value?
270 addParam(params, name, "");
271 }
272 }
273
274 private boolean addParam(Map<String, List<String>> params, String name, String value) {
275 if (nParams >= maxParams) {
276 return false;
277 }
278
279 List<String> values = params.get(name);
280 if (values == null) {
281 values = new ArrayList<String>(1); // Often there's only 1 value.
282 params.put(name, values);
283 }
284 values.add(value);
285 nParams ++;
286 return true;
287 }
288
289 /**
290 * Decodes a bit of an URL encoded by a browser.
291 * <p>
292 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
293 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
294 * @param s The string to decode (can be empty).
295 * @return The decoded string, or {@code s} if there's nothing to decode.
296 * If the string to decode is {@code null}, returns an empty string.
297 * @throws IllegalArgumentException if the string contains a malformed
298 * escape sequence.
299 */
300 public static String decodeComponent(final String s) {
301 return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
302 }
303
304 /**
305 * Decodes a bit of an URL encoded by a browser.
306 * <p>
307 * The string is expected to be encoded as per RFC 3986, Section 2.
308 * This is the encoding used by JavaScript functions {@code encodeURI}
309 * and {@code encodeURIComponent}, but not {@code escape}. For example
310 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
311 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
312 * <p>
313 * This is essentially equivalent to calling
314 * {@link URLDecoder#decode(String, String) URLDecoder.decode(s, charset.name())}
315 * except that it's over 2x faster and generates less garbage for the GC.
316 * Actually this function doesn't allocate any memory if there's nothing
317 * to decode, the argument itself is returned.
318 * @param s The string to decode (can be empty).
319 * @param charset The charset to use to decode the string (should really
320 * be {@link CharsetUtil#UTF_8}.
321 * @return The decoded string, or {@code s} if there's nothing to decode.
322 * If the string to decode is {@code null}, returns an empty string.
323 * @throws IllegalArgumentException if the string contains a malformed
324 * escape sequence.
325 */
326 @SuppressWarnings("fallthrough")
327 public static String decodeComponent(final String s,
328 final Charset charset) {
329 if (s == null) {
330 return "";
331 }
332 final int size = s.length();
333 boolean modified = false;
334 for (int i = 0; i < size; i++) {
335 final char c = s.charAt(i);
336 switch (c) {
337 case '%':
338 i++; // We can skip at least one char, e.g. `%%'.
339 // Fall through.
340 case '+':
341 modified = true;
342 break;
343 }
344 }
345 if (!modified) {
346 return s;
347 }
348 final byte[] buf = new byte[size];
349 int pos = 0; // position in `buf'.
350 for (int i = 0; i < size; i++) {
351 char c = s.charAt(i);
352 switch (c) {
353 case '+':
354 buf[pos++] = ' '; // "+" -> " "
355 break;
356 case '%':
357 if (i == size - 1) {
358 throw new IllegalArgumentException("unterminated escape"
359 + " sequence at end of string: " + s);
360 }
361 c = s.charAt(++i);
362 if (c == '%') {
363 buf[pos++] = '%'; // "%%" -> "%"
364 break;
365 }
366
367 if (i == size - 1) {
368 throw new IllegalArgumentException("partial escape"
369 + " sequence at end of string: " + s);
370 }
371 c = decodeHexNibble(c);
372 final char c2 = decodeHexNibble(s.charAt(++i));
373 if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
374 throw new IllegalArgumentException(
375 "invalid escape sequence `%" + s.charAt(i - 1)
376 + s.charAt(i) + "' at index " + (i - 2)
377 + " of: " + s);
378 }
379 c = (char) (c * 16 + c2);
380 // Fall through.
381 default:
382 buf[pos++] = (byte) c;
383 break;
384 }
385 }
386 try {
387 return new String(buf, 0, pos, charset.name());
388 } catch (UnsupportedEncodingException e) {
389 throw new IllegalArgumentException("unsupported encoding: " + charset.name(), e);
390 }
391 }
392
393 /**
394 * Helper to decode half of a hexadecimal number from a string.
395 * @param c The ASCII character of the hexadecimal number to decode.
396 * Must be in the range {@code [0-9a-fA-F]}.
397 * @return The hexadecimal value represented in the ASCII character
398 * given, or {@link Character#MAX_VALUE} if the character is invalid.
399 */
400 private static char decodeHexNibble(final char c) {
401 if ('0' <= c && c <= '9') {
402 return (char) (c - '0');
403 } else if ('a' <= c && c <= 'f') {
404 return (char) (c - 'a' + 10);
405 } else if ('A' <= c && c <= 'F') {
406 return (char) (c - 'A' + 10);
407 } else {
408 return Character.MAX_VALUE;
409 }
410 }
411 }