1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package org.jboss.netty.handler.codec.http;
17
18 import java.io.UnsupportedEncodingException;
19 import java.net.URI;
20 import java.net.URLDecoder;
21 import java.nio.charset.Charset;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.LinkedHashMap;
25 import java.util.List;
26 import java.util.Map;
27
28 import org.jboss.netty.util.CharsetUtil;
29
30 /**
31 * Splits an HTTP query string into a path string and key-value parameter pairs.
32 * This decoder is for one time use only. Create a new instance for each URI:
33 * <pre>
34 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
35 * assert decoder.getPath().equals("/hello");
36 * assert decoder.getParameters().get("recipient").get(0).equals("world");
37 * assert decoder.getParameters().get("x").get(0).equals("1");
38 * assert decoder.getParameters().get("y").get(0).equals("2");
39 * </pre>
40 *
41 * This decoder can also decode the content of an HTTP POST request whose
42 * content type is <tt>application/x-www-form-urlencoded</tt>:
43 * <pre>
44 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
45 * ...
46 * </pre>
47 *
48 * <h3>HashDOS vulnerability fix</h3>
49 *
50 * As a workaround to the <a href="http://goo.gl/I4Nky">HashDOS</a> vulnerability, the decoder
51 * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
52 * default, and you can configure it when you construct the decoder by passing an additional
53 * integer parameter.
54 *
55 * @see QueryStringEncoder
56 *
57 * @apiviz.stereotype utility
58 * @apiviz.has org.jboss.netty.handler.codec.http.HttpRequest oneway - - decodes
59 */
60 public class QueryStringDecoder {
61
62 private static final int DEFAULT_MAX_PARAMS = 1024;
63
64 private final Charset charset;
65 private final String uri;
66 private final boolean hasPath;
67 private final int maxParams;
68 private String path;
69 private Map<String, List<String>> params;
70 private int nParams;
71
72 /**
73 * Creates a new decoder that decodes the specified URI. The decoder will
74 * assume that the query string is encoded in UTF-8.
75 */
76 public QueryStringDecoder(String uri) {
77 this(uri, HttpConstants.DEFAULT_CHARSET);
78 }
79
80 /**
81 * Creates a new decoder that decodes the specified URI encoded in the
82 * specified charset.
83 */
84 public QueryStringDecoder(String uri, boolean hasPath) {
85 this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
86 }
87
88 /**
89 * Creates a new decoder that decodes the specified URI encoded in the
90 * specified charset.
91 */
92 public QueryStringDecoder(String uri, Charset charset) {
93 this(uri, charset, true);
94 }
95
96 /**
97 * Creates a new decoder that decodes the specified URI encoded in the
98 * specified charset.
99 */
100 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
101 this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
102 }
103
104 /**
105 * Creates a new decoder that decodes the specified URI encoded in the
106 * specified charset.
107 */
108 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
109 if (uri == null) {
110 throw new NullPointerException("uri");
111 }
112 if (charset == null) {
113 throw new NullPointerException("charset");
114 }
115 if (maxParams <= 0) {
116 throw new IllegalArgumentException(
117 "maxParams: " + maxParams + " (expected: a positive integer)");
118 }
119
120 // http://en.wikipedia.org/wiki/Query_string
121 this.uri = uri.replace(';', '&');
122 this.charset = charset;
123 this.maxParams = maxParams;
124 this.hasPath = hasPath;
125 }
126
127 /**
128 * @deprecated Use {@link #QueryStringDecoder(String, Charset)} instead.
129 */
130 @Deprecated
131 public QueryStringDecoder(String uri, String charset) {
132 this(uri, Charset.forName(charset));
133 }
134
135 /**
136 * Creates a new decoder that decodes the specified URI. The decoder will
137 * assume that the query string is encoded in UTF-8.
138 */
139 public QueryStringDecoder(URI uri) {
140 this(uri, HttpConstants.DEFAULT_CHARSET);
141 }
142
143 /**
144 * Creates a new decoder that decodes the specified URI encoded in the
145 * specified charset.
146 */
147 public QueryStringDecoder(URI uri, Charset charset) {
148 this(uri, charset, DEFAULT_MAX_PARAMS);
149 }
150
151 /**
152 * Creates a new decoder that decodes the specified URI encoded in the
153 * specified charset.
154 */
155 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
156 if (uri == null) {
157 throw new NullPointerException("uri");
158 }
159 if (charset == null) {
160 throw new NullPointerException("charset");
161 }
162 if (maxParams <= 0) {
163 throw new IllegalArgumentException(
164 "maxParams: " + maxParams + " (expected: a positive integer)");
165 }
166
167 String rawPath = uri.getRawPath();
168 if (rawPath != null) {
169 hasPath = true;
170 } else {
171 rawPath = "";
172 hasPath = false;
173 }
174 // Also take care of cut of things like "http://localhost"
175 String newUri = rawPath + '?' + uri.getRawQuery();
176
177 // http://en.wikipedia.org/wiki/Query_string
178 this.uri = newUri.replace(';', '&');
179 this.charset = charset;
180 this.maxParams = maxParams;
181
182 }
183
184 /**
185 * @deprecated Use {@link #QueryStringDecoder(URI, Charset)} instead.
186 */
187 @Deprecated
188 public QueryStringDecoder(URI uri, String charset) {
189 this(uri, Charset.forName(charset));
190 }
191
192 /**
193 * Returns the decoded path string of the URI.
194 */
195 public String getPath() {
196 if (path == null) {
197 if (!hasPath) {
198 return path = "";
199 }
200
201 int pathEndPos = uri.indexOf('?');
202 if (pathEndPos < 0) {
203 path = uri;
204 } else {
205 return path = uri.substring(0, pathEndPos);
206 }
207 }
208 return path;
209 }
210
211 /**
212 * Returns the decoded key-value parameter pairs of the URI.
213 */
214 public Map<String, List<String>> getParameters() {
215 if (params == null) {
216 if (hasPath) {
217 int pathLength = getPath().length();
218 if (uri.length() == pathLength) {
219 return Collections.emptyMap();
220 }
221 decodeParams(uri.substring(pathLength + 1));
222 } else {
223 if (uri.length() == 0) {
224 return Collections.emptyMap();
225 }
226 decodeParams(uri);
227 }
228 }
229 return params;
230 }
231
232 private void decodeParams(String s) {
233 Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
234 nParams = 0;
235 String name = null;
236 int pos = 0; // Beginning of the unprocessed region
237 int i; // End of the unprocessed region
238 char c = 0; // Current character
239 for (i = 0; i < s.length(); i++) {
240 c = s.charAt(i);
241 if (c == '=' && name == null) {
242 if (pos != i) {
243 name = decodeComponent(s.substring(pos, i), charset);
244 }
245 pos = i + 1;
246 } else if (c == '&') {
247 if (name == null && pos != i) {
248 // We haven't seen an `=' so far but moved forward.
249 // Must be a param of the form '&a&' so add it with
250 // an empty value.
251 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
252 return;
253 }
254 } else if (name != null) {
255 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
256 return;
257 }
258 name = null;
259 }
260 pos = i + 1;
261 }
262 }
263
264 if (pos != i) { // Are there characters we haven't dealt with?
265 if (name == null) { // Yes and we haven't seen any `='.
266 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
267 return;
268 }
269 } else { // Yes and this must be the last value.
270 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
271 return;
272 }
273 }
274 } else if (name != null) { // Have we seen a name without value?
275 if (!addParam(params, name, "")) {
276 return;
277 }
278 }
279 }
280
281 private boolean addParam(Map<String, List<String>> params, String name, String value) {
282 if (nParams >= maxParams) {
283 return false;
284 }
285
286 List<String> values = params.get(name);
287 if (values == null) {
288 values = new ArrayList<String>(1); // Often there's only 1 value.
289 params.put(name, values);
290 }
291 values.add(value);
292 nParams ++;
293 return true;
294 }
295
296 /**
297 * Decodes a bit of an URL encoded by a browser.
298 * <p>
299 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
300 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
301 * @param s The string to decode (can be empty).
302 * @return The decoded string, or {@code s} if there's nothing to decode.
303 * If the string to decode is {@code null}, returns an empty string.
304 * @throws IllegalArgumentException if the string contains a malformed
305 * escape sequence.
306 */
307 public static String decodeComponent(final String s) {
308 return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
309 }
310
311 /**
312 * Decodes a bit of an URL encoded by a browser.
313 * <p>
314 * The string is expected to be encoded as per RFC 3986, Section 2.
315 * This is the encoding used by JavaScript functions {@code encodeURI}
316 * and {@code encodeURIComponent}, but not {@code escape}. For example
317 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
318 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
319 * <p>
320 * This is essentially equivalent to calling
321 * <code>{@link URLDecoder#decode(String, String) URLDecoder.decode}(s, charset.name())</code>
322 * except that it's over 2x faster and generates less garbage for the GC.
323 * Actually this function doesn't allocate any memory if there's nothing
324 * to decode, the argument itself is returned.
325 * @param s The string to decode (can be empty).
326 * @param charset The charset to use to decode the string (should really
327 * be {@link CharsetUtil#UTF_8}.
328 * @return The decoded string, or {@code s} if there's nothing to decode.
329 * If the string to decode is {@code null}, returns an empty string.
330 * @throws IllegalArgumentException if the string contains a malformed
331 * escape sequence.
332 */
333 @SuppressWarnings("fallthrough")
334 public static String decodeComponent(final String s,
335 final Charset charset) {
336 if (s == null) {
337 return "";
338 }
339 final int size = s.length();
340 boolean modified = false;
341 for (int i = 0; i < size; i++) {
342 final char c = s.charAt(i);
343 switch (c) {
344 case '%':
345 i++; // We can skip at least one char, e.g. `%%'.
346 // Fall through.
347 case '+':
348 modified = true;
349 break;
350 }
351 }
352 if (!modified) {
353 return s;
354 }
355 final byte[] buf = new byte[size];
356 int pos = 0; // position in `buf'.
357 for (int i = 0; i < size; i++) {
358 char c = s.charAt(i);
359 switch (c) {
360 case '+':
361 buf[pos++] = ' '; // "+" -> " "
362 break;
363 case '%':
364 if (i == size - 1) {
365 throw new IllegalArgumentException("unterminated escape"
366 + " sequence at end of string: " + s);
367 }
368 c = s.charAt(++i);
369 if (c == '%') {
370 buf[pos++] = '%'; // "%%" -> "%"
371 break;
372 } else if (i == size - 1) {
373 throw new IllegalArgumentException("partial escape"
374 + " sequence at end of string: " + s);
375 }
376 c = decodeHexNibble(c);
377 final char c2 = decodeHexNibble(s.charAt(++i));
378 if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
379 throw new IllegalArgumentException(
380 "invalid escape sequence `%" + s.charAt(i - 1)
381 + s.charAt(i) + "' at index " + (i - 2)
382 + " of: " + s);
383 }
384 c = (char) (c * 16 + c2);
385 // Fall through.
386 default:
387 buf[pos++] = (byte) c;
388 break;
389 }
390 }
391 try {
392 return new String(buf, 0, pos, charset.name());
393 } catch (UnsupportedEncodingException e) {
394 throw new IllegalArgumentException("unsupported encoding: " + charset.name());
395 }
396 }
397
398 /**
399 * Helper to decode half of a hexadecimal number from a string.
400 * @param c The ASCII character of the hexadecimal number to decode.
401 * Must be in the range {@code [0-9a-fA-F]}.
402 * @return The hexadecimal value represented in the ASCII character
403 * given, or {@link Character#MAX_VALUE} if the character is invalid.
404 */
405 private static char decodeHexNibble(final char c) {
406 if ('0' <= c && c <= '9') {
407 return (char) (c - '0');
408 } else if ('a' <= c && c <= 'f') {
409 return (char) (c - 'a' + 10);
410 } else if ('A' <= c && c <= 'F') {
411 return (char) (c - 'A' + 10);
412 } else {
413 return Character.MAX_VALUE;
414 }
415 }
416 }