1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package org.jboss.netty.handler.codec.http;
17
18 import org.jboss.netty.util.CharsetUtil;
19
20 import java.io.UnsupportedEncodingException;
21 import java.net.URI;
22 import java.net.URLDecoder;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedHashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 /**
31 * Splits an HTTP query string into a path string and key-value parameter pairs.
32 * This decoder is for one time use only. Create a new instance for each URI:
33 * <pre>
34 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
35 * assert decoder.getPath().equals("/hello");
36 * assert decoder.getParameters().get("recipient").get(0).equals("world");
37 * assert decoder.getParameters().get("x").get(0).equals("1");
38 * assert decoder.getParameters().get("y").get(0).equals("2");
39 * </pre>
40 *
41 * This decoder can also decode the content of an HTTP POST request whose
42 * content type is <tt>application/x-www-form-urlencoded</tt>:
43 * <pre>
44 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
45 * ...
46 * </pre>
47 *
48 * <h3>HashDOS vulnerability fix</h3>
49 *
50 * As a workaround to the <a href="http://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
51 * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
52 * default, and you can configure it when you construct the decoder by passing an additional
53 * integer parameter.
54 *
55 * @see QueryStringEncoder
56 *
57 * @apiviz.stereotype utility
58 * @apiviz.has org.jboss.netty.handler.codec.http.HttpRequest oneway - - decodes
59 */
60 public class QueryStringDecoder {
61
62 private static final int DEFAULT_MAX_PARAMS = 1024;
63
64 private final Charset charset;
65 private final String uri;
66 private final boolean hasPath;
67 private final int maxParams;
68 private String path;
69 private Map<String, List<String>> params;
70 private int nParams;
71
72 /**
73 * Creates a new decoder that decodes the specified URI. The decoder will
74 * assume that the query string is encoded in UTF-8.
75 */
76 public QueryStringDecoder(String uri) {
77 this(uri, HttpConstants.DEFAULT_CHARSET);
78 }
79
80 /**
81 * Creates a new decoder that decodes the specified URI encoded in the
82 * specified charset.
83 */
84 public QueryStringDecoder(String uri, boolean hasPath) {
85 this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
86 }
87
88 /**
89 * Creates a new decoder that decodes the specified URI encoded in the
90 * specified charset.
91 */
92 public QueryStringDecoder(String uri, Charset charset) {
93 this(uri, charset, true);
94 }
95
96 /**
97 * Creates a new decoder that decodes the specified URI encoded in the
98 * specified charset.
99 */
100 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
101 this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
102 }
103
104 /**
105 * Creates a new decoder that decodes the specified URI encoded in the
106 * specified charset.
107 */
108 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
109 if (uri == null) {
110 throw new NullPointerException("uri");
111 }
112 if (charset == null) {
113 throw new NullPointerException("charset");
114 }
115 if (maxParams <= 0) {
116 throw new IllegalArgumentException(
117 "maxParams: " + maxParams + " (expected: a positive integer)");
118 }
119
120 this.uri = uri;
121 this.charset = charset;
122 this.maxParams = maxParams;
123 this.hasPath = hasPath;
124 }
125
126 /**
127 * Creates a new decoder that decodes the specified URI. The decoder will
128 * assume that the query string is encoded in UTF-8.
129 */
130 public QueryStringDecoder(URI uri) {
131 this(uri, HttpConstants.DEFAULT_CHARSET);
132 }
133
134 /**
135 * Creates a new decoder that decodes the specified URI encoded in the
136 * specified charset.
137 */
138 public QueryStringDecoder(URI uri, Charset charset) {
139 this(uri, charset, DEFAULT_MAX_PARAMS);
140 }
141
142 /**
143 * Creates a new decoder that decodes the specified URI encoded in the
144 * specified charset.
145 */
146 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
147 if (uri == null) {
148 throw new NullPointerException("uri");
149 }
150 if (charset == null) {
151 throw new NullPointerException("charset");
152 }
153 if (maxParams <= 0) {
154 throw new IllegalArgumentException(
155 "maxParams: " + maxParams + " (expected: a positive integer)");
156 }
157
158 String rawPath = uri.getRawPath();
159 if (rawPath != null) {
160 hasPath = true;
161 } else {
162 rawPath = "";
163 hasPath = false;
164 }
165 // Also take care of cut of things like "http://localhost"
166 this.uri = rawPath + '?' + uri.getRawQuery();
167 this.charset = charset;
168 this.maxParams = maxParams;
169 }
170
171 /**
172 * Returns the decoded path string of the URI.
173 */
174 public String getPath() {
175 if (path == null) {
176 if (!hasPath) {
177 return path = "";
178 }
179
180 int pathEndPos = uri.indexOf('?');
181 if (pathEndPos < 0) {
182 path = uri;
183 } else {
184 return path = uri.substring(0, pathEndPos);
185 }
186 }
187 return path;
188 }
189
190 /**
191 * Returns the decoded key-value parameter pairs of the URI.
192 */
193 public Map<String, List<String>> getParameters() {
194 if (params == null) {
195 if (hasPath) {
196 int pathLength = getPath().length();
197 if (uri.length() == pathLength) {
198 return Collections.emptyMap();
199 }
200 decodeParams(uri.substring(pathLength + 1));
201 } else {
202 if (uri.length() == 0) {
203 return Collections.emptyMap();
204 }
205 decodeParams(uri);
206 }
207 }
208 return params;
209 }
210
211 private void decodeParams(String s) {
212 Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
213 nParams = 0;
214 String name = null;
215 int pos = 0; // Beginning of the unprocessed region
216 int i; // End of the unprocessed region
217 char c; // Current character
218 for (i = 0; i < s.length(); i++) {
219 c = s.charAt(i);
220 if (c == '=' && name == null) {
221 if (pos != i) {
222 name = decodeComponent(s.substring(pos, i), charset);
223 }
224 pos = i + 1;
225 // http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2
226 } else if (c == '&' || c == ';') {
227 if (name == null && pos != i) {
228 // We haven't seen an `=' so far but moved forward.
229 // Must be a param of the form '&a&' so add it with
230 // an empty value.
231 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
232 return;
233 }
234 } else if (name != null) {
235 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
236 return;
237 }
238 name = null;
239 }
240 pos = i + 1;
241 }
242 }
243
244 if (pos != i) { // Are there characters we haven't dealt with?
245 if (name == null) { // Yes and we haven't seen any `='.
246 addParam(params, decodeComponent(s.substring(pos, i), charset), "");
247 } else { // Yes and this must be the last value.
248 addParam(params, name, decodeComponent(s.substring(pos, i), charset));
249 }
250 } else if (name != null) { // Have we seen a name without value?
251 addParam(params, name, "");
252 }
253 }
254
255 private boolean addParam(Map<String, List<String>> params, String name, String value) {
256 if (nParams >= maxParams) {
257 return false;
258 }
259
260 List<String> values = params.get(name);
261 if (values == null) {
262 values = new ArrayList<String>(1); // Often there's only 1 value.
263 params.put(name, values);
264 }
265 values.add(value);
266 nParams ++;
267 return true;
268 }
269
270 /**
271 * Decodes a bit of an URL encoded by a browser.
272 * <p>
273 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
274 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
275 * @param s The string to decode (can be empty).
276 * @return The decoded string, or {@code s} if there's nothing to decode.
277 * If the string to decode is {@code null}, returns an empty string.
278 * @throws IllegalArgumentException if the string contains a malformed
279 * escape sequence.
280 */
281 public static String decodeComponent(final String s) {
282 return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
283 }
284
285 /**
286 * Decodes a bit of an URL encoded by a browser.
287 * <p>
288 * The string is expected to be encoded as per RFC 3986, Section 2.
289 * This is the encoding used by JavaScript functions {@code encodeURI}
290 * and {@code encodeURIComponent}, but not {@code escape}. For example
291 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
292 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
293 * <p>
294 * This is essentially equivalent to calling
295 * {@link URLDecoder#decode(String, String) URLDecoder.decode(s, charset.name())}
296 * except that it's over 2x faster and generates less garbage for the GC.
297 * Actually this function doesn't allocate any memory if there's nothing
298 * to decode, the argument itself is returned.
299 * @param s The string to decode (can be empty).
300 * @param charset The charset to use to decode the string (should really
301 * be {@link CharsetUtil#UTF_8}.
302 * @return The decoded string, or {@code s} if there's nothing to decode.
303 * If the string to decode is {@code null}, returns an empty string.
304 * @throws IllegalArgumentException if the string contains a malformed
305 * escape sequence.
306 */
307 @SuppressWarnings("fallthrough")
308 public static String decodeComponent(final String s,
309 final Charset charset) {
310 if (s == null) {
311 return "";
312 }
313 final int size = s.length();
314 boolean modified = false;
315 for (int i = 0; i < size; i++) {
316 final char c = s.charAt(i);
317 switch (c) {
318 case '%':
319 i++; // We can skip at least one char, e.g. `%%'.
320 // Fall through.
321 case '+':
322 modified = true;
323 break;
324 }
325 }
326 if (!modified) {
327 return s;
328 }
329 final byte[] buf = new byte[size];
330 int pos = 0; // position in `buf'.
331 for (int i = 0; i < size; i++) {
332 char c = s.charAt(i);
333 switch (c) {
334 case '+':
335 buf[pos++] = ' '; // "+" -> " "
336 break;
337 case '%':
338 if (i == size - 1) {
339 throw new IllegalArgumentException("unterminated escape"
340 + " sequence at end of string: " + s);
341 }
342 c = s.charAt(++i);
343 if (c == '%') {
344 buf[pos++] = '%'; // "%%" -> "%"
345 break;
346 }
347
348 if (i == size - 1) {
349 throw new IllegalArgumentException("partial escape"
350 + " sequence at end of string: " + s);
351 }
352 c = decodeHexNibble(c);
353 final char c2 = decodeHexNibble(s.charAt(++i));
354 if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
355 throw new IllegalArgumentException(
356 "invalid escape sequence `%" + s.charAt(i - 1)
357 + s.charAt(i) + "' at index " + (i - 2)
358 + " of: " + s);
359 }
360 c = (char) (c * 16 + c2);
361 // Fall through.
362 default:
363 buf[pos++] = (byte) c;
364 break;
365 }
366 }
367 try {
368 return new String(buf, 0, pos, charset.name());
369 } catch (UnsupportedEncodingException e) {
370 throw new IllegalArgumentException("unsupported encoding: " + charset.name(), e);
371 }
372 }
373
374 /**
375 * Helper to decode half of a hexadecimal number from a string.
376 * @param c The ASCII character of the hexadecimal number to decode.
377 * Must be in the range {@code [0-9a-fA-F]}.
378 * @return The hexadecimal value represented in the ASCII character
379 * given, or {@link Character#MAX_VALUE} if the character is invalid.
380 */
381 private static char decodeHexNibble(final char c) {
382 if ('0' <= c && c <= '9') {
383 return (char) (c - '0');
384 } else if ('a' <= c && c <= 'f') {
385 return (char) (c - 'a' + 10);
386 } else if ('A' <= c && c <= 'F') {
387 return (char) (c - 'A' + 10);
388 } else {
389 return Character.MAX_VALUE;
390 }
391 }
392 }