View Javadoc

1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.http;
17  
18  import io.netty.util.CharsetUtil;
19  
20  import java.net.URI;
21  import java.net.URLDecoder;
22  import java.nio.ByteBuffer;
23  import java.nio.CharBuffer;
24  import java.nio.charset.CharacterCodingException;
25  import java.nio.charset.Charset;
26  import java.nio.charset.CharsetDecoder;
27  import java.nio.charset.CoderResult;
28  import java.util.ArrayList;
29  import java.util.Collections;
30  import java.util.LinkedHashMap;
31  import java.util.List;
32  import java.util.Map;
33  
34  import static io.netty.util.internal.ObjectUtil.*;
35  import static io.netty.util.internal.StringUtil.*;
36  
37  /**
38   * Splits an HTTP query string into a path string and key-value parameter pairs.
39   * This decoder is for one time use only.  Create a new instance for each URI:
40   * <pre>
41   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
42   * assert decoder.path().equals("/hello");
43   * assert decoder.parameters().get("recipient").get(0).equals("world");
44   * assert decoder.parameters().get("x").get(0).equals("1");
45   * assert decoder.parameters().get("y").get(0).equals("2");
46   * </pre>
47   *
48   * This decoder can also decode the content of an HTTP POST request whose
49   * content type is <tt>application/x-www-form-urlencoded</tt>:
50   * <pre>
51   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
52   * ...
53   * </pre>
54   *
55   * <h3>HashDOS vulnerability fix</h3>
56   *
57   * As a workaround to the <a href="http://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
58   * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
59   * default, and you can configure it when you construct the decoder by passing an additional
60   * integer parameter.
61   *
62   * @see QueryStringEncoder
63   */
64  public class QueryStringDecoder {
65  
66      private static final int DEFAULT_MAX_PARAMS = 1024;
67  
68      private final Charset charset;
69      private final String uri;
70      private final int maxParams;
71      private int pathEndIdx;
72      private String path;
73      private Map<String, List<String>> params;
74  
75      /**
76       * Creates a new decoder that decodes the specified URI. The decoder will
77       * assume that the query string is encoded in UTF-8.
78       */
79      public QueryStringDecoder(String uri) {
80          this(uri, HttpConstants.DEFAULT_CHARSET);
81      }
82  
83      /**
84       * Creates a new decoder that decodes the specified URI encoded in the
85       * specified charset.
86       */
87      public QueryStringDecoder(String uri, boolean hasPath) {
88          this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
89      }
90  
91      /**
92       * Creates a new decoder that decodes the specified URI encoded in the
93       * specified charset.
94       */
95      public QueryStringDecoder(String uri, Charset charset) {
96          this(uri, charset, true);
97      }
98  
99      /**
100      * Creates a new decoder that decodes the specified URI encoded in the
101      * specified charset.
102      */
103     public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
104         this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
105     }
106 
107     /**
108      * Creates a new decoder that decodes the specified URI encoded in the
109      * specified charset.
110      */
111     public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
112         this.uri = checkNotNull(uri, "uri");
113         this.charset = checkNotNull(charset, "charset");
114         this.maxParams = checkPositive(maxParams, "maxParams");
115 
116         // `-1` means that path end index will be initialized lazily
117         pathEndIdx = hasPath ? -1 : 0;
118     }
119 
120     /**
121      * Creates a new decoder that decodes the specified URI. The decoder will
122      * assume that the query string is encoded in UTF-8.
123      */
124     public QueryStringDecoder(URI uri) {
125         this(uri, HttpConstants.DEFAULT_CHARSET);
126     }
127 
128     /**
129      * Creates a new decoder that decodes the specified URI encoded in the
130      * specified charset.
131      */
132     public QueryStringDecoder(URI uri, Charset charset) {
133         this(uri, charset, DEFAULT_MAX_PARAMS);
134     }
135 
136     /**
137      * Creates a new decoder that decodes the specified URI encoded in the
138      * specified charset.
139      */
140     public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
141         String rawPath = uri.getRawPath();
142         if (rawPath == null) {
143             rawPath = EMPTY_STRING;
144         }
145         String rawQuery = uri.getRawQuery();
146         // Also take care of cut of things like "http://localhost"
147         this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
148         this.charset = checkNotNull(charset, "charset");
149         this.maxParams = checkPositive(maxParams, "maxParams");
150         pathEndIdx = rawPath.length();
151     }
152 
153     @Override
154     public String toString() {
155         return uri();
156     }
157 
158     /**
159      * Returns the uri used to initialize this {@link QueryStringDecoder}.
160      */
161     public String uri() {
162         return uri;
163     }
164 
165     /**
166      * Returns the decoded path string of the URI.
167      */
168     public String path() {
169         if (path == null) {
170             path = decodeComponent(uri, 0, pathEndIdx(), charset, true);
171         }
172         return path;
173     }
174 
175     /**
176      * Returns the decoded key-value parameter pairs of the URI.
177      */
178     public Map<String, List<String>> parameters() {
179         if (params == null) {
180             params = decodeParams(uri, pathEndIdx(), charset, maxParams);
181         }
182         return params;
183     }
184 
185     /**
186      * Returns the raw path string of the URI.
187      */
188     public String rawPath() {
189         return uri.substring(0, pathEndIdx());
190     }
191 
192     /**
193      * Returns raw query string of the URI.
194      */
195     public String rawQuery() {
196         int start = pathEndIdx() + 1;
197         return start < uri.length() ? uri.substring(start) : EMPTY_STRING;
198     }
199 
200     private int pathEndIdx() {
201         if (pathEndIdx == -1) {
202             pathEndIdx = findPathEndIndex(uri);
203         }
204         return pathEndIdx;
205     }
206 
207     private static Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit) {
208         int len = s.length();
209         if (from >= len) {
210             return Collections.emptyMap();
211         }
212         if (s.charAt(from) == '?') {
213             from++;
214         }
215         Map<String, List<String>> params = new LinkedHashMap<String, List<String>>();
216         int nameStart = from;
217         int valueStart = -1;
218         int i;
219         loop:
220         for (i = from; i < len; i++) {
221             switch (s.charAt(i)) {
222             case '=':
223                 if (nameStart == i) {
224                     nameStart = i + 1;
225                 } else if (valueStart < nameStart) {
226                     valueStart = i + 1;
227                 }
228                 break;
229             case '&':
230             case ';':
231                 if (addParam(s, nameStart, valueStart, i, params, charset)) {
232                     paramsLimit--;
233                     if (paramsLimit == 0) {
234                         return params;
235                     }
236                 }
237                 nameStart = i + 1;
238                 break;
239             case '#':
240                 break loop;
241             default:
242                 // continue
243             }
244         }
245         addParam(s, nameStart, valueStart, i, params, charset);
246         return params;
247     }
248 
249     private static boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
250                                     Map<String, List<String>> params, Charset charset) {
251         if (nameStart >= valueEnd) {
252             return false;
253         }
254         if (valueStart <= nameStart) {
255             valueStart = valueEnd + 1;
256         }
257         String name = decodeComponent(s, nameStart, valueStart - 1, charset, false);
258         String value = decodeComponent(s, valueStart, valueEnd, charset, false);
259         List<String> values = params.get(name);
260         if (values == null) {
261             values = new ArrayList<String>(1);  // Often there's only 1 value.
262             params.put(name, values);
263         }
264         values.add(value);
265         return true;
266     }
267 
268     /**
269      * Decodes a bit of an URL encoded by a browser.
270      * <p>
271      * This is equivalent to calling {@link #decodeComponent(String, Charset)}
272      * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
273      * @param s The string to decode (can be empty).
274      * @return The decoded string, or {@code s} if there's nothing to decode.
275      * If the string to decode is {@code null}, returns an empty string.
276      * @throws IllegalArgumentException if the string contains a malformed
277      * escape sequence.
278      */
279     public static String decodeComponent(final String s) {
280         return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
281     }
282 
283     /**
284      * Decodes a bit of an URL encoded by a browser.
285      * <p>
286      * The string is expected to be encoded as per RFC 3986, Section 2.
287      * This is the encoding used by JavaScript functions {@code encodeURI}
288      * and {@code encodeURIComponent}, but not {@code escape}.  For example
289      * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
290      * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
291      * <p>
292      * This is essentially equivalent to calling
293      *   {@link URLDecoder#decode(String, String)}
294      * except that it's over 2x faster and generates less garbage for the GC.
295      * Actually this function doesn't allocate any memory if there's nothing
296      * to decode, the argument itself is returned.
297      * @param s The string to decode (can be empty).
298      * @param charset The charset to use to decode the string (should really
299      * be {@link CharsetUtil#UTF_8}.
300      * @return The decoded string, or {@code s} if there's nothing to decode.
301      * If the string to decode is {@code null}, returns an empty string.
302      * @throws IllegalArgumentException if the string contains a malformed
303      * escape sequence.
304      */
305     public static String decodeComponent(final String s, final Charset charset) {
306         if (s == null) {
307             return EMPTY_STRING;
308         }
309         return decodeComponent(s, 0, s.length(), charset, false);
310     }
311 
312     private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean isPath) {
313         int len = toExcluded - from;
314         if (len <= 0) {
315             return EMPTY_STRING;
316         }
317         int firstEscaped = -1;
318         for (int i = from; i < toExcluded; i++) {
319             char c = s.charAt(i);
320             if (c == '%' || c == '+' && !isPath) {
321                 firstEscaped = i;
322                 break;
323             }
324         }
325         if (firstEscaped == -1) {
326             return s.substring(from, toExcluded);
327         }
328 
329         CharsetDecoder decoder = CharsetUtil.decoder(charset);
330 
331         // Each encoded byte takes 3 characters (e.g. "%20")
332         int decodedCapacity = (toExcluded - firstEscaped) / 3;
333         ByteBuffer byteBuf = ByteBuffer.allocate(decodedCapacity);
334         CharBuffer charBuf = CharBuffer.allocate(decodedCapacity);
335 
336         StringBuilder strBuf = new StringBuilder(len);
337         strBuf.append(s, from, firstEscaped);
338 
339         for (int i = firstEscaped; i < toExcluded; i++) {
340             char c = s.charAt(i);
341             if (c != '%') {
342                 strBuf.append(c != '+' || isPath? c : ' ');
343                 continue;
344             }
345 
346             byteBuf.clear();
347             do {
348                 if (i + 3 > toExcluded) {
349                     throw new IllegalArgumentException("unterminated escape sequence at index " + i + " of: " + s);
350                 }
351                 byteBuf.put(decodeHexByte(s, i + 1));
352                 i += 3;
353             } while (i < toExcluded && s.charAt(i) == '%');
354             i--;
355 
356             byteBuf.flip();
357             charBuf.clear();
358             CoderResult result = decoder.reset().decode(byteBuf, charBuf, true);
359             try {
360                 if (!result.isUnderflow()) {
361                     result.throwException();
362                 }
363                 result = decoder.flush(charBuf);
364                 if (!result.isUnderflow()) {
365                     result.throwException();
366                 }
367             } catch (CharacterCodingException ex) {
368                 throw new IllegalStateException(ex);
369             }
370             strBuf.append(charBuf.flip());
371         }
372         return strBuf.toString();
373     }
374 
375     private static int findPathEndIndex(String uri) {
376         int len = uri.length();
377         for (int i = 0; i < len; i++) {
378             char c = uri.charAt(i);
379             if (c == '?' || c == '#') {
380                 return i;
381             }
382         }
383         return len;
384     }
385 }