View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.http;
17  
18  import io.netty.buffer.ByteBufUtil;
19  import io.netty.util.CharsetUtil;
20  import io.netty.util.internal.ObjectUtil;
21  import io.netty.util.internal.StringUtil;
22  
23  import java.net.URI;
24  import java.net.URISyntaxException;
25  import java.net.URLEncoder;
26  import java.nio.charset.Charset;
27  
28  /**
29   * Creates a URL-encoded URI from a path string and key-value parameter pairs.
30   * This encoder is for one time use only.  Create a new instance for each URI.
31   *
32   * <pre>
33   * {@link QueryStringEncoder} encoder = new {@link QueryStringEncoder}("/hello");
34   * encoder.addParam("recipient", "world");
35   * assert encoder.toString().equals("/hello?recipient=world");
36   * </pre>
37   *
38   * @see QueryStringDecoder
39   */
40  public class QueryStringEncoder {
41  
42      private final Charset charset;
43      private final StringBuilder uriBuilder;
44      private boolean hasParams;
45      private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
46      private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray();
47  
48      /**
49       * Creates a new encoder that encodes a URI that starts with the specified
50       * path string.  The encoder will encode the URI in UTF-8.
51       */
52      public QueryStringEncoder(String uri) {
53          this(uri, HttpConstants.DEFAULT_CHARSET);
54      }
55  
56      /**
57       * Creates a new encoder that encodes a URI that starts with the specified
58       * path string in the specified charset.
59       */
60      public QueryStringEncoder(String uri, Charset charset) {
61          ObjectUtil.checkNotNull(charset, "charset");
62          uriBuilder = new StringBuilder(uri);
63          this.charset = CharsetUtil.UTF_8.equals(charset) ? null : charset;
64      }
65  
66      /**
67       * Adds a parameter with the specified name and value to this encoder.
68       */
69      public void addParam(String name, String value) {
70          ObjectUtil.checkNotNull(name, "name");
71          if (hasParams) {
72              uriBuilder.append('&');
73          } else {
74              uriBuilder.append('?');
75              hasParams = true;
76          }
77  
78          encodeComponent(name);
79          if (value != null) {
80              uriBuilder.append('=');
81              encodeComponent(value);
82          }
83      }
84  
85      private void encodeComponent(CharSequence s) {
86          if (charset == null) {
87              encodeUtf8Component(s);
88          } else {
89              encodeNonUtf8Component(s);
90          }
91      }
92  
93      /**
94       * Returns the URL-encoded URI object which was created from the path string
95       * specified in the constructor and the parameters added by
96       * {@link #addParam(String, String)} method.
97       */
98      public URI toUri() throws URISyntaxException {
99          return new URI(toString());
100     }
101 
102     /**
103      * Returns the URL-encoded URI which was created from the path string
104      * specified in the constructor and the parameters added by
105      * {@link #addParam(String, String)} method.
106      */
107     @Override
108     public String toString() {
109         return uriBuilder.toString();
110     }
111 
112     /**
113      * Encode the String as per RFC 3986, Section 2.
114      * <p>
115      * There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}.
116      * The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20}
117      * beyond that , this method reuse the {@link #uriBuilder} in this class rather then create a new one,
118      * thus generates less garbage for the GC.
119      *
120      * @param s The String to encode
121      */
122     private void encodeNonUtf8Component(CharSequence s) {
123         //Don't allocate memory until needed
124         char[] buf = null;
125 
126         for (int i = 0, len = s.length(); i < len;) {
127             char c = s.charAt(i);
128             if (dontNeedEncoding(c)) {
129                 uriBuilder.append(c);
130                 i++;
131             } else {
132                 int index = 0;
133                 if (buf == null) {
134                     buf = new char[s.length() - i];
135                 }
136 
137                 do {
138                     buf[index] = c;
139                     index++;
140                     i++;
141                 } while (i < s.length() && !dontNeedEncoding(c = s.charAt(i)));
142 
143                 byte[] bytes = new String(buf, 0, index).getBytes(charset);
144 
145                 for (byte b : bytes) {
146                     appendEncoded(b);
147                 }
148             }
149         }
150     }
151 
152     /**
153      * @see ByteBufUtil#writeUtf8(io.netty.buffer.ByteBuf, CharSequence, int, int)
154      */
155     private void encodeUtf8Component(CharSequence s) {
156         for (int i = 0, len = s.length(); i < len; i++) {
157             char c = s.charAt(i);
158             if (!dontNeedEncoding(c)) {
159                 encodeUtf8Component(s, i, len);
160                 return;
161             }
162         }
163         uriBuilder.append(s);
164     }
165 
166     private void encodeUtf8Component(CharSequence s, int encodingStart, int len) {
167         if (encodingStart > 0) {
168             // Append non-encoded characters directly first.
169             uriBuilder.append(s, 0, encodingStart);
170         }
171         encodeUtf8ComponentSlow(s, encodingStart, len);
172     }
173 
174     private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) {
175         for (int i = start; i < len; i++) {
176             char c = s.charAt(i);
177             if (c < 0x80) {
178                 if (dontNeedEncoding(c)) {
179                     uriBuilder.append(c);
180                 } else {
181                     appendEncoded(c);
182                 }
183             } else if (c < 0x800) {
184                 appendEncoded(0xc0 | (c >> 6));
185                 appendEncoded(0x80 | (c & 0x3f));
186             } else if (StringUtil.isSurrogate(c)) {
187                 if (!Character.isHighSurrogate(c)) {
188                     appendEncoded(WRITE_UTF_UNKNOWN);
189                     continue;
190                 }
191                 // Surrogate Pair consumes 2 characters.
192                 if (++i == s.length()) {
193                     appendEncoded(WRITE_UTF_UNKNOWN);
194                     break;
195                 }
196                 // Extra method to allow inlining the rest of writeUtf8 which is the most likely code path.
197                 writeUtf8Surrogate(c, s.charAt(i));
198             } else {
199                 appendEncoded(0xe0 | (c >> 12));
200                 appendEncoded(0x80 | ((c >> 6) & 0x3f));
201                 appendEncoded(0x80 | (c & 0x3f));
202             }
203         }
204     }
205 
206     private void writeUtf8Surrogate(char c, char c2) {
207         if (!Character.isLowSurrogate(c2)) {
208             appendEncoded(WRITE_UTF_UNKNOWN);
209             appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
210             return;
211         }
212         int codePoint = Character.toCodePoint(c, c2);
213         // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
214         appendEncoded(0xf0 | (codePoint >> 18));
215         appendEncoded(0x80 | ((codePoint >> 12) & 0x3f));
216         appendEncoded(0x80 | ((codePoint >> 6) & 0x3f));
217         appendEncoded(0x80 | (codePoint & 0x3f));
218     }
219 
220     private void appendEncoded(int b) {
221         uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b));
222     }
223 
224     /**
225      * Convert the given digit to a upper hexadecimal char.
226      *
227      * @param digit the number to convert to a character.
228      * @return the {@code char} representation of the specified digit
229      * in hexadecimal.
230      */
231     private static char forDigit(int digit) {
232         return CHAR_MAP[digit & 0xF];
233     }
234 
235     /**
236      * Determines whether the given character is a unreserved character.
237      * <p>
238      * unreserved characters do not need to be encoded, and include uppercase and lowercase
239      * letters, decimal digits, hyphen, period, underscore, and tilde.
240      * <p>
241      * unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~" / "*"
242      *
243      * @param ch the char to be judged whether it need to be encode
244      * @return true or false
245      */
246     private static boolean dontNeedEncoding(char ch) {
247         return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9'
248                 || ch == '-' || ch == '_' || ch == '.' || ch == '*' || ch == '~';
249     }
250 }