1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import io.netty.buffer.ByteBufUtil;
19 import io.netty.util.CharsetUtil;
20 import io.netty.util.internal.ObjectUtil;
21 import io.netty.util.internal.StringUtil;
22
23 import java.net.URI;
24 import java.net.URISyntaxException;
25 import java.net.URLEncoder;
26 import java.nio.charset.Charset;
27
28 /**
29 * Creates a URL-encoded URI from a path string and key-value parameter pairs.
30 * This encoder is for one time use only. Create a new instance for each URI.
31 *
32 * <pre>
33 * {@link QueryStringEncoder} encoder = new {@link QueryStringEncoder}("/hello");
34 * encoder.addParam("recipient", "world");
35 * assert encoder.toString().equals("/hello?recipient=world");
36 * </pre>
37 *
38 * @see QueryStringDecoder
39 */
40 public class QueryStringEncoder {
41
42 private final Charset charset;
43 private final StringBuilder uriBuilder;
44 private boolean hasParams;
45 private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
46 private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray();
47
48 /**
49 * Creates a new encoder that encodes a URI that starts with the specified
50 * path string. The encoder will encode the URI in UTF-8.
51 */
52 public QueryStringEncoder(String uri) {
53 this(uri, HttpConstants.DEFAULT_CHARSET);
54 }
55
56 /**
57 * Creates a new encoder that encodes a URI that starts with the specified
58 * path string in the specified charset.
59 */
60 public QueryStringEncoder(String uri, Charset charset) {
61 ObjectUtil.checkNotNull(charset, "charset");
62 uriBuilder = new StringBuilder(uri);
63 this.charset = CharsetUtil.UTF_8.equals(charset) ? null : charset;
64 }
65
66 /**
67 * Adds a parameter with the specified name and value to this encoder.
68 */
69 public void addParam(String name, String value) {
70 ObjectUtil.checkNotNull(name, "name");
71 if (hasParams) {
72 uriBuilder.append('&');
73 } else {
74 uriBuilder.append('?');
75 hasParams = true;
76 }
77
78 encodeComponent(name);
79 if (value != null) {
80 uriBuilder.append('=');
81 encodeComponent(value);
82 }
83 }
84
85 private void encodeComponent(CharSequence s) {
86 if (charset == null) {
87 encodeUtf8Component(s);
88 } else {
89 encodeNonUtf8Component(s);
90 }
91 }
92
93 /**
94 * Returns the URL-encoded URI object which was created from the path string
95 * specified in the constructor and the parameters added by
96 * {@link #addParam(String, String)} method.
97 */
98 public URI toUri() throws URISyntaxException {
99 return new URI(toString());
100 }
101
102 /**
103 * Returns the URL-encoded URI which was created from the path string
104 * specified in the constructor and the parameters added by
105 * {@link #addParam(String, String)} method.
106 */
107 @Override
108 public String toString() {
109 return uriBuilder.toString();
110 }
111
112 /**
113 * Encode the String as per RFC 3986, Section 2.
114 * <p>
115 * There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}.
116 * The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20}
117 * beyond that , this method reuse the {@link #uriBuilder} in this class rather then create a new one,
118 * thus generates less garbage for the GC.
119 *
120 * @param s The String to encode
121 */
122 private void encodeNonUtf8Component(CharSequence s) {
123 //Don't allocate memory until needed
124 char[] buf = null;
125
126 for (int i = 0, len = s.length(); i < len;) {
127 char c = s.charAt(i);
128 if (dontNeedEncoding(c)) {
129 uriBuilder.append(c);
130 i++;
131 } else {
132 int index = 0;
133 if (buf == null) {
134 buf = new char[s.length() - i];
135 }
136
137 do {
138 buf[index] = c;
139 index++;
140 i++;
141 } while (i < s.length() && !dontNeedEncoding(c = s.charAt(i)));
142
143 byte[] bytes = new String(buf, 0, index).getBytes(charset);
144
145 for (byte b : bytes) {
146 appendEncoded(b);
147 }
148 }
149 }
150 }
151
152 /**
153 * @see ByteBufUtil#writeUtf8(io.netty.buffer.ByteBuf, CharSequence, int, int)
154 */
155 private void encodeUtf8Component(CharSequence s) {
156 for (int i = 0, len = s.length(); i < len; i++) {
157 char c = s.charAt(i);
158 if (!dontNeedEncoding(c)) {
159 encodeUtf8Component(s, i, len);
160 return;
161 }
162 }
163 uriBuilder.append(s);
164 }
165
166 private void encodeUtf8Component(CharSequence s, int encodingStart, int len) {
167 if (encodingStart > 0) {
168 // Append non-encoded characters directly first.
169 uriBuilder.append(s, 0, encodingStart);
170 }
171 encodeUtf8ComponentSlow(s, encodingStart, len);
172 }
173
174 private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) {
175 for (int i = start; i < len; i++) {
176 char c = s.charAt(i);
177 if (c < 0x80) {
178 if (dontNeedEncoding(c)) {
179 uriBuilder.append(c);
180 } else {
181 appendEncoded(c);
182 }
183 } else if (c < 0x800) {
184 appendEncoded(0xc0 | (c >> 6));
185 appendEncoded(0x80 | (c & 0x3f));
186 } else if (StringUtil.isSurrogate(c)) {
187 if (!Character.isHighSurrogate(c)) {
188 appendEncoded(WRITE_UTF_UNKNOWN);
189 continue;
190 }
191 // Surrogate Pair consumes 2 characters.
192 if (++i == s.length()) {
193 appendEncoded(WRITE_UTF_UNKNOWN);
194 break;
195 }
196 // Extra method to allow inlining the rest of writeUtf8 which is the most likely code path.
197 writeUtf8Surrogate(c, s.charAt(i));
198 } else {
199 appendEncoded(0xe0 | (c >> 12));
200 appendEncoded(0x80 | ((c >> 6) & 0x3f));
201 appendEncoded(0x80 | (c & 0x3f));
202 }
203 }
204 }
205
206 private void writeUtf8Surrogate(char c, char c2) {
207 if (!Character.isLowSurrogate(c2)) {
208 appendEncoded(WRITE_UTF_UNKNOWN);
209 appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
210 return;
211 }
212 int codePoint = Character.toCodePoint(c, c2);
213 // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
214 appendEncoded(0xf0 | (codePoint >> 18));
215 appendEncoded(0x80 | ((codePoint >> 12) & 0x3f));
216 appendEncoded(0x80 | ((codePoint >> 6) & 0x3f));
217 appendEncoded(0x80 | (codePoint & 0x3f));
218 }
219
220 private void appendEncoded(int b) {
221 uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b));
222 }
223
224 /**
225 * Convert the given digit to a upper hexadecimal char.
226 *
227 * @param digit the number to convert to a character.
228 * @return the {@code char} representation of the specified digit
229 * in hexadecimal.
230 */
231 private static char forDigit(int digit) {
232 return CHAR_MAP[digit & 0xF];
233 }
234
235 /**
236 * Determines whether the given character is a unreserved character.
237 * <p>
238 * unreserved characters do not need to be encoded, and include uppercase and lowercase
239 * letters, decimal digits, hyphen, period, underscore, and tilde.
240 * <p>
241 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / "*"
242 *
243 * @param ch the char to be judged whether it need to be encode
244 * @return true or false
245 */
246 private static boolean dontNeedEncoding(char ch) {
247 return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9'
248 || ch == '-' || ch == '_' || ch == '.' || ch == '*' || ch == '~';
249 }
250 }