1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty5.handler.codec.http;
17
18 import io.netty5.util.CharsetUtil;
19 import io.netty5.util.internal.StringUtil;
20
21 import java.net.URI;
22 import java.net.URISyntaxException;
23 import java.net.URLEncoder;
24 import java.nio.charset.Charset;
25 import java.util.Objects;
26
27 import static java.util.Objects.requireNonNull;
28
29 /**
30 * Creates a URL-encoded URI from a path string and key-value parameter pairs.
31 * This encoder is for one time use only. Create a new instance for each URI.
32 *
33 * <pre>
34 * {@link QueryStringEncoder} encoder = new {@link QueryStringEncoder}("/hello");
35 * encoder.addParam("recipient", "world");
36 * assert encoder.toString().equals("/hello?recipient=world");
37 * </pre>
38 *
39 * @see QueryStringDecoder
40 */
41 public class QueryStringEncoder {
42
43 private final Charset charset;
44 private final StringBuilder uriBuilder;
45 private boolean hasParams;
46 private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
47 private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray();
48
49 /**
50 * Creates a new encoder that encodes a URI that starts with the specified
51 * path string. The encoder will encode the URI in UTF-8.
52 */
53 public QueryStringEncoder(String uri) {
54 this(uri, HttpConstants.DEFAULT_CHARSET);
55 }
56
57 /**
58 * Creates a new encoder that encodes a URI that starts with the specified
59 * path string in the specified charset.
60 */
61 public QueryStringEncoder(String uri, Charset charset) {
62 Objects.requireNonNull(charset, "charset");
63 uriBuilder = new StringBuilder(uri);
64 this.charset = CharsetUtil.UTF_8.equals(charset) ? null : charset;
65 }
66
67 /**
68 * Adds a parameter with the specified name and value to this encoder.
69 */
70 public void addParam(String name, String value) {
71 requireNonNull(name, "name");
72 if (hasParams) {
73 uriBuilder.append('&');
74 } else {
75 uriBuilder.append('?');
76 hasParams = true;
77 }
78
79 encodeComponent(name);
80 if (value != null) {
81 uriBuilder.append('=');
82 encodeComponent(value);
83 }
84 }
85
86 private void encodeComponent(CharSequence s) {
87 if (charset == null) {
88 encodeUtf8Component(s);
89 } else {
90 encodeNonUtf8Component(s);
91 }
92 }
93
94 /**
95 * Returns the URL-encoded URI object which was created from the path string
96 * specified in the constructor and the parameters added by
97 * {@link #addParam(String, String)} method.
98 */
99 public URI toUri() throws URISyntaxException {
100 return new URI(toString());
101 }
102
103 /**
104 * Returns the URL-encoded URI which was created from the path string
105 * specified in the constructor and the parameters added by
106 * {@link #addParam(String, String)} method.
107 */
108 @Override
109 public String toString() {
110 return uriBuilder.toString();
111 }
112
113 /**
114 * Encode the String as per RFC 3986, Section 2.
115 * <p>
116 * There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}.
117 * The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20}
118 * beyond that , this method reuse the {@link #uriBuilder} in this class rather then create a new one,
119 * thus generates less garbage for the GC.
120 *
121 * @param s The String to encode
122 */
123 private void encodeNonUtf8Component(CharSequence s) {
124 //Don't allocate memory until needed
125 char[] buf = null;
126
127 for (int i = 0, len = s.length(); i < len;) {
128 char c = s.charAt(i);
129 if (dontNeedEncoding(c)) {
130 uriBuilder.append(c);
131 i++;
132 } else {
133 int index = 0;
134 if (buf == null) {
135 buf = new char[s.length() - i];
136 }
137
138 do {
139 buf[index] = c;
140 index++;
141 i++;
142 } while (i < s.length() && !dontNeedEncoding(c = s.charAt(i)));
143
144 byte[] bytes = new String(buf, 0, index).getBytes(charset);
145
146 for (byte b : bytes) {
147 appendEncoded(b);
148 }
149 }
150 }
151 }
152
153 private void encodeUtf8Component(CharSequence s) {
154 for (int i = 0, len = s.length(); i < len; i++) {
155 char c = s.charAt(i);
156 if (!dontNeedEncoding(c)) {
157 encodeUtf8Component(s, i, len);
158 return;
159 }
160 }
161 uriBuilder.append(s);
162 }
163
164 private void encodeUtf8Component(CharSequence s, int encodingStart, int len) {
165 if (encodingStart > 0) {
166 // Append non-encoded characters directly first.
167 uriBuilder.append(s, 0, encodingStart);
168 }
169 encodeUtf8ComponentSlow(s, encodingStart, len);
170 }
171
172 private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) {
173 for (int i = start; i < len; i++) {
174 char c = s.charAt(i);
175 if (c < 0x80) {
176 if (dontNeedEncoding(c)) {
177 uriBuilder.append(c);
178 } else {
179 appendEncoded(c);
180 }
181 } else if (c < 0x800) {
182 appendEncoded(0xc0 | (c >> 6));
183 appendEncoded(0x80 | (c & 0x3f));
184 } else if (StringUtil.isSurrogate(c)) {
185 if (!Character.isHighSurrogate(c)) {
186 appendEncoded(WRITE_UTF_UNKNOWN);
187 continue;
188 }
189 // Surrogate Pair consumes 2 characters.
190 if (++i == s.length()) {
191 appendEncoded(WRITE_UTF_UNKNOWN);
192 break;
193 }
194 // Extra method to allow inlining the rest of writeUtf8 which is the most likely code path.
195 writeUtf8Surrogate(c, s.charAt(i));
196 } else {
197 appendEncoded(0xe0 | (c >> 12));
198 appendEncoded(0x80 | ((c >> 6) & 0x3f));
199 appendEncoded(0x80 | (c & 0x3f));
200 }
201 }
202 }
203
204 private void writeUtf8Surrogate(char c, char c2) {
205 if (!Character.isLowSurrogate(c2)) {
206 appendEncoded(WRITE_UTF_UNKNOWN);
207 appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
208 return;
209 }
210 int codePoint = Character.toCodePoint(c, c2);
211 // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
212 appendEncoded(0xf0 | (codePoint >> 18));
213 appendEncoded(0x80 | ((codePoint >> 12) & 0x3f));
214 appendEncoded(0x80 | ((codePoint >> 6) & 0x3f));
215 appendEncoded(0x80 | (codePoint & 0x3f));
216 }
217
218 private void appendEncoded(int b) {
219 uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b));
220 }
221
222 /**
223 * Convert the given digit to a upper hexadecimal char.
224 *
225 * @param digit the number to convert to a character.
226 * @return the {@code char} representation of the specified digit
227 * in hexadecimal.
228 */
229 private static char forDigit(int digit) {
230 return CHAR_MAP[digit & 0xF];
231 }
232
233 /**
234 * Determines whether the given character is a unreserved character.
235 * <p>
236 * unreserved characters do not need to be encoded, and include uppercase and lowercase
237 * letters, decimal digits, hyphen, period, underscore, and tilde.
238 * <p>
239 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / "*"
240 *
241 * @param ch the char to be judged whether it need to be encode
242 * @return true or false
243 */
244 private static boolean dontNeedEncoding(char ch) {
245 return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9'
246 || ch == '-' || ch == '_' || ch == '.' || ch == '*' || ch == '~';
247 }
248 }