View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty5.handler.codec.http;
17  
18  import io.netty5.util.CharsetUtil;
19  import io.netty5.util.internal.StringUtil;
20  
21  import java.net.URI;
22  import java.net.URISyntaxException;
23  import java.net.URLEncoder;
24  import java.nio.charset.Charset;
25  import java.util.Objects;
26  
27  import static java.util.Objects.requireNonNull;
28  
29  /**
30   * Creates a URL-encoded URI from a path string and key-value parameter pairs.
31   * This encoder is for one time use only.  Create a new instance for each URI.
32   *
33   * <pre>
34   * {@link QueryStringEncoder} encoder = new {@link QueryStringEncoder}("/hello");
35   * encoder.addParam("recipient", "world");
36   * assert encoder.toString().equals("/hello?recipient=world");
37   * </pre>
38   *
39   * @see QueryStringDecoder
40   */
41  public class QueryStringEncoder {
42  
43      private final Charset charset;
44      private final StringBuilder uriBuilder;
45      private boolean hasParams;
46      private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
47      private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray();
48  
49      /**
50       * Creates a new encoder that encodes a URI that starts with the specified
51       * path string.  The encoder will encode the URI in UTF-8.
52       */
53      public QueryStringEncoder(String uri) {
54          this(uri, HttpConstants.DEFAULT_CHARSET);
55      }
56  
57      /**
58       * Creates a new encoder that encodes a URI that starts with the specified
59       * path string in the specified charset.
60       */
61      public QueryStringEncoder(String uri, Charset charset) {
62          Objects.requireNonNull(charset, "charset");
63          uriBuilder = new StringBuilder(uri);
64          this.charset = CharsetUtil.UTF_8.equals(charset) ? null : charset;
65      }
66  
67      /**
68       * Adds a parameter with the specified name and value to this encoder.
69       */
70      public void addParam(String name, String value) {
71          requireNonNull(name, "name");
72          if (hasParams) {
73              uriBuilder.append('&');
74          } else {
75              uriBuilder.append('?');
76              hasParams = true;
77          }
78  
79          encodeComponent(name);
80          if (value != null) {
81              uriBuilder.append('=');
82              encodeComponent(value);
83          }
84      }
85  
86      private void encodeComponent(CharSequence s) {
87          if (charset == null) {
88              encodeUtf8Component(s);
89          } else {
90              encodeNonUtf8Component(s);
91          }
92      }
93  
94      /**
95       * Returns the URL-encoded URI object which was created from the path string
96       * specified in the constructor and the parameters added by
97       * {@link #addParam(String, String)} method.
98       */
99      public URI toUri() throws URISyntaxException {
100         return new URI(toString());
101     }
102 
103     /**
104      * Returns the URL-encoded URI which was created from the path string
105      * specified in the constructor and the parameters added by
106      * {@link #addParam(String, String)} method.
107      */
108     @Override
109     public String toString() {
110         return uriBuilder.toString();
111     }
112 
113     /**
114      * Encode the String as per RFC 3986, Section 2.
115      * <p>
116      * There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}.
117      * The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20}
118      * beyond that , this method reuse the {@link #uriBuilder} in this class rather then create a new one,
119      * thus generates less garbage for the GC.
120      *
121      * @param s The String to encode
122      */
123     private void encodeNonUtf8Component(CharSequence s) {
124         //Don't allocate memory until needed
125         char[] buf = null;
126 
127         for (int i = 0, len = s.length(); i < len;) {
128             char c = s.charAt(i);
129             if (dontNeedEncoding(c)) {
130                 uriBuilder.append(c);
131                 i++;
132             } else {
133                 int index = 0;
134                 if (buf == null) {
135                     buf = new char[s.length() - i];
136                 }
137 
138                 do {
139                     buf[index] = c;
140                     index++;
141                     i++;
142                 } while (i < s.length() && !dontNeedEncoding(c = s.charAt(i)));
143 
144                 byte[] bytes = new String(buf, 0, index).getBytes(charset);
145 
146                 for (byte b : bytes) {
147                     appendEncoded(b);
148                 }
149             }
150         }
151     }
152 
153     private void encodeUtf8Component(CharSequence s) {
154         for (int i = 0, len = s.length(); i < len; i++) {
155             char c = s.charAt(i);
156             if (!dontNeedEncoding(c)) {
157                 encodeUtf8Component(s, i, len);
158                 return;
159             }
160         }
161         uriBuilder.append(s);
162     }
163 
164     private void encodeUtf8Component(CharSequence s, int encodingStart, int len) {
165         if (encodingStart > 0) {
166             // Append non-encoded characters directly first.
167             uriBuilder.append(s, 0, encodingStart);
168         }
169         encodeUtf8ComponentSlow(s, encodingStart, len);
170     }
171 
172     private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) {
173         for (int i = start; i < len; i++) {
174             char c = s.charAt(i);
175             if (c < 0x80) {
176                 if (dontNeedEncoding(c)) {
177                     uriBuilder.append(c);
178                 } else {
179                     appendEncoded(c);
180                 }
181             } else if (c < 0x800) {
182                 appendEncoded(0xc0 | (c >> 6));
183                 appendEncoded(0x80 | (c & 0x3f));
184             } else if (StringUtil.isSurrogate(c)) {
185                 if (!Character.isHighSurrogate(c)) {
186                     appendEncoded(WRITE_UTF_UNKNOWN);
187                     continue;
188                 }
189                 // Surrogate Pair consumes 2 characters.
190                 if (++i == s.length()) {
191                     appendEncoded(WRITE_UTF_UNKNOWN);
192                     break;
193                 }
194                 // Extra method to allow inlining the rest of writeUtf8 which is the most likely code path.
195                 writeUtf8Surrogate(c, s.charAt(i));
196             } else {
197                 appendEncoded(0xe0 | (c >> 12));
198                 appendEncoded(0x80 | ((c >> 6) & 0x3f));
199                 appendEncoded(0x80 | (c & 0x3f));
200             }
201         }
202     }
203 
204     private void writeUtf8Surrogate(char c, char c2) {
205         if (!Character.isLowSurrogate(c2)) {
206             appendEncoded(WRITE_UTF_UNKNOWN);
207             appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
208             return;
209         }
210         int codePoint = Character.toCodePoint(c, c2);
211         // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
212         appendEncoded(0xf0 | (codePoint >> 18));
213         appendEncoded(0x80 | ((codePoint >> 12) & 0x3f));
214         appendEncoded(0x80 | ((codePoint >> 6) & 0x3f));
215         appendEncoded(0x80 | (codePoint & 0x3f));
216     }
217 
218     private void appendEncoded(int b) {
219         uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b));
220     }
221 
222     /**
223      * Convert the given digit to a upper hexadecimal char.
224      *
225      * @param digit the number to convert to a character.
226      * @return the {@code char} representation of the specified digit
227      * in hexadecimal.
228      */
229     private static char forDigit(int digit) {
230         return CHAR_MAP[digit & 0xF];
231     }
232 
233     /**
234      * Determines whether the given character is a unreserved character.
235      * <p>
236      * unreserved characters do not need to be encoded, and include uppercase and lowercase
237      * letters, decimal digits, hyphen, period, underscore, and tilde.
238      * <p>
239      * unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~" / "*"
240      *
241      * @param ch the char to be judged whether it need to be encode
242      * @return true or false
243      */
244     private static boolean dontNeedEncoding(char ch) {
245         return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9'
246                 || ch == '-' || ch == '_' || ch == '.' || ch == '*' || ch == '~';
247     }
248 }