1 /*
2 * Copyright 2015 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import java.net.InetSocketAddress;
19 import java.net.URI;
20 import java.nio.charset.Charset;
21 import java.nio.charset.IllegalCharsetNameException;
22 import java.nio.charset.UnsupportedCharsetException;
23 import java.util.ArrayList;
24 import java.util.Iterator;
25 import java.util.List;
26
27 import io.netty.util.AsciiString;
28 import io.netty.util.CharsetUtil;
29 import io.netty.util.NetUtil;
30 import io.netty.util.internal.ObjectUtil;
31
32 import static io.netty.util.internal.StringUtil.COMMA;
33 import static io.netty.util.internal.ObjectUtil.checkPositiveOrZero;
34
35 /**
36 * Utility methods useful in the HTTP context.
37 */
38 public final class HttpUtil {
39
40 private static final AsciiString CHARSET_EQUALS = AsciiString.of(HttpHeaderValues.CHARSET + "=");
41 private static final AsciiString SEMICOLON = AsciiString.cached(";");
42 private static final String COMMA_STRING = String.valueOf(COMMA);
43
44 private HttpUtil() { }
45
46 /**
47 * Determine if a uri is in origin-form according to
48 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.1">RFC 9112, 3.2.1</a>.
49 */
50 public static boolean isOriginForm(URI uri) {
51 return isOriginForm(uri.toString());
52 }
53
54 /**
55 * Determine if a string uri is in origin-form according to
56 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.1">RFC 9112, 3.2.1</a>.
57 */
58 public static boolean isOriginForm(String uri) {
59 return uri.startsWith("/");
60 }
61
62 /**
63 * Determine if a uri is in asterisk-form according to
64 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.4">RFC 9112, 3.2.4</a>.
65 */
66 public static boolean isAsteriskForm(URI uri) {
67 return isAsteriskForm(uri.toString());
68 }
69
70 /**
71 * Determine if a string uri is in asterisk-form according to
72 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.4">RFC 9112, 3.2.4</a>.
73 */
74 public static boolean isAsteriskForm(String uri) {
75 return "*".equals(uri);
76 }
77
78 static void validateRequestLineTokens(HttpMethod method, String uri) {
79 // The HttpVersion class does its own validation, and it's not possible for subclasses to circumvent it.
80 // The HttpMethod class does its own validation, but subclasses might circumvent it.
81 if (method.getClass() != HttpMethod.class) {
82 if (!isEncodingSafeStartLineToken(method.asciiName())) {
83 throw new IllegalArgumentException(
84 "The HTTP method name contain illegal characters: " + method.asciiName());
85 }
86 }
87
88 if (!isEncodingSafeStartLineToken(uri)) {
89 throw new IllegalArgumentException("The URI contain illegal characters: " + uri);
90 }
91 }
92
93 /**
94 * Validate that the given request line token is safe for verbatim encoding to the network.
95 * This does not fully check that the token – HTTP method, version, or URI – is valid and formatted correctly.
96 * Only that the token does not contain characters that would break or
97 * desynchronize HTTP message parsing of the start line wherein the token would be included.
98 * <p>
99 * See <a href="https://datatracker.ietf.org/doc/html/rfc9112#name-request-line">RFC 9112, 3.</a>
100 *
101 * @param token The token to check.
102 * @return {@code true} if the token is safe to encode verbatim into the HTTP message output stream,
103 * otherwise {@code false}.
104 */
105 public static boolean isEncodingSafeStartLineToken(CharSequence token) {
106 int lenBytes = token.length();
107 for (int i = 0; i < lenBytes; i++) {
108 char ch = token.charAt(i);
109 // this is to help AOT compiled code which cannot profile the switch
110 if (ch <= ' ') {
111 switch (ch) {
112 case '\n':
113 case '\r':
114 case ' ':
115 return false;
116 }
117 }
118 }
119 return true;
120 }
121
122 /**
123 * Returns {@code true} if and only if the connection can remain open and
124 * thus 'kept alive'. This method respects the value of the
125 * {@code "Connection"} header first and then the return value of
126 * {@link HttpVersion#isKeepAliveDefault()}.
127 */
128 public static boolean isKeepAlive(HttpMessage message) {
129 return !message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE, true) &&
130 (message.protocolVersion().isKeepAliveDefault() ||
131 message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE, true));
132 }
133
134 /**
135 * Sets the value of the {@code "Connection"} header depending on the
136 * protocol version of the specified message. This getMethod sets or removes
137 * the {@code "Connection"} header depending on what the default keep alive
138 * mode of the message's protocol version is, as specified by
139 * {@link HttpVersion#isKeepAliveDefault()}.
140 * <ul>
141 * <li>If the connection is kept alive by default:
142 * <ul>
143 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
144 * <li>remove otherwise.</li>
145 * </ul></li>
146 * <li>If the connection is closed by default:
147 * <ul>
148 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
149 * <li>remove otherwise.</li>
150 * </ul></li>
151 * </ul>
152 * @see #setKeepAlive(HttpHeaders, HttpVersion, boolean)
153 */
154 public static void setKeepAlive(HttpMessage message, boolean keepAlive) {
155 setKeepAlive(message.headers(), message.protocolVersion(), keepAlive);
156 }
157
158 /**
159 * Sets the value of the {@code "Connection"} header depending on the
160 * protocol version of the specified message. This getMethod sets or removes
161 * the {@code "Connection"} header depending on what the default keep alive
162 * mode of the message's protocol version is, as specified by
163 * {@link HttpVersion#isKeepAliveDefault()}.
164 * <ul>
165 * <li>If the connection is kept alive by default:
166 * <ul>
167 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
168 * <li>remove otherwise.</li>
169 * </ul></li>
170 * <li>If the connection is closed by default:
171 * <ul>
172 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
173 * <li>remove otherwise.</li>
174 * </ul></li>
175 * </ul>
176 */
177 public static void setKeepAlive(HttpHeaders h, HttpVersion httpVersion, boolean keepAlive) {
178 if (httpVersion.isKeepAliveDefault()) {
179 if (keepAlive) {
180 h.remove(HttpHeaderNames.CONNECTION);
181 } else {
182 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE);
183 }
184 } else {
185 if (keepAlive) {
186 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE);
187 } else {
188 h.remove(HttpHeaderNames.CONNECTION);
189 }
190 }
191 }
192
193 /**
194 * Returns the length of the content. Please note that this value is
195 * not retrieved from {@link HttpContent#content()} but from the
196 * {@code "Content-Length"} header, and thus they are independent from each
197 * other.
198 *
199 * @return the content length
200 *
201 * @throws NumberFormatException
202 * if the message does not have the {@code "Content-Length"} header
203 * or its value is not a number
204 */
205 public static long getContentLength(HttpMessage message) {
206 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
207 if (value != null) {
208 return Long.parseLong(value);
209 }
210
211 // We know the content length if it's a Web Socket message even if
212 // Content-Length header is missing.
213 long webSocketContentLength = getWebSocketContentLength(message);
214 if (webSocketContentLength >= 0) {
215 return webSocketContentLength;
216 }
217
218 // Otherwise we don't.
219 throw new NumberFormatException("header not found: " + HttpHeaderNames.CONTENT_LENGTH);
220 }
221
222 /**
223 * Returns the length of the content or the specified default value if the message does not have the {@code
224 * "Content-Length" header}. Please note that this value is not retrieved from {@link HttpContent#content()} but
225 * from the {@code "Content-Length"} header, and thus they are independent from each other.
226 *
227 * @param message the message
228 * @param defaultValue the default value
229 * @return the content length or the specified default value
230 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as a long
231 */
232 public static long getContentLength(HttpMessage message, long defaultValue) {
233 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
234 if (value != null) {
235 return Long.parseLong(value);
236 }
237
238 // We know the content length if it's a Web Socket message even if
239 // Content-Length header is missing.
240 long webSocketContentLength = getWebSocketContentLength(message);
241 if (webSocketContentLength >= 0) {
242 return webSocketContentLength;
243 }
244
245 // Otherwise we don't.
246 return defaultValue;
247 }
248
249 /**
250 * Get an {@code int} representation of {@link #getContentLength(HttpMessage, long)}.
251 *
252 * @return the content length or {@code defaultValue} if this message does
253 * not have the {@code "Content-Length"} header.
254 *
255 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as an int
256 */
257 public static int getContentLength(HttpMessage message, int defaultValue) {
258 return (int) Math.min(Integer.MAX_VALUE, getContentLength(message, (long) defaultValue));
259 }
260
261 /**
262 * Returns the content length of the specified web socket message. If the
263 * specified message is not a web socket message, {@code -1} is returned.
264 */
265 static int getWebSocketContentLength(HttpMessage message) {
266 // WebSocket messages have constant content-lengths.
267 HttpHeaders h = message.headers();
268 if (message instanceof HttpRequest) {
269 HttpRequest req = (HttpRequest) message;
270 if (HttpMethod.GET.equals(req.method()) &&
271 h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY1) &&
272 h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY2)) {
273 return 8;
274 }
275 } else if (message instanceof HttpResponse) {
276 HttpResponse res = (HttpResponse) message;
277 if (res.status().code() == 101 &&
278 h.contains(HttpHeaderNames.SEC_WEBSOCKET_ORIGIN) &&
279 h.contains(HttpHeaderNames.SEC_WEBSOCKET_LOCATION)) {
280 return 16;
281 }
282 }
283
284 // Not a web socket message
285 return -1;
286 }
287
288 /**
289 * Sets the {@code "Content-Length"} header.
290 */
291 public static void setContentLength(HttpMessage message, long length) {
292 message.headers().set(HttpHeaderNames.CONTENT_LENGTH, length);
293 }
294
295 public static boolean isContentLengthSet(HttpMessage m) {
296 return m.headers().contains(HttpHeaderNames.CONTENT_LENGTH);
297 }
298
299 /**
300 * Returns {@code true} if and only if the specified message contains an expect header and the only expectation
301 * present is the 100-continue expectation. Note that this method returns {@code false} if the expect header is
302 * not valid for the message (e.g., the message is a response, or the version on the message is HTTP/1.0).
303 *
304 * @param message the message
305 * @return {@code true} if and only if the expectation 100-continue is present and it is the only expectation
306 * present
307 */
308 public static boolean is100ContinueExpected(HttpMessage message) {
309 return isExpectHeaderValid(message)
310 // unquoted tokens in the expect header are case-insensitive, thus 100-continue is case insensitive
311 && message.headers().contains(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE, true);
312 }
313
314 /**
315 * Returns {@code true} if the specified message contains an expect header specifying an expectation that is not
316 * supported. Note that this method returns {@code false} if the expect header is not valid for the message
317 * (e.g., the message is a response, or the version on the message is HTTP/1.0).
318 *
319 * @param message the message
320 * @return {@code true} if and only if an expectation is present that is not supported
321 */
322 static boolean isUnsupportedExpectation(HttpMessage message) {
323 if (!isExpectHeaderValid(message)) {
324 return false;
325 }
326
327 final String expectValue = message.headers().get(HttpHeaderNames.EXPECT);
328 return expectValue != null && !HttpHeaderValues.CONTINUE.toString().equalsIgnoreCase(expectValue);
329 }
330
331 private static boolean isExpectHeaderValid(final HttpMessage message) {
332 /*
333 * Expect: 100-continue is for requests only and it works only on HTTP/1.1 or later. Note further that RFC 7231
334 * section 5.1.1 says "A server that receives a 100-continue expectation in an HTTP/1.0 request MUST ignore
335 * that expectation."
336 */
337 return message instanceof HttpRequest &&
338 message.protocolVersion().compareTo(HttpVersion.HTTP_1_1) >= 0;
339 }
340
341 /**
342 * Sets or removes the {@code "Expect: 100-continue"} header to / from the
343 * specified message. If {@code expected} is {@code true},
344 * the {@code "Expect: 100-continue"} header is set and all other previous
345 * {@code "Expect"} headers are removed. Otherwise, all {@code "Expect"}
346 * headers are removed completely.
347 */
348 public static void set100ContinueExpected(HttpMessage message, boolean expected) {
349 if (expected) {
350 message.headers().set(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE);
351 } else {
352 message.headers().remove(HttpHeaderNames.EXPECT);
353 }
354 }
355
356 /**
357 * Checks to see if the transfer encoding in a specified {@link HttpMessage} is chunked
358 *
359 * @param message The message to check
360 * @return True if transfer encoding is chunked, otherwise false
361 */
362 public static boolean isTransferEncodingChunked(HttpMessage message) {
363 return message.headers().containsValue(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED, true);
364 }
365
366 /**
367 * Set the {@link HttpHeaderNames#TRANSFER_ENCODING} to either include {@link HttpHeaderValues#CHUNKED} if
368 * {@code chunked} is {@code true}, or remove {@link HttpHeaderValues#CHUNKED} if {@code chunked} is {@code false}.
369 *
370 * @param m The message which contains the headers to modify.
371 * @param chunked if {@code true} then include {@link HttpHeaderValues#CHUNKED} in the headers. otherwise remove
372 * {@link HttpHeaderValues#CHUNKED} from the headers.
373 */
374 public static void setTransferEncodingChunked(HttpMessage m, boolean chunked) {
375 if (chunked) {
376 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED);
377 m.headers().remove(HttpHeaderNames.CONTENT_LENGTH);
378 } else {
379 List<String> encodings = m.headers().getAll(HttpHeaderNames.TRANSFER_ENCODING);
380 if (encodings.isEmpty()) {
381 return;
382 }
383 List<CharSequence> values = new ArrayList<CharSequence>(encodings);
384 Iterator<CharSequence> valuesIt = values.iterator();
385 while (valuesIt.hasNext()) {
386 CharSequence value = valuesIt.next();
387 if (HttpHeaderValues.CHUNKED.contentEqualsIgnoreCase(value)) {
388 valuesIt.remove();
389 }
390 }
391 if (values.isEmpty()) {
392 m.headers().remove(HttpHeaderNames.TRANSFER_ENCODING);
393 } else {
394 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, values);
395 }
396 }
397 }
398
399 /**
400 * Fetch charset from message's Content-Type header.
401 *
402 * @param message entity to fetch Content-Type header from
403 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
404 * if charset is not presented or unparsable
405 */
406 public static Charset getCharset(HttpMessage message) {
407 return getCharset(message, CharsetUtil.ISO_8859_1);
408 }
409
410 /**
411 * Fetch charset from Content-Type header value.
412 *
413 * @param contentTypeValue Content-Type header value to parse
414 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
415 * if charset is not presented or unparsable
416 */
417 public static Charset getCharset(CharSequence contentTypeValue) {
418 if (contentTypeValue != null) {
419 return getCharset(contentTypeValue, CharsetUtil.ISO_8859_1);
420 } else {
421 return CharsetUtil.ISO_8859_1;
422 }
423 }
424
425 /**
426 * Fetch charset from message's Content-Type header.
427 *
428 * @param message entity to fetch Content-Type header from
429 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value
430 * @return the charset from message's Content-Type header or {@code defaultCharset}
431 * if charset is not presented or unparsable
432 */
433 public static Charset getCharset(HttpMessage message, Charset defaultCharset) {
434 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
435 if (contentTypeValue != null) {
436 return getCharset(contentTypeValue, defaultCharset);
437 } else {
438 return defaultCharset;
439 }
440 }
441
442 /**
443 * Fetch charset from Content-Type header value.
444 *
445 * @param contentTypeValue Content-Type header value to parse
446 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value
447 * @return the charset from message's Content-Type header or {@code defaultCharset}
448 * if charset is not presented or unparsable
449 */
450 public static Charset getCharset(CharSequence contentTypeValue, Charset defaultCharset) {
451 if (contentTypeValue != null) {
452 CharSequence charsetRaw = getCharsetAsSequence(contentTypeValue);
453 if (charsetRaw != null) {
454 if (charsetRaw.length() > 2) { // at least contains 2 quotes(")
455 if (charsetRaw.charAt(0) == '"' && charsetRaw.charAt(charsetRaw.length() - 1) == '"') {
456 charsetRaw = charsetRaw.subSequence(1, charsetRaw.length() - 1);
457 }
458 }
459 try {
460 return Charset.forName(charsetRaw.toString());
461 } catch (IllegalCharsetNameException ignored) {
462 // just return the default charset
463 } catch (UnsupportedCharsetException ignored) {
464 // just return the default charset
465 }
466 }
467 }
468 return defaultCharset;
469 }
470
471 /**
472 * Fetch charset from message's Content-Type header as a char sequence.
473 *
474 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
475 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
476 *
477 * @param message entity to fetch Content-Type header from
478 * @return the {@code CharSequence} with charset from message's Content-Type header
479 * or {@code null} if charset is not presented
480 * @deprecated use {@link #getCharsetAsSequence(HttpMessage)}
481 */
482 @Deprecated
483 public static CharSequence getCharsetAsString(HttpMessage message) {
484 return getCharsetAsSequence(message);
485 }
486
487 /**
488 * Fetch charset from message's Content-Type header as a char sequence.
489 *
490 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
491 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
492 *
493 * @return the {@code CharSequence} with charset from message's Content-Type header
494 * or {@code null} if charset is not presented
495 */
496 public static CharSequence getCharsetAsSequence(HttpMessage message) {
497 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
498 if (contentTypeValue != null) {
499 return getCharsetAsSequence(contentTypeValue);
500 } else {
501 return null;
502 }
503 }
504
505 /**
506 * Fetch charset from Content-Type header value as a char sequence.
507 *
508 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
509 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
510 *
511 * @param contentTypeValue Content-Type header value to parse
512 * @return the {@code CharSequence} with charset from message's Content-Type header
513 * or {@code null} if charset is not presented
514 * @throws NullPointerException in case if {@code contentTypeValue == null}
515 */
516 public static CharSequence getCharsetAsSequence(CharSequence contentTypeValue) {
517 ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
518
519 int indexOfCharset = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, CHARSET_EQUALS, 0);
520 if (indexOfCharset == AsciiString.INDEX_NOT_FOUND) {
521 return null;
522 }
523
524 int indexOfEncoding = indexOfCharset + CHARSET_EQUALS.length();
525 if (indexOfEncoding < contentTypeValue.length()) {
526 CharSequence charsetCandidate = contentTypeValue.subSequence(indexOfEncoding, contentTypeValue.length());
527 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(charsetCandidate, SEMICOLON, 0);
528 if (indexOfSemicolon == AsciiString.INDEX_NOT_FOUND) {
529 return charsetCandidate;
530 }
531
532 return charsetCandidate.subSequence(0, indexOfSemicolon);
533 }
534
535 return null;
536 }
537
538 /**
539 * Fetch MIME type part from message's Content-Type header as a char sequence.
540 *
541 * @param message entity to fetch Content-Type header from
542 * @return the MIME type as a {@code CharSequence} from message's Content-Type header
543 * or {@code null} if content-type header or MIME type part of this header are not presented
544 * <p/>
545 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
546 * "content-type: text/html" - "text/html" will be returned <br/>
547 * "content-type: " or no header - {@code null} we be returned
548 */
549 public static CharSequence getMimeType(HttpMessage message) {
550 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
551 if (contentTypeValue != null) {
552 return getMimeType(contentTypeValue);
553 } else {
554 return null;
555 }
556 }
557
558 /**
559 * Fetch MIME type part from Content-Type header value as a char sequence.
560 *
561 * @param contentTypeValue Content-Type header value to parse
562 * @return the MIME type as a {@code CharSequence} from message's Content-Type header
563 * or {@code null} if content-type header or MIME type part of this header are not presented
564 * <p/>
565 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
566 * "content-type: text/html" - "text/html" will be returned <br/>
567 * "content-type: empty header - {@code null} we be returned
568 * @throws NullPointerException in case if {@code contentTypeValue == null}
569 */
570 public static CharSequence getMimeType(CharSequence contentTypeValue) {
571 ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
572
573 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, SEMICOLON, 0);
574 if (indexOfSemicolon != AsciiString.INDEX_NOT_FOUND) {
575 return contentTypeValue.subSequence(0, indexOfSemicolon);
576 } else {
577 return contentTypeValue.length() > 0 ? contentTypeValue : null;
578 }
579 }
580
581 /**
582 * Formats the host string of an address so it can be used for computing an HTTP component
583 * such as a URL or a Host header
584 *
585 * @param addr the address
586 * @return the formatted String
587 */
588 public static String formatHostnameForHttp(InetSocketAddress addr) {
589 String hostString = NetUtil.getHostname(addr);
590 if (NetUtil.isValidIpV6Address(hostString)) {
591 if (!addr.isUnresolved()) {
592 hostString = NetUtil.toAddressString(addr.getAddress());
593 } else if (hostString.charAt(0) == '[' && hostString.charAt(hostString.length() - 1) == ']') {
594 // If IPv6 address already contains brackets, let's return as is.
595 return hostString;
596 }
597
598 return '[' + hostString + ']';
599 }
600 return hostString;
601 }
602
603 /**
604 * Validates, and optionally extracts the content length from headers. This method is not intended for
605 * general use, but is here to be shared between HTTP/1 and HTTP/2 parsing.
606 *
607 * @param contentLengthFields the content-length header fields.
608 * @param isHttp10OrEarlier {@code true} if we are handling HTTP/1.0 or earlier
609 * @param allowDuplicateContentLengths {@code true} if multiple, identical-value content lengths should be allowed.
610 * @return the normalized content length from the headers or {@code -1} if the fields were empty.
611 * @throws IllegalArgumentException if the content-length fields are not valid
612 */
613 public static long normalizeAndGetContentLength(
614 List<? extends CharSequence> contentLengthFields, boolean isHttp10OrEarlier,
615 boolean allowDuplicateContentLengths) {
616 if (contentLengthFields.isEmpty()) {
617 return -1;
618 }
619
620 // Guard against multiple Content-Length headers as stated in
621 // https://tools.ietf.org/html/rfc7230#section-3.3.2:
622 //
623 // If a message is received that has multiple Content-Length header
624 // fields with field-values consisting of the same decimal value, or a
625 // single Content-Length header field with a field value containing a
626 // list of identical decimal values (e.g., "Content-Length: 42, 42"),
627 // indicating that duplicate Content-Length header fields have been
628 // generated or combined by an upstream message processor, then the
629 // recipient MUST either reject the message as invalid or replace the
630 // duplicated field-values with a single valid Content-Length field
631 // containing that decimal value prior to determining the message body
632 // length or forwarding the message.
633 String firstField = contentLengthFields.get(0).toString();
634 boolean multipleContentLengths =
635 contentLengthFields.size() > 1 || firstField.indexOf(COMMA) >= 0;
636
637 if (multipleContentLengths && !isHttp10OrEarlier) {
638 if (allowDuplicateContentLengths) {
639 // Find and enforce that all Content-Length values are the same
640 String firstValue = null;
641 for (CharSequence field : contentLengthFields) {
642 String[] tokens = field.toString().split(COMMA_STRING, -1);
643 for (String token : tokens) {
644 String trimmed = token.trim();
645 if (firstValue == null) {
646 firstValue = trimmed;
647 } else if (!trimmed.equals(firstValue)) {
648 throw new IllegalArgumentException(
649 "Multiple Content-Length values found: " + contentLengthFields);
650 }
651 }
652 }
653 // Replace the duplicated field-values with a single valid Content-Length field
654 firstField = firstValue;
655 } else {
656 // Reject the message as invalid
657 throw new IllegalArgumentException(
658 "Multiple Content-Length values found: " + contentLengthFields);
659 }
660 }
661 // Ensure we not allow sign as part of the content-length:
662 // See https://github.com/squid-cache/squid/security/advisories/GHSA-qf3v-rc95-96j5
663 if (firstField.isEmpty() || !Character.isDigit(firstField.charAt(0))) {
664 // Reject the message as invalid
665 throw new IllegalArgumentException(
666 "Content-Length value is not a number: " + firstField);
667 }
668 try {
669 final long value = Long.parseLong(firstField);
670 return checkPositiveOrZero(value, "Content-Length value");
671 } catch (NumberFormatException e) {
672 // Reject the message as invalid
673 throw new IllegalArgumentException(
674 "Content-Length value is not a number: " + firstField, e);
675 }
676 }
677
678 /**
679 * Validate a <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> contains only allowed
680 * characters.
681 * <p>
682 * The <a href="https://tools.ietf.org/html/rfc2616#section-2.2">token</a> format is used for variety of HTTP
683 * components, like <a href="https://tools.ietf.org/html/rfc6265#section-4.1.1">cookie-name</a>,
684 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">field-name</a> of a
685 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2">header-field</a>, or
686 * <a href="https://tools.ietf.org/html/rfc7231#section-4">request method</a>.
687 *
688 * @param token the token to validate.
689 * @return the index of the first invalid token character found, or {@code -1} if there are none.
690 */
691 static int validateToken(CharSequence token) {
692 if (token instanceof AsciiString) {
693 return validateAsciiStringToken((AsciiString) token);
694 }
695 return validateCharSequenceToken(token);
696 }
697
698 /**
699 * Validate that an {@link AsciiString} contain onlu valid
700 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
701 *
702 * @param token the ascii string to validate.
703 */
704 private static int validateAsciiStringToken(AsciiString token) {
705 byte[] array = token.array();
706 for (int i = token.arrayOffset(), len = token.arrayOffset() + token.length(); i < len; i++) {
707 if (!isValidTokenChar(array[i])) {
708 return i - token.arrayOffset();
709 }
710 }
711 return -1;
712 }
713
714 /**
715 * Validate that a {@link CharSequence} contain onlu valid
716 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
717 *
718 * @param token the character sequence to validate.
719 */
720 private static int validateCharSequenceToken(CharSequence token) {
721 for (int i = 0, len = token.length(); i < len; i++) {
722 byte value = (byte) token.charAt(i);
723 if (!isValidTokenChar(value)) {
724 return i;
725 }
726 }
727 return -1;
728 }
729
730 // HEADER
731 // header-field = field-name ":" OWS field-value OWS
732 //
733 // field-name = token
734 // token = 1*tchar
735 //
736 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
737 // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
738 // / DIGIT / ALPHA
739 // ; any VCHAR, except delimiters.
740 // Delimiters are chosen
741 // from the set of US-ASCII visual characters not allowed in a token
742 // (DQUOTE and "(),/:;<=>?@[\]{}")
743 //
744 // COOKIE
745 // cookie-pair = cookie-name "=" cookie-value
746 // cookie-name = token
747 // token = 1*<any CHAR except CTLs or separators>
748 // CTL = <any US-ASCII control character
749 // (octets 0 - 31) and DEL (127)>
750 // separators = "(" | ")" | "<" | ">" | "@"
751 // | "," | ";" | ":" | "\" | <">
752 // | "/" | "[" | "]" | "?" | "="
753 // | "{" | "}" | SP | HT
754 //
755 // field-name's token is equivalent to cookie-name's token, we can reuse the tchar mask for both:
756
757 // private static final class BitSet128 {
758 // private long high;
759 // private long low;
760 //
761 // BitSet128 range(char fromInc, char toInc) {
762 // for (int bit = fromInc; bit <= toInc; bit++) {
763 // if (bit < 64) {
764 // low |= 1L << bit;
765 // } else {
766 // high |= 1L << bit - 64;
767 // }
768 // }
769 // return this;
770 // }
771 //
772 // BitSet128 bits(char... bits) {
773 // for (char bit : bits) {
774 // if (bit < 64) {
775 // low |= 1L << bit;
776 // } else {
777 // high |= 1L << bit - 64;
778 // }
779 // }
780 // return this;
781 // }
782 //
783 // long high() {
784 // return high;
785 // }
786 //
787 // long low() {
788 // return low;
789 // }
790 //
791 // static boolean contains(byte bit, long high, long low) {
792 // if (bit < 0) {
793 // return false;
794 // }
795 // if (bit < 64) {
796 // return 0 != (low & 1L << bit);
797 // }
798 // return 0 != (high & 1L << bit - 64);
799 // }
800 // }
801
802 // BitSet128 tokenChars = new BitSet128()
803 // .range('0', '9').range('a', 'z').range('A', 'Z') // Alphanumeric.
804 // .bits('-', '.', '_', '~') // Unreserved characters.
805 // .bits('!', '#', '$', '%', '&', '\'', '*', '+', '^', '`', '|'); // Token special characters.
806
807 // This constants calculated by the above code
808 private static final long TOKEN_CHARS_HIGH = 0x57ffffffc7fffffeL;
809 private static final long TOKEN_CHARS_LOW = 0x3ff6cfa00000000L;
810
811 private static boolean isValidTokenChar(byte bit) {
812 if (bit < 0) {
813 return false;
814 }
815 if (bit < 64) {
816 return 0 != (TOKEN_CHARS_LOW & 1L << bit);
817 }
818 return 0 != (TOKEN_CHARS_HIGH & 1L << bit - 64);
819 }
820 }