1 /*
2 * Copyright 2015 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import java.net.InetSocketAddress;
19 import java.net.URI;
20 import java.nio.charset.Charset;
21 import java.nio.charset.IllegalCharsetNameException;
22 import java.nio.charset.UnsupportedCharsetException;
23 import java.util.ArrayList;
24 import java.util.Iterator;
25 import java.util.List;
26
27 import io.netty.util.AsciiString;
28 import io.netty.util.CharsetUtil;
29 import io.netty.util.NetUtil;
30 import io.netty.util.internal.ObjectUtil;
31
32 import static io.netty.util.internal.StringUtil.COMMA;
33 import static io.netty.util.internal.ObjectUtil.checkPositiveOrZero;
34
35 /**
36 * Utility methods useful in the HTTP context.
37 */
38 public final class HttpUtil {
39
40 private static final AsciiString CHARSET_EQUALS = AsciiString.of(HttpHeaderValues.CHARSET + "=");
41 private static final AsciiString SEMICOLON = AsciiString.cached(";");
42 private static final String COMMA_STRING = String.valueOf(COMMA);
43
44 private HttpUtil() { }
45
46 /**
47 * Determine if a uri is in origin-form according to
48 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.1">RFC 9112, 3.2.1</a>.
49 */
50 public static boolean isOriginForm(URI uri) {
51 return isOriginForm(uri.toString());
52 }
53
54 /**
55 * Determine if a string uri is in origin-form according to
56 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.1">RFC 9112, 3.2.1</a>.
57 */
58 public static boolean isOriginForm(String uri) {
59 return uri.startsWith("/");
60 }
61
62 /**
63 * Determine if a uri is in asterisk-form according to
64 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.4">RFC 9112, 3.2.4</a>.
65 */
66 public static boolean isAsteriskForm(URI uri) {
67 return isAsteriskForm(uri.toString());
68 }
69
70 /**
71 * Determine if a string uri is in asterisk-form according to
72 * <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.4">RFC 9112, 3.2.4</a>.
73 */
74 public static boolean isAsteriskForm(String uri) {
75 return "*".equals(uri);
76 }
77
78 static void validateRequestLineTokens(HttpMethod method, String uri) {
79 // The HttpVersion class does its own validation, and it's not possible for subclasses to circumvent it.
80 // The HttpMethod class does its own validation, but subclasses might circumvent it.
81 if (method.getClass() != HttpMethod.class) {
82 if (!isEncodingSafeStartLineToken(method.asciiName())) {
83 throw new IllegalArgumentException(
84 "The HTTP method name contain illegal characters: " + method.asciiName());
85 }
86 }
87
88 if (!isEncodingSafeStartLineToken(uri)) {
89 throw new IllegalArgumentException("The URI contain illegal characters: " + uri);
90 }
91 }
92
93 /**
94 * Validate that the given request line token is safe for verbatim encoding to the network.
95 * This does not fully check that the token – HTTP method, version, or URI – is valid and formatted correctly.
96 * Only that the token does not contain characters that would break or
97 * desynchronize HTTP message parsing of the start line wherein the token would be included.
98 * <p>
99 * See <a href="https://datatracker.ietf.org/doc/html/rfc9112#name-request-line">RFC 9112, 3.</a>
100 *
101 * @param token The token to check.
102 * @return {@code true} if the token is safe to encode verbatim into the HTTP message output stream,
103 * otherwise {@code false}.
104 */
105 public static boolean isEncodingSafeStartLineToken(CharSequence token) {
106 int lenBytes = token.length();
107 for (int i = 0; i < lenBytes; i++) {
108 char ch = token.charAt(i);
109 // this is to help AOT compiled code which cannot profile the switch
110 if (ch <= ' ') {
111 switch (ch) {
112 case '\n':
113 case '\r':
114 case ' ':
115 return false;
116 }
117 }
118 }
119 return true;
120 }
121
122 /**
123 * Returns {@code true} if and only if the connection can remain open and
124 * thus 'kept alive'. This method respects the value of the
125 * {@code "Connection"} header first and then the return value of
126 * {@link HttpVersion#isKeepAliveDefault()}.
127 */
128 public static boolean isKeepAlive(HttpMessage message) {
129 return !message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE, true) &&
130 (message.protocolVersion().isKeepAliveDefault() ||
131 message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE, true));
132 }
133
134 /**
135 * Sets the value of the {@code "Connection"} header depending on the
136 * protocol version of the specified message. This getMethod sets or removes
137 * the {@code "Connection"} header depending on what the default keep alive
138 * mode of the message's protocol version is, as specified by
139 * {@link HttpVersion#isKeepAliveDefault()}.
140 * <ul>
141 * <li>If the connection is kept alive by default:
142 * <ul>
143 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
144 * <li>remove otherwise.</li>
145 * </ul></li>
146 * <li>If the connection is closed by default:
147 * <ul>
148 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
149 * <li>remove otherwise.</li>
150 * </ul></li>
151 * </ul>
152 * @see #setKeepAlive(HttpHeaders, HttpVersion, boolean)
153 */
154 public static void setKeepAlive(HttpMessage message, boolean keepAlive) {
155 setKeepAlive(message.headers(), message.protocolVersion(), keepAlive);
156 }
157
158 /**
159 * Sets the value of the {@code "Connection"} header depending on the
160 * protocol version of the specified message. This getMethod sets or removes
161 * the {@code "Connection"} header depending on what the default keep alive
162 * mode of the message's protocol version is, as specified by
163 * {@link HttpVersion#isKeepAliveDefault()}.
164 * <ul>
165 * <li>If the connection is kept alive by default:
166 * <ul>
167 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
168 * <li>remove otherwise.</li>
169 * </ul></li>
170 * <li>If the connection is closed by default:
171 * <ul>
172 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
173 * <li>remove otherwise.</li>
174 * </ul></li>
175 * </ul>
176 */
177 public static void setKeepAlive(HttpHeaders h, HttpVersion httpVersion, boolean keepAlive) {
178 if (httpVersion.isKeepAliveDefault()) {
179 if (keepAlive) {
180 h.remove(HttpHeaderNames.CONNECTION);
181 } else {
182 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE);
183 }
184 } else {
185 if (keepAlive) {
186 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE);
187 } else {
188 h.remove(HttpHeaderNames.CONNECTION);
189 }
190 }
191 }
192
193 /**
194 * Returns the length of the content. Please note that this value is
195 * not retrieved from {@link HttpContent#content()} but from the
196 * {@code "Content-Length"} header, and thus they are independent from each
197 * other.
198 *
199 * @return the content length
200 *
201 * @throws NumberFormatException
202 * if the message does not have the {@code "Content-Length"} header
203 * or its value is not a number
204 */
205 public static long getContentLength(HttpMessage message) {
206 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
207 if (value != null) {
208 return Long.parseLong(value);
209 }
210
211 // We know the content length if it's a Web Socket message even if
212 // Content-Length header is missing.
213 long webSocketContentLength = getWebSocketContentLength(message);
214 if (webSocketContentLength >= 0) {
215 return webSocketContentLength;
216 }
217
218 // Otherwise we don't.
219 throw new NumberFormatException("header not found: " + HttpHeaderNames.CONTENT_LENGTH);
220 }
221
222 /**
223 * Returns the length of the content or the specified default value if the message does not have the {@code
224 * "Content-Length" header}. Please note that this value is not retrieved from {@link HttpContent#content()} but
225 * from the {@code "Content-Length"} header, and thus they are independent from each other.
226 *
227 * @param message the message
228 * @param defaultValue the default value
229 * @return the content length or the specified default value
230 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as a long
231 */
232 public static long getContentLength(HttpMessage message, long defaultValue) {
233 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
234 if (value != null) {
235 return Long.parseLong(value);
236 }
237
238 // We know the content length if it's a Web Socket message even if
239 // Content-Length header is missing.
240 long webSocketContentLength = getWebSocketContentLength(message);
241 if (webSocketContentLength >= 0) {
242 return webSocketContentLength;
243 }
244
245 // Otherwise we don't.
246 return defaultValue;
247 }
248
249 /**
250 * Get an {@code int} representation of {@link #getContentLength(HttpMessage, long)}.
251 *
252 * @return the content length or {@code defaultValue} if this message does
253 * not have the {@code "Content-Length"} header.
254 *
255 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as an int
256 */
257 public static int getContentLength(HttpMessage message, int defaultValue) {
258 return (int) Math.min(Integer.MAX_VALUE, getContentLength(message, (long) defaultValue));
259 }
260
261 /**
262 * Returns the content length of the specified web socket message. If the
263 * specified message is not a web socket message, {@code -1} is returned.
264 */
265 static int getWebSocketContentLength(HttpMessage message) {
266 // WebSocket messages have constant content-lengths.
267 HttpHeaders h = message.headers();
268 if (message instanceof HttpRequest) {
269 HttpRequest req = (HttpRequest) message;
270 if (HttpMethod.GET.equals(req.method()) &&
271 h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY1) &&
272 h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY2)) {
273 return 8;
274 }
275 } else if (message instanceof HttpResponse) {
276 HttpResponse res = (HttpResponse) message;
277 if (res.status().code() == 101 &&
278 h.contains(HttpHeaderNames.SEC_WEBSOCKET_ORIGIN) &&
279 h.contains(HttpHeaderNames.SEC_WEBSOCKET_LOCATION)) {
280 return 16;
281 }
282 }
283
284 // Not a web socket message
285 return -1;
286 }
287
288 /**
289 * Sets the {@code "Content-Length"} header.
290 */
291 public static void setContentLength(HttpMessage message, long length) {
292 message.headers().set(HttpHeaderNames.CONTENT_LENGTH, length);
293 }
294
295 public static boolean isContentLengthSet(HttpMessage m) {
296 return m.headers().contains(HttpHeaderNames.CONTENT_LENGTH);
297 }
298
299 /**
300 * Returns {@code true} if and only if the specified message contains an expect header and the only expectation
301 * present is the 100-continue expectation. Note that this method returns {@code false} if the expect header is
302 * not valid for the message (e.g., the message is a response, or the version on the message is HTTP/1.0).
303 *
304 * @param message the message
305 * @return {@code true} if and only if the expectation 100-continue is present and it is the only expectation
306 * present
307 */
308 public static boolean is100ContinueExpected(HttpMessage message) {
309 return isExpectHeaderValid(message)
310 // unquoted tokens in the expect header are case-insensitive, thus 100-continue is case insensitive
311 && message.headers().contains(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE, true);
312 }
313
314 /**
315 * Returns {@code true} if the specified message contains an expect header specifying an expectation that is not
316 * supported. Note that this method returns {@code false} if the expect header is not valid for the message
317 * (e.g., the message is a response, or the version on the message is HTTP/1.0).
318 *
319 * @param message the message
320 * @return {@code true} if and only if an expectation is present that is not supported
321 */
322 static boolean isUnsupportedExpectation(HttpMessage message) {
323 if (!isExpectHeaderValid(message)) {
324 return false;
325 }
326
327 final String expectValue = message.headers().get(HttpHeaderNames.EXPECT);
328 return expectValue != null && !HttpHeaderValues.CONTINUE.toString().equalsIgnoreCase(expectValue);
329 }
330
331 private static boolean isExpectHeaderValid(final HttpMessage message) {
332 /*
333 * Expect: 100-continue is for requests only and it works only on HTTP/1.1 or later. Note further that RFC 7231
334 * section 5.1.1 says "A server that receives a 100-continue expectation in an HTTP/1.0 request MUST ignore
335 * that expectation."
336 */
337 return message instanceof HttpRequest &&
338 message.protocolVersion().compareTo(HttpVersion.HTTP_1_1) >= 0;
339 }
340
341 /**
342 * Sets or removes the {@code "Expect: 100-continue"} header to / from the
343 * specified message. If {@code expected} is {@code true},
344 * the {@code "Expect: 100-continue"} header is set and all other previous
345 * {@code "Expect"} headers are removed. Otherwise, all {@code "Expect"}
346 * headers are removed completely.
347 */
348 public static void set100ContinueExpected(HttpMessage message, boolean expected) {
349 if (expected) {
350 message.headers().set(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE);
351 } else {
352 message.headers().remove(HttpHeaderNames.EXPECT);
353 }
354 }
355
356 /**
357 * Checks to see if the transfer encoding in a specified {@link HttpMessage} is chunked
358 *
359 * @param message The message to check
360 * @return True if transfer encoding is chunked, otherwise false
361 */
362 public static boolean isTransferEncodingChunked(HttpMessage message) {
363 return message.headers().containsValue(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED, true);
364 }
365
366 /**
367 * Set the {@link HttpHeaderNames#TRANSFER_ENCODING} to either include {@link HttpHeaderValues#CHUNKED} if
368 * {@code chunked} is {@code true}, or remove {@link HttpHeaderValues#CHUNKED} if {@code chunked} is {@code false}.
369 *
370 * @param m The message which contains the headers to modify.
371 * @param chunked if {@code true} then include {@link HttpHeaderValues#CHUNKED} in the headers. otherwise remove
372 * {@link HttpHeaderValues#CHUNKED} from the headers.
373 */
374 public static void setTransferEncodingChunked(HttpMessage m, boolean chunked) {
375 if (chunked) {
376 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED);
377 m.headers().remove(HttpHeaderNames.CONTENT_LENGTH);
378 } else {
379 List<String> encodings = m.headers().getAll(HttpHeaderNames.TRANSFER_ENCODING);
380 if (encodings.isEmpty()) {
381 return;
382 }
383 List<CharSequence> values = new ArrayList<CharSequence>(encodings);
384 Iterator<CharSequence> valuesIt = values.iterator();
385 while (valuesIt.hasNext()) {
386 CharSequence value = valuesIt.next();
387 if (HttpHeaderValues.CHUNKED.contentEqualsIgnoreCase(value)) {
388 valuesIt.remove();
389 }
390 }
391 if (values.isEmpty()) {
392 m.headers().remove(HttpHeaderNames.TRANSFER_ENCODING);
393 } else {
394 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, values);
395 }
396 }
397 }
398
399 /**
400 * Fetch charset from message's Content-Type header.
401 *
402 * @param message entity to fetch Content-Type header from
403 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
404 * if charset is not presented or unparsable
405 */
406 public static Charset getCharset(HttpMessage message) {
407 return getCharset(message, CharsetUtil.ISO_8859_1);
408 }
409
410 /**
411 * Fetch charset from Content-Type header value.
412 *
413 * @param contentTypeValue Content-Type header value to parse
414 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
415 * if charset is not presented or unparsable
416 */
417 public static Charset getCharset(CharSequence contentTypeValue) {
418 if (contentTypeValue != null) {
419 return getCharset(contentTypeValue, CharsetUtil.ISO_8859_1);
420 } else {
421 return CharsetUtil.ISO_8859_1;
422 }
423 }
424
425 /**
426 * Fetch charset from message's Content-Type header.
427 *
428 * @param message entity to fetch Content-Type header from
429 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value
430 * @return the charset from message's Content-Type header or {@code defaultCharset}
431 * if charset is not presented or unparsable
432 */
433 public static Charset getCharset(HttpMessage message, Charset defaultCharset) {
434 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
435 if (contentTypeValue != null) {
436 return getCharset(contentTypeValue, defaultCharset);
437 } else {
438 return defaultCharset;
439 }
440 }
441
442 /**
443 * Fetch charset from Content-Type header value.
444 *
445 * @param contentTypeValue Content-Type header value to parse
446 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value
447 * @return the charset from message's Content-Type header or {@code defaultCharset}
448 * if charset is not presented or unparsable
449 */
450 public static Charset getCharset(CharSequence contentTypeValue, Charset defaultCharset) {
451 if (contentTypeValue != null) {
452 CharSequence charsetRaw = getCharsetAsSequence(contentTypeValue);
453 if (charsetRaw != null) {
454 if (charsetRaw.length() > 2) { // at least contains 2 quotes(")
455 if (charsetRaw.charAt(0) == '"' && charsetRaw.charAt(charsetRaw.length() - 1) == '"') {
456 charsetRaw = charsetRaw.subSequence(1, charsetRaw.length() - 1);
457 }
458 }
459 try {
460 return Charset.forName(charsetRaw.toString());
461 } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) {
462 // just return the default charset
463 }
464 }
465 }
466 return defaultCharset;
467 }
468
469 /**
470 * Fetch charset from message's Content-Type header as a char sequence.
471 *
472 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
473 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
474 *
475 * @param message entity to fetch Content-Type header from
476 * @return the {@code CharSequence} with charset from message's Content-Type header
477 * or {@code null} if charset is not presented
478 * @deprecated use {@link #getCharsetAsSequence(HttpMessage)}
479 */
480 @Deprecated
481 public static CharSequence getCharsetAsString(HttpMessage message) {
482 return getCharsetAsSequence(message);
483 }
484
485 /**
486 * Fetch charset from message's Content-Type header as a char sequence.
487 *
488 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
489 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
490 *
491 * @return the {@code CharSequence} with charset from message's Content-Type header
492 * or {@code null} if charset is not presented
493 */
494 public static CharSequence getCharsetAsSequence(HttpMessage message) {
495 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
496 if (contentTypeValue != null) {
497 return getCharsetAsSequence(contentTypeValue);
498 } else {
499 return null;
500 }
501 }
502
503 /**
504 * Fetch charset from Content-Type header value as a char sequence.
505 *
506 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
507 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
508 *
509 * @param contentTypeValue Content-Type header value to parse
510 * @return the {@code CharSequence} with charset from message's Content-Type header
511 * or {@code null} if charset is not presented
512 * @throws NullPointerException in case if {@code contentTypeValue == null}
513 */
514 public static CharSequence getCharsetAsSequence(CharSequence contentTypeValue) {
515 ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
516
517 int indexOfCharset = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, CHARSET_EQUALS, 0);
518 if (indexOfCharset == AsciiString.INDEX_NOT_FOUND) {
519 return null;
520 }
521
522 int indexOfEncoding = indexOfCharset + CHARSET_EQUALS.length();
523 if (indexOfEncoding < contentTypeValue.length()) {
524 CharSequence charsetCandidate = contentTypeValue.subSequence(indexOfEncoding, contentTypeValue.length());
525 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(charsetCandidate, SEMICOLON, 0);
526 if (indexOfSemicolon == AsciiString.INDEX_NOT_FOUND) {
527 return charsetCandidate;
528 }
529
530 return charsetCandidate.subSequence(0, indexOfSemicolon);
531 }
532
533 return null;
534 }
535
536 /**
537 * Fetch MIME type part from message's Content-Type header as a char sequence.
538 *
539 * @param message entity to fetch Content-Type header from
540 * @return the MIME type as a {@code CharSequence} from message's Content-Type header
541 * or {@code null} if content-type header or MIME type part of this header are not presented
542 * <p/>
543 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
544 * "content-type: text/html" - "text/html" will be returned <br/>
545 * "content-type: " or no header - {@code null} we be returned
546 */
547 public static CharSequence getMimeType(HttpMessage message) {
548 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
549 if (contentTypeValue != null) {
550 return getMimeType(contentTypeValue);
551 } else {
552 return null;
553 }
554 }
555
556 /**
557 * Fetch MIME type part from Content-Type header value as a char sequence.
558 *
559 * @param contentTypeValue Content-Type header value to parse
560 * @return the MIME type as a {@code CharSequence} from message's Content-Type header
561 * or {@code null} if content-type header or MIME type part of this header are not presented
562 * <p/>
563 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
564 * "content-type: text/html" - "text/html" will be returned <br/>
565 * "content-type: empty header - {@code null} we be returned
566 * @throws NullPointerException in case if {@code contentTypeValue == null}
567 */
568 public static CharSequence getMimeType(CharSequence contentTypeValue) {
569 ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
570
571 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, SEMICOLON, 0);
572 if (indexOfSemicolon != AsciiString.INDEX_NOT_FOUND) {
573 return contentTypeValue.subSequence(0, indexOfSemicolon);
574 } else {
575 return contentTypeValue.length() > 0 ? contentTypeValue : null;
576 }
577 }
578
579 /**
580 * Formats the host string of an address so it can be used for computing an HTTP component
581 * such as a URL or a Host header
582 *
583 * @param addr the address
584 * @return the formatted String
585 */
586 public static String formatHostnameForHttp(InetSocketAddress addr) {
587 String hostString = NetUtil.getHostname(addr);
588 if (NetUtil.isValidIpV6Address(hostString)) {
589 if (!addr.isUnresolved()) {
590 hostString = NetUtil.toAddressString(addr.getAddress());
591 } else if (hostString.charAt(0) == '[' && hostString.charAt(hostString.length() - 1) == ']') {
592 // If IPv6 address already contains brackets, let's return as is.
593 return hostString;
594 }
595
596 return '[' + hostString + ']';
597 }
598 return hostString;
599 }
600
601 /**
602 * Validates, and optionally extracts the content length from headers. This method is not intended for
603 * general use, but is here to be shared between HTTP/1 and HTTP/2 parsing.
604 *
605 * @param contentLengthFields the content-length header fields.
606 * @param isHttp10OrEarlier {@code true} if we are handling HTTP/1.0 or earlier
607 * @param allowDuplicateContentLengths {@code true} if multiple, identical-value content lengths should be allowed.
608 * @return the normalized content length from the headers or {@code -1} if the fields were empty.
609 * @throws IllegalArgumentException if the content-length fields are not valid
610 */
611 public static long normalizeAndGetContentLength(
612 List<? extends CharSequence> contentLengthFields, boolean isHttp10OrEarlier,
613 boolean allowDuplicateContentLengths) {
614 if (contentLengthFields.isEmpty()) {
615 return -1;
616 }
617
618 // Guard against multiple Content-Length headers as stated in
619 // https://tools.ietf.org/html/rfc7230#section-3.3.2:
620 //
621 // If a message is received that has multiple Content-Length header
622 // fields with field-values consisting of the same decimal value, or a
623 // single Content-Length header field with a field value containing a
624 // list of identical decimal values (e.g., "Content-Length: 42, 42"),
625 // indicating that duplicate Content-Length header fields have been
626 // generated or combined by an upstream message processor, then the
627 // recipient MUST either reject the message as invalid or replace the
628 // duplicated field-values with a single valid Content-Length field
629 // containing that decimal value prior to determining the message body
630 // length or forwarding the message.
631 String firstField = contentLengthFields.get(0).toString();
632 boolean multipleContentLengths =
633 contentLengthFields.size() > 1 || firstField.indexOf(COMMA) >= 0;
634
635 if (multipleContentLengths && !isHttp10OrEarlier) {
636 if (allowDuplicateContentLengths) {
637 // Find and enforce that all Content-Length values are the same
638 String firstValue = null;
639 for (CharSequence field : contentLengthFields) {
640 String[] tokens = field.toString().split(COMMA_STRING, -1);
641 for (String token : tokens) {
642 String trimmed = token.trim();
643 if (firstValue == null) {
644 firstValue = trimmed;
645 } else if (!trimmed.equals(firstValue)) {
646 throw new IllegalArgumentException(
647 "Multiple Content-Length values found: " + contentLengthFields);
648 }
649 }
650 }
651 // Replace the duplicated field-values with a single valid Content-Length field
652 firstField = firstValue;
653 } else {
654 // Reject the message as invalid
655 throw new IllegalArgumentException(
656 "Multiple Content-Length values found: " + contentLengthFields);
657 }
658 }
659 // Ensure we not allow sign as part of the content-length:
660 // See https://github.com/squid-cache/squid/security/advisories/GHSA-qf3v-rc95-96j5
661 if (firstField.isEmpty() || !Character.isDigit(firstField.charAt(0))) {
662 // Reject the message as invalid
663 throw new IllegalArgumentException(
664 "Content-Length value is not a number: " + firstField);
665 }
666 try {
667 final long value = Long.parseLong(firstField);
668 return checkPositiveOrZero(value, "Content-Length value");
669 } catch (NumberFormatException e) {
670 // Reject the message as invalid
671 throw new IllegalArgumentException(
672 "Content-Length value is not a number: " + firstField, e);
673 }
674 }
675
676 /**
677 * Validate a <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> contains only allowed
678 * characters.
679 * <p>
680 * The <a href="https://tools.ietf.org/html/rfc2616#section-2.2">token</a> format is used for variety of HTTP
681 * components, like <a href="https://tools.ietf.org/html/rfc6265#section-4.1.1">cookie-name</a>,
682 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">field-name</a> of a
683 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2">header-field</a>, or
684 * <a href="https://tools.ietf.org/html/rfc7231#section-4">request method</a>.
685 *
686 * @param token the token to validate.
687 * @return the index of the first invalid token character found, or {@code -1} if there are none.
688 */
689 static int validateToken(CharSequence token) {
690 if (token instanceof AsciiString) {
691 return validateAsciiStringToken((AsciiString) token);
692 }
693 return validateCharSequenceToken(token);
694 }
695
696 /**
697 * Validate that an {@link AsciiString} contain onlu valid
698 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
699 *
700 * @param token the ascii string to validate.
701 */
702 private static int validateAsciiStringToken(AsciiString token) {
703 byte[] array = token.array();
704 for (int i = token.arrayOffset(), len = token.arrayOffset() + token.length(); i < len; i++) {
705 if (!isValidTokenChar(array[i])) {
706 return i - token.arrayOffset();
707 }
708 }
709 return -1;
710 }
711
712 /**
713 * Validate that a {@link CharSequence} contain onlu valid
714 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
715 *
716 * @param token the character sequence to validate.
717 */
718 private static int validateCharSequenceToken(CharSequence token) {
719 for (int i = 0, len = token.length(); i < len; i++) {
720 byte value = (byte) token.charAt(i);
721 if (!isValidTokenChar(value)) {
722 return i;
723 }
724 }
725 return -1;
726 }
727
728 // HEADER
729 // header-field = field-name ":" OWS field-value OWS
730 //
731 // field-name = token
732 // token = 1*tchar
733 //
734 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
735 // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
736 // / DIGIT / ALPHA
737 // ; any VCHAR, except delimiters.
738 // Delimiters are chosen
739 // from the set of US-ASCII visual characters not allowed in a token
740 // (DQUOTE and "(),/:;<=>?@[\]{}")
741 //
742 // COOKIE
743 // cookie-pair = cookie-name "=" cookie-value
744 // cookie-name = token
745 // token = 1*<any CHAR except CTLs or separators>
746 // CTL = <any US-ASCII control character
747 // (octets 0 - 31) and DEL (127)>
748 // separators = "(" | ")" | "<" | ">" | "@"
749 // | "," | ";" | ":" | "\" | <">
750 // | "/" | "[" | "]" | "?" | "="
751 // | "{" | "}" | SP | HT
752 //
753 // field-name's token is equivalent to cookie-name's token, we can reuse the tchar mask for both:
754
755 // private static final class BitSet128 {
756 // private long high;
757 // private long low;
758 //
759 // BitSet128 range(char fromInc, char toInc) {
760 // for (int bit = fromInc; bit <= toInc; bit++) {
761 // if (bit < 64) {
762 // low |= 1L << bit;
763 // } else {
764 // high |= 1L << bit - 64;
765 // }
766 // }
767 // return this;
768 // }
769 //
770 // BitSet128 bits(char... bits) {
771 // for (char bit : bits) {
772 // if (bit < 64) {
773 // low |= 1L << bit;
774 // } else {
775 // high |= 1L << bit - 64;
776 // }
777 // }
778 // return this;
779 // }
780 //
781 // long high() {
782 // return high;
783 // }
784 //
785 // long low() {
786 // return low;
787 // }
788 //
789 // static boolean contains(byte bit, long high, long low) {
790 // if (bit < 0) {
791 // return false;
792 // }
793 // if (bit < 64) {
794 // return 0 != (low & 1L << bit);
795 // }
796 // return 0 != (high & 1L << bit - 64);
797 // }
798 // }
799
800 // BitSet128 tokenChars = new BitSet128()
801 // .range('0', '9').range('a', 'z').range('A', 'Z') // Alphanumeric.
802 // .bits('-', '.', '_', '~') // Unreserved characters.
803 // .bits('!', '#', '$', '%', '&', '\'', '*', '+', '^', '`', '|'); // Token special characters.
804
805 // This constants calculated by the above code
806 private static final long TOKEN_CHARS_HIGH = 0x57ffffffc7fffffeL;
807 private static final long TOKEN_CHARS_LOW = 0x3ff6cfa00000000L;
808
809 private static boolean isValidTokenChar(byte bit) {
810 if (bit < 0) {
811 return false;
812 }
813 if (bit < 64) {
814 return 0 != (TOKEN_CHARS_LOW & 1L << bit);
815 }
816 return 0 != (TOKEN_CHARS_HIGH & 1L << bit - 64);
817 }
818 }