1 /*
2 * Copyright 2015 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import java.net.InetSocketAddress;
19 import java.net.URI;
20 import java.nio.charset.Charset;
21 import java.nio.charset.IllegalCharsetNameException;
22 import java.nio.charset.UnsupportedCharsetException;
23 import java.util.ArrayList;
24 import java.util.Iterator;
25 import java.util.List;
26
27 import io.netty.util.AsciiString;
28 import io.netty.util.CharsetUtil;
29 import io.netty.util.NetUtil;
30 import io.netty.util.internal.ObjectUtil;
31
32 import static io.netty.util.internal.StringUtil.COMMA;
33 import static io.netty.util.internal.ObjectUtil.checkPositiveOrZero;
34
35 /**
36 * Utility methods useful in the HTTP context.
37 */
38 public final class HttpUtil {
39
40 private static final AsciiString CHARSET_EQUALS = AsciiString.of(HttpHeaderValues.CHARSET + "=");
41 private static final AsciiString SEMICOLON = AsciiString.cached(";");
42 private static final String COMMA_STRING = String.valueOf(COMMA);
43
44 private HttpUtil() { }
45
46 /**
47 * Determine if a uri is in origin-form according to
48 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
49 */
50 public static boolean isOriginForm(URI uri) {
51 return isOriginForm(uri.toString());
52 }
53
54 /**
55 * Determine if a string uri is in origin-form according to
56 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
57 */
58 public static boolean isOriginForm(String uri) {
59 return uri.startsWith("/");
60 }
61
62 /**
63 * Determine if a uri is in asterisk-form according to
64 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
65 */
66 public static boolean isAsteriskForm(URI uri) {
67 return isAsteriskForm(uri.toString());
68 }
69
70 /**
71 * Determine if a string uri is in asterisk-form according to
72 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
73 */
74 public static boolean isAsteriskForm(String uri) {
75 return "*".equals(uri);
76 }
77
78 /**
79 * Returns {@code true} if and only if the connection can remain open and
80 * thus 'kept alive'. This methods respects the value of the.
81 *
82 * {@code "Connection"} header first and then the return value of
83 * {@link HttpVersion#isKeepAliveDefault()}.
84 */
85 public static boolean isKeepAlive(HttpMessage message) {
86 return !message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE, true) &&
87 (message.protocolVersion().isKeepAliveDefault() ||
88 message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE, true));
89 }
90
91 /**
92 * Sets the value of the {@code "Connection"} header depending on the
93 * protocol version of the specified message. This getMethod sets or removes
94 * the {@code "Connection"} header depending on what the default keep alive
95 * mode of the message's protocol version is, as specified by
96 * {@link HttpVersion#isKeepAliveDefault()}.
97 * <ul>
98 * <li>If the connection is kept alive by default:
99 * <ul>
100 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
101 * <li>remove otherwise.</li>
102 * </ul></li>
103 * <li>If the connection is closed by default:
104 * <ul>
105 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
106 * <li>remove otherwise.</li>
107 * </ul></li>
108 * </ul>
109 * @see #setKeepAlive(HttpHeaders, HttpVersion, boolean)
110 */
111 public static void setKeepAlive(HttpMessage message, boolean keepAlive) {
112 setKeepAlive(message.headers(), message.protocolVersion(), keepAlive);
113 }
114
115 /**
116 * Sets the value of the {@code "Connection"} header depending on the
117 * protocol version of the specified message. This getMethod sets or removes
118 * the {@code "Connection"} header depending on what the default keep alive
119 * mode of the message's protocol version is, as specified by
120 * {@link HttpVersion#isKeepAliveDefault()}.
121 * <ul>
122 * <li>If the connection is kept alive by default:
123 * <ul>
124 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
125 * <li>remove otherwise.</li>
126 * </ul></li>
127 * <li>If the connection is closed by default:
128 * <ul>
129 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
130 * <li>remove otherwise.</li>
131 * </ul></li>
132 * </ul>
133 */
134 public static void setKeepAlive(HttpHeaders h, HttpVersion httpVersion, boolean keepAlive) {
135 if (httpVersion.isKeepAliveDefault()) {
136 if (keepAlive) {
137 h.remove(HttpHeaderNames.CONNECTION);
138 } else {
139 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE);
140 }
141 } else {
142 if (keepAlive) {
143 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE);
144 } else {
145 h.remove(HttpHeaderNames.CONNECTION);
146 }
147 }
148 }
149
150 /**
151 * Returns the length of the content. Please note that this value is
152 * not retrieved from {@link HttpContent#content()} but from the
153 * {@code "Content-Length"} header, and thus they are independent from each
154 * other.
155 *
156 * @return the content length
157 *
158 * @throws NumberFormatException
159 * if the message does not have the {@code "Content-Length"} header
160 * or its value is not a number
161 */
162 public static long getContentLength(HttpMessage message) {
163 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
164 if (value != null) {
165 return Long.parseLong(value);
166 }
167
168 // We know the content length if it's a Web Socket message even if
169 // Content-Length header is missing.
170 long webSocketContentLength = getWebSocketContentLength(message);
171 if (webSocketContentLength >= 0) {
172 return webSocketContentLength;
173 }
174
175 // Otherwise we don't.
176 throw new NumberFormatException("header not found: " + HttpHeaderNames.CONTENT_LENGTH);
177 }
178
179 /**
180 * Returns the length of the content or the specified default value if the message does not have the {@code
181 * "Content-Length" header}. Please note that this value is not retrieved from {@link HttpContent#content()} but
182 * from the {@code "Content-Length"} header, and thus they are independent from each other.
183 *
184 * @param message the message
185 * @param defaultValue the default value
186 * @return the content length or the specified default value
187 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as a long
188 */
189 public static long getContentLength(HttpMessage message, long defaultValue) {
190 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
191 if (value != null) {
192 return Long.parseLong(value);
193 }
194
195 // We know the content length if it's a Web Socket message even if
196 // Content-Length header is missing.
197 long webSocketContentLength = getWebSocketContentLength(message);
198 if (webSocketContentLength >= 0) {
199 return webSocketContentLength;
200 }
201
202 // Otherwise we don't.
203 return defaultValue;
204 }
205
206 /**
207 * Get an {@code int} representation of {@link #getContentLength(HttpMessage, long)}.
208 *
209 * @return the content length or {@code defaultValue} if this message does
210 * not have the {@code "Content-Length"} header.
211 *
212 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as an int
213 */
214 public static int getContentLength(HttpMessage message, int defaultValue) {
215 return (int) Math.min(Integer.MAX_VALUE, getContentLength(message, (long) defaultValue));
216 }
217
218 /**
219 * Returns the content length of the specified web socket message. If the
220 * specified message is not a web socket message, {@code -1} is returned.
221 */
222 static int getWebSocketContentLength(HttpMessage message) {
223 // WebSocket messages have constant content-lengths.
224 HttpHeaders h = message.headers();
225 if (message instanceof HttpRequest) {
226 HttpRequest req = (HttpRequest) message;
227 if (HttpMethod.GET.equals(req.method()) &&
228 h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY1) &&
229 h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY2)) {
230 return 8;
231 }
232 } else if (message instanceof HttpResponse) {
233 HttpResponse res = (HttpResponse) message;
234 if (res.status().code() == 101 &&
235 h.contains(HttpHeaderNames.SEC_WEBSOCKET_ORIGIN) &&
236 h.contains(HttpHeaderNames.SEC_WEBSOCKET_LOCATION)) {
237 return 16;
238 }
239 }
240
241 // Not a web socket message
242 return -1;
243 }
244
245 /**
246 * Sets the {@code "Content-Length"} header.
247 */
248 public static void setContentLength(HttpMessage message, long length) {
249 message.headers().set(HttpHeaderNames.CONTENT_LENGTH, length);
250 }
251
252 public static boolean isContentLengthSet(HttpMessage m) {
253 return m.headers().contains(HttpHeaderNames.CONTENT_LENGTH);
254 }
255
256 /**
257 * Returns {@code true} if and only if the specified message contains an expect header and the only expectation
258 * present is the 100-continue expectation. Note that this method returns {@code false} if the expect header is
259 * not valid for the message (e.g., the message is a response, or the version on the message is HTTP/1.0).
260 *
261 * @param message the message
262 * @return {@code true} if and only if the expectation 100-continue is present and it is the only expectation
263 * present
264 */
265 public static boolean is100ContinueExpected(HttpMessage message) {
266 return isExpectHeaderValid(message)
267 // unquoted tokens in the expect header are case-insensitive, thus 100-continue is case insensitive
268 && message.headers().contains(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE, true);
269 }
270
271 /**
272 * Returns {@code true} if the specified message contains an expect header specifying an expectation that is not
273 * supported. Note that this method returns {@code false} if the expect header is not valid for the message
274 * (e.g., the message is a response, or the version on the message is HTTP/1.0).
275 *
276 * @param message the message
277 * @return {@code true} if and only if an expectation is present that is not supported
278 */
279 static boolean isUnsupportedExpectation(HttpMessage message) {
280 if (!isExpectHeaderValid(message)) {
281 return false;
282 }
283
284 final String expectValue = message.headers().get(HttpHeaderNames.EXPECT);
285 return expectValue != null && !HttpHeaderValues.CONTINUE.toString().equalsIgnoreCase(expectValue);
286 }
287
288 private static boolean isExpectHeaderValid(final HttpMessage message) {
289 /*
290 * Expect: 100-continue is for requests only and it works only on HTTP/1.1 or later. Note further that RFC 7231
291 * section 5.1.1 says "A server that receives a 100-continue expectation in an HTTP/1.0 request MUST ignore
292 * that expectation."
293 */
294 return message instanceof HttpRequest &&
295 message.protocolVersion().compareTo(HttpVersion.HTTP_1_1) >= 0;
296 }
297
298 /**
299 * Sets or removes the {@code "Expect: 100-continue"} header to / from the
300 * specified message. If {@code expected} is {@code true},
301 * the {@code "Expect: 100-continue"} header is set and all other previous
302 * {@code "Expect"} headers are removed. Otherwise, all {@code "Expect"}
303 * headers are removed completely.
304 */
305 public static void set100ContinueExpected(HttpMessage message, boolean expected) {
306 if (expected) {
307 message.headers().set(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE);
308 } else {
309 message.headers().remove(HttpHeaderNames.EXPECT);
310 }
311 }
312
313 /**
314 * Checks to see if the transfer encoding in a specified {@link HttpMessage} is chunked
315 *
316 * @param message The message to check
317 * @return True if transfer encoding is chunked, otherwise false
318 */
319 public static boolean isTransferEncodingChunked(HttpMessage message) {
320 return message.headers().containsValue(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED, true);
321 }
322
323 /**
324 * Set the {@link HttpHeaderNames#TRANSFER_ENCODING} to either include {@link HttpHeaderValues#CHUNKED} if
325 * {@code chunked} is {@code true}, or remove {@link HttpHeaderValues#CHUNKED} if {@code chunked} is {@code false}.
326 *
327 * @param m The message which contains the headers to modify.
328 * @param chunked if {@code true} then include {@link HttpHeaderValues#CHUNKED} in the headers. otherwise remove
329 * {@link HttpHeaderValues#CHUNKED} from the headers.
330 */
331 public static void setTransferEncodingChunked(HttpMessage m, boolean chunked) {
332 if (chunked) {
333 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED);
334 m.headers().remove(HttpHeaderNames.CONTENT_LENGTH);
335 } else {
336 List<String> encodings = m.headers().getAll(HttpHeaderNames.TRANSFER_ENCODING);
337 if (encodings.isEmpty()) {
338 return;
339 }
340 List<CharSequence> values = new ArrayList<CharSequence>(encodings);
341 Iterator<CharSequence> valuesIt = values.iterator();
342 while (valuesIt.hasNext()) {
343 CharSequence value = valuesIt.next();
344 if (HttpHeaderValues.CHUNKED.contentEqualsIgnoreCase(value)) {
345 valuesIt.remove();
346 }
347 }
348 if (values.isEmpty()) {
349 m.headers().remove(HttpHeaderNames.TRANSFER_ENCODING);
350 } else {
351 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, values);
352 }
353 }
354 }
355
356 /**
357 * Fetch charset from message's Content-Type header.
358 *
359 * @param message entity to fetch Content-Type header from
360 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
361 * if charset is not presented or unparsable
362 */
363 public static Charset getCharset(HttpMessage message) {
364 return getCharset(message, CharsetUtil.ISO_8859_1);
365 }
366
367 /**
368 * Fetch charset from Content-Type header value.
369 *
370 * @param contentTypeValue Content-Type header value to parse
371 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
372 * if charset is not presented or unparsable
373 */
374 public static Charset getCharset(CharSequence contentTypeValue) {
375 if (contentTypeValue != null) {
376 return getCharset(contentTypeValue, CharsetUtil.ISO_8859_1);
377 } else {
378 return CharsetUtil.ISO_8859_1;
379 }
380 }
381
382 /**
383 * Fetch charset from message's Content-Type header.
384 *
385 * @param message entity to fetch Content-Type header from
386 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value
387 * @return the charset from message's Content-Type header or {@code defaultCharset}
388 * if charset is not presented or unparsable
389 */
390 public static Charset getCharset(HttpMessage message, Charset defaultCharset) {
391 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
392 if (contentTypeValue != null) {
393 return getCharset(contentTypeValue, defaultCharset);
394 } else {
395 return defaultCharset;
396 }
397 }
398
399 /**
400 * Fetch charset from Content-Type header value.
401 *
402 * @param contentTypeValue Content-Type header value to parse
403 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value
404 * @return the charset from message's Content-Type header or {@code defaultCharset}
405 * if charset is not presented or unparsable
406 */
407 public static Charset getCharset(CharSequence contentTypeValue, Charset defaultCharset) {
408 if (contentTypeValue != null) {
409 CharSequence charsetRaw = getCharsetAsSequence(contentTypeValue);
410 if (charsetRaw != null) {
411 if (charsetRaw.length() > 2) { // at least contains 2 quotes(")
412 if (charsetRaw.charAt(0) == '"' && charsetRaw.charAt(charsetRaw.length() - 1) == '"') {
413 charsetRaw = charsetRaw.subSequence(1, charsetRaw.length() - 1);
414 }
415 }
416 try {
417 return Charset.forName(charsetRaw.toString());
418 } catch (IllegalCharsetNameException | UnsupportedCharsetException ignored) {
419 // just return the default charset
420 }
421 }
422 }
423 return defaultCharset;
424 }
425
426 /**
427 * Fetch charset from message's Content-Type header as a char sequence.
428 *
429 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
430 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
431 *
432 * @param message entity to fetch Content-Type header from
433 * @return the {@code CharSequence} with charset from message's Content-Type header
434 * or {@code null} if charset is not presented
435 * @deprecated use {@link #getCharsetAsSequence(HttpMessage)}
436 */
437 @Deprecated
438 public static CharSequence getCharsetAsString(HttpMessage message) {
439 return getCharsetAsSequence(message);
440 }
441
442 /**
443 * Fetch charset from message's Content-Type header as a char sequence.
444 *
445 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
446 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
447 *
448 * @return the {@code CharSequence} with charset from message's Content-Type header
449 * or {@code null} if charset is not presented
450 */
451 public static CharSequence getCharsetAsSequence(HttpMessage message) {
452 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
453 if (contentTypeValue != null) {
454 return getCharsetAsSequence(contentTypeValue);
455 } else {
456 return null;
457 }
458 }
459
460 /**
461 * Fetch charset from Content-Type header value as a char sequence.
462 *
463 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
464 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
465 *
466 * @param contentTypeValue Content-Type header value to parse
467 * @return the {@code CharSequence} with charset from message's Content-Type header
468 * or {@code null} if charset is not presented
469 * @throws NullPointerException in case if {@code contentTypeValue == null}
470 */
471 public static CharSequence getCharsetAsSequence(CharSequence contentTypeValue) {
472 ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
473
474 int indexOfCharset = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, CHARSET_EQUALS, 0);
475 if (indexOfCharset == AsciiString.INDEX_NOT_FOUND) {
476 return null;
477 }
478
479 int indexOfEncoding = indexOfCharset + CHARSET_EQUALS.length();
480 if (indexOfEncoding < contentTypeValue.length()) {
481 CharSequence charsetCandidate = contentTypeValue.subSequence(indexOfEncoding, contentTypeValue.length());
482 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(charsetCandidate, SEMICOLON, 0);
483 if (indexOfSemicolon == AsciiString.INDEX_NOT_FOUND) {
484 return charsetCandidate;
485 }
486
487 return charsetCandidate.subSequence(0, indexOfSemicolon);
488 }
489
490 return null;
491 }
492
493 /**
494 * Fetch MIME type part from message's Content-Type header as a char sequence.
495 *
496 * @param message entity to fetch Content-Type header from
497 * @return the MIME type as a {@code CharSequence} from message's Content-Type header
498 * or {@code null} if content-type header or MIME type part of this header are not presented
499 * <p/>
500 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
501 * "content-type: text/html" - "text/html" will be returned <br/>
502 * "content-type: " or no header - {@code null} we be returned
503 */
504 public static CharSequence getMimeType(HttpMessage message) {
505 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
506 if (contentTypeValue != null) {
507 return getMimeType(contentTypeValue);
508 } else {
509 return null;
510 }
511 }
512
513 /**
514 * Fetch MIME type part from Content-Type header value as a char sequence.
515 *
516 * @param contentTypeValue Content-Type header value to parse
517 * @return the MIME type as a {@code CharSequence} from message's Content-Type header
518 * or {@code null} if content-type header or MIME type part of this header are not presented
519 * <p/>
520 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
521 * "content-type: text/html" - "text/html" will be returned <br/>
522 * "content-type: empty header - {@code null} we be returned
523 * @throws NullPointerException in case if {@code contentTypeValue == null}
524 */
525 public static CharSequence getMimeType(CharSequence contentTypeValue) {
526 ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
527
528 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, SEMICOLON, 0);
529 if (indexOfSemicolon != AsciiString.INDEX_NOT_FOUND) {
530 return contentTypeValue.subSequence(0, indexOfSemicolon);
531 } else {
532 return contentTypeValue.length() > 0 ? contentTypeValue : null;
533 }
534 }
535
536 /**
537 * Formats the host string of an address so it can be used for computing an HTTP component
538 * such as a URL or a Host header
539 *
540 * @param addr the address
541 * @return the formatted String
542 */
543 public static String formatHostnameForHttp(InetSocketAddress addr) {
544 String hostString = NetUtil.getHostname(addr);
545 if (NetUtil.isValidIpV6Address(hostString)) {
546 if (!addr.isUnresolved()) {
547 hostString = NetUtil.toAddressString(addr.getAddress());
548 } else if (hostString.charAt(0) == '[' && hostString.charAt(hostString.length() - 1) == ']') {
549 // If IPv6 address already contains brackets, let's return as is.
550 return hostString;
551 }
552
553 return '[' + hostString + ']';
554 }
555 return hostString;
556 }
557
558 /**
559 * Validates, and optionally extracts the content length from headers. This method is not intended for
560 * general use, but is here to be shared between HTTP/1 and HTTP/2 parsing.
561 *
562 * @param contentLengthFields the content-length header fields.
563 * @param isHttp10OrEarlier {@code true} if we are handling HTTP/1.0 or earlier
564 * @param allowDuplicateContentLengths {@code true} if multiple, identical-value content lengths should be allowed.
565 * @return the normalized content length from the headers or {@code -1} if the fields were empty.
566 * @throws IllegalArgumentException if the content-length fields are not valid
567 */
568 public static long normalizeAndGetContentLength(
569 List<? extends CharSequence> contentLengthFields, boolean isHttp10OrEarlier,
570 boolean allowDuplicateContentLengths) {
571 if (contentLengthFields.isEmpty()) {
572 return -1;
573 }
574
575 // Guard against multiple Content-Length headers as stated in
576 // https://tools.ietf.org/html/rfc7230#section-3.3.2:
577 //
578 // If a message is received that has multiple Content-Length header
579 // fields with field-values consisting of the same decimal value, or a
580 // single Content-Length header field with a field value containing a
581 // list of identical decimal values (e.g., "Content-Length: 42, 42"),
582 // indicating that duplicate Content-Length header fields have been
583 // generated or combined by an upstream message processor, then the
584 // recipient MUST either reject the message as invalid or replace the
585 // duplicated field-values with a single valid Content-Length field
586 // containing that decimal value prior to determining the message body
587 // length or forwarding the message.
588 String firstField = contentLengthFields.get(0).toString();
589 boolean multipleContentLengths =
590 contentLengthFields.size() > 1 || firstField.indexOf(COMMA) >= 0;
591
592 if (multipleContentLengths && !isHttp10OrEarlier) {
593 if (allowDuplicateContentLengths) {
594 // Find and enforce that all Content-Length values are the same
595 String firstValue = null;
596 for (CharSequence field : contentLengthFields) {
597 String[] tokens = field.toString().split(COMMA_STRING, -1);
598 for (String token : tokens) {
599 String trimmed = token.trim();
600 if (firstValue == null) {
601 firstValue = trimmed;
602 } else if (!trimmed.equals(firstValue)) {
603 throw new IllegalArgumentException(
604 "Multiple Content-Length values found: " + contentLengthFields);
605 }
606 }
607 }
608 // Replace the duplicated field-values with a single valid Content-Length field
609 firstField = firstValue;
610 } else {
611 // Reject the message as invalid
612 throw new IllegalArgumentException(
613 "Multiple Content-Length values found: " + contentLengthFields);
614 }
615 }
616 // Ensure we not allow sign as part of the content-length:
617 // See https://github.com/squid-cache/squid/security/advisories/GHSA-qf3v-rc95-96j5
618 if (firstField.isEmpty() || !Character.isDigit(firstField.charAt(0))) {
619 // Reject the message as invalid
620 throw new IllegalArgumentException(
621 "Content-Length value is not a number: " + firstField);
622 }
623 try {
624 final long value = Long.parseLong(firstField);
625 return checkPositiveOrZero(value, "Content-Length value");
626 } catch (NumberFormatException e) {
627 // Reject the message as invalid
628 throw new IllegalArgumentException(
629 "Content-Length value is not a number: " + firstField, e);
630 }
631 }
632
633 /**
634 * Validate a <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> contains only allowed
635 * characters.
636 * <p>
637 * The <a href="https://tools.ietf.org/html/rfc2616#section-2.2">token</a> format is used for variety of HTTP
638 * components, like <a href="https://tools.ietf.org/html/rfc6265#section-4.1.1">cookie-name</a>,
639 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">field-name</a> of a
640 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2">header-field</a>, or
641 * <a href="https://tools.ietf.org/html/rfc7231#section-4">request method</a>.
642 *
643 * @param token the token to validate.
644 * @return the index of the first invalid token character found, or {@code -1} if there are none.
645 */
646 static int validateToken(CharSequence token) {
647 if (token instanceof AsciiString) {
648 return validateAsciiStringToken((AsciiString) token);
649 }
650 return validateCharSequenceToken(token);
651 }
652
653 /**
654 * Validate that an {@link AsciiString} contain onlu valid
655 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
656 *
657 * @param token the ascii string to validate.
658 */
659 private static int validateAsciiStringToken(AsciiString token) {
660 byte[] array = token.array();
661 for (int i = token.arrayOffset(), len = token.arrayOffset() + token.length(); i < len; i++) {
662 if (!isValidTokenChar(array[i])) {
663 return i - token.arrayOffset();
664 }
665 }
666 return -1;
667 }
668
669 /**
670 * Validate that a {@link CharSequence} contain onlu valid
671 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
672 *
673 * @param token the character sequence to validate.
674 */
675 private static int validateCharSequenceToken(CharSequence token) {
676 for (int i = 0, len = token.length(); i < len; i++) {
677 byte value = (byte) token.charAt(i);
678 if (!isValidTokenChar(value)) {
679 return i;
680 }
681 }
682 return -1;
683 }
684
685 // HEADER
686 // header-field = field-name ":" OWS field-value OWS
687 //
688 // field-name = token
689 // token = 1*tchar
690 //
691 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
692 // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
693 // / DIGIT / ALPHA
694 // ; any VCHAR, except delimiters.
695 // Delimiters are chosen
696 // from the set of US-ASCII visual characters not allowed in a token
697 // (DQUOTE and "(),/:;<=>?@[\]{}")
698 //
699 // COOKIE
700 // cookie-pair = cookie-name "=" cookie-value
701 // cookie-name = token
702 // token = 1*<any CHAR except CTLs or separators>
703 // CTL = <any US-ASCII control character
704 // (octets 0 - 31) and DEL (127)>
705 // separators = "(" | ")" | "<" | ">" | "@"
706 // | "," | ";" | ":" | "\" | <">
707 // | "/" | "[" | "]" | "?" | "="
708 // | "{" | "}" | SP | HT
709 //
710 // field-name's token is equivalent to cookie-name's token, we can reuse the tchar mask for both:
711
712 // private static final class BitSet128 {
713 // private long high;
714 // private long low;
715 //
716 // BitSet128 range(char fromInc, char toInc) {
717 // for (int bit = fromInc; bit <= toInc; bit++) {
718 // if (bit < 64) {
719 // low |= 1L << bit;
720 // } else {
721 // high |= 1L << bit - 64;
722 // }
723 // }
724 // return this;
725 // }
726 //
727 // BitSet128 bits(char... bits) {
728 // for (char bit : bits) {
729 // if (bit < 64) {
730 // low |= 1L << bit;
731 // } else {
732 // high |= 1L << bit - 64;
733 // }
734 // }
735 // return this;
736 // }
737 //
738 // long high() {
739 // return high;
740 // }
741 //
742 // long low() {
743 // return low;
744 // }
745 //
746 // static boolean contains(byte bit, long high, long low) {
747 // if (bit < 0) {
748 // return false;
749 // }
750 // if (bit < 64) {
751 // return 0 != (low & 1L << bit);
752 // }
753 // return 0 != (high & 1L << bit - 64);
754 // }
755 // }
756
757 // BitSet128 tokenChars = new BitSet128()
758 // .range('0', '9').range('a', 'z').range('A', 'Z') // Alphanumeric.
759 // .bits('-', '.', '_', '~') // Unreserved characters.
760 // .bits('!', '#', '$', '%', '&', '\'', '*', '+', '^', '`', '|'); // Token special characters.
761
762 //this constants calculated by the above code
763 private static final long TOKEN_CHARS_HIGH = 0x57ffffffc7fffffeL;
764 private static final long TOKEN_CHARS_LOW = 0x3ff6cfa00000000L;
765
766 private static boolean isValidTokenChar(byte bit) {
767 if (bit < 0) {
768 return false;
769 }
770 if (bit < 64) {
771 return 0 != (TOKEN_CHARS_LOW & 1L << bit);
772 }
773 return 0 != (TOKEN_CHARS_HIGH & 1L << bit - 64);
774 }
775
776 }