View Javadoc
1   /*
2    * Copyright 2015 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.http;
17  
18  import java.net.InetSocketAddress;
19  import java.net.URI;
20  import java.nio.charset.Charset;
21  import java.nio.charset.IllegalCharsetNameException;
22  import java.nio.charset.UnsupportedCharsetException;
23  import java.util.ArrayList;
24  import java.util.Iterator;
25  import java.util.List;
26  
27  import io.netty.util.AsciiString;
28  import io.netty.util.CharsetUtil;
29  import io.netty.util.NetUtil;
30  import io.netty.util.internal.ObjectUtil;
31  
32  import static io.netty.util.internal.StringUtil.COMMA;
33  import static io.netty.util.internal.ObjectUtil.checkPositiveOrZero;
34  
35  /**
36   * Utility methods useful in the HTTP context.
37   */
38  public final class HttpUtil {
39  
40      private static final AsciiString CHARSET_EQUALS = AsciiString.of(HttpHeaderValues.CHARSET + "=");
41      private static final AsciiString SEMICOLON = AsciiString.cached(";");
42      private static final String COMMA_STRING = String.valueOf(COMMA);
43  
44      private HttpUtil() { }
45  
46      /**
47       * Determine if a uri is in origin-form according to
48       * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
49       */
50      public static boolean isOriginForm(URI uri) {
51          return isOriginForm(uri.toString());
52      }
53  
54      /**
55       * Determine if a string uri is in origin-form according to
56       * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
57       */
58      public static boolean isOriginForm(String uri) {
59          return uri.startsWith("/");
60      }
61  
62      /**
63       * Determine if a uri is in asterisk-form according to
64       * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
65       */
66      public static boolean isAsteriskForm(URI uri) {
67          return isAsteriskForm(uri.toString());
68      }
69  
70      /**
71       * Determine if a string uri is in asterisk-form according to
72       * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>.
73       */
74      public static boolean isAsteriskForm(String uri) {
75          return "*".equals(uri);
76      }
77  
78      /**
79       * Returns {@code true} if and only if the connection can remain open and
80       * thus 'kept alive'.  This methods respects the value of the.
81       *
82       * {@code "Connection"} header first and then the return value of
83       * {@link HttpVersion#isKeepAliveDefault()}.
84       */
85      public static boolean isKeepAlive(HttpMessage message) {
86          return !message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE, true) &&
87                 (message.protocolVersion().isKeepAliveDefault() ||
88                  message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE, true));
89      }
90  
91      /**
92       * Sets the value of the {@code "Connection"} header depending on the
93       * protocol version of the specified message. This getMethod sets or removes
94       * the {@code "Connection"} header depending on what the default keep alive
95       * mode of the message's protocol version is, as specified by
96       * {@link HttpVersion#isKeepAliveDefault()}.
97       * <ul>
98       * <li>If the connection is kept alive by default:
99       *     <ul>
100      *     <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
101      *     <li>remove otherwise.</li>
102      *     </ul></li>
103      * <li>If the connection is closed by default:
104      *     <ul>
105      *     <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
106      *     <li>remove otherwise.</li>
107      *     </ul></li>
108      * </ul>
109      * @see #setKeepAlive(HttpHeaders, HttpVersion, boolean)
110      */
111     public static void setKeepAlive(HttpMessage message, boolean keepAlive) {
112         setKeepAlive(message.headers(), message.protocolVersion(), keepAlive);
113     }
114 
115     /**
116      * Sets the value of the {@code "Connection"} header depending on the
117      * protocol version of the specified message. This getMethod sets or removes
118      * the {@code "Connection"} header depending on what the default keep alive
119      * mode of the message's protocol version is, as specified by
120      * {@link HttpVersion#isKeepAliveDefault()}.
121      * <ul>
122      * <li>If the connection is kept alive by default:
123      *     <ul>
124      *     <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li>
125      *     <li>remove otherwise.</li>
126      *     </ul></li>
127      * <li>If the connection is closed by default:
128      *     <ul>
129      *     <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li>
130      *     <li>remove otherwise.</li>
131      *     </ul></li>
132      * </ul>
133      */
134     public static void setKeepAlive(HttpHeaders h, HttpVersion httpVersion, boolean keepAlive) {
135         if (httpVersion.isKeepAliveDefault()) {
136             if (keepAlive) {
137                 h.remove(HttpHeaderNames.CONNECTION);
138             } else {
139                 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE);
140             }
141         } else {
142             if (keepAlive) {
143                 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE);
144             } else {
145                 h.remove(HttpHeaderNames.CONNECTION);
146             }
147         }
148     }
149 
150     /**
151      * Returns the length of the content. Please note that this value is
152      * not retrieved from {@link HttpContent#content()} but from the
153      * {@code "Content-Length"} header, and thus they are independent from each
154      * other.
155      *
156      * @return the content length
157      *
158      * @throws NumberFormatException
159      *         if the message does not have the {@code "Content-Length"} header
160      *         or its value is not a number
161      */
162     public static long getContentLength(HttpMessage message) {
163         String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
164         if (value != null) {
165             return Long.parseLong(value);
166         }
167 
168         // We know the content length if it's a Web Socket message even if
169         // Content-Length header is missing.
170         long webSocketContentLength = getWebSocketContentLength(message);
171         if (webSocketContentLength >= 0) {
172             return webSocketContentLength;
173         }
174 
175         // Otherwise we don't.
176         throw new NumberFormatException("header not found: " + HttpHeaderNames.CONTENT_LENGTH);
177     }
178 
179     /**
180      * Returns the length of the content or the specified default value if the message does not have the {@code
181      * "Content-Length" header}. Please note that this value is not retrieved from {@link HttpContent#content()} but
182      * from the {@code "Content-Length"} header, and thus they are independent from each other.
183      *
184      * @param message      the message
185      * @param defaultValue the default value
186      * @return the content length or the specified default value
187      * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as a long
188      */
189     public static long getContentLength(HttpMessage message, long defaultValue) {
190         String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH);
191         if (value != null) {
192             return Long.parseLong(value);
193         }
194 
195         // We know the content length if it's a Web Socket message even if
196         // Content-Length header is missing.
197         long webSocketContentLength = getWebSocketContentLength(message);
198         if (webSocketContentLength >= 0) {
199             return webSocketContentLength;
200         }
201 
202         // Otherwise we don't.
203         return defaultValue;
204     }
205 
206     /**
207      * Get an {@code int} representation of {@link #getContentLength(HttpMessage, long)}.
208      *
209      * @return the content length or {@code defaultValue} if this message does
210      *         not have the {@code "Content-Length"} header.
211      *
212      * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as an int
213      */
214     public static int getContentLength(HttpMessage message, int defaultValue) {
215         return (int) Math.min(Integer.MAX_VALUE, getContentLength(message, (long) defaultValue));
216     }
217 
218     /**
219      * Returns the content length of the specified web socket message. If the
220      * specified message is not a web socket message, {@code -1} is returned.
221      */
222     static int getWebSocketContentLength(HttpMessage message) {
223         // WebSocket messages have constant content-lengths.
224         HttpHeaders h = message.headers();
225         if (message instanceof HttpRequest) {
226             HttpRequest req = (HttpRequest) message;
227             if (HttpMethod.GET.equals(req.method()) &&
228                     h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY1) &&
229                     h.contains(HttpHeaderNames.SEC_WEBSOCKET_KEY2)) {
230                 return 8;
231             }
232         } else if (message instanceof HttpResponse) {
233             HttpResponse res = (HttpResponse) message;
234             if (res.status().code() == 101 &&
235                     h.contains(HttpHeaderNames.SEC_WEBSOCKET_ORIGIN) &&
236                     h.contains(HttpHeaderNames.SEC_WEBSOCKET_LOCATION)) {
237                 return 16;
238             }
239         }
240 
241         // Not a web socket message
242         return -1;
243     }
244 
245     /**
246      * Sets the {@code "Content-Length"} header.
247      */
248     public static void setContentLength(HttpMessage message, long length) {
249         message.headers().set(HttpHeaderNames.CONTENT_LENGTH, length);
250     }
251 
252     public static boolean isContentLengthSet(HttpMessage m) {
253         return m.headers().contains(HttpHeaderNames.CONTENT_LENGTH);
254     }
255 
256     /**
257      * Returns {@code true} if and only if the specified message contains an expect header and the only expectation
258      * present is the 100-continue expectation. Note that this method returns {@code false} if the expect header is
259      * not valid for the message (e.g., the message is a response, or the version on the message is HTTP/1.0).
260      *
261      * @param message the message
262      * @return {@code true} if and only if the expectation 100-continue is present and it is the only expectation
263      * present
264      */
265     public static boolean is100ContinueExpected(HttpMessage message) {
266         return isExpectHeaderValid(message)
267           // unquoted tokens in the expect header are case-insensitive, thus 100-continue is case insensitive
268           && message.headers().contains(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE, true);
269     }
270 
271     /**
272      * Returns {@code true} if the specified message contains an expect header specifying an expectation that is not
273      * supported. Note that this method returns {@code false} if the expect header is not valid for the message
274      * (e.g., the message is a response, or the version on the message is HTTP/1.0).
275      *
276      * @param message the message
277      * @return {@code true} if and only if an expectation is present that is not supported
278      */
279     static boolean isUnsupportedExpectation(HttpMessage message) {
280         if (!isExpectHeaderValid(message)) {
281             return false;
282         }
283 
284         final String expectValue = message.headers().get(HttpHeaderNames.EXPECT);
285         return expectValue != null && !HttpHeaderValues.CONTINUE.toString().equalsIgnoreCase(expectValue);
286     }
287 
288     private static boolean isExpectHeaderValid(final HttpMessage message) {
289         /*
290          * Expect: 100-continue is for requests only and it works only on HTTP/1.1 or later. Note further that RFC 7231
291          * section 5.1.1 says "A server that receives a 100-continue expectation in an HTTP/1.0 request MUST ignore
292          * that expectation."
293          */
294         return message instanceof HttpRequest &&
295                 message.protocolVersion().compareTo(HttpVersion.HTTP_1_1) >= 0;
296     }
297 
298     /**
299      * Sets or removes the {@code "Expect: 100-continue"} header to / from the
300      * specified message. If {@code expected} is {@code true},
301      * the {@code "Expect: 100-continue"} header is set and all other previous
302      * {@code "Expect"} headers are removed.  Otherwise, all {@code "Expect"}
303      * headers are removed completely.
304      */
305     public static void set100ContinueExpected(HttpMessage message, boolean expected) {
306         if (expected) {
307             message.headers().set(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE);
308         } else {
309             message.headers().remove(HttpHeaderNames.EXPECT);
310         }
311     }
312 
313     /**
314      * Checks to see if the transfer encoding in a specified {@link HttpMessage} is chunked
315      *
316      * @param message The message to check
317      * @return True if transfer encoding is chunked, otherwise false
318      */
319     public static boolean isTransferEncodingChunked(HttpMessage message) {
320         return message.headers().containsValue(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED, true);
321     }
322 
323     /**
324      * Set the {@link HttpHeaderNames#TRANSFER_ENCODING} to either include {@link HttpHeaderValues#CHUNKED} if
325      * {@code chunked} is {@code true}, or remove {@link HttpHeaderValues#CHUNKED} if {@code chunked} is {@code false}.
326      *
327      * @param m The message which contains the headers to modify.
328      * @param chunked if {@code true} then include {@link HttpHeaderValues#CHUNKED} in the headers. otherwise remove
329      * {@link HttpHeaderValues#CHUNKED} from the headers.
330      */
331     public static void setTransferEncodingChunked(HttpMessage m, boolean chunked) {
332         if (chunked) {
333             m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED);
334             m.headers().remove(HttpHeaderNames.CONTENT_LENGTH);
335         } else {
336             List<String> encodings = m.headers().getAll(HttpHeaderNames.TRANSFER_ENCODING);
337             if (encodings.isEmpty()) {
338                 return;
339             }
340             List<CharSequence> values = new ArrayList<CharSequence>(encodings);
341             Iterator<CharSequence> valuesIt = values.iterator();
342             while (valuesIt.hasNext()) {
343                 CharSequence value = valuesIt.next();
344                 if (HttpHeaderValues.CHUNKED.contentEqualsIgnoreCase(value)) {
345                     valuesIt.remove();
346                 }
347             }
348             if (values.isEmpty()) {
349                 m.headers().remove(HttpHeaderNames.TRANSFER_ENCODING);
350             } else {
351                 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, values);
352             }
353         }
354     }
355 
356     /**
357      * Fetch charset from message's Content-Type header.
358      *
359      * @param message entity to fetch Content-Type header from
360      * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
361      * if charset is not presented or unparsable
362      */
363     public static Charset getCharset(HttpMessage message) {
364         return getCharset(message, CharsetUtil.ISO_8859_1);
365     }
366 
367     /**
368      * Fetch charset from Content-Type header value.
369      *
370      * @param contentTypeValue Content-Type header value to parse
371      * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1}
372      * if charset is not presented or unparsable
373      */
374     public static Charset getCharset(CharSequence contentTypeValue) {
375         if (contentTypeValue != null) {
376             return getCharset(contentTypeValue, CharsetUtil.ISO_8859_1);
377         } else {
378             return CharsetUtil.ISO_8859_1;
379         }
380     }
381 
382     /**
383      * Fetch charset from message's Content-Type header.
384      *
385      * @param message        entity to fetch Content-Type header from
386      * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value
387      * @return the charset from message's Content-Type header or {@code defaultCharset}
388      * if charset is not presented or unparsable
389      */
390     public static Charset getCharset(HttpMessage message, Charset defaultCharset) {
391         CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
392         if (contentTypeValue != null) {
393             return getCharset(contentTypeValue, defaultCharset);
394         } else {
395             return defaultCharset;
396         }
397     }
398 
399     /**
400      * Fetch charset from Content-Type header value.
401      *
402      * @param contentTypeValue Content-Type header value to parse
403      * @param defaultCharset   result to use in case of empty, incorrect or doesn't contain required part header value
404      * @return the charset from message's Content-Type header or {@code defaultCharset}
405      * if charset is not presented or unparsable
406      */
407     public static Charset getCharset(CharSequence contentTypeValue, Charset defaultCharset) {
408         if (contentTypeValue != null) {
409             CharSequence charsetRaw = getCharsetAsSequence(contentTypeValue);
410             if (charsetRaw != null) {
411                 if (charsetRaw.length() > 2) { // at least contains 2 quotes(")
412                     if (charsetRaw.charAt(0) == '"' && charsetRaw.charAt(charsetRaw.length() - 1) == '"') {
413                         charsetRaw = charsetRaw.subSequence(1, charsetRaw.length() - 1);
414                     }
415                 }
416                 try {
417                     return Charset.forName(charsetRaw.toString());
418                 } catch (IllegalCharsetNameException ignored) {
419                     // just return the default charset
420                 } catch (UnsupportedCharsetException ignored) {
421                     // just return the default charset
422                 }
423             }
424         }
425         return defaultCharset;
426     }
427 
428     /**
429      * Fetch charset from message's Content-Type header as a char sequence.
430      *
431      * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
432      * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
433      *
434      * @param message entity to fetch Content-Type header from
435      * @return the {@code CharSequence} with charset from message's Content-Type header
436      * or {@code null} if charset is not presented
437      * @deprecated use {@link #getCharsetAsSequence(HttpMessage)}
438      */
439     @Deprecated
440     public static CharSequence getCharsetAsString(HttpMessage message) {
441         return getCharsetAsSequence(message);
442     }
443 
444     /**
445      * Fetch charset from message's Content-Type header as a char sequence.
446      *
447      * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
448      * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
449      *
450      * @return the {@code CharSequence} with charset from message's Content-Type header
451      * or {@code null} if charset is not presented
452      */
453     public static CharSequence getCharsetAsSequence(HttpMessage message) {
454         CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
455         if (contentTypeValue != null) {
456             return getCharsetAsSequence(contentTypeValue);
457         } else {
458             return null;
459         }
460     }
461 
462     /**
463      * Fetch charset from Content-Type header value as a char sequence.
464      *
465      * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8"
466      * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code
467      *
468      * @param contentTypeValue Content-Type header value to parse
469      * @return the {@code CharSequence} with charset from message's Content-Type header
470      * or {@code null} if charset is not presented
471      * @throws NullPointerException in case if {@code contentTypeValue == null}
472      */
473     public static CharSequence getCharsetAsSequence(CharSequence contentTypeValue) {
474         ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
475 
476         int indexOfCharset = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, CHARSET_EQUALS, 0);
477         if (indexOfCharset == AsciiString.INDEX_NOT_FOUND) {
478             return null;
479         }
480 
481         int indexOfEncoding = indexOfCharset + CHARSET_EQUALS.length();
482         if (indexOfEncoding < contentTypeValue.length()) {
483             CharSequence charsetCandidate = contentTypeValue.subSequence(indexOfEncoding, contentTypeValue.length());
484             int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(charsetCandidate, SEMICOLON, 0);
485             if (indexOfSemicolon == AsciiString.INDEX_NOT_FOUND) {
486                 return charsetCandidate;
487             }
488 
489             return charsetCandidate.subSequence(0, indexOfSemicolon);
490         }
491 
492         return null;
493     }
494 
495     /**
496      * Fetch MIME type part from message's Content-Type header as a char sequence.
497      *
498      * @param message entity to fetch Content-Type header from
499      * @return the MIME type as a {@code CharSequence} from message's Content-Type header
500      * or {@code null} if content-type header or MIME type part of this header are not presented
501      * <p/>
502      * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
503      * "content-type: text/html" - "text/html" will be returned <br/>
504      * "content-type: " or no header - {@code null} we be returned
505      */
506     public static CharSequence getMimeType(HttpMessage message) {
507         CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE);
508         if (contentTypeValue != null) {
509             return getMimeType(contentTypeValue);
510         } else {
511             return null;
512         }
513     }
514 
515     /**
516      * Fetch MIME type part from Content-Type header value as a char sequence.
517      *
518      * @param contentTypeValue Content-Type header value to parse
519      * @return the MIME type as a {@code CharSequence} from message's Content-Type header
520      * or {@code null} if content-type header or MIME type part of this header are not presented
521      * <p/>
522      * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/>
523      * "content-type: text/html" - "text/html" will be returned <br/>
524      * "content-type: empty header - {@code null} we be returned
525      * @throws NullPointerException in case if {@code contentTypeValue == null}
526      */
527     public static CharSequence getMimeType(CharSequence contentTypeValue) {
528         ObjectUtil.checkNotNull(contentTypeValue, "contentTypeValue");
529 
530         int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, SEMICOLON, 0);
531         if (indexOfSemicolon != AsciiString.INDEX_NOT_FOUND) {
532             return contentTypeValue.subSequence(0, indexOfSemicolon);
533         } else {
534             return contentTypeValue.length() > 0 ? contentTypeValue : null;
535         }
536     }
537 
538     /**
539      * Formats the host string of an address so it can be used for computing an HTTP component
540      * such as a URL or a Host header
541      *
542      * @param addr the address
543      * @return the formatted String
544      */
545     public static String formatHostnameForHttp(InetSocketAddress addr) {
546         String hostString = NetUtil.getHostname(addr);
547         if (NetUtil.isValidIpV6Address(hostString)) {
548             if (!addr.isUnresolved()) {
549                 hostString = NetUtil.toAddressString(addr.getAddress());
550             } else if (hostString.charAt(0) == '[' && hostString.charAt(hostString.length() - 1) == ']') {
551                 // If IPv6 address already contains brackets, let's return as is.
552                 return hostString;
553             }
554 
555             return '[' + hostString + ']';
556         }
557         return hostString;
558     }
559 
560     /**
561      * Validates, and optionally extracts the content length from headers. This method is not intended for
562      * general use, but is here to be shared between HTTP/1 and HTTP/2 parsing.
563      *
564      * @param contentLengthFields the content-length header fields.
565      * @param isHttp10OrEarlier {@code true} if we are handling HTTP/1.0 or earlier
566      * @param allowDuplicateContentLengths {@code true}  if multiple, identical-value content lengths should be allowed.
567      * @return the normalized content length from the headers or {@code -1} if the fields were empty.
568      * @throws IllegalArgumentException if the content-length fields are not valid
569      */
570     public static long normalizeAndGetContentLength(
571             List<? extends CharSequence> contentLengthFields, boolean isHttp10OrEarlier,
572             boolean allowDuplicateContentLengths) {
573         if (contentLengthFields.isEmpty()) {
574             return -1;
575         }
576 
577         // Guard against multiple Content-Length headers as stated in
578         // https://tools.ietf.org/html/rfc7230#section-3.3.2:
579         //
580         // If a message is received that has multiple Content-Length header
581         //   fields with field-values consisting of the same decimal value, or a
582         //   single Content-Length header field with a field value containing a
583         //   list of identical decimal values (e.g., "Content-Length: 42, 42"),
584         //   indicating that duplicate Content-Length header fields have been
585         //   generated or combined by an upstream message processor, then the
586         //   recipient MUST either reject the message as invalid or replace the
587         //   duplicated field-values with a single valid Content-Length field
588         //   containing that decimal value prior to determining the message body
589         //   length or forwarding the message.
590         String firstField = contentLengthFields.get(0).toString();
591         boolean multipleContentLengths =
592                 contentLengthFields.size() > 1 || firstField.indexOf(COMMA) >= 0;
593 
594         if (multipleContentLengths && !isHttp10OrEarlier) {
595             if (allowDuplicateContentLengths) {
596                 // Find and enforce that all Content-Length values are the same
597                 String firstValue = null;
598                 for (CharSequence field : contentLengthFields) {
599                     String[] tokens = field.toString().split(COMMA_STRING, -1);
600                     for (String token : tokens) {
601                         String trimmed = token.trim();
602                         if (firstValue == null) {
603                             firstValue = trimmed;
604                         } else if (!trimmed.equals(firstValue)) {
605                             throw new IllegalArgumentException(
606                                     "Multiple Content-Length values found: " + contentLengthFields);
607                         }
608                     }
609                 }
610                 // Replace the duplicated field-values with a single valid Content-Length field
611                 firstField = firstValue;
612             } else {
613                 // Reject the message as invalid
614                 throw new IllegalArgumentException(
615                         "Multiple Content-Length values found: " + contentLengthFields);
616             }
617         }
618         // Ensure we not allow sign as part of the content-length:
619         // See https://github.com/squid-cache/squid/security/advisories/GHSA-qf3v-rc95-96j5
620         if (firstField.isEmpty() || !Character.isDigit(firstField.charAt(0))) {
621             // Reject the message as invalid
622             throw new IllegalArgumentException(
623                     "Content-Length value is not a number: " + firstField);
624         }
625         try {
626             final long value = Long.parseLong(firstField);
627             return checkPositiveOrZero(value, "Content-Length value");
628         } catch (NumberFormatException e) {
629             // Reject the message as invalid
630             throw new IllegalArgumentException(
631                     "Content-Length value is not a number: " + firstField, e);
632         }
633     }
634 
635     /**
636      * Validate a <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> contains only allowed
637      * characters.
638      * <p>
639      * The <a href="https://tools.ietf.org/html/rfc2616#section-2.2">token</a> format is used for variety of HTTP
640      * components, like  <a href="https://tools.ietf.org/html/rfc6265#section-4.1.1">cookie-name</a>,
641      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">field-name</a> of a
642      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2">header-field</a>, or
643      * <a href="https://tools.ietf.org/html/rfc7231#section-4">request method</a>.
644      *
645      * @param token the token to validate.
646      * @return the index of the first invalid token character found, or {@code -1} if there are none.
647      */
648     static int validateToken(CharSequence token) {
649         if (token instanceof AsciiString) {
650             return validateAsciiStringToken((AsciiString) token);
651         }
652         return validateCharSequenceToken(token);
653     }
654 
655     /**
656      * Validate that an {@link AsciiString} contain onlu valid
657      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
658      *
659      * @param token the ascii string to validate.
660      */
661     private static int validateAsciiStringToken(AsciiString token) {
662         byte[] array = token.array();
663         for (int i = token.arrayOffset(), len = token.arrayOffset() + token.length(); i < len; i++) {
664             if (!BitSet128.contains(array[i], TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
665                 return i - token.arrayOffset();
666             }
667         }
668         return -1;
669     }
670 
671     /**
672      * Validate that a {@link CharSequence} contain onlu valid
673      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
674      *
675      * @param token the character sequence to validate.
676      */
677     private static int validateCharSequenceToken(CharSequence token) {
678         for (int i = 0, len = token.length(); i < len; i++) {
679             byte value = (byte) token.charAt(i);
680             if (!BitSet128.contains(value, TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
681                 return i;
682             }
683         }
684         return -1;
685     }
686 
687     private static final long TOKEN_CHARS_HIGH;
688     private static final long TOKEN_CHARS_LOW;
689     static {
690         // HEADER
691         // header-field   = field-name ":" OWS field-value OWS
692         //
693         // field-name     = token
694         // token          = 1*tchar
695         //
696         // tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
697         //                    / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
698         //                    / DIGIT / ALPHA
699         //                    ; any VCHAR, except delimiters.
700         //  Delimiters are chosen
701         //   from the set of US-ASCII visual characters not allowed in a token
702         //   (DQUOTE and "(),/:;<=>?@[\]{}")
703         //
704         // COOKIE
705         // cookie-pair       = cookie-name "=" cookie-value
706         // cookie-name       = token
707         // token          = 1*<any CHAR except CTLs or separators>
708         // CTL = <any US-ASCII control character
709         //       (octets 0 - 31) and DEL (127)>
710         // separators     = "(" | ")" | "<" | ">" | "@"
711         //                      | "," | ";" | ":" | "\" | <">
712         //                      | "/" | "[" | "]" | "?" | "="
713         //                      | "{" | "}" | SP | HT
714         //
715         // field-name's token is equivalent to cookie-name's token, we can reuse the tchar mask for both:
716         BitSet128 tokenChars = new BitSet128()
717                 .range('0', '9').range('a', 'z').range('A', 'Z') // Alphanumeric.
718                 .bits('-', '.', '_', '~') // Unreserved characters.
719                 .bits('!', '#', '$', '%', '&', '\'', '*', '+', '^', '`', '|'); // Token special characters.
720         TOKEN_CHARS_HIGH = tokenChars.high();
721         TOKEN_CHARS_LOW = tokenChars.low();
722     }
723 
724     private static final class BitSet128 {
725         private long high;
726         private long low;
727 
728         BitSet128 range(char fromInc, char toInc) {
729             for (int bit = fromInc; bit <= toInc; bit++) {
730                 if (bit < 64) {
731                     low |= 1L << bit;
732                 } else {
733                     high |= 1L << bit - 64;
734                 }
735             }
736             return this;
737         }
738 
739         BitSet128 bits(char... bits) {
740             for (char bit : bits) {
741                 if (bit < 64) {
742                     low |= 1L << bit;
743                 } else {
744                     high |= 1L << bit - 64;
745                 }
746             }
747             return this;
748         }
749 
750         long high() {
751             return high;
752         }
753 
754         long low() {
755             return low;
756         }
757 
758         static boolean contains(byte bit, long high, long low) {
759             if (bit < 0) {
760                 return false;
761             }
762             if (bit < 64) {
763                 return 0 != (low & 1L << bit);
764             }
765             return 0 != (high & 1L << bit - 64);
766         }
767     }
768 }