1 /* 2 * Copyright 2015 The Netty Project 3 * 4 * The Netty Project licenses this file to you under the Apache License, 5 * version 2.0 (the "License"); you may not use this file except in compliance 6 * with the License. You may obtain a copy of the License at: 7 * 8 * https://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations 14 * under the License. 15 */ 16 package io.netty5.handler.codec.http; 17 18 import io.netty5.util.AsciiString; 19 import io.netty5.util.CharsetUtil; 20 import io.netty5.util.NetUtil; 21 import io.netty5.util.internal.UnstableApi; 22 23 import java.net.InetSocketAddress; 24 import java.net.URI; 25 import java.nio.charset.Charset; 26 import java.nio.charset.IllegalCharsetNameException; 27 import java.nio.charset.UnsupportedCharsetException; 28 import java.util.ArrayList; 29 import java.util.List; 30 31 import static io.netty5.util.internal.ObjectUtil.checkPositiveOrZero; 32 import static io.netty5.util.internal.StringUtil.COMMA; 33 import static java.util.Objects.requireNonNull; 34 35 /** 36 * Utility methods useful in the HTTP context. 37 */ 38 public final class HttpUtil { 39 40 private static final AsciiString CHARSET_EQUALS = AsciiString.of(HttpHeaderValues.CHARSET + "="); 41 private static final AsciiString SEMICOLON = AsciiString.cached(";"); 42 private static final String COMMA_STRING = String.valueOf(COMMA); 43 44 private HttpUtil() { } 45 46 /** 47 * Determine if a uri is in origin-form according to 48 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>. 49 */ 50 public static boolean isOriginForm(URI uri) { 51 return isOriginForm(uri.toString()); 52 } 53 54 /** 55 * Determine if a string uri is in origin-form according to 56 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>. 57 */ 58 public static boolean isOriginForm(String uri) { 59 return uri.startsWith("/"); 60 } 61 62 /** 63 * Determine if a uri is in asterisk-form according to 64 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>. 65 */ 66 public static boolean isAsteriskForm(URI uri) { 67 return isAsteriskForm(uri.toString()); 68 } 69 70 /** 71 * Determine if a string uri is in asterisk-form according to 72 * <a href="https://tools.ietf.org/html/rfc7230#section-5.3">rfc7230, 5.3</a>. 73 */ 74 public static boolean isAsteriskForm(String uri) { 75 return "*".equals(uri); 76 } 77 78 /** 79 * Returns {@code true} if and only if the connection can remain open and 80 * thus 'kept alive'. This methods respects the value of the. 81 * 82 * {@code "Connection"} header first and then the return value of 83 * {@link HttpVersion#isKeepAliveDefault()}. 84 */ 85 public static boolean isKeepAlive(HttpMessage message) { 86 return !message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE, true) && 87 (message.protocolVersion().isKeepAliveDefault() || 88 message.headers().containsValue(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE, true)); 89 } 90 91 /** 92 * Sets the value of the {@code "Connection"} header depending on the 93 * protocol version of the specified message. This getMethod sets or removes 94 * the {@code "Connection"} header depending on what the default keep alive 95 * mode of the message's protocol version is, as specified by 96 * {@link HttpVersion#isKeepAliveDefault()}. 97 * <ul> 98 * <li>If the connection is kept alive by default: 99 * <ul> 100 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li> 101 * <li>remove otherwise.</li> 102 * </ul></li> 103 * <li>If the connection is closed by default: 104 * <ul> 105 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li> 106 * <li>remove otherwise.</li> 107 * </ul></li> 108 * </ul> 109 * @see #setKeepAlive(HttpHeaders, HttpVersion, boolean) 110 */ 111 public static void setKeepAlive(HttpMessage message, boolean keepAlive) { 112 setKeepAlive(message.headers(), message.protocolVersion(), keepAlive); 113 } 114 115 /** 116 * Sets the value of the {@code "Connection"} header depending on the 117 * protocol version of the specified message. This getMethod sets or removes 118 * the {@code "Connection"} header depending on what the default keep alive 119 * mode of the message's protocol version is, as specified by 120 * {@link HttpVersion#isKeepAliveDefault()}. 121 * <ul> 122 * <li>If the connection is kept alive by default: 123 * <ul> 124 * <li>set to {@code "close"} if {@code keepAlive} is {@code false}.</li> 125 * <li>remove otherwise.</li> 126 * </ul></li> 127 * <li>If the connection is closed by default: 128 * <ul> 129 * <li>set to {@code "keep-alive"} if {@code keepAlive} is {@code true}.</li> 130 * <li>remove otherwise.</li> 131 * </ul></li> 132 * </ul> 133 */ 134 public static void setKeepAlive(HttpHeaders h, HttpVersion httpVersion, boolean keepAlive) { 135 if (httpVersion.isKeepAliveDefault()) { 136 if (keepAlive) { 137 h.remove(HttpHeaderNames.CONNECTION); 138 } else { 139 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.CLOSE); 140 } 141 } else { 142 if (keepAlive) { 143 h.set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE); 144 } else { 145 h.remove(HttpHeaderNames.CONNECTION); 146 } 147 } 148 } 149 150 /** 151 * Returns the length of the content. Please note that this value is 152 * not retrieved from {@link HttpContent#payload()} but from the 153 * {@code "Content-Length"} header, and thus they are independent from each 154 * other. 155 * 156 * @return the content length 157 * 158 * @throws NumberFormatException 159 * if the message does not have the {@code "Content-Length"} header 160 * or its value is not a number 161 */ 162 public static long getContentLength(HttpMessage message) { 163 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH); 164 if (value != null) { 165 return Long.parseLong(value); 166 } 167 168 throw new NumberFormatException("header not found: " + HttpHeaderNames.CONTENT_LENGTH); 169 } 170 171 /** 172 * Returns the length of the content or the specified default value if the message does not have the {@code 173 * "Content-Length" header}. Please note that this value is not retrieved from {@link HttpContent#payload()} but 174 * from the {@code "Content-Length"} header, and thus they are independent from each other. 175 * 176 * @param message the message 177 * @param defaultValue the default value 178 * @return the content length or the specified default value 179 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as a long 180 */ 181 public static long getContentLength(HttpMessage message, long defaultValue) { 182 String value = message.headers().get(HttpHeaderNames.CONTENT_LENGTH); 183 if (value != null) { 184 return Long.parseLong(value); 185 } 186 187 return defaultValue; 188 } 189 190 /** 191 * Get an {@code int} representation of {@link #getContentLength(HttpMessage, long)}. 192 * 193 * @return the content length or {@code defaultValue} if this message does 194 * not have the {@code "Content-Length"} header. 195 * 196 * @throws NumberFormatException if the {@code "Content-Length"} header does not parse as an int 197 */ 198 public static int getContentLength(HttpMessage message, int defaultValue) { 199 return (int) Math.min(Integer.MAX_VALUE, getContentLength(message, (long) defaultValue)); 200 } 201 202 /** 203 * Sets the {@code "Content-Length"} header. 204 */ 205 public static void setContentLength(HttpMessage message, long length) { 206 message.headers().set(HttpHeaderNames.CONTENT_LENGTH, length); 207 } 208 209 public static boolean isContentLengthSet(HttpMessage m) { 210 return m.headers().contains(HttpHeaderNames.CONTENT_LENGTH); 211 } 212 213 /** 214 * Returns {@code true} if and only if the specified message contains an expect header and the only expectation 215 * present is the 100-continue expectation. Note that this method returns {@code false} if the expect header is 216 * not valid for the message (e.g., the message is a response, or the version on the message is HTTP/1.0). 217 * 218 * @param message the message 219 * @return {@code true} if and only if the expectation 100-continue is present and it is the only expectation 220 * present 221 */ 222 public static boolean is100ContinueExpected(HttpMessage message) { 223 return isExpectHeaderValid(message) 224 // unquoted tokens in the expect header are case-insensitive, thus 100-continue is case insensitive 225 && message.headers().contains(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE, true); 226 } 227 228 /** 229 * Returns {@code true} if the specified message contains an expect header specifying an expectation that is not 230 * supported. Note that this method returns {@code false} if the expect header is not valid for the message 231 * (e.g., the message is a response, or the version on the message is HTTP/1.0). 232 * 233 * @param message the message 234 * @return {@code true} if and only if an expectation is present that is not supported 235 */ 236 static boolean isUnsupportedExpectation(HttpMessage message) { 237 if (!isExpectHeaderValid(message)) { 238 return false; 239 } 240 241 final String expectValue = message.headers().get(HttpHeaderNames.EXPECT); 242 return expectValue != null && !HttpHeaderValues.CONTINUE.toString().equalsIgnoreCase(expectValue); 243 } 244 245 private static boolean isExpectHeaderValid(final HttpMessage message) { 246 /* 247 * Expect: 100-continue is for requests only and it works only on HTTP/1.1 or later. Note further that RFC 7231 248 * section 5.1.1 says "A server that receives a 100-continue expectation in an HTTP/1.0 request MUST ignore 249 * that expectation." 250 */ 251 return message instanceof HttpRequest && 252 message.protocolVersion().compareTo(HttpVersion.HTTP_1_1) >= 0; 253 } 254 255 /** 256 * Sets or removes the {@code "Expect: 100-continue"} header to / from the 257 * specified message. If {@code expected} is {@code true}, 258 * the {@code "Expect: 100-continue"} header is set and all other previous 259 * {@code "Expect"} headers are removed. Otherwise, all {@code "Expect"} 260 * headers are removed completely. 261 */ 262 public static void set100ContinueExpected(HttpMessage message, boolean expected) { 263 if (expected) { 264 message.headers().set(HttpHeaderNames.EXPECT, HttpHeaderValues.CONTINUE); 265 } else { 266 message.headers().remove(HttpHeaderNames.EXPECT); 267 } 268 } 269 270 /** 271 * Checks to see if the transfer encoding in a specified {@link HttpMessage} is chunked 272 * 273 * @param message The message to check 274 * @return True if transfer encoding is chunked, otherwise false 275 */ 276 public static boolean isTransferEncodingChunked(HttpMessage message) { 277 return message.headers().containsValue(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED, true); 278 } 279 280 /** 281 * Set the {@link HttpHeaderNames#TRANSFER_ENCODING} to either include {@link HttpHeaderValues#CHUNKED} if 282 * {@code chunked} is {@code true}, or remove {@link HttpHeaderValues#CHUNKED} if {@code chunked} is {@code false}. 283 * 284 * @param m The message which contains the headers to modify. 285 * @param chunked if {@code true} then include {@link HttpHeaderValues#CHUNKED} in the headers. otherwise remove 286 * {@link HttpHeaderValues#CHUNKED} from the headers. 287 */ 288 public static void setTransferEncodingChunked(HttpMessage m, boolean chunked) { 289 if (chunked) { 290 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, HttpHeaderValues.CHUNKED); 291 m.headers().remove(HttpHeaderNames.CONTENT_LENGTH); 292 } else { 293 List<String> encodings = m.headers().getAll(HttpHeaderNames.TRANSFER_ENCODING); 294 if (encodings.isEmpty()) { 295 return; 296 } 297 List<CharSequence> values = new ArrayList<>(encodings); 298 values.removeIf(HttpHeaderValues.CHUNKED::contentEqualsIgnoreCase); 299 if (values.isEmpty()) { 300 m.headers().remove(HttpHeaderNames.TRANSFER_ENCODING); 301 } else { 302 m.headers().set(HttpHeaderNames.TRANSFER_ENCODING, values); 303 } 304 } 305 } 306 307 /** 308 * Fetch charset from message's Content-Type header. 309 * 310 * @param message entity to fetch Content-Type header from 311 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1} 312 * if charset is not presented or unparsable 313 */ 314 public static Charset getCharset(HttpMessage message) { 315 return getCharset(message, CharsetUtil.ISO_8859_1); 316 } 317 318 /** 319 * Fetch charset from Content-Type header value. 320 * 321 * @param contentTypeValue Content-Type header value to parse 322 * @return the charset from message's Content-Type header or {@link CharsetUtil#ISO_8859_1} 323 * if charset is not presented or unparsable 324 */ 325 public static Charset getCharset(CharSequence contentTypeValue) { 326 if (contentTypeValue != null) { 327 return getCharset(contentTypeValue, CharsetUtil.ISO_8859_1); 328 } else { 329 return CharsetUtil.ISO_8859_1; 330 } 331 } 332 333 /** 334 * Fetch charset from message's Content-Type header. 335 * 336 * @param message entity to fetch Content-Type header from 337 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value 338 * @return the charset from message's Content-Type header or {@code defaultCharset} 339 * if charset is not presented or unparsable 340 */ 341 public static Charset getCharset(HttpMessage message, Charset defaultCharset) { 342 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE); 343 if (contentTypeValue != null) { 344 return getCharset(contentTypeValue, defaultCharset); 345 } else { 346 return defaultCharset; 347 } 348 } 349 350 /** 351 * Fetch charset from Content-Type header value. 352 * 353 * @param contentTypeValue Content-Type header value to parse 354 * @param defaultCharset result to use in case of empty, incorrect or doesn't contain required part header value 355 * @return the charset from message's Content-Type header or {@code defaultCharset} 356 * if charset is not presented or unparsable 357 */ 358 public static Charset getCharset(CharSequence contentTypeValue, Charset defaultCharset) { 359 if (contentTypeValue != null) { 360 CharSequence charsetRaw = getCharsetAsSequence(contentTypeValue); 361 if (charsetRaw != null) { 362 if (charsetRaw.length() > 2) { // at least contains 2 quotes(") 363 if (charsetRaw.charAt(0) == '"' && charsetRaw.charAt(charsetRaw.length() - 1) == '"') { 364 charsetRaw = charsetRaw.subSequence(1, charsetRaw.length() - 1); 365 } 366 } 367 try { 368 return Charset.forName(charsetRaw.toString()); 369 } catch (UnsupportedCharsetException | IllegalCharsetNameException ignored) { 370 // just return the default charset 371 return defaultCharset; 372 } 373 } else { 374 return defaultCharset; 375 } 376 } else { 377 return defaultCharset; 378 } 379 } 380 381 /** 382 * Fetch charset from message's Content-Type header as a char sequence. 383 * 384 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8" 385 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code 386 * 387 * @param message entity to fetch Content-Type header from 388 * @return the {@code CharSequence} with charset from message's Content-Type header 389 * or {@code null} if charset is not presented 390 * @deprecated use {@link #getCharsetAsSequence(HttpMessage)} 391 */ 392 @Deprecated 393 public static CharSequence getCharsetAsString(HttpMessage message) { 394 return getCharsetAsSequence(message); 395 } 396 397 /** 398 * Fetch charset from message's Content-Type header as a char sequence. 399 * 400 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8" 401 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code 402 * 403 * @return the {@code CharSequence} with charset from message's Content-Type header 404 * or {@code null} if charset is not presented 405 */ 406 public static CharSequence getCharsetAsSequence(HttpMessage message) { 407 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE); 408 if (contentTypeValue != null) { 409 return getCharsetAsSequence(contentTypeValue); 410 } else { 411 return null; 412 } 413 } 414 415 /** 416 * Fetch charset from Content-Type header value as a char sequence. 417 * 418 * A lot of sites/possibly clients have charset="CHARSET", for example charset="utf-8". Or "utf8" instead of "utf-8" 419 * This is not according to standard, but this method provide an ability to catch desired mistakes manually in code 420 * 421 * @param contentTypeValue Content-Type header value to parse 422 * @return the {@code CharSequence} with charset from message's Content-Type header 423 * or {@code null} if charset is not presented 424 * @throws NullPointerException in case if {@code contentTypeValue == null} 425 */ 426 public static CharSequence getCharsetAsSequence(CharSequence contentTypeValue) { 427 requireNonNull(contentTypeValue, "contentTypeValue"); 428 429 int indexOfCharset = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, CHARSET_EQUALS, 0); 430 if (indexOfCharset == AsciiString.INDEX_NOT_FOUND) { 431 return null; 432 } 433 434 int indexOfEncoding = indexOfCharset + CHARSET_EQUALS.length(); 435 if (indexOfEncoding < contentTypeValue.length()) { 436 CharSequence charsetCandidate = contentTypeValue.subSequence(indexOfEncoding, contentTypeValue.length()); 437 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(charsetCandidate, SEMICOLON, 0); 438 if (indexOfSemicolon == AsciiString.INDEX_NOT_FOUND) { 439 return charsetCandidate; 440 } 441 442 return charsetCandidate.subSequence(0, indexOfSemicolon); 443 } 444 445 return null; 446 } 447 448 /** 449 * Fetch MIME type part from message's Content-Type header as a char sequence. 450 * 451 * @param message entity to fetch Content-Type header from 452 * @return the MIME type as a {@code CharSequence} from message's Content-Type header 453 * or {@code null} if content-type header or MIME type part of this header are not presented 454 * <p/> 455 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/> 456 * "content-type: text/html" - "text/html" will be returned <br/> 457 * "content-type: " or no header - {@code null} we be returned 458 */ 459 public static CharSequence getMimeType(HttpMessage message) { 460 CharSequence contentTypeValue = message.headers().get(HttpHeaderNames.CONTENT_TYPE); 461 if (contentTypeValue != null) { 462 return getMimeType(contentTypeValue); 463 } else { 464 return null; 465 } 466 } 467 468 /** 469 * Fetch MIME type part from Content-Type header value as a char sequence. 470 * 471 * @param contentTypeValue Content-Type header value to parse 472 * @return the MIME type as a {@code CharSequence} from message's Content-Type header 473 * or {@code null} if content-type header or MIME type part of this header are not presented 474 * <p/> 475 * "content-type: text/html; charset=utf-8" - "text/html" will be returned <br/> 476 * "content-type: text/html" - "text/html" will be returned <br/> 477 * "content-type: empty header - {@code null} we be returned 478 * @throws NullPointerException in case if {@code contentTypeValue == null} 479 */ 480 public static CharSequence getMimeType(CharSequence contentTypeValue) { 481 requireNonNull(contentTypeValue, "contentTypeValue"); 482 483 int indexOfSemicolon = AsciiString.indexOfIgnoreCaseAscii(contentTypeValue, SEMICOLON, 0); 484 if (indexOfSemicolon != AsciiString.INDEX_NOT_FOUND) { 485 return contentTypeValue.subSequence(0, indexOfSemicolon); 486 } else { 487 return contentTypeValue.length() > 0 ? contentTypeValue : null; 488 } 489 } 490 491 /** 492 * Formats the host string of an address so it can be used for computing an HTTP component 493 * such as a URL or a Host header 494 * 495 * @param addr the address 496 * @return the formatted String 497 */ 498 public static String formatHostnameForHttp(InetSocketAddress addr) { 499 String hostString = NetUtil.getHostname(addr); 500 if (NetUtil.isValidIpV6Address(hostString)) { 501 if (!addr.isUnresolved()) { 502 hostString = NetUtil.toAddressString(addr.getAddress()); 503 } 504 return '[' + hostString + ']'; 505 } 506 return hostString; 507 } 508 509 /** 510 * Validates, and optionally extracts the content length from headers. This method is not intended for 511 * general use, but is here to be shared between HTTP/1 and HTTP/2 parsing. 512 * 513 * @param contentLengthFields the content-length header fields. 514 * @param isHttp10OrEarlier {@code true} if we are handling HTTP/1.0 or earlier 515 * @param allowDuplicateContentLengths {@code true} if multiple, identical-value content lengths should be allowed. 516 * @return the normalized content length from the headers or {@code -1} if the fields were empty. 517 * @throws IllegalArgumentException if the content-length fields are not valid 518 */ 519 @UnstableApi 520 public static long normalizeAndGetContentLength( 521 List<? extends CharSequence> contentLengthFields, boolean isHttp10OrEarlier, 522 boolean allowDuplicateContentLengths) { 523 if (contentLengthFields.isEmpty()) { 524 return -1; 525 } 526 527 // Guard against multiple Content-Length headers as stated in 528 // https://tools.ietf.org/html/rfc7230#section-3.3.2: 529 // 530 // If a message is received that has multiple Content-Length header 531 // fields with field-values consisting of the same decimal value, or a 532 // single Content-Length header field with a field value containing a 533 // list of identical decimal values (e.g., "Content-Length: 42, 42"), 534 // indicating that duplicate Content-Length header fields have been 535 // generated or combined by an upstream message processor, then the 536 // recipient MUST either reject the message as invalid or replace the 537 // duplicated field-values with a single valid Content-Length field 538 // containing that decimal value prior to determining the message body 539 // length or forwarding the message. 540 String firstField = contentLengthFields.get(0).toString(); 541 boolean multipleContentLengths = 542 contentLengthFields.size() > 1 || firstField.indexOf(COMMA) >= 0; 543 544 if (multipleContentLengths && !isHttp10OrEarlier) { 545 if (allowDuplicateContentLengths) { 546 // Find and enforce that all Content-Length values are the same 547 String firstValue = null; 548 for (CharSequence field : contentLengthFields) { 549 String[] tokens = field.toString().split(COMMA_STRING, -1); 550 for (String token : tokens) { 551 String trimmed = token.trim(); 552 if (firstValue == null) { 553 firstValue = trimmed; 554 } else if (!trimmed.equals(firstValue)) { 555 throw new IllegalArgumentException( 556 "Multiple Content-Length values found: " + contentLengthFields); 557 } 558 } 559 } 560 // Replace the duplicated field-values with a single valid Content-Length field 561 firstField = firstValue; 562 } else { 563 // Reject the message as invalid 564 throw new IllegalArgumentException( 565 "Multiple Content-Length values found: " + contentLengthFields); 566 } 567 } 568 // Ensure we not allow sign as part of the content-length: 569 // See https://github.com/squid-cache/squid/security/advisories/GHSA-qf3v-rc95-96j5 570 if (firstField.isEmpty() || !Character.isDigit(firstField.charAt(0))) { 571 // Reject the message as invalid 572 throw new IllegalArgumentException( 573 "Content-Length value is not a number: " + firstField); 574 } 575 try { 576 final long value = Long.parseLong(firstField); 577 return checkPositiveOrZero(value, "Content-Length value"); 578 } catch (NumberFormatException e) { 579 // Reject the message as invalid 580 throw new IllegalArgumentException( 581 "Content-Length value is not a number: " + firstField, e); 582 } 583 } 584 }