View Javadoc
1   /*
2    * Copyright 2022 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.http;
17  
18  import io.netty.util.AsciiString;
19  import io.netty.util.internal.UnstableApi;
20  
21  import static io.netty.util.AsciiString.contentEqualsIgnoreCase;
22  
23  /**
24   * Functions used to perform various validations of HTTP header names and values.
25   */
26  @UnstableApi
27  public final class HttpHeaderValidationUtil {
28      private HttpHeaderValidationUtil() {
29      }
30  
31      /**
32       * Check if a header name is "connection related".
33       * <p>
34       * The <a href="https://datatracker.ietf.org/doc/html/rfc9110#section-7.6.1">RFC9110</a> only specify an incomplete
35       * list of the following headers:
36       *
37       * <ul>
38       *     <li><tt>Connection</tt></li>
39       *     <li><tt>Proxy-Connection</tt></li>
40       *     <li><tt>Keep-Alive</tt></li>
41       *     <li><tt>TE</tt></li>
42       *     <li><tt>Transfer-Encoding</tt></li>
43       *     <li><tt>Upgrade</tt></li>
44       * </ul>
45       *
46       * @param name the name of the header to check. The check is case-insensitive.
47       * @param ignoreTeHeader {@code true} if the <tt>TE</tt> header should be ignored by this check.
48       * This is relevant for HTTP/2 header validation, where the <tt>TE</tt> header has special rules.
49       * @return {@code true} if the given header name is one of the specified connection-related headers.
50       */
51      @SuppressWarnings("deprecation") // We need to check for deprecated headers as well.
52      public static boolean isConnectionHeader(CharSequence name, boolean ignoreTeHeader) {
53          // These are the known standard and non-standard connection related headers:
54          // - upgrade (7 chars)
55          // - connection (10 chars)
56          // - keep-alive (10 chars)
57          // - proxy-connection (16 chars)
58          // - transfer-encoding (17 chars)
59          //
60          // See https://datatracker.ietf.org/doc/html/rfc9113#section-8.2.2
61          // and https://datatracker.ietf.org/doc/html/rfc9110#section-7.6.1
62          // for the list of connection related headers.
63          //
64          // We scan for these based on the length, then double-check any matching name.
65          int len = name.length();
66          switch (len) {
67              case 2: return ignoreTeHeader? false : contentEqualsIgnoreCase(name, HttpHeaderNames.TE);
68              case 7: return contentEqualsIgnoreCase(name, HttpHeaderNames.UPGRADE);
69              case 10: return contentEqualsIgnoreCase(name, HttpHeaderNames.CONNECTION) ||
70                      contentEqualsIgnoreCase(name, HttpHeaderNames.KEEP_ALIVE);
71              case 16: return contentEqualsIgnoreCase(name, HttpHeaderNames.PROXY_CONNECTION);
72              case 17: return contentEqualsIgnoreCase(name, HttpHeaderNames.TRANSFER_ENCODING);
73              default:
74                  return false;
75          }
76      }
77  
78      /**
79       * If the given header is {@link HttpHeaderNames#TE} and the given header value is <em>not</em>
80       * {@link HttpHeaderValues#TRAILERS}, then return {@code true}. Otherwie, {@code false}.
81       * <p>
82       * The string comparisons are case-insensitive.
83       * <p>
84       * This check is important for HTTP/2 header validation.
85       *
86       * @param name the header name to check if it is <tt>TE</tt> or not.
87       * @param value the header value to check if it is something other than <tt>TRAILERS</tt>.
88       * @return {@code true} only if the header name is <tt>TE</tt>, and the header value is <em>not</em>
89       * <tt>TRAILERS</tt>. Otherwise, {@code false}.
90       */
91      public static boolean isTeNotTrailers(CharSequence name, CharSequence value) {
92          if (name.length() == 2) {
93              return contentEqualsIgnoreCase(name, HttpHeaderNames.TE) &&
94                      !contentEqualsIgnoreCase(value, HttpHeaderValues.TRAILERS);
95          }
96          return false;
97      }
98  
99      /**
100      * Validate the given HTTP header value by searching for any illegal characters.
101      *
102      * @param value the HTTP header value to validate.
103      * @return the index of the first illegal character found, or {@code -1} if there are none and the header value is
104      * valid.
105      */
106     public static int validateValidHeaderValue(CharSequence value) {
107         int length = value.length();
108         if (length == 0) {
109             return -1;
110         }
111         if (value instanceof AsciiString) {
112             return verifyValidHeaderValueAsciiString((AsciiString) value);
113         }
114         return verifyValidHeaderValueCharSequence(value);
115     }
116 
117     private static int verifyValidHeaderValueAsciiString(AsciiString value) {
118         // Validate value to field-content rule.
119         //  field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
120         //  field-vchar    = VCHAR / obs-text
121         //  VCHAR          = %x21-7E ; visible (printing) characters
122         //  obs-text       = %x80-FF
123         //  SP             = %x20
124         //  HTAB           = %x09 ; horizontal tab
125         //  See: https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
126         //  And: https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
127         final byte[] array = value.array();
128         final int start = value.arrayOffset();
129         int b = array[start] & 0xFF;
130         if (b < 0x21 || b == 0x7F) {
131             return 0;
132         }
133         int length = value.length();
134         for (int i = start + 1; i < length; i++) {
135             b = array[i] & 0xFF;
136             if (b < 0x20 && b != 0x09 || b == 0x7F) {
137                 return i - start;
138             }
139         }
140         return -1;
141     }
142 
143     private static int verifyValidHeaderValueCharSequence(CharSequence value) {
144         // Validate value to field-content rule.
145         //  field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
146         //  field-vchar    = VCHAR / obs-text
147         //  VCHAR          = %x21-7E ; visible (printing) characters
148         //  obs-text       = %x80-FF
149         //  SP             = %x20
150         //  HTAB           = %x09 ; horizontal tab
151         //  See: https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
152         //  And: https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
153         int b = value.charAt(0);
154         if (b < 0x21 || b == 0x7F) {
155             return 0;
156         }
157         int length = value.length();
158         for (int i = 1; i < length; i++) {
159             b = value.charAt(i);
160             if (b < 0x20 && b != 0x09 || b == 0x7F) {
161                 return i;
162             }
163         }
164         return -1;
165     }
166 
167     /**
168      * Validate a <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> contains only allowed
169      * characters.
170      * <p>
171      * The <a href="https://tools.ietf.org/html/rfc2616#section-2.2">token</a> format is used for variety of HTTP
172      * components, like  <a href="https://tools.ietf.org/html/rfc6265#section-4.1.1">cookie-name</a>,
173      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">field-name</a> of a
174      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2">header-field</a>, or
175      * <a href="https://tools.ietf.org/html/rfc7231#section-4">request method</a>.
176      *
177      * @param token the token to validate.
178      * @return the index of the first invalid token character found, or {@code -1} if there are none.
179      */
180     public static int validateToken(CharSequence token) {
181         if (token instanceof AsciiString) {
182             return validateAsciiStringToken((AsciiString) token);
183         }
184         return validateCharSequenceToken(token);
185     }
186 
187     /**
188      * Validate that an {@link AsciiString} contain onlu valid
189      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
190      *
191      * @param token the ascii string to validate.
192      */
193     private static int validateAsciiStringToken(AsciiString token) {
194         byte[] array = token.array();
195         for (int i = token.arrayOffset(), len = token.arrayOffset() + token.length(); i < len; i++) {
196             if (!BitSet128.contains(array[i], TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
197                 return i - token.arrayOffset();
198             }
199         }
200         return -1;
201     }
202 
203     /**
204      * Validate that a {@link CharSequence} contain onlu valid
205      * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
206      *
207      * @param token the character sequence to validate.
208      */
209     private static int validateCharSequenceToken(CharSequence token) {
210         for (int i = 0, len = token.length(); i < len; i++) {
211             byte value = (byte) token.charAt(i);
212             if (!BitSet128.contains(value, TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
213                 return i;
214             }
215         }
216         return -1;
217     }
218 
219     private static final long TOKEN_CHARS_HIGH;
220     private static final long TOKEN_CHARS_LOW;
221     static {
222         // HEADER
223         // header-field   = field-name ":" OWS field-value OWS
224         //
225         // field-name     = token
226         // token          = 1*tchar
227         //
228         // tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
229         //                    / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
230         //                    / DIGIT / ALPHA
231         //                    ; any VCHAR, except delimiters.
232         //  Delimiters are chosen
233         //   from the set of US-ASCII visual characters not allowed in a token
234         //   (DQUOTE and "(),/:;<=>?@[\]{}")
235         //
236         // COOKIE
237         // cookie-pair       = cookie-name "=" cookie-value
238         // cookie-name       = token
239         // token          = 1*<any CHAR except CTLs or separators>
240         // CTL = <any US-ASCII control character
241         //       (octets 0 - 31) and DEL (127)>
242         // separators     = "(" | ")" | "<" | ">" | "@"
243         //                      | "," | ";" | ":" | "\" | <">
244         //                      | "/" | "[" | "]" | "?" | "="
245         //                      | "{" | "}" | SP | HT
246         //
247         // field-name's token is equivalent to cookie-name's token, we can reuse the tchar mask for both:
248         BitSet128 tokenChars = new BitSet128()
249                 .range('0', '9').range('a', 'z').range('A', 'Z') // Alphanumeric.
250                 .bits('-', '.', '_', '~') // Unreserved characters.
251                 .bits('!', '#', '$', '%', '&', '\'', '*', '+', '^', '`', '|'); // Token special characters.
252         TOKEN_CHARS_HIGH = tokenChars.high();
253         TOKEN_CHARS_LOW = tokenChars.low();
254     }
255 
256     private static final class BitSet128 {
257         private long high;
258         private long low;
259 
260         BitSet128 range(char fromInc, char toInc) {
261             for (int bit = fromInc; bit <= toInc; bit++) {
262                 if (bit < 64) {
263                     low |= 1L << bit;
264                 } else {
265                     high |= 1L << bit - 64;
266                 }
267             }
268             return this;
269         }
270 
271         BitSet128 bits(char... bits) {
272             for (char bit : bits) {
273                 if (bit < 64) {
274                     low |= 1L << bit;
275                 } else {
276                     high |= 1L << bit - 64;
277                 }
278             }
279             return this;
280         }
281 
282         long high() {
283             return high;
284         }
285 
286         long low() {
287             return low;
288         }
289 
290         static boolean contains(byte bit, long high, long low) {
291             if (bit < 0) {
292                 return false;
293             }
294             if (bit < 64) {
295                 return 0 != (low & 1L << bit);
296             }
297             return 0 != (high & 1L << bit - 64);
298         }
299     }
300 }