View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty5.util.internal;
18  import io.netty5.util.concurrent.FastThreadLocal;
19  import io.netty5.util.internal.logging.InternalLogger;
20  import io.netty5.util.internal.logging.InternalLoggerFactory;
22  import;
23  import;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.Iterator;
27  import java.util.List;
29  import static java.util.Objects.requireNonNull;
31  /**
32   * String utility class.
33   */
34  public final class StringUtil {
36      public static final String EMPTY_STRING = "";
37      public static final String NEWLINE = SystemPropertyUtil.get("line.separator", "\n");
39      public static final char DOUBLE_QUOTE = '\"';
40      public static final char COMMA = ',';
41      public static final char LINE_FEED = '\n';
42      public static final char CARRIAGE_RETURN = '\r';
43      public static final char TAB = '\t';
44      public static final char SPACE = 0x20;
46      private static final String[] BYTE2HEX_PAD = new String[256];
47      private static final String[] BYTE2HEX_NOPAD = new String[256];
48      private static final byte[] HEX2B;
50      /**
51       * 2 - Quote character at beginning and end.
52       * 5 - Extra allowance for anticipated escape characters that may be added.
53       */
54      private static final int CSV_NUMBER_ESCAPE_CHARACTERS = 2 + 5;
55      private static final char PACKAGE_SEPARATOR_CHAR = '.';
57      private static final int STRING_BUILDER_INITIAL_SIZE;
58      private static final int STRING_BUILDER_MAX_SIZE;
59      private static final InternalLogger logger = InternalLoggerFactory.getInstance(StringUtil.class);
61      static {
62          // Generate the lookup table that converts a byte into a 2-digit hexadecimal integer.
63          for (int i = 0; i < BYTE2HEX_PAD.length; i++) {
64              String str = Integer.toHexString(i);
65              BYTE2HEX_PAD[i] = i > 0xf ? str : '0' + str;
66              BYTE2HEX_NOPAD[i] = str;
67          }
68          // Generate the lookup table that converts an hex char into its decimal value:
69          // the size of the table is such that the JVM is capable of save any bounds-check
70          // if a char type is used as an index.
71          HEX2B = new byte[Character.MAX_VALUE + 1];
72          Arrays.fill(HEX2B, (byte) -1);
73          HEX2B['0'] = 0;
74          HEX2B['1'] = 1;
75          HEX2B['2'] = 2;
76          HEX2B['3'] = 3;
77          HEX2B['4'] = 4;
78          HEX2B['5'] = 5;
79          HEX2B['6'] = 6;
80          HEX2B['7'] = 7;
81          HEX2B['8'] = 8;
82          HEX2B['9'] = 9;
83          HEX2B['A'] = 10;
84          HEX2B['B'] = 11;
85          HEX2B['C'] = 12;
86          HEX2B['D'] = 13;
87          HEX2B['E'] = 14;
88          HEX2B['F'] = 15;
89          HEX2B['a'] = 10;
90          HEX2B['b'] = 11;
91          HEX2B['c'] = 12;
92          HEX2B['d'] = 13;
93          HEX2B['e'] = 14;
94          HEX2B['f'] = 15;
97                  SystemPropertyUtil.getInt("io.netty5.stringUtil.stringBuilder.initialSize", 1024);
98          logger.debug("-Dio.netty5.stringUtil.stringBuilder.initialSize: {}", STRING_BUILDER_INITIAL_SIZE);
100         STRING_BUILDER_MAX_SIZE = SystemPropertyUtil.getInt("io.netty5.stringUtil.stringBuilder.maxSize", 1024 * 4);
101         logger.debug("-Dio.netty5.stringUtil.stringBuilder.maxSize: {}", STRING_BUILDER_MAX_SIZE);
102     }
104     private static final FastThreadLocal<StringBuilder> STRING_BUILDERS = new FastThreadLocal<>() {
105         @Override
106         protected StringBuilder initialValue() {
107             return new StringBuilder(STRING_BUILDER_INITIAL_SIZE);
108         }
109     };
111     private StringUtil() {
112         // Unused.
113     }
115     public static StringBuilder threadLocalStringBuilder() {
116         StringBuilder sb = STRING_BUILDERS.get();
117         if (sb.capacity() > STRING_BUILDER_MAX_SIZE) {
118             sb.setLength(STRING_BUILDER_INITIAL_SIZE);
119             sb.trimToSize();
120         }
121         sb.setLength(0);
122         return sb;
123     }
125     /**
126      * Get the item after one char delim if the delim is found (else null).
127      * This operation is a simplified and optimized
128      * version of {@link String#split(String, int)}.
129      */
130     public static String substringAfter(String value, char delim) {
131         int pos = value.indexOf(delim);
132         if (pos >= 0) {
133             return value.substring(pos + 1);
134         }
135         return null;
136     }
138     /**
139      * Checks if two strings have the same suffix of specified length
140      *
141      * @param s   string
142      * @param p   string
143      * @param len length of the common suffix
144      * @return true if both s and p are not null and both have the same suffix. Otherwise - false
145      */
146     public static boolean commonSuffixOfLength(String s, String p, int len) {
147         return s != null && p != null && len >= 0 && s.regionMatches(s.length() - len, p, p.length() - len, len);
148     }
150     /**
151      * Converts the specified byte value into a 2-digit hexadecimal integer.
152      */
153     public static String byteToHexStringPadded(int value) {
154         return BYTE2HEX_PAD[value & 0xff];
155     }
157     /**
158      * Converts the specified byte value into a 2-digit hexadecimal integer and appends it to the specified buffer.
159      */
160     public static <T extends Appendable> T byteToHexStringPadded(T buf, int value) {
161         try {
162             buf.append(byteToHexStringPadded(value));
163         } catch (IOException e) {
164             throw new UncheckedIOException(e);
165         }
166         return buf;
167     }
169     /**
170      * Converts the specified byte array into a hexadecimal value.
171      */
172     public static String toHexStringPadded(byte[] src) {
173         return toHexStringPadded(src, 0, src.length);
174     }
176     /**
177      * Converts the specified byte array into a hexadecimal value.
178      */
179     public static String toHexStringPadded(byte[] src, int offset, int length) {
180         return toHexStringPadded(new StringBuilder(length << 1), src, offset, length).toString();
181     }
183     /**
184      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
185      */
186     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src) {
187         return toHexStringPadded(dst, src, 0, src.length);
188     }
190     /**
191      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
192      */
193     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src, int offset, int length) {
194         final int end = offset + length;
195         for (int i = offset; i < end; i++) {
196             byteToHexStringPadded(dst, src[i]);
197         }
198         return dst;
199     }
201     /**
202      * Converts the specified byte value into a hexadecimal integer.
203      */
204     public static String byteToHexString(int value) {
205         return BYTE2HEX_NOPAD[value & 0xff];
206     }
208     /**
209      * Converts the specified byte value into a hexadecimal integer and appends it to the specified buffer.
210      */
211     public static <T extends Appendable> T byteToHexString(T buf, int value) {
212         try {
213             buf.append(byteToHexString(value));
214         } catch (IOException e) {
215             throw new UncheckedIOException(e);
216         }
217         return buf;
218     }
220     /**
221      * Converts the specified byte array into a hexadecimal value.
222      */
223     public static String toHexString(byte[] src) {
224         return toHexString(src, 0, src.length);
225     }
227     /**
228      * Converts the specified byte array into a hexadecimal value.
229      */
230     public static String toHexString(byte[] src, int offset, int length) {
231         return toHexString(new StringBuilder(length << 1), src, offset, length).toString();
232     }
234     /**
235      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
236      */
237     public static <T extends Appendable> T toHexString(T dst, byte[] src) {
238         return toHexString(dst, src, 0, src.length);
239     }
241     /**
242      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
243      */
244     public static <T extends Appendable> T toHexString(T dst, byte[] src, int offset, int length) {
245         assert length >= 0;
246         if (length == 0) {
247             return dst;
248         }
250         final int end = offset + length;
251         final int endMinusOne = end - 1;
252         int i;
254         // Skip preceding zeroes.
255         for (i = offset; i < endMinusOne; i++) {
256             if (src[i] != 0) {
257                 break;
258             }
259         }
261         byteToHexString(dst, src[i++]);
262         int remaining = end - i;
263         toHexStringPadded(dst, src, i, remaining);
265         return dst;
266     }
268     /**
269      * Helper to decode half of a hexadecimal number from a string.
270      * @param c The ASCII character of the hexadecimal number to decode.
271      * Must be in the range {@code [0-9a-fA-F]}.
272      * @return The hexadecimal value represented in the ASCII character
273      * given, or {@code -1} if the character is invalid.
274      */
275     public static int decodeHexNibble(final char c) {
276         assert HEX2B.length == Character.MAX_VALUE + 1;
277         // Character.digit() is not used here, as it addresses a larger
278         // set of characters (both ASCII and full-width latin letters).
279         return HEX2B[c];
280     }
282     /**
283      * Decode a 2-digit hex byte from within a string.
284      */
285     public static byte decodeHexByte(CharSequence s, int pos) {
286         int hi = decodeHexNibble(s.charAt(pos));
287         int lo = decodeHexNibble(s.charAt(pos + 1));
288         if (hi == -1 || lo == -1) {
289             throw new IllegalArgumentException(String.format(
290                     "invalid hex byte '%s' at index %d of '%s'", s.subSequence(pos, pos + 2), pos, s));
291         }
292         return (byte) ((hi << 4) + lo);
293     }
295     /**
296      * Decodes part of a string with <a href="">hex dump</a>
297      *
298      * @param hexDump a {@link CharSequence} which contains the hex dump
299      * @param fromIndex start of hex dump in {@code hexDump}
300      * @param length hex string length
301      */
302     public static byte[] decodeHexDump(CharSequence hexDump, int fromIndex, int length) {
303         if (length < 0 || (length & 1) != 0) {
304             throw new IllegalArgumentException("length: " + length);
305         }
306         if (length == 0) {
307             return EmptyArrays.EMPTY_BYTES;
308         }
309         byte[] bytes = new byte[length >>> 1];
310         for (int i = 0; i < length; i += 2) {
311             bytes[i >>> 1] = decodeHexByte(hexDump, fromIndex + i);
312         }
313         return bytes;
314     }
316     /**
317      * Decodes a <a href="">hex dump</a>
318      */
319     public static byte[] decodeHexDump(CharSequence hexDump) {
320         return decodeHexDump(hexDump, 0, hexDump.length());
321     }
323     /**
324      * The shortcut to {@link #simpleClassName(Class) simpleClassName(o.getClass())}.
325      */
326     public static String simpleClassName(Object o) {
327         if (o == null) {
328             return "null_object";
329         } else {
330             return simpleClassName(o.getClass());
331         }
332     }
334     /**
335      * Generates a simplified name from a {@link Class}.  Similar to {@link Class#getSimpleName()}, but it works fine
336      * with anonymous classes.
337      */
338     public static String simpleClassName(Class<?> clazz) {
339         String className = requireNonNull(clazz, "clazz").getName();
340         final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
341         if (lastDotIdx > -1) {
342             return className.substring(lastDotIdx + 1);
343         }
344         return className;
345     }
347     /**
348      * Escapes the specified value, if necessary according to
349      * <a href="">RFC-4180</a>.
350      *
351      * @param value The value which will be escaped according to
352      *              <a href="">RFC-4180</a>
353      * @return {@link CharSequence} the escaped value if necessary, or the value unchanged
354      */
355     public static CharSequence escapeCsv(CharSequence value) {
356         return escapeCsv(value, false);
357     }
359     /**
360      * Escapes the specified value, if necessary according to
361      * <a href="">RFC-4180</a>.
362      *
363      * @param value          The value which will be escaped according to
364      *                       <a href="">RFC-4180</a>
365      * @param trimWhiteSpace The value will first be trimmed of its optional white-space characters,
366      *                       according to <a href="">RFC-7230</a>
367      * @return {@link CharSequence} the escaped value if necessary, or the value unchanged
368      */
369     public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) {
370         int length = requireNonNull(value, "value").length();
371         int start;
372         int last;
373         if (trimWhiteSpace) {
374             start = indexOfFirstNonOwsChar(value, length);
375             last = indexOfLastNonOwsChar(value, start, length);
376         } else {
377             start = 0;
378             last = length - 1;
379         }
380         if (start > last) {
381             return EMPTY_STRING;
382         }
384         int firstUnescapedSpecial = -1;
385         boolean quoted = false;
386         if (isDoubleQuote(value.charAt(start))) {
387             quoted = isDoubleQuote(value.charAt(last)) && last > start;
388             if (quoted) {
389                 start++;
390                 last--;
391             } else {
392                 firstUnescapedSpecial = start;
393             }
394         }
396         if (firstUnescapedSpecial < 0) {
397             if (quoted) {
398                 for (int i = start; i <= last; i++) {
399                     if (isDoubleQuote(value.charAt(i))) {
400                         if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
401                             firstUnescapedSpecial = i;
402                             break;
403                         }
404                         i++;
405                     }
406                 }
407             } else {
408                 for (int i = start; i <= last; i++) {
409                     char c = value.charAt(i);
410                     if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) {
411                         firstUnescapedSpecial = i;
412                         break;
413                     }
414                     if (isDoubleQuote(c)) {
415                         if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
416                             firstUnescapedSpecial = i;
417                             break;
418                         }
419                         i++;
420                     }
421                 }
422             }
424             if (firstUnescapedSpecial < 0) {
425                 // Special characters is not found or all of them already escaped.
426                 // In the most cases returns a same string. New string will be instantiated (via StringBuilder)
427                 // only if it really needed. It's important to prevent GC extra load.
428                 return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1);
429             }
430         }
432         StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS);
433         result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial);
434         for (int i = firstUnescapedSpecial; i <= last; i++) {
435             char c = value.charAt(i);
436             if (isDoubleQuote(c)) {
437                 result.append(DOUBLE_QUOTE);
438                 if (i < last && isDoubleQuote(value.charAt(i + 1))) {
439                     i++;
440                 }
441             }
442             result.append(c);
443         }
444         return result.append(DOUBLE_QUOTE);
445     }
447     /**
448      * Unescapes the specified escaped CSV field, if necessary according to
449      * <a href="">RFC-4180</a>.
450      *
451      * @param value The escaped CSV field which will be unescaped according to
452      *              <a href="">RFC-4180</a>
453      * @return {@link CharSequence} the unescaped value if necessary, or the value unchanged
454      */
455     public static CharSequence unescapeCsv(CharSequence value) {
456         int length = requireNonNull(value, "value").length();
457         if (length == 0) {
458             return value;
459         }
460         int last = length - 1;
461         boolean quoted = isDoubleQuote(value.charAt(0)) && isDoubleQuote(value.charAt(last)) && length != 1;
462         if (!quoted) {
463             validateCsvFormat(value);
464             return value;
465         }
466         StringBuilder unescaped = threadLocalStringBuilder();
467         for (int i = 1; i < last; i++) {
468             char current = value.charAt(i);
469             if (current == DOUBLE_QUOTE) {
470                 if (isDoubleQuote(value.charAt(i + 1)) && (i + 1) != last) {
471                     // Followed by a double-quote but not the last character
472                     // Just skip the next double-quote
473                     i++;
474                 } else {
475                     // Not followed by a double-quote or the following double-quote is the last character
476                     throw newInvalidEscapedCsvFieldException(value, i);
477                 }
478             }
479             unescaped.append(current);
480         }
481         return unescaped.toString();
482     }
484     /**
485      * Unescapes the specified escaped CSV fields according to
486      * <a href="">RFC-4180</a>.
487      *
488      * @param value A string with multiple CSV escaped fields which will be unescaped according to
489      *              <a href="">RFC-4180</a>
490      * @return {@link List} the list of unescaped fields
491      */
492     public static List<CharSequence> unescapeCsvFields(CharSequence value) {
493         List<CharSequence> unescaped = new ArrayList<>(2);
494         StringBuilder current = threadLocalStringBuilder();
495         boolean quoted = false;
496         int last = value.length() - 1;
497         for (int i = 0; i <= last; i++) {
498             char c = value.charAt(i);
499             if (quoted) {
500                 if (c == DOUBLE_QUOTE) {
501                     if (i == last) {
502                         // Add the last field and return
503                         unescaped.add(current.toString());
504                         return unescaped;
505                     }
506                     char next = value.charAt(++i);
507                     if (next == DOUBLE_QUOTE) {
508                         // 2 double-quotes should be unescaped to one
509                         current.append(DOUBLE_QUOTE);
510                         continue;
511                     }
512                     if (next == COMMA) {
513                         // This is the end of a field. Let's start to parse the next field.
514                         quoted = false;
515                         unescaped.add(current.toString());
516                         current.setLength(0);
517                         continue;
518                     }
519                     // double-quote followed by other character is invalid
520                     throw newInvalidEscapedCsvFieldException(value, i - 1);
521                 } else {
522                     current.append(c);
523                 }
524             } else {
525                 switch (c) {
526                     case COMMA:
527                         // Start to parse the next field
528                         unescaped.add(current.toString());
529                         current.setLength(0);
530                         break;
531                     case DOUBLE_QUOTE:
532                         if (current.length() == 0) {
533                             quoted = true;
534                             break;
535                         }
536                         // double-quote appears without being enclosed with double-quotes
537                         // fall through
538                     case LINE_FEED:
539                         // fall through
540                     case CARRIAGE_RETURN:
541                         // special characters appears without being enclosed with double-quotes
542                         throw newInvalidEscapedCsvFieldException(value, i);
543                     default:
544                         current.append(c);
545                 }
546             }
547         }
548         if (quoted) {
549             throw newInvalidEscapedCsvFieldException(value, last);
550         }
551         unescaped.add(current.toString());
552         return unescaped;
553     }
555     /**
556      * Validate if {@code value} is a valid csv field without double-quotes.
557      *
558      * @throws IllegalArgumentException if {@code value} needs to be encoded with double-quotes.
559      */
560     private static void validateCsvFormat(CharSequence value) {
561         int length = value.length();
562         for (int i = 0; i < length; i++) {
563             switch (value.charAt(i)) {
564                 case DOUBLE_QUOTE:
565                 case LINE_FEED:
566                 case CARRIAGE_RETURN:
567                 case COMMA:
568                     // If value contains any special character, it should be enclosed with double-quotes
569                     throw newInvalidEscapedCsvFieldException(value, i);
570                 default:
571             }
572         }
573     }
575     private static IllegalArgumentException newInvalidEscapedCsvFieldException(CharSequence value, int index) {
576         return new IllegalArgumentException("invalid escaped CSV field: " + value + " index: " + index);
577     }
579     /**
580      * Get the length of a string, {@code null} input is considered {@code 0} length.
581      */
582     public static int length(String s) {
583         return s == null ? 0 : s.length();
584     }
586     /**
587      * Determine if a string is {@code null} or {@link String#isEmpty()} returns {@code true}.
588      */
589     public static boolean isNullOrEmpty(String s) {
590         return s == null || s.isEmpty();
591     }
593     /**
594      * Find the index of the first non-white space character in {@code s} starting at {@code offset}.
595      *
596      * @param seq    The string to search.
597      * @param offset The offset to start searching at.
598      * @return the index of the first non-white space character or &lt;{@code -1} if none was found.
599      */
600     public static int indexOfNonWhiteSpace(CharSequence seq, int offset) {
601         for (; offset < seq.length(); ++offset) {
602             if (!Character.isWhitespace(seq.charAt(offset))) {
603                 return offset;
604             }
605         }
606         return -1;
607     }
609     /**
610      * Find the index of the first white space character in {@code s} starting at {@code offset}.
611      *
612      * @param seq    The string to search.
613      * @param offset The offset to start searching at.
614      * @return the index of the first white space character or &lt;{@code -1} if none was found.
615      */
616     public static int indexOfWhiteSpace(CharSequence seq, int offset) {
617         for (; offset < seq.length(); ++offset) {
618             if (Character.isWhitespace(seq.charAt(offset))) {
619                 return offset;
620             }
621         }
622         return -1;
623     }
625     /**
626      * Determine if {@code c} lies within the range of values defined for
627      * <a href="">Surrogate Code Point</a>.
628      *
629      * @param c the character to check.
630      * @return {@code true} if {@code c} lies within the range of values defined for
631      * <a href="">Surrogate Code Point</a>. {@code false} otherwise.
632      */
633     public static boolean isSurrogate(char c) {
634         return c >= '\uD800' && c <= '\uDFFF';
635     }
637     private static boolean isDoubleQuote(char c) {
638         return c == DOUBLE_QUOTE;
639     }
641     /**
642      * Determine if the string {@code s} ends with the char {@code c}.
643      *
644      * @param s the string to test
645      * @param c the tested char
646      * @return true if {@code s} ends with the char {@code c}
647      */
648     public static boolean endsWith(CharSequence s, char c) {
649         int len = s.length();
650         return len > 0 && s.charAt(len - 1) == c;
651     }
653     /**
654      * Trim optional white-space characters from the specified value,
655      * according to <a href="">RFC-7230</a>.
656      *
657      * @param value the value to trim
658      * @return {@link CharSequence} the trimmed value if necessary, or the value unchanged
659      */
660     public static CharSequence trimOws(CharSequence value) {
661         final int length = value.length();
662         if (length == 0) {
663             return value;
664         }
665         int start = indexOfFirstNonOwsChar(value, length);
666         int end = indexOfLastNonOwsChar(value, start, length);
667         return start == 0 && end == length - 1 ? value : value.subSequence(start, end + 1);
668     }
670     /**
671      * Returns a char sequence that contains all {@code elements} joined by a given separator.
672      *
673      * @param separator for each element
674      * @param elements to join together
675      *
676      * @return a char sequence joined by a given separator.
677      */
678     public static CharSequence join(CharSequence separator, Iterable<? extends CharSequence> elements) {
679         requireNonNull(separator, "separator");
680         requireNonNull(elements, "elements");
682         Iterator<? extends CharSequence> iterator = elements.iterator();
683         if (!iterator.hasNext()) {
684             return EMPTY_STRING;
685         }
687         CharSequence firstElement =;
688         if (!iterator.hasNext()) {
689             return firstElement;
690         }
692         StringBuilder builder = new StringBuilder(firstElement);
693         do {
694             builder.append(separator).append(;
695         } while (iterator.hasNext());
697         return builder;
698     }
700     /**
701      * @return {@code length} if no OWS is found.
702      */
703     private static int indexOfFirstNonOwsChar(CharSequence value, int length) {
704         int i = 0;
705         while (i < length && isOws(value.charAt(i))) {
706             i++;
707         }
708         return i;
709     }
711     /**
712      * @return {@code start} if no OWS is found.
713      */
714     private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) {
715         int i = length - 1;
716         while (i > start && isOws(value.charAt(i))) {
717             i--;
718         }
719         return i;
720     }
722     private static boolean isOws(char c) {
723         return c == SPACE || c == TAB;
724     }
725 }