View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.util.internal;
17  
18  import java.io.IOException;
19  import java.util.ArrayList;
20  import java.util.List;
21  
22  import static io.netty.util.internal.ObjectUtil.*;
23  
24  /**
25   * String utility class.
26   */
27  public final class StringUtil {
28  
29      public static final String EMPTY_STRING = "";
30      public static final String NEWLINE = SystemPropertyUtil.get("line.separator", "\n");
31  
32      public static final char DOUBLE_QUOTE = '\"';
33      public static final char COMMA = ',';
34      public static final char LINE_FEED = '\n';
35      public static final char CARRIAGE_RETURN = '\r';
36      public static final char TAB = '\t';
37      public static final char SPACE = 0x20;
38  
39      private static final String[] BYTE2HEX_PAD = new String[256];
40      private static final String[] BYTE2HEX_NOPAD = new String[256];
41  
42      /**
43       * 2 - Quote character at beginning and end.
44       * 5 - Extra allowance for anticipated escape characters that may be added.
45       */
46      private static final int CSV_NUMBER_ESCAPE_CHARACTERS = 2 + 5;
47      private static final char PACKAGE_SEPARATOR_CHAR = '.';
48  
49      static {
50          // Generate the lookup table that converts a byte into a 2-digit hexadecimal integer.
51          for (int i = 0; i < BYTE2HEX_PAD.length; i++) {
52              String str = Integer.toHexString(i);
53              BYTE2HEX_PAD[i] = i > 0xf ? str : ('0' + str);
54              BYTE2HEX_NOPAD[i] = str;
55          }
56      }
57  
58      private StringUtil() {
59          // Unused.
60      }
61  
62      /**
63       * Get the item after one char delim if the delim is found (else null).
64       * This operation is a simplified and optimized
65       * version of {@link String#split(String, int)}.
66       */
67      public static String substringAfter(String value, char delim) {
68          int pos = value.indexOf(delim);
69          if (pos >= 0) {
70              return value.substring(pos + 1);
71          }
72          return null;
73      }
74  
75      /**
76       * Checks if two strings have the same suffix of specified length
77       *
78       * @param s   string
79       * @param p   string
80       * @param len length of the common suffix
81       * @return true if both s and p are not null and both have the same suffix. Otherwise - false
82       */
83      public static boolean commonSuffixOfLength(String s, String p, int len) {
84          return s != null && p != null && len >= 0 && s.regionMatches(s.length() - len, p, p.length() - len, len);
85      }
86  
87      /**
88       * Converts the specified byte value into a 2-digit hexadecimal integer.
89       */
90      public static String byteToHexStringPadded(int value) {
91          return BYTE2HEX_PAD[value & 0xff];
92      }
93  
94      /**
95       * Converts the specified byte value into a 2-digit hexadecimal integer and appends it to the specified buffer.
96       */
97      public static <T extends Appendable> T byteToHexStringPadded(T buf, int value) {
98          try {
99              buf.append(byteToHexStringPadded(value));
100         } catch (IOException e) {
101             PlatformDependent.throwException(e);
102         }
103         return buf;
104     }
105 
106     /**
107      * Converts the specified byte array into a hexadecimal value.
108      */
109     public static String toHexStringPadded(byte[] src) {
110         return toHexStringPadded(src, 0, src.length);
111     }
112 
113     /**
114      * Converts the specified byte array into a hexadecimal value.
115      */
116     public static String toHexStringPadded(byte[] src, int offset, int length) {
117         return toHexStringPadded(new StringBuilder(length << 1), src, offset, length).toString();
118     }
119 
120     /**
121      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
122      */
123     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src) {
124         return toHexStringPadded(dst, src, 0, src.length);
125     }
126 
127     /**
128      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
129      */
130     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src, int offset, int length) {
131         final int end = offset + length;
132         for (int i = offset; i < end; i++) {
133             byteToHexStringPadded(dst, src[i]);
134         }
135         return dst;
136     }
137 
138     /**
139      * Converts the specified byte value into a hexadecimal integer.
140      */
141     public static String byteToHexString(int value) {
142         return BYTE2HEX_NOPAD[value & 0xff];
143     }
144 
145     /**
146      * Converts the specified byte value into a hexadecimal integer and appends it to the specified buffer.
147      */
148     public static <T extends Appendable> T byteToHexString(T buf, int value) {
149         try {
150             buf.append(byteToHexString(value));
151         } catch (IOException e) {
152             PlatformDependent.throwException(e);
153         }
154         return buf;
155     }
156 
157     /**
158      * Converts the specified byte array into a hexadecimal value.
159      */
160     public static String toHexString(byte[] src) {
161         return toHexString(src, 0, src.length);
162     }
163 
164     /**
165      * Converts the specified byte array into a hexadecimal value.
166      */
167     public static String toHexString(byte[] src, int offset, int length) {
168         return toHexString(new StringBuilder(length << 1), src, offset, length).toString();
169     }
170 
171     /**
172      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
173      */
174     public static <T extends Appendable> T toHexString(T dst, byte[] src) {
175         return toHexString(dst, src, 0, src.length);
176     }
177 
178     /**
179      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
180      */
181     public static <T extends Appendable> T toHexString(T dst, byte[] src, int offset, int length) {
182         assert length >= 0;
183         if (length == 0) {
184             return dst;
185         }
186 
187         final int end = offset + length;
188         final int endMinusOne = end - 1;
189         int i;
190 
191         // Skip preceding zeroes.
192         for (i = offset; i < endMinusOne; i++) {
193             if (src[i] != 0) {
194                 break;
195             }
196         }
197 
198         byteToHexString(dst, src[i++]);
199         int remaining = end - i;
200         toHexStringPadded(dst, src, i, remaining);
201 
202         return dst;
203     }
204 
205     /**
206      * Helper to decode half of a hexadecimal number from a string.
207      * @param c The ASCII character of the hexadecimal number to decode.
208      * Must be in the range {@code [0-9a-fA-F]}.
209      * @return The hexadecimal value represented in the ASCII character
210      * given, or {@code -1} if the character is invalid.
211      */
212     public static int decodeHexNibble(final char c) {
213         // Character.digit() is not used here, as it addresses a larger
214         // set of characters (both ASCII and full-width latin letters).
215         if (c >= '0' && c <= '9') {
216             return c - '0';
217         }
218         if (c >= 'A' && c <= 'F') {
219             return c - ('A' - 0xA);
220         }
221         if (c >= 'a' && c <= 'f') {
222             return c - ('a' - 0xA);
223         }
224         return -1;
225     }
226 
227     /**
228      * Decode a 2-digit hex byte from within a string.
229      */
230     public static byte decodeHexByte(CharSequence s, int pos) {
231         int hi = decodeHexNibble(s.charAt(pos));
232         int lo = decodeHexNibble(s.charAt(pos + 1));
233         if (hi == -1 || lo == -1) {
234             throw new IllegalArgumentException(String.format(
235                     "invalid hex byte '%s' at index %d of '%s'", s.subSequence(pos, pos + 2), pos, s));
236         }
237         return (byte) ((hi << 4) + lo);
238     }
239 
240     /**
241      * Decodes part of a string with <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
242      *
243      * @param hexDump a {@link CharSequence} which contains the hex dump
244      * @param fromIndex start of hex dump in {@code hexDump}
245      * @param length hex string length
246      */
247     public static byte[] decodeHexDump(CharSequence hexDump, int fromIndex, int length) {
248         if (length < 0 || (length & 1) != 0) {
249             throw new IllegalArgumentException("length: " + length);
250         }
251         if (length == 0) {
252             return EmptyArrays.EMPTY_BYTES;
253         }
254         byte[] bytes = new byte[length >>> 1];
255         for (int i = 0; i < length; i += 2) {
256             bytes[i >>> 1] = decodeHexByte(hexDump, fromIndex + i);
257         }
258         return bytes;
259     }
260 
261     /**
262      * Decodes a <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
263      */
264     public static byte[] decodeHexDump(CharSequence hexDump) {
265         return decodeHexDump(hexDump, 0, hexDump.length());
266     }
267 
268     /**
269      * The shortcut to {@link #simpleClassName(Class) simpleClassName(o.getClass())}.
270      */
271     public static String simpleClassName(Object o) {
272         if (o == null) {
273             return "null_object";
274         } else {
275             return simpleClassName(o.getClass());
276         }
277     }
278 
279     /**
280      * Generates a simplified name from a {@link Class}.  Similar to {@link Class#getSimpleName()}, but it works fine
281      * with anonymous classes.
282      */
283     public static String simpleClassName(Class<?> clazz) {
284         String className = checkNotNull(clazz, "clazz").getName();
285         final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
286         if (lastDotIdx > -1) {
287             return className.substring(lastDotIdx + 1);
288         }
289         return className;
290     }
291 
292     /**
293      * Escapes the specified value, if necessary according to
294      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
295      *
296      * @param value The value which will be escaped according to
297      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
298      * @return {@link CharSequence} the escaped value if necessary, or the value unchanged
299      */
300     public static CharSequence escapeCsv(CharSequence value) {
301         return escapeCsv(value, false);
302     }
303 
304     /**
305      * Escapes the specified value, if necessary according to
306      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
307      *
308      * @param value          The value which will be escaped according to
309      *                       <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
310      * @param trimWhiteSpace The value will first be trimmed of its optional white-space characters,
311      *                       according to <a href="https://tools.ietf.org/html/rfc7230#section-7">RFC-7230</a>
312      * @return {@link CharSequence} the escaped value if necessary, or the value unchanged
313      */
314     public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) {
315         int length = checkNotNull(value, "value").length();
316         int start;
317         int last;
318         if (trimWhiteSpace) {
319             start = indexOfFirstNonOwsChar(value, length);
320             last = indexOfLastNonOwsChar(value, start, length);
321         } else {
322             start = 0;
323             last = length - 1;
324         }
325         if (start > last) {
326             return EMPTY_STRING;
327         }
328 
329         int firstUnescapedSpecial = -1;
330         boolean quoted = false;
331         if (isDoubleQuote(value.charAt(start))) {
332             quoted = isDoubleQuote(value.charAt(last)) && last > start;
333             if (quoted) {
334                 start++;
335                 last--;
336             } else {
337                 firstUnescapedSpecial = start;
338             }
339         }
340 
341         if (firstUnescapedSpecial < 0) {
342             if (quoted) {
343                 for (int i = start; i <= last; i++) {
344                     if (isDoubleQuote(value.charAt(i))) {
345                         if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
346                             firstUnescapedSpecial = i;
347                             break;
348                         }
349                         i++;
350                     }
351                 }
352             } else {
353                 for (int i = start; i <= last; i++) {
354                     char c = value.charAt(i);
355                     if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) {
356                         firstUnescapedSpecial = i;
357                         break;
358                     }
359                     if (isDoubleQuote(c)) {
360                         if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
361                             firstUnescapedSpecial = i;
362                             break;
363                         }
364                         i++;
365                     }
366                 }
367             }
368 
369             if (firstUnescapedSpecial < 0) {
370                 // Special characters is not found or all of them already escaped.
371                 // In the most cases returns a same string. New string will be instantiated (via StringBuilder)
372                 // only if it really needed. It's important to prevent GC extra load.
373                 return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1);
374             }
375         }
376 
377         StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS);
378         result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial);
379         for (int i = firstUnescapedSpecial; i <= last; i++) {
380             char c = value.charAt(i);
381             if (isDoubleQuote(c)) {
382                 result.append(DOUBLE_QUOTE);
383                 if (i < last && isDoubleQuote(value.charAt(i + 1))) {
384                     i++;
385                 }
386             }
387             result.append(c);
388         }
389         return result.append(DOUBLE_QUOTE);
390     }
391 
392     /**
393      * Unescapes the specified escaped CSV field, if necessary according to
394      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
395      *
396      * @param value The escaped CSV field which will be unescaped according to
397      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
398      * @return {@link CharSequence} the unescaped value if necessary, or the value unchanged
399      */
400     public static CharSequence unescapeCsv(CharSequence value) {
401         int length = checkNotNull(value, "value").length();
402         if (length == 0) {
403             return value;
404         }
405         int last = length - 1;
406         boolean quoted = isDoubleQuote(value.charAt(0)) && isDoubleQuote(value.charAt(last)) && length != 1;
407         if (!quoted) {
408             validateCsvFormat(value);
409             return value;
410         }
411         StringBuilder unescaped = InternalThreadLocalMap.get().stringBuilder();
412         for (int i = 1; i < last; i++) {
413             char current = value.charAt(i);
414             if (current == DOUBLE_QUOTE) {
415                 if (isDoubleQuote(value.charAt(i + 1)) && (i + 1) != last) {
416                     // Followed by a double-quote but not the last character
417                     // Just skip the next double-quote
418                     i++;
419                 } else {
420                     // Not followed by a double-quote or the following double-quote is the last character
421                     throw newInvalidEscapedCsvFieldException(value, i);
422                 }
423             }
424             unescaped.append(current);
425         }
426         return unescaped.toString();
427     }
428 
429     /**
430      * Unescapes the specified escaped CSV fields according to
431      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
432      *
433      * @param value A string with multiple CSV escaped fields which will be unescaped according to
434      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
435      * @return {@link List} the list of unescaped fields
436      */
437     public static List<CharSequence> unescapeCsvFields(CharSequence value) {
438         List<CharSequence> unescaped = new ArrayList<CharSequence>(2);
439         StringBuilder current = InternalThreadLocalMap.get().stringBuilder();
440         boolean quoted = false;
441         int last = value.length() - 1;
442         for (int i = 0; i <= last; i++) {
443             char c = value.charAt(i);
444             if (quoted) {
445                 switch (c) {
446                     case DOUBLE_QUOTE:
447                         if (i == last) {
448                             // Add the last field and return
449                             unescaped.add(current.toString());
450                             return unescaped;
451                         }
452                         char next = value.charAt(++i);
453                         if (next == DOUBLE_QUOTE) {
454                             // 2 double-quotes should be unescaped to one
455                             current.append(DOUBLE_QUOTE);
456                             break;
457                         }
458                         if (next == COMMA) {
459                             // This is the end of a field. Let's start to parse the next field.
460                             quoted = false;
461                             unescaped.add(current.toString());
462                             current.setLength(0);
463                             break;
464                         }
465                         // double-quote followed by other character is invalid
466                         throw newInvalidEscapedCsvFieldException(value, i - 1);
467                     default:
468                         current.append(c);
469                 }
470             } else {
471                 switch (c) {
472                     case COMMA:
473                         // Start to parse the next field
474                         unescaped.add(current.toString());
475                         current.setLength(0);
476                         break;
477                     case DOUBLE_QUOTE:
478                         if (current.length() == 0) {
479                             quoted = true;
480                             break;
481                         }
482                         // double-quote appears without being enclosed with double-quotes
483                         // fall through
484                     case LINE_FEED:
485                         // fall through
486                     case CARRIAGE_RETURN:
487                         // special characters appears without being enclosed with double-quotes
488                         throw newInvalidEscapedCsvFieldException(value, i);
489                     default:
490                         current.append(c);
491                 }
492             }
493         }
494         if (quoted) {
495             throw newInvalidEscapedCsvFieldException(value, last);
496         }
497         unescaped.add(current.toString());
498         return unescaped;
499     }
500 
501     /**
502      * Validate if {@code value} is a valid csv field without double-quotes.
503      *
504      * @throws IllegalArgumentException if {@code value} needs to be encoded with double-quotes.
505      */
506     private static void validateCsvFormat(CharSequence value) {
507         int length = value.length();
508         for (int i = 0; i < length; i++) {
509             switch (value.charAt(i)) {
510                 case DOUBLE_QUOTE:
511                 case LINE_FEED:
512                 case CARRIAGE_RETURN:
513                 case COMMA:
514                     // If value contains any special character, it should be enclosed with double-quotes
515                     throw newInvalidEscapedCsvFieldException(value, i);
516                 default:
517             }
518         }
519     }
520 
521     private static IllegalArgumentException newInvalidEscapedCsvFieldException(CharSequence value, int index) {
522         return new IllegalArgumentException("invalid escaped CSV field: " + value + " index: " + index);
523     }
524 
525     /**
526      * Get the length of a string, {@code null} input is considered {@code 0} length.
527      */
528     public static int length(String s) {
529         return s == null ? 0 : s.length();
530     }
531 
532     /**
533      * Determine if a string is {@code null} or {@link String#isEmpty()} returns {@code true}.
534      */
535     public static boolean isNullOrEmpty(String s) {
536         return s == null || s.isEmpty();
537     }
538 
539     /**
540      * Find the index of the first non-white space character in {@code s} starting at {@code offset}.
541      *
542      * @param seq    The string to search.
543      * @param offset The offset to start searching at.
544      * @return the index of the first non-white space character or &lt;{@code 0} if none was found.
545      */
546     public static int indexOfNonWhiteSpace(CharSequence seq, int offset) {
547         for (; offset < seq.length(); ++offset) {
548             if (!Character.isWhitespace(seq.charAt(offset))) {
549                 return offset;
550             }
551         }
552         return -1;
553     }
554 
555     /**
556      * Determine if {@code c} lies within the range of values defined for
557      * <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>.
558      *
559      * @param c the character to check.
560      * @return {@code true} if {@code c} lies within the range of values defined for
561      * <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>. {@code false} otherwise.
562      */
563     public static boolean isSurrogate(char c) {
564         return c >= '\uD800' && c <= '\uDFFF';
565     }
566 
567     private static boolean isDoubleQuote(char c) {
568         return c == DOUBLE_QUOTE;
569     }
570 
571     /**
572      * Determine if the string {@code s} ends with the char {@code c}.
573      *
574      * @param s the string to test
575      * @param c the tested char
576      * @return true if {@code s} ends with the char {@code c}
577      */
578     public static boolean endsWith(CharSequence s, char c) {
579         int len = s.length();
580         return len > 0 && s.charAt(len - 1) == c;
581     }
582 
583     /**
584      * Trim optional white-space characters from the specified value,
585      * according to <a href="https://tools.ietf.org/html/rfc7230#section-7">RFC-7230</a>.
586      *
587      * @param value the value to trim
588      * @return {@link CharSequence} the trimmed value if necessary, or the value unchanged
589      */
590     public static CharSequence trimOws(CharSequence value) {
591         final int length = value.length();
592         if (length == 0) {
593             return value;
594         }
595         int start = indexOfFirstNonOwsChar(value, length);
596         int end = indexOfLastNonOwsChar(value, start, length);
597         return start == 0 && end == length - 1 ? value : value.subSequence(start, end + 1);
598     }
599 
600     /**
601      * @return {@code length} if no OWS is found.
602      */
603     private static int indexOfFirstNonOwsChar(CharSequence value, int length) {
604         int i = 0;
605         while (i < length && isOws(value.charAt(i))) {
606             i++;
607         }
608         return i;
609     }
610 
611     /**
612      * @return {@code start} if no OWS is found.
613      */
614     private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) {
615         int i = length - 1;
616         while (i > start && isOws(value.charAt(i))) {
617             i--;
618         }
619         return i;
620     }
621 
622     private static boolean isOws(char c) {
623         return c == SPACE || c == TAB;
624     }
625 }