View Javadoc
1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.util.internal;
17  
18  import java.io.IOException;
19  import java.util.ArrayList;
20  import java.util.List;
21  
22  import static io.netty.util.internal.ObjectUtil.*;
23  
24  /**
25   * String utility class.
26   */
27  public final class StringUtil {
28  
29      public static final String EMPTY_STRING = "";
30      public static final String NEWLINE = SystemPropertyUtil.get("line.separator", "\n");
31  
32      public static final char DOUBLE_QUOTE = '\"';
33      public static final char COMMA = ',';
34      public static final char LINE_FEED = '\n';
35      public static final char CARRIAGE_RETURN = '\r';
36      public static final char TAB = '\t';
37      public static final char SPACE = 0x20;
38  
39      private static final String[] BYTE2HEX_PAD = new String[256];
40      private static final String[] BYTE2HEX_NOPAD = new String[256];
41  
42      /**
43       * 2 - Quote character at beginning and end.
44       * 5 - Extra allowance for anticipated escape characters that may be added.
45       */
46      private static final int CSV_NUMBER_ESCAPE_CHARACTERS = 2 + 5;
47      private static final char PACKAGE_SEPARATOR_CHAR = '.';
48  
49      static {
50          // Generate the lookup table that converts a byte into a 2-digit hexadecimal integer.
51          int i;
52          for (i = 0; i < 10; i++) {
53              BYTE2HEX_PAD[i] = "0" + i;
54              BYTE2HEX_NOPAD[i] = String.valueOf(i);
55          }
56          for (; i < 16; i++) {
57              char c = (char) ('a' + i - 10);
58              BYTE2HEX_PAD[i] = "0" + c;
59              BYTE2HEX_NOPAD[i] = String.valueOf(c);
60          }
61          for (; i < BYTE2HEX_PAD.length; i++) {
62              String str = Integer.toHexString(i);
63              BYTE2HEX_PAD[i] = str;
64              BYTE2HEX_NOPAD[i] = str;
65          }
66      }
67  
68      private StringUtil() {
69          // Unused.
70      }
71  
72      /**
73       * Get the item after one char delim if the delim is found (else null).
74       * This operation is a simplified and optimized
75       * version of {@link String#split(String, int)}.
76       */
77      public static String substringAfter(String value, char delim) {
78          int pos = value.indexOf(delim);
79          if (pos >= 0) {
80              return value.substring(pos + 1);
81          }
82          return null;
83      }
84  
85      /**
86       * Checks if two strings have the same suffix of specified length
87       *
88       * @param s   string
89       * @param p   string
90       * @param len length of the common suffix
91       * @return true if both s and p are not null and both have the same suffix. Otherwise - false
92       */
93      public static boolean commonSuffixOfLength(String s, String p, int len) {
94          return s != null && p != null && len >= 0 && s.regionMatches(s.length() - len, p, p.length() - len, len);
95      }
96  
97      /**
98       * Converts the specified byte value into a 2-digit hexadecimal integer.
99       */
100     public static String byteToHexStringPadded(int value) {
101         return BYTE2HEX_PAD[value & 0xff];
102     }
103 
104     /**
105      * Converts the specified byte value into a 2-digit hexadecimal integer and appends it to the specified buffer.
106      */
107     public static <T extends Appendable> T byteToHexStringPadded(T buf, int value) {
108         try {
109             buf.append(byteToHexStringPadded(value));
110         } catch (IOException e) {
111             PlatformDependent.throwException(e);
112         }
113         return buf;
114     }
115 
116     /**
117      * Converts the specified byte array into a hexadecimal value.
118      */
119     public static String toHexStringPadded(byte[] src) {
120         return toHexStringPadded(src, 0, src.length);
121     }
122 
123     /**
124      * Converts the specified byte array into a hexadecimal value.
125      */
126     public static String toHexStringPadded(byte[] src, int offset, int length) {
127         return toHexStringPadded(new StringBuilder(length << 1), src, offset, length).toString();
128     }
129 
130     /**
131      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
132      */
133     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src) {
134         return toHexStringPadded(dst, src, 0, src.length);
135     }
136 
137     /**
138      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
139      */
140     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src, int offset, int length) {
141         final int end = offset + length;
142         for (int i = offset; i < end; i++) {
143             byteToHexStringPadded(dst, src[i]);
144         }
145         return dst;
146     }
147 
148     /**
149      * Converts the specified byte value into a hexadecimal integer.
150      */
151     public static String byteToHexString(int value) {
152         return BYTE2HEX_NOPAD[value & 0xff];
153     }
154 
155     /**
156      * Converts the specified byte value into a hexadecimal integer and appends it to the specified buffer.
157      */
158     public static <T extends Appendable> T byteToHexString(T buf, int value) {
159         try {
160             buf.append(byteToHexString(value));
161         } catch (IOException e) {
162             PlatformDependent.throwException(e);
163         }
164         return buf;
165     }
166 
167     /**
168      * Converts the specified byte array into a hexadecimal value.
169      */
170     public static String toHexString(byte[] src) {
171         return toHexString(src, 0, src.length);
172     }
173 
174     /**
175      * Converts the specified byte array into a hexadecimal value.
176      */
177     public static String toHexString(byte[] src, int offset, int length) {
178         return toHexString(new StringBuilder(length << 1), src, offset, length).toString();
179     }
180 
181     /**
182      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
183      */
184     public static <T extends Appendable> T toHexString(T dst, byte[] src) {
185         return toHexString(dst, src, 0, src.length);
186     }
187 
188     /**
189      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
190      */
191     public static <T extends Appendable> T toHexString(T dst, byte[] src, int offset, int length) {
192         assert length >= 0;
193         if (length == 0) {
194             return dst;
195         }
196 
197         final int end = offset + length;
198         final int endMinusOne = end - 1;
199         int i;
200 
201         // Skip preceding zeroes.
202         for (i = offset; i < endMinusOne; i++) {
203             if (src[i] != 0) {
204                 break;
205             }
206         }
207 
208         byteToHexString(dst, src[i++]);
209         int remaining = end - i;
210         toHexStringPadded(dst, src, i, remaining);
211 
212         return dst;
213     }
214 
215     /**
216      * Helper to decode half of a hexadecimal number from a string.
217      * @param c The ASCII character of the hexadecimal number to decode.
218      * Must be in the range {@code [0-9a-fA-F]}.
219      * @return The hexadecimal value represented in the ASCII character
220      * given, or {@code -1} if the character is invalid.
221      */
222     public static int decodeHexNibble(final char c) {
223         // Character.digit() is not used here, as it addresses a larger
224         // set of characters (both ASCII and full-width latin letters).
225         if (c >= '0' && c <= '9') {
226             return c - '0';
227         }
228         if (c >= 'A' && c <= 'F') {
229             return c - 'A' + 0xA;
230         }
231         if (c >= 'a' && c <= 'f') {
232             return c - 'a' + 0xA;
233         }
234         return -1;
235     }
236 
237     /**
238      * Decode a 2-digit hex byte from within a string.
239      */
240     public static byte decodeHexByte(CharSequence s, int pos) {
241         int hi = decodeHexNibble(s.charAt(pos));
242         int lo = decodeHexNibble(s.charAt(pos + 1));
243         if (hi == -1 || lo == -1) {
244             throw new IllegalArgumentException(String.format(
245                     "invalid hex byte '%s' at index %d of '%s'", s.subSequence(pos, pos + 2), pos, s));
246         }
247         return (byte) ((hi << 4) + lo);
248     }
249 
250     /**
251      * Decodes part of a string with <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
252      *
253      * @param hexDump a {@link CharSequence} which contains the hex dump
254      * @param fromIndex start of hex dump in {@code hexDump}
255      * @param length hex string length
256      */
257     public static byte[] decodeHexDump(CharSequence hexDump, int fromIndex, int length) {
258         if (length < 0 || (length & 1) != 0) {
259             throw new IllegalArgumentException("length: " + length);
260         }
261         if (length == 0) {
262             return EmptyArrays.EMPTY_BYTES;
263         }
264         byte[] bytes = new byte[length >>> 1];
265         for (int i = 0; i < length; i += 2) {
266             bytes[i >>> 1] = decodeHexByte(hexDump, fromIndex + i);
267         }
268         return bytes;
269     }
270 
271     /**
272      * Decodes a <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
273      */
274     public static byte[] decodeHexDump(CharSequence hexDump) {
275         return decodeHexDump(hexDump, 0, hexDump.length());
276     }
277 
278     /**
279      * The shortcut to {@link #simpleClassName(Class) simpleClassName(o.getClass())}.
280      */
281     public static String simpleClassName(Object o) {
282         if (o == null) {
283             return "null_object";
284         } else {
285             return simpleClassName(o.getClass());
286         }
287     }
288 
289     /**
290      * Generates a simplified name from a {@link Class}.  Similar to {@link Class#getSimpleName()}, but it works fine
291      * with anonymous classes.
292      */
293     public static String simpleClassName(Class<?> clazz) {
294         String className = checkNotNull(clazz, "clazz").getName();
295         final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
296         if (lastDotIdx > -1) {
297             return className.substring(lastDotIdx + 1);
298         }
299         return className;
300     }
301 
302     /**
303      * Escapes the specified value, if necessary according to
304      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
305      *
306      * @param value The value which will be escaped according to
307      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
308      * @return {@link CharSequence} the escaped value if necessary, or the value unchanged
309      */
310     public static CharSequence escapeCsv(CharSequence value) {
311         return escapeCsv(value, false);
312     }
313 
314     /**
315      * Escapes the specified value, if necessary according to
316      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
317      *
318      * @param value          The value which will be escaped according to
319      *                       <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
320      * @param trimWhiteSpace The value will first be trimmed of its optional white-space characters,
321      *                       according to <a href="https://tools.ietf.org/html/rfc7230#section-7">RFC-7230</a>
322      * @return {@link CharSequence} the escaped value if necessary, or the value unchanged
323      */
324     public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) {
325         int length = checkNotNull(value, "value").length();
326         int start;
327         int last;
328         if (trimWhiteSpace) {
329             start = indexOfFirstNonOwsChar(value, length);
330             last = indexOfLastNonOwsChar(value, start, length);
331         } else {
332             start = 0;
333             last = length - 1;
334         }
335         if (start > last) {
336             return EMPTY_STRING;
337         }
338 
339         int firstUnescapedSpecial = -1;
340         boolean quoted = false;
341         if (isDoubleQuote(value.charAt(start))) {
342             quoted = isDoubleQuote(value.charAt(last)) && last > start;
343             if (quoted) {
344                 start++;
345                 last--;
346             } else {
347                 firstUnescapedSpecial = start;
348             }
349         }
350 
351         if (firstUnescapedSpecial < 0) {
352             if (quoted) {
353                 for (int i = start; i <= last; i++) {
354                     if (isDoubleQuote(value.charAt(i))) {
355                         if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
356                             firstUnescapedSpecial = i;
357                             break;
358                         }
359                         i++;
360                     }
361                 }
362             } else {
363                 for (int i = start; i <= last; i++) {
364                     char c = value.charAt(i);
365                     if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) {
366                         firstUnescapedSpecial = i;
367                         break;
368                     }
369                     if (isDoubleQuote(c)) {
370                         if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
371                             firstUnescapedSpecial = i;
372                             break;
373                         }
374                         i++;
375                     }
376                 }
377             }
378 
379             if (firstUnescapedSpecial < 0) {
380                 // Special characters is not found or all of them already escaped.
381                 // In the most cases returns a same string. New string will be instantiated (via StringBuilder)
382                 // only if it really needed. It's important to prevent GC extra load.
383                 return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1);
384             }
385         }
386 
387         StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS);
388         result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial);
389         for (int i = firstUnescapedSpecial; i <= last; i++) {
390             char c = value.charAt(i);
391             if (isDoubleQuote(c)) {
392                 result.append(DOUBLE_QUOTE);
393                 if (i < last && isDoubleQuote(value.charAt(i + 1))) {
394                     i++;
395                 }
396             }
397             result.append(c);
398         }
399         return result.append(DOUBLE_QUOTE);
400     }
401 
402     /**
403      * Unescapes the specified escaped CSV field, if necessary according to
404      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
405      *
406      * @param value The escaped CSV field which will be unescaped according to
407      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
408      * @return {@link CharSequence} the unescaped value if necessary, or the value unchanged
409      */
410     public static CharSequence unescapeCsv(CharSequence value) {
411         int length = checkNotNull(value, "value").length();
412         if (length == 0) {
413             return value;
414         }
415         int last = length - 1;
416         boolean quoted = isDoubleQuote(value.charAt(0)) && isDoubleQuote(value.charAt(last)) && length != 1;
417         if (!quoted) {
418             validateCsvFormat(value);
419             return value;
420         }
421         StringBuilder unescaped = InternalThreadLocalMap.get().stringBuilder();
422         for (int i = 1; i < last; i++) {
423             char current = value.charAt(i);
424             if (current == DOUBLE_QUOTE) {
425                 if (isDoubleQuote(value.charAt(i + 1)) && (i + 1) != last) {
426                     // Followed by a double-quote but not the last character
427                     // Just skip the next double-quote
428                     i++;
429                 } else {
430                     // Not followed by a double-quote or the following double-quote is the last character
431                     throw newInvalidEscapedCsvFieldException(value, i);
432                 }
433             }
434             unescaped.append(current);
435         }
436         return unescaped.toString();
437     }
438 
439     /**
440      * Unescapes the specified escaped CSV fields according to
441      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
442      *
443      * @param value A string with multiple CSV escaped fields which will be unescaped according to
444      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
445      * @return {@link List} the list of unescaped fields
446      */
447     public static List<CharSequence> unescapeCsvFields(CharSequence value) {
448         List<CharSequence> unescaped = new ArrayList<CharSequence>(2);
449         StringBuilder current = InternalThreadLocalMap.get().stringBuilder();
450         boolean quoted = false;
451         int last = value.length() - 1;
452         for (int i = 0; i <= last; i++) {
453             char c = value.charAt(i);
454             if (quoted) {
455                 switch (c) {
456                     case DOUBLE_QUOTE:
457                         if (i == last) {
458                             // Add the last field and return
459                             unescaped.add(current.toString());
460                             return unescaped;
461                         }
462                         char next = value.charAt(++i);
463                         if (next == DOUBLE_QUOTE) {
464                             // 2 double-quotes should be unescaped to one
465                             current.append(DOUBLE_QUOTE);
466                             break;
467                         }
468                         if (next == COMMA) {
469                             // This is the end of a field. Let's start to parse the next field.
470                             quoted = false;
471                             unescaped.add(current.toString());
472                             current.setLength(0);
473                             break;
474                         }
475                         // double-quote followed by other character is invalid
476                         throw newInvalidEscapedCsvFieldException(value, i - 1);
477                     default:
478                         current.append(c);
479                 }
480             } else {
481                 switch (c) {
482                     case COMMA:
483                         // Start to parse the next field
484                         unescaped.add(current.toString());
485                         current.setLength(0);
486                         break;
487                     case DOUBLE_QUOTE:
488                         if (current.length() == 0) {
489                             quoted = true;
490                             break;
491                         }
492                         // double-quote appears without being enclosed with double-quotes
493                         // fall through
494                     case LINE_FEED:
495                         // fall through
496                     case CARRIAGE_RETURN:
497                         // special characters appears without being enclosed with double-quotes
498                         throw newInvalidEscapedCsvFieldException(value, i);
499                     default:
500                         current.append(c);
501                 }
502             }
503         }
504         if (quoted) {
505             throw newInvalidEscapedCsvFieldException(value, last);
506         }
507         unescaped.add(current.toString());
508         return unescaped;
509     }
510 
511     /**
512      * Validate if {@code value} is a valid csv field without double-quotes.
513      *
514      * @throws IllegalArgumentException if {@code value} needs to be encoded with double-quotes.
515      */
516     private static void validateCsvFormat(CharSequence value) {
517         int length = value.length();
518         for (int i = 0; i < length; i++) {
519             switch (value.charAt(i)) {
520                 case DOUBLE_QUOTE:
521                 case LINE_FEED:
522                 case CARRIAGE_RETURN:
523                 case COMMA:
524                     // If value contains any special character, it should be enclosed with double-quotes
525                     throw newInvalidEscapedCsvFieldException(value, i);
526                 default:
527             }
528         }
529     }
530 
531     private static IllegalArgumentException newInvalidEscapedCsvFieldException(CharSequence value, int index) {
532         return new IllegalArgumentException("invalid escaped CSV field: " + value + " index: " + index);
533     }
534 
535     /**
536      * Get the length of a string, {@code null} input is considered {@code 0} length.
537      */
538     public static int length(String s) {
539         return s == null ? 0 : s.length();
540     }
541 
542     /**
543      * Determine if a string is {@code null} or {@link String#isEmpty()} returns {@code true}.
544      */
545     public static boolean isNullOrEmpty(String s) {
546         return s == null || s.isEmpty();
547     }
548 
549     /**
550      * Find the index of the first non-white space character in {@code s} starting at {@code offset}.
551      *
552      * @param seq    The string to search.
553      * @param offset The offset to start searching at.
554      * @return the index of the first non-white space character or &lt;{@code 0} if none was found.
555      */
556     public static int indexOfNonWhiteSpace(CharSequence seq, int offset) {
557         for (; offset < seq.length(); ++offset) {
558             if (!Character.isWhitespace(seq.charAt(offset))) {
559                 return offset;
560             }
561         }
562         return -1;
563     }
564 
565     /**
566      * Determine if {@code c} lies within the range of values defined for
567      * <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>.
568      *
569      * @param c the character to check.
570      * @return {@code true} if {@code c} lies within the range of values defined for
571      * <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>. {@code false} otherwise.
572      */
573     public static boolean isSurrogate(char c) {
574         return c >= '\uD800' && c <= '\uDFFF';
575     }
576 
577     private static boolean isDoubleQuote(char c) {
578         return c == DOUBLE_QUOTE;
579     }
580 
581     /**
582      * Determine if the string {@code s} ends with the char {@code c}.
583      *
584      * @param s the string to test
585      * @param c the tested char
586      * @return true if {@code s} ends with the char {@code c}
587      */
588     public static boolean endsWith(CharSequence s, char c) {
589         int len = s.length();
590         return len > 0 && s.charAt(len - 1) == c;
591     }
592 
593     /**
594      * Trim optional white-space characters from the specified value,
595      * according to <a href="https://tools.ietf.org/html/rfc7230#section-7">RFC-7230</a>.
596      *
597      * @param value the value to trim
598      * @return {@link CharSequence} the trimmed value if necessary, or the value unchanged
599      */
600     public static CharSequence trimOws(CharSequence value) {
601         final int length = value.length();
602         if (length == 0) {
603             return value;
604         }
605         int start = indexOfFirstNonOwsChar(value, length);
606         int end = indexOfLastNonOwsChar(value, start, length);
607         return start == 0 && end == length - 1 ? value : value.subSequence(start, end + 1);
608     }
609 
610     /**
611      * @return {@code length} if no OWS is found.
612      */
613     private static int indexOfFirstNonOwsChar(CharSequence value, int length) {
614         int i = 0;
615         while (i < length && isOws(value.charAt(i))) {
616             i++;
617         }
618         return i;
619     }
620 
621     /**
622      * @return {@code start} if no OWS is found.
623      */
624     private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) {
625         int i = length - 1;
626         while (i > start && isOws(value.charAt(i))) {
627             i--;
628         }
629         return i;
630     }
631 
632     private static boolean isOws(char c) {
633         return c == SPACE || c == TAB;
634     }
635 }