View Javadoc

1   /*
2    * Copyright 2012 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.util.internal;
17  
18  import java.io.IOException;
19  
20  import static io.netty.util.internal.ObjectUtil.*;
21  
22  /**
23   * String utility class.
24   */
25  public final class StringUtil {
26  
27      public static final String NEWLINE = SystemPropertyUtil.get("line.separator", "\n");
28      public static final char DOUBLE_QUOTE = '\"';
29      public static final char COMMA = ',';
30      public static final char LINE_FEED = '\n';
31      public static final char CARRIAGE_RETURN = '\r';
32      public static final String EMPTY_STRING = "";
33      private static final String[] BYTE2HEX_PAD = new String[256];
34      private static final String[] BYTE2HEX_NOPAD = new String[256];
35  
36      /**
37       * 2 - Quote character at beginning and end.
38       * 5 - Extra allowance for anticipated escape characters that may be added.
39       */
40      private static final int CSV_NUMBER_ESCAPE_CHARACTERS = 2 + 5;
41      private static final char PACKAGE_SEPARATOR_CHAR = '.';
42  
43      static {
44          // Generate the lookup table that converts a byte into a 2-digit hexadecimal integer.
45          int i;
46          for (i = 0; i < 10; i ++) {
47              BYTE2HEX_PAD[i] = "0" + i;
48              BYTE2HEX_NOPAD[i] = String.valueOf(i);
49          }
50          for (; i < 16; i ++) {
51              char c = (char) ('a' + i - 10);
52              BYTE2HEX_PAD[i] = "0" + c;
53              BYTE2HEX_NOPAD[i] = String.valueOf(c);
54          }
55          for (; i < BYTE2HEX_PAD.length; i ++) {
56              String str = Integer.toHexString(i);
57              BYTE2HEX_PAD[i] = str;
58              BYTE2HEX_NOPAD[i] = str;
59          }
60      }
61  
62      private StringUtil() {
63          // Unused.
64      }
65  
66      /**
67       * Get the item after one char delim if the delim is found (else null).
68       * This operation is a simplified and optimized
69       * version of {@link String#split(String, int)}.
70       */
71      public static String substringAfter(String value, char delim) {
72          int pos = value.indexOf(delim);
73          if (pos >= 0) {
74              return value.substring(pos + 1);
75          }
76          return null;
77      }
78  
79      /**
80       * Checks if two strings have the same suffix of specified length
81       *
82       * @param s            string
83       * @param p            string
84       * @param len length of the common suffix
85       * @return true if both s and p are not null and both have the same suffix. Otherwise - false
86       */
87      public static boolean commonSuffixOfLength(String s, String p, int len) {
88          return s != null && p != null && len >= 0 && s.regionMatches(s.length() - len, p, p.length() - len, len);
89      }
90  
91      /**
92       * Converts the specified byte value into a 2-digit hexadecimal integer.
93       */
94      public static String byteToHexStringPadded(int value) {
95          return BYTE2HEX_PAD[value & 0xff];
96      }
97  
98      /**
99       * Converts the specified byte value into a 2-digit hexadecimal integer and appends it to the specified buffer.
100      */
101     public static <T extends Appendable> T byteToHexStringPadded(T buf, int value) {
102         try {
103             buf.append(byteToHexStringPadded(value));
104         } catch (IOException e) {
105             PlatformDependent.throwException(e);
106         }
107         return buf;
108     }
109 
110     /**
111      * Converts the specified byte array into a hexadecimal value.
112      */
113     public static String toHexStringPadded(byte[] src) {
114         return toHexStringPadded(src, 0, src.length);
115     }
116 
117     /**
118      * Converts the specified byte array into a hexadecimal value.
119      */
120     public static String toHexStringPadded(byte[] src, int offset, int length) {
121         return toHexStringPadded(new StringBuilder(length << 1), src, offset, length).toString();
122     }
123 
124     /**
125      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
126      */
127     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src) {
128         return toHexStringPadded(dst, src, 0, src.length);
129     }
130 
131     /**
132      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
133      */
134     public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src, int offset, int length) {
135         final int end = offset + length;
136         for (int i = offset; i < end; i ++) {
137             byteToHexStringPadded(dst, src[i]);
138         }
139         return dst;
140     }
141 
142     /**
143      * Converts the specified byte value into a hexadecimal integer.
144      */
145     public static String byteToHexString(int value) {
146         return BYTE2HEX_NOPAD[value & 0xff];
147     }
148 
149     /**
150      * Converts the specified byte value into a hexadecimal integer and appends it to the specified buffer.
151      */
152     public static <T extends Appendable> T byteToHexString(T buf, int value) {
153         try {
154             buf.append(byteToHexString(value));
155         } catch (IOException e) {
156             PlatformDependent.throwException(e);
157         }
158         return buf;
159     }
160 
161     /**
162      * Converts the specified byte array into a hexadecimal value.
163      */
164     public static String toHexString(byte[] src) {
165         return toHexString(src, 0, src.length);
166     }
167 
168     /**
169      * Converts the specified byte array into a hexadecimal value.
170      */
171     public static String toHexString(byte[] src, int offset, int length) {
172         return toHexString(new StringBuilder(length << 1), src, offset, length).toString();
173     }
174 
175     /**
176      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
177      */
178     public static <T extends Appendable> T toHexString(T dst, byte[] src) {
179         return toHexString(dst, src, 0, src.length);
180     }
181 
182     /**
183      * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
184      */
185     public static <T extends Appendable> T toHexString(T dst, byte[] src, int offset, int length) {
186         assert length >= 0;
187         if (length == 0) {
188             return dst;
189         }
190 
191         final int end = offset + length;
192         final int endMinusOne = end - 1;
193         int i;
194 
195         // Skip preceding zeroes.
196         for (i = offset; i < endMinusOne; i ++) {
197             if (src[i] != 0) {
198                 break;
199             }
200         }
201 
202         byteToHexString(dst, src[i ++]);
203         int remaining = end - i;
204         toHexStringPadded(dst, src, i, remaining);
205 
206         return dst;
207     }
208 
209     /**
210      * Helper to decode half of a hexadecimal number from a string.
211      * @param c The ASCII character of the hexadecimal number to decode.
212      * Must be in the range {@code [0-9a-fA-F]}.
213      * @return The hexadecimal value represented in the ASCII character
214      * given, or {@code -1} if the character is invalid.
215      */
216     public static int decodeHexNibble(final char c) {
217         // Character.digit() is not used here, as it addresses a larger
218         // set of characters (both ASCII and full-width latin letters).
219         if (c >= '0' && c <= '9') {
220             return c - '0';
221         }
222         if (c >= 'A' && c <= 'F') {
223             return c - 'A' + 0xA;
224         }
225         if (c >= 'a' && c <= 'f') {
226             return c - 'a' + 0xA;
227         }
228         return -1;
229     }
230 
231     /**
232      * Decode a 2-digit hex byte from within a string.
233      */
234     public static byte decodeHexByte(CharSequence s, int pos) {
235         int hi = decodeHexNibble(s.charAt(pos));
236         int lo = decodeHexNibble(s.charAt(pos + 1));
237         if (hi == -1 || lo == -1) {
238             throw new IllegalArgumentException(String.format(
239                     "invalid hex byte '%s' at index %d of '%s'", s.subSequence(pos, pos + 2), pos, s));
240         }
241         return (byte) ((hi << 4) + lo);
242     }
243 
244     /**
245      * Decodes part of a string with <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
246      *
247      * @param hexDump a {@link CharSequence} which contains the hex dump
248      * @param fromIndex start of hex dump in {@code hexDump}
249      * @param length hex string length
250      */
251     public static byte[] decodeHexDump(CharSequence hexDump, int fromIndex, int length) {
252         if (length < 0 || (length & 1) != 0) {
253             throw new IllegalArgumentException("length: " + length);
254         }
255         if (length == 0) {
256             return EmptyArrays.EMPTY_BYTES;
257         }
258         byte[] bytes = new byte[length >>> 1];
259         for (int i = 0; i < length; i += 2) {
260             bytes[i >>> 1] = decodeHexByte(hexDump, fromIndex + i);
261         }
262         return bytes;
263     }
264 
265     /**
266      * Decodes a <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
267      */
268     public static byte[] decodeHexDump(CharSequence hexDump) {
269         return decodeHexDump(hexDump, 0, hexDump.length());
270     }
271 
272     /**
273      * The shortcut to {@link #simpleClassName(Class) simpleClassName(o.getClass())}.
274      */
275     public static String simpleClassName(Object o) {
276         if (o == null) {
277             return "null_object";
278         } else {
279             return simpleClassName(o.getClass());
280         }
281     }
282 
283     /**
284      * Generates a simplified name from a {@link Class}.  Similar to {@link Class#getSimpleName()}, but it works fine
285      * with anonymous classes.
286      */
287     public static String simpleClassName(Class<?> clazz) {
288         String className = ObjectUtil.checkNotNull(clazz, "clazz").getName();
289         final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
290         if (lastDotIdx > -1) {
291             return className.substring(lastDotIdx + 1);
292         }
293         return className;
294     }
295 
296     /**
297      * Escapes the specified value, if necessary according to
298      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
299      *
300      * @param value The value which will be escaped according to
301      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
302      * @return {@link CharSequence} the escaped value if necessary, or the value unchanged
303      */
304     public static CharSequence escapeCsv(CharSequence value) {
305         int length = checkNotNull(value, "value").length();
306         if (length == 0) {
307             return value;
308         }
309         int last = length - 1;
310         boolean quoted = isDoubleQuote(value.charAt(0)) && isDoubleQuote(value.charAt(last)) && length != 1;
311         boolean foundSpecialCharacter = false;
312         boolean escapedDoubleQuote = false;
313         StringBuilder escaped = new StringBuilder(length + CSV_NUMBER_ESCAPE_CHARACTERS).append(DOUBLE_QUOTE);
314         for (int i = 0; i < length; i++) {
315             char current = value.charAt(i);
316             switch (current) {
317                 case DOUBLE_QUOTE:
318                     if (i == 0 || i == last) {
319                         if (!quoted) {
320                             escaped.append(DOUBLE_QUOTE);
321                         } else {
322                             continue;
323                         }
324                     } else {
325                         boolean isNextCharDoubleQuote = isDoubleQuote(value.charAt(i + 1));
326                         if (!isDoubleQuote(value.charAt(i - 1)) &&
327                                 (!isNextCharDoubleQuote || i + 1 == last)) {
328                             escaped.append(DOUBLE_QUOTE);
329                             escapedDoubleQuote = true;
330                         }
331                         break;
332                     }
333                 case LINE_FEED:
334                 case CARRIAGE_RETURN:
335                 case COMMA:
336                     foundSpecialCharacter = true;
337             }
338             escaped.append(current);
339         }
340         return escapedDoubleQuote || foundSpecialCharacter && !quoted ?
341                 escaped.append(DOUBLE_QUOTE) : value;
342     }
343 
344     /**
345      * Unescapes the specified escaped CSV field, if necessary according to
346      * <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
347      *
348      * @param value The escaped CSV field which will be unescaped according to
349      *              <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
350      * @return {@link CharSequence} the unescaped value if necessary, or the value unchanged
351      */
352     public static CharSequence unescapeCsv(CharSequence value) {
353         int length = checkNotNull(value, "value").length();
354         if (length == 0) {
355             return value;
356         }
357         int last = length - 1;
358         boolean quoted = isDoubleQuote(value.charAt(0)) && isDoubleQuote(value.charAt(last)) && length != 1;
359         if (!quoted) {
360             validateCsvFormat(value);
361             return value;
362         }
363         StringBuilder unescaped = InternalThreadLocalMap.get().stringBuilder();
364         for (int i = 1; i < last; i++) {
365             char current = value.charAt(i);
366             if (current == DOUBLE_QUOTE) {
367                 if (isDoubleQuote(value.charAt(i + 1)) && (i + 1) != last) {
368                     // Followed by a double-quote but not the last character
369                     // Just skip the next double-quote
370                     i++;
371                 } else {
372                     // Not followed by a double-quote or the following double-quote is the last character
373                     throw newInvalidEscapedCsvFieldException(value, i);
374                 }
375             }
376             unescaped.append(current);
377         }
378         return unescaped.toString();
379     }
380 
381     /**
382      * Validate if {@code value} is a valid csv field without double-quotes.
383      *
384      * @throws IllegalArgumentException if {@code value} needs to be encoded with double-quotes.
385      */
386     private static void validateCsvFormat(CharSequence value) {
387         int length = value.length();
388         for (int i = 0; i < length; i++) {
389             switch (value.charAt(i)) {
390                 case DOUBLE_QUOTE:
391                 case LINE_FEED:
392                 case CARRIAGE_RETURN:
393                 case COMMA:
394                     // If value contains any special character, it should be enclosed with double-quotes
395                     throw newInvalidEscapedCsvFieldException(value, i);
396                 default:
397             }
398         }
399     }
400 
401     private static IllegalArgumentException newInvalidEscapedCsvFieldException(CharSequence value, int index) {
402         return new IllegalArgumentException("invalid escaped CSV field: " + value + " index: " + index);
403     }
404 
405     /**
406      * Get the length of a string, {@code null} input is considered {@code 0} length.
407      */
408     public static int length(String s) {
409         return s == null ? 0 : s.length();
410     }
411 
412     /**
413      * Determine if a string is {@code null} or {@link String#isEmpty()} returns {@code true}.
414      */
415     public static boolean isNullOrEmpty(String s) {
416         return s == null || s.isEmpty();
417     }
418 
419     /**
420      * Determine if {@code c} lies within the range of values defined for
421      * <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>.
422      * @param c the character to check.
423      * @return {@code true} if {@code c} lies within the range of values defined for
424      * <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>. {@code false} otherwise.
425      */
426     public static boolean isSurrogate(char c) {
427         return c >= '\uD800' && c <= '\uDFFF';
428     }
429 
430     private static boolean isDoubleQuote(char c) {
431         return c == DOUBLE_QUOTE;
432     }
433 }