001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.HashSet;
020import java.util.Set;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023
024import org.apache.commons.lang3.ArrayUtils;
025import org.apache.commons.lang3.StringUtils;
026import org.apache.commons.lang3.Validate;
027
028/**
029 * <p>
030 * Operations on Strings that contain words.
031 * </p>
032 *
033 * <p>
034 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
035 * {@code null} input. Each method documents its behavior in more detail.
036 * </p>
037 *
038 * @since 1.1
039 */
040public class WordUtils {
041
042    /**
043     * <p>{@code WordUtils} instances should NOT be constructed in
044     * standard programming. Instead, the class should be used as
045     * {@code WordUtils.wrap("foo bar", 20);}.</p>
046     *
047     * <p>This constructor is public to permit tools that require a JavaBean
048     * instance to operate.</p>
049     */
050    public WordUtils() {
051      super();
052    }
053
054    // Wrapping
055    //--------------------------------------------------------------------------
056    /**
057     * <p>Wraps a single line of text, identifying words by {@code ' '}.</p>
058     *
059     * <p>New lines will be separated by the system property line separator.
060     * Very long words, such as URLs will <i>not</i> be wrapped.</p>
061     *
062     * <p>Leading spaces on a new line are stripped.
063     * Trailing spaces are not stripped.</p>
064     *
065     * <table border="1">
066     *  <caption>Examples</caption>
067     *  <tr>
068     *   <th>input</th>
069     *   <th>wrapLength</th>
070     *   <th>result</th>
071     *  </tr>
072     *  <tr>
073     *   <td>null</td>
074     *   <td>*</td>
075     *   <td>null</td>
076     *  </tr>
077     *  <tr>
078     *   <td>""</td>
079     *   <td>*</td>
080     *   <td>""</td>
081     *  </tr>
082     *  <tr>
083     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
084     *   <td>20</td>
085     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
086     *  </tr>
087     *  <tr>
088     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
089     *   <td>20</td>
090     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
091     *  </tr>
092     *  <tr>
093     *   <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
094     *   <td>20</td>
095     *   <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
096     *  </tr>
097     * </table>
098     *
099     * (assuming that '\n' is the systems line separator)
100     *
101     * @param str  the String to be word wrapped, may be null
102     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
103     * @return a line with newlines inserted, {@code null} if null input
104     */
105    public static String wrap(final String str, final int wrapLength) {
106        return wrap(str, wrapLength, null, false);
107    }
108
109    /**
110     * <p>Wraps a single line of text, identifying words by {@code ' '}.</p>
111     *
112     * <p>Leading spaces on a new line are stripped.
113     * Trailing spaces are not stripped.</p>
114     *
115     * <table border="1">
116     *  <caption>Examples</caption>
117     *  <tr>
118     *   <th>input</th>
119     *   <th>wrapLength</th>
120     *   <th>newLineString</th>
121     *   <th>wrapLongWords</th>
122     *   <th>result</th>
123     *  </tr>
124     *  <tr>
125     *   <td>null</td>
126     *   <td>*</td>
127     *   <td>*</td>
128     *   <td>true/false</td>
129     *   <td>null</td>
130     *  </tr>
131     *  <tr>
132     *   <td>""</td>
133     *   <td>*</td>
134     *   <td>*</td>
135     *   <td>true/false</td>
136     *   <td>""</td>
137     *  </tr>
138     *  <tr>
139     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
140     *   <td>20</td>
141     *   <td>"\n"</td>
142     *   <td>true/false</td>
143     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
144     *  </tr>
145     *  <tr>
146     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
147     *   <td>20</td>
148     *   <td>"&lt;br /&gt;"</td>
149     *   <td>true/false</td>
150     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
151     *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
152     *  </tr>
153     *  <tr>
154     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
155     *   <td>20</td>
156     *   <td>null</td>
157     *   <td>true/false</td>
158     *   <td>"Here is one line of" + systemNewLine + "text that is going"
159     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
160     *  </tr>
161     *  <tr>
162     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
163     *   <td>20</td>
164     *   <td>"\n"</td>
165     *   <td>false</td>
166     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
167     *  </tr>
168     *  <tr>
169     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
170     *   <td>20</td>
171     *   <td>"\n"</td>
172     *   <td>true</td>
173     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
174     *  </tr>
175     * </table>
176     *
177     * @param str  the String to be word wrapped, may be null
178     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
179     * @param newLineStr  the string to insert for a new line,
180     *  {@code null} uses the system property line separator
181     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
182     * @return a line with newlines inserted, {@code null} if null input
183     */
184    public static String wrap(final String str,
185                              final int wrapLength,
186                              final String newLineStr,
187                              final boolean wrapLongWords) {
188        return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
189    }
190
191    /**
192     * <p>Wraps a single line of text, identifying words by {@code wrapOn}.</p>
193     *
194     * <p>Leading spaces on a new line are stripped.
195     * Trailing spaces are not stripped.</p>
196     *
197     * <table border="1">
198     *  <caption>Examples</caption>
199     *  <tr>
200     *   <th>input</th>
201     *   <th>wrapLength</th>
202     *   <th>newLineString</th>
203     *   <th>wrapLongWords</th>
204     *   <th>wrapOn</th>
205     *   <th>result</th>
206     *  </tr>
207     *  <tr>
208     *   <td>null</td>
209     *   <td>*</td>
210     *   <td>*</td>
211     *   <td>true/false</td>
212     *   <td>*</td>
213     *   <td>null</td>
214     *  </tr>
215     *  <tr>
216     *   <td>""</td>
217     *   <td>*</td>
218     *   <td>*</td>
219     *   <td>true/false</td>
220     *   <td>*</td>
221     *   <td>""</td>
222     *  </tr>
223     *  <tr>
224     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
225     *   <td>20</td>
226     *   <td>"\n"</td>
227     *   <td>true/false</td>
228     *   <td>" "</td>
229     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
230     *  </tr>
231     *  <tr>
232     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
233     *   <td>20</td>
234     *   <td>"&lt;br /&gt;"</td>
235     *   <td>true/false</td>
236     *   <td>" "</td>
237     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
238     *   to be wrapped after&lt;br /&gt;20 columns."</td>
239     *  </tr>
240     *  <tr>
241     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
242     *   <td>20</td>
243     *   <td>null</td>
244     *   <td>true/false</td>
245     *   <td>" "</td>
246     *   <td>"Here is one line of" + systemNewLine + "text that is going"
247     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
248     *  </tr>
249     *  <tr>
250     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
251     *   <td>20</td>
252     *   <td>"\n"</td>
253     *   <td>false</td>
254     *   <td>" "</td>
255     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
256     *  </tr>
257     *  <tr>
258     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
259     *   <td>20</td>
260     *   <td>"\n"</td>
261     *   <td>true</td>
262     *   <td>" "</td>
263     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
264     *  </tr>
265     *  <tr>
266     *   <td>"flammable/inflammable"</td>
267     *   <td>20</td>
268     *   <td>"\n"</td>
269     *   <td>true</td>
270     *   <td>"/"</td>
271     *   <td>"flammable\ninflammable"</td>
272     *  </tr>
273     * </table>
274     * @param str  the String to be word wrapped, may be null
275     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
276     * @param newLineStr  the string to insert for a new line,
277     *  {@code null} uses the system property line separator
278     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
279     * @param wrapOn regex expression to be used as a breakable characters,
280     *               if blank string is provided a space character will be used
281     * @return a line with newlines inserted, {@code null} if null input
282     */
283    public static String wrap(final String str,
284                              int wrapLength,
285                              String newLineStr,
286                              final boolean wrapLongWords,
287                              String wrapOn) {
288        if (str == null) {
289            return null;
290        }
291        if (newLineStr == null) {
292            newLineStr = System.lineSeparator();
293        }
294        if (wrapLength < 1) {
295            wrapLength = 1;
296        }
297        if (StringUtils.isBlank(wrapOn)) {
298            wrapOn = " ";
299        }
300        final Pattern patternToWrapOn = Pattern.compile(wrapOn);
301        final int inputLineLength = str.length();
302        int offset = 0;
303        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
304        int matcherSize = -1;
305
306        while (offset < inputLineLength) {
307            int spaceToWrapAt = -1;
308            Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
309                    Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
310            if (matcher.find()) {
311                if (matcher.start() == 0) {
312                    matcherSize = matcher.end() - matcher.start();
313                    if (matcherSize != 0) {
314                        offset += matcher.end();
315                        continue;
316                    }
317                    offset += 1;
318                }
319                spaceToWrapAt = matcher.start() + offset;
320            }
321
322            // only last line without leading spaces is left
323            if (inputLineLength - offset <= wrapLength) {
324                break;
325            }
326
327            while (matcher.find()) {
328                spaceToWrapAt = matcher.start() + offset;
329            }
330
331            if (spaceToWrapAt >= offset) {
332                // normal case
333                wrappedLine.append(str, offset, spaceToWrapAt);
334                wrappedLine.append(newLineStr);
335                offset = spaceToWrapAt + 1;
336
337            } else {
338                // really long word or URL
339                if (wrapLongWords) {
340                    if (matcherSize == 0) {
341                        offset--;
342                    }
343                    // wrap really long word one line at a time
344                    wrappedLine.append(str, offset, wrapLength + offset);
345                    wrappedLine.append(newLineStr);
346                    offset += wrapLength;
347                    matcherSize = -1;
348                } else {
349                    // do not wrap really long word, just extend beyond limit
350                    matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
351                    if (matcher.find()) {
352                        matcherSize = matcher.end() - matcher.start();
353                        spaceToWrapAt = matcher.start() + offset + wrapLength;
354                    }
355
356                    if (spaceToWrapAt >= 0) {
357                        if (matcherSize == 0 && offset != 0) {
358                            offset--;
359                        }
360                        wrappedLine.append(str, offset, spaceToWrapAt);
361                        wrappedLine.append(newLineStr);
362                        offset = spaceToWrapAt + 1;
363                    } else {
364                        if (matcherSize == 0 && offset != 0) {
365                            offset--;
366                        }
367                        wrappedLine.append(str, offset, str.length());
368                        offset = inputLineLength;
369                        matcherSize = -1;
370                    }
371                }
372            }
373        }
374
375        if (matcherSize == 0 && offset < inputLineLength) {
376            offset--;
377        }
378
379        // Whatever is left in line is short enough to just pass through
380        wrappedLine.append(str, offset, str.length());
381
382        return wrappedLine.toString();
383    }
384
385    // Capitalizing
386    //-----------------------------------------------------------------------
387    /**
388     * <p>Capitalizes all the whitespace separated words in a String.
389     * Only the first character of each word is changed. To convert the
390     * rest of each word to lowercase at the same time,
391     * use {@link #capitalizeFully(String)}.</p>
392     *
393     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
394     * A {@code null} input String returns {@code null}.
395     * Capitalization uses the Unicode title case, normally equivalent to
396     * upper case.</p>
397     *
398     * <pre>
399     * WordUtils.capitalize(null)        = null
400     * WordUtils.capitalize("")          = ""
401     * WordUtils.capitalize("i am FINE") = "I Am FINE"
402     * </pre>
403     *
404     * @param str  the String to capitalize, may be null
405     * @return capitalized String, {@code null} if null String input
406     * @see #uncapitalize(String)
407     * @see #capitalizeFully(String)
408     */
409    public static String capitalize(final String str) {
410        return capitalize(str, null);
411    }
412
413    /**
414     * <p>Capitalizes all the delimiter separated words in a String.
415     * Only the first character of each word is changed. To convert the
416     * rest of each word to lowercase at the same time,
417     * use {@link #capitalizeFully(String, char[])}.</p>
418     *
419     * <p>The delimiters represent a set of characters understood to separate words.
420     * The first string character and the first non-delimiter character after a
421     * delimiter will be capitalized.</p>
422     *
423     * <p>A {@code null} input String returns {@code null}.
424     * Capitalization uses the Unicode title case, normally equivalent to
425     * upper case.</p>
426     *
427     * <pre>
428     * WordUtils.capitalize(null, *)            = null
429     * WordUtils.capitalize("", *)              = ""
430     * WordUtils.capitalize(*, new char[0])     = *
431     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
432     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
433     * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
434     * </pre>
435     *
436     * @param str  the String to capitalize, may be null
437     * @param delimiters  set of characters to determine capitalization, null means whitespace
438     * @return capitalized String, {@code null} if null String input
439     * @see #uncapitalize(String)
440     * @see #capitalizeFully(String)
441     */
442    public static String capitalize(final String str, final char... delimiters) {
443        if (StringUtils.isEmpty(str)) {
444            return str;
445        }
446        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
447        final int strLen = str.length();
448        final int[] newCodePoints = new int[strLen];
449        int outOffset = 0;
450
451        boolean capitalizeNext = true;
452        for (int index = 0; index < strLen;) {
453            final int codePoint = str.codePointAt(index);
454
455            if (delimiterSet.contains(codePoint)) {
456                capitalizeNext = true;
457                newCodePoints[outOffset++] = codePoint;
458                index += Character.charCount(codePoint);
459            } else if (capitalizeNext) {
460                final int titleCaseCodePoint = Character.toTitleCase(codePoint);
461                newCodePoints[outOffset++] = titleCaseCodePoint;
462                index += Character.charCount(titleCaseCodePoint);
463                capitalizeNext = false;
464            } else {
465                newCodePoints[outOffset++] = codePoint;
466                index += Character.charCount(codePoint);
467            }
468        }
469        return new String(newCodePoints, 0, outOffset);
470    }
471
472    //-----------------------------------------------------------------------
473    /**
474     * <p>Converts all the whitespace separated words in a String into capitalized words,
475     * that is each word is made up of a titlecase character and then a series of
476     * lowercase characters.</p>
477     *
478     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
479     * A {@code null} input String returns {@code null}.
480     * Capitalization uses the Unicode title case, normally equivalent to
481     * upper case.</p>
482     *
483     * <pre>
484     * WordUtils.capitalizeFully(null)        = null
485     * WordUtils.capitalizeFully("")          = ""
486     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
487     * </pre>
488     *
489     * @param str  the String to capitalize, may be null
490     * @return capitalized String, {@code null} if null String input
491     */
492    public static String capitalizeFully(final String str) {
493        return capitalizeFully(str, null);
494    }
495
496    /**
497     * <p>Converts all the delimiter separated words in a String into capitalized words,
498     * that is each word is made up of a titlecase character and then a series of
499     * lowercase characters.</p>
500     *
501     * <p>The delimiters represent a set of characters understood to separate words.
502     * The first string character and the first non-delimiter character after a
503     * delimiter will be capitalized.</p>
504     *
505     * <p>A {@code null} input String returns {@code null}.
506     * Capitalization uses the Unicode title case, normally equivalent to
507     * upper case.</p>
508     *
509     * <pre>
510     * WordUtils.capitalizeFully(null, *)            = null
511     * WordUtils.capitalizeFully("", *)              = ""
512     * WordUtils.capitalizeFully(*, null)            = *
513     * WordUtils.capitalizeFully(*, new char[0])     = *
514     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
515     * </pre>
516     *
517     * @param str  the String to capitalize, may be null
518     * @param delimiters  set of characters to determine capitalization, null means whitespace
519     * @return capitalized String, {@code null} if null String input
520     */
521    public static String capitalizeFully(String str, final char... delimiters) {
522        if (StringUtils.isEmpty(str)) {
523            return str;
524        }
525        str = str.toLowerCase();
526        return capitalize(str, delimiters);
527    }
528
529    //-----------------------------------------------------------------------
530    /**
531     * <p>Uncapitalizes all the whitespace separated words in a String.
532     * Only the first character of each word is changed.</p>
533     *
534     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
535     * A {@code null} input String returns {@code null}.</p>
536     *
537     * <pre>
538     * WordUtils.uncapitalize(null)        = null
539     * WordUtils.uncapitalize("")          = ""
540     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
541     * </pre>
542     *
543     * @param str  the String to uncapitalize, may be null
544     * @return uncapitalized String, {@code null} if null String input
545     * @see #capitalize(String)
546     */
547    public static String uncapitalize(final String str) {
548        return uncapitalize(str, null);
549    }
550
551    /**
552     * <p>Uncapitalizes all the whitespace separated words in a String.
553     * Only the first character of each word is changed.</p>
554     *
555     * <p>The delimiters represent a set of characters understood to separate words.
556     * The first string character and the first non-delimiter character after a
557     * delimiter will be uncapitalized.</p>
558     *
559     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
560     * A {@code null} input String returns {@code null}.</p>
561     *
562     * <pre>
563     * WordUtils.uncapitalize(null, *)            = null
564     * WordUtils.uncapitalize("", *)              = ""
565     * WordUtils.uncapitalize(*, null)            = *
566     * WordUtils.uncapitalize(*, new char[0])     = *
567     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
568     * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
569     * </pre>
570     *
571     * @param str  the String to uncapitalize, may be null
572     * @param delimiters  set of characters to determine uncapitalization, null means whitespace
573     * @return uncapitalized String, {@code null} if null String input
574     * @see #capitalize(String)
575     */
576    public static String uncapitalize(final String str, final char... delimiters) {
577        if (StringUtils.isEmpty(str)) {
578            return str;
579        }
580        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
581        final int strLen = str.length();
582        final int[] newCodePoints = new int[strLen];
583        int outOffset = 0;
584
585        boolean uncapitalizeNext = true;
586        for (int index = 0; index < strLen;) {
587            final int codePoint = str.codePointAt(index);
588
589            if (delimiterSet.contains(codePoint)) {
590                uncapitalizeNext = true;
591                newCodePoints[outOffset++] = codePoint;
592                index += Character.charCount(codePoint);
593            } else if (uncapitalizeNext) {
594                final int titleCaseCodePoint = Character.toLowerCase(codePoint);
595                newCodePoints[outOffset++] = titleCaseCodePoint;
596                index += Character.charCount(titleCaseCodePoint);
597                uncapitalizeNext = false;
598            } else {
599                newCodePoints[outOffset++] = codePoint;
600                index += Character.charCount(codePoint);
601            }
602        }
603        return new String(newCodePoints, 0, outOffset);
604    }
605
606    //-----------------------------------------------------------------------
607    /**
608     * <p>Swaps the case of a String using a word based algorithm.</p>
609     *
610     * <ul>
611     *  <li>Upper case character converts to Lower case</li>
612     *  <li>Title case character converts to Lower case</li>
613     *  <li>Lower case character after Whitespace or at start converts to Title case</li>
614     *  <li>Other Lower case character converts to Upper case</li>
615     * </ul>
616     *
617     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
618     * A {@code null} input String returns {@code null}.</p>
619     *
620     * <pre>
621     * StringUtils.swapCase(null)                 = null
622     * StringUtils.swapCase("")                   = ""
623     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
624     * </pre>
625     *
626     * @param str  the String to swap case, may be null
627     * @return The changed String, {@code null} if null String input
628     */
629    public static String swapCase(final String str) {
630        if (StringUtils.isEmpty(str)) {
631            return str;
632        }
633        final int strLen = str.length();
634        final int[] newCodePoints = new int[strLen];
635        int outOffset = 0;
636        boolean whitespace = true;
637        for (int index = 0; index < strLen;) {
638            final int oldCodepoint = str.codePointAt(index);
639            final int newCodePoint;
640            if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
641                newCodePoint = Character.toLowerCase(oldCodepoint);
642                whitespace = false;
643            } else if (Character.isLowerCase(oldCodepoint)) {
644                if (whitespace) {
645                    newCodePoint = Character.toTitleCase(oldCodepoint);
646                    whitespace = false;
647                } else {
648                    newCodePoint = Character.toUpperCase(oldCodepoint);
649                }
650            } else {
651                whitespace = Character.isWhitespace(oldCodepoint);
652                newCodePoint = oldCodepoint;
653            }
654            newCodePoints[outOffset++] = newCodePoint;
655            index += Character.charCount(newCodePoint);
656        }
657        return new String(newCodePoints, 0, outOffset);
658    }
659
660    //-----------------------------------------------------------------------
661    /**
662     * <p>Extracts the initial characters from each word in the String.</p>
663     *
664     * <p>All first characters after whitespace are returned as a new string.
665     * Their case is not changed.</p>
666     *
667     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
668     * A {@code null} input String returns {@code null}.</p>
669     *
670     * <pre>
671     * WordUtils.initials(null)             = null
672     * WordUtils.initials("")               = ""
673     * WordUtils.initials("Ben John Lee")   = "BJL"
674     * WordUtils.initials("Ben J.Lee")      = "BJ"
675     * </pre>
676     *
677     * @param str  the String to get initials from, may be null
678     * @return String of initial letters, {@code null} if null String input
679     * @see #initials(String,char[])
680     */
681    public static String initials(final String str) {
682        return initials(str, null);
683    }
684
685    /**
686     * <p>Extracts the initial characters from each word in the String.</p>
687     *
688     * <p>All first characters after the defined delimiters are returned as a new string.
689     * Their case is not changed.</p>
690     *
691     * <p>If the delimiters array is null, then Whitespace is used.
692     * Whitespace is defined by {@link Character#isWhitespace(char)}.
693     * A {@code null} input String returns {@code null}.
694     * An empty delimiter array returns an empty String.</p>
695     *
696     * <pre>
697     * WordUtils.initials(null, *)                = null
698     * WordUtils.initials("", *)                  = ""
699     * WordUtils.initials("Ben John Lee", null)   = "BJL"
700     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
701     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
702     * WordUtils.initials(*, new char[0])         = ""
703     * </pre>
704     *
705     * @param str  the String to get initials from, may be null
706     * @param delimiters  set of characters to determine words, null means whitespace
707     * @return String of initial characters, {@code null} if null String input
708     * @see #initials(String)
709     */
710    public static String initials(final String str, final char... delimiters) {
711        if (StringUtils.isEmpty(str)) {
712            return str;
713        }
714        if (delimiters != null && delimiters.length == 0) {
715            return StringUtils.EMPTY;
716        }
717        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
718        final int strLen = str.length();
719        final int[] newCodePoints = new int[strLen / 2 + 1];
720        int count = 0;
721        boolean lastWasGap = true;
722        for (int i = 0; i < strLen;) {
723            final int codePoint = str.codePointAt(i);
724
725            if (delimiterSet.contains(codePoint) || (delimiters == null && Character.isWhitespace(codePoint))) {
726                lastWasGap = true;
727            } else if (lastWasGap) {
728                newCodePoints[count++] = codePoint;
729                lastWasGap = false;
730            }
731
732            i += Character.charCount(codePoint);
733        }
734        return new String(newCodePoints, 0, count);
735    }
736
737    //-----------------------------------------------------------------------
738    /**
739     * <p>Checks if the String contains all words in the given array.</p>
740     *
741     * <p>
742     * A {@code null} String will return {@code false}. A {@code null}, zero
743     * length search array or if one element of array is null will return {@code false}.
744     * </p>
745     *
746     * <pre>
747     * WordUtils.containsAllWords(null, *)            = false
748     * WordUtils.containsAllWords("", *)              = false
749     * WordUtils.containsAllWords(*, null)            = false
750     * WordUtils.containsAllWords(*, [])              = false
751     * WordUtils.containsAllWords("abcd", "ab", "cd") = false
752     * WordUtils.containsAllWords("abc def", "def", "abc") = true
753     * </pre>
754     *
755     * @param word The CharSequence to check, may be null
756     * @param words The array of String words to search for, may be null
757     * @return {@code true} if all search words are found, {@code false} otherwise
758     */
759    public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
760        if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
761            return false;
762        }
763        for (final CharSequence w : words) {
764            if (StringUtils.isBlank(w)) {
765                return false;
766            }
767            final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*");
768            if (!p.matcher(word).matches()) {
769                return false;
770            }
771        }
772        return true;
773    }
774
775    //-----------------------------------------------------------------------
776    /**
777     * Is the character a delimiter.
778     *
779     * @param ch the character to check
780     * @param delimiters the delimiters
781     * @return true if it is a delimiter
782     * @deprecated as of 1.2 and will be removed in 2.0
783     */
784    @Deprecated
785    public static boolean isDelimiter(final char ch, final char[] delimiters) {
786        if (delimiters == null) {
787            return Character.isWhitespace(ch);
788        }
789        for (final char delimiter : delimiters) {
790            if (ch == delimiter) {
791                return true;
792            }
793        }
794        return false;
795    }
796
797  //-----------------------------------------------------------------------
798    /**
799     * Is the codePoint a delimiter.
800     *
801     * @param codePoint the codePint to check
802     * @param delimiters the delimiters
803     * @return true if it is a delimiter
804     * @deprecated as of 1.2 and will be removed in 2.0
805     */
806    @Deprecated
807    public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
808        if (delimiters == null) {
809            return Character.isWhitespace(codePoint);
810        }
811        for (int index = 0; index < delimiters.length; index++) {
812            final int delimiterCodePoint = Character.codePointAt(delimiters, index);
813            if (delimiterCodePoint == codePoint) {
814                return true;
815            }
816        }
817        return false;
818    }
819
820    //-----------------------------------------------------------------------
821    /**
822     * Abbreviates the words nicely.
823     *
824     * This method searches for the first space after the lower limit and abbreviates
825     * the String there. It will also append any String passed as a parameter
826     * to the end of the String. The upper limit can be specified to forcibly
827     * abbreviate a String.
828     *
829     * @param str         the string to be abbreviated. If null is passed, null is returned.
830     *                    If the empty String is passed, the empty string is returned.
831     * @param lower       the lower limit.
832     * @param upper       the upper limit; specify -1 if no limit is desired.
833     *                    If the upper limit is lower than the lower limit, it will be
834     *                    adjusted to be the same as the lower limit.
835     * @param appendToEnd String to be appended to the end of the abbreviated string.
836     *                    This is appended ONLY if the string was indeed abbreviated.
837     *                    The append does not count towards the lower or upper limits.
838     * @return The abbreviated String.
839     *
840     * <pre>
841     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
842     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
843     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
844     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
845     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
846     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
847     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
848     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
849     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
850     * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
851     * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
852     * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
853     * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
854     * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
855     * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = IllegalArgumentException
856     * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = IllegalArgumentException
857     * </pre>
858     */
859    public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
860        Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
861        Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
862
863        if (StringUtils.isEmpty(str)) {
864            return str;
865        }
866
867        // if the lower value is greater than the length of the string,
868        // set to the length of the string
869        if (lower > str.length()) {
870            lower = str.length();
871        }
872
873        // if the upper value is -1 (i.e. no limit) or is greater
874        // than the length of the string, set to the length of the string
875        if (upper == -1 || upper > str.length()) {
876            upper = str.length();
877        }
878
879        final StringBuilder result = new StringBuilder();
880        final int index = StringUtils.indexOf(str, " ", lower);
881        if (index == -1) {
882            result.append(str, 0, upper);
883            // only if abbreviation has occurred do we append the appendToEnd value
884            if (upper != str.length()) {
885                result.append(StringUtils.defaultString(appendToEnd));
886            }
887        } else if (index > upper) {
888            result.append(str, 0, upper);
889            result.append(StringUtils.defaultString(appendToEnd));
890        } else {
891            result.append(str, 0, index);
892            result.append(StringUtils.defaultString(appendToEnd));
893        }
894
895        return result.toString();
896    }
897
898    // -----------------------------------------------------------------------
899    /**
900     * <p>
901     * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default
902     * value if delimiters is null. The generated hash set provides O(1) lookup time.
903     * </p>
904     *
905     * @param delimiters set of characters to determine capitalization, null means whitespace
906     * @return Set<Integer>
907     */
908    private static Set<Integer> generateDelimiterSet(final char[] delimiters) {
909        final Set<Integer> delimiterHashSet = new HashSet<>();
910        if (delimiters == null || delimiters.length == 0) {
911            if (delimiters == null) {
912                delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0));
913            }
914
915            return delimiterHashSet;
916        }
917
918        for (int index = 0; index < delimiters.length; index++) {
919            delimiterHashSet.add(Character.codePointAt(delimiters, index));
920        }
921        return delimiterHashSet;
922    }
923 }