001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.HashSet; 020import java.util.Set; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023 024import org.apache.commons.lang3.ArrayUtils; 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.lang3.Validate; 027 028/** 029 * <p> 030 * Operations on Strings that contain words. 031 * </p> 032 * 033 * <p> 034 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a 035 * {@code null} input. Each method documents its behavior in more detail. 036 * </p> 037 * 038 * @since 1.1 039 */ 040public class WordUtils { 041 042 /** 043 * <p>{@code WordUtils} instances should NOT be constructed in 044 * standard programming. Instead, the class should be used as 045 * {@code WordUtils.wrap("foo bar", 20);}.</p> 046 * 047 * <p>This constructor is public to permit tools that require a JavaBean 048 * instance to operate.</p> 049 */ 050 public WordUtils() { 051 super(); 052 } 053 054 // Wrapping 055 //-------------------------------------------------------------------------- 056 /** 057 * <p>Wraps a single line of text, identifying words by {@code ' '}.</p> 058 * 059 * <p>New lines will be separated by the system property line separator. 060 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 061 * 062 * <p>Leading spaces on a new line are stripped. 063 * Trailing spaces are not stripped.</p> 064 * 065 * <table border="1"> 066 * <caption>Examples</caption> 067 * <tr> 068 * <th>input</th> 069 * <th>wrapLength</th> 070 * <th>result</th> 071 * </tr> 072 * <tr> 073 * <td>null</td> 074 * <td>*</td> 075 * <td>null</td> 076 * </tr> 077 * <tr> 078 * <td>""</td> 079 * <td>*</td> 080 * <td>""</td> 081 * </tr> 082 * <tr> 083 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 084 * <td>20</td> 085 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 086 * </tr> 087 * <tr> 088 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 089 * <td>20</td> 090 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 091 * </tr> 092 * <tr> 093 * <td>"Click here, https://commons.apache.org, to jump to the commons website"</td> 094 * <td>20</td> 095 * <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td> 096 * </tr> 097 * </table> 098 * 099 * (assuming that '\n' is the systems line separator) 100 * 101 * @param str the String to be word wrapped, may be null 102 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 103 * @return a line with newlines inserted, {@code null} if null input 104 */ 105 public static String wrap(final String str, final int wrapLength) { 106 return wrap(str, wrapLength, null, false); 107 } 108 109 /** 110 * <p>Wraps a single line of text, identifying words by {@code ' '}.</p> 111 * 112 * <p>Leading spaces on a new line are stripped. 113 * Trailing spaces are not stripped.</p> 114 * 115 * <table border="1"> 116 * <caption>Examples</caption> 117 * <tr> 118 * <th>input</th> 119 * <th>wrapLength</th> 120 * <th>newLineString</th> 121 * <th>wrapLongWords</th> 122 * <th>result</th> 123 * </tr> 124 * <tr> 125 * <td>null</td> 126 * <td>*</td> 127 * <td>*</td> 128 * <td>true/false</td> 129 * <td>null</td> 130 * </tr> 131 * <tr> 132 * <td>""</td> 133 * <td>*</td> 134 * <td>*</td> 135 * <td>true/false</td> 136 * <td>""</td> 137 * </tr> 138 * <tr> 139 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 140 * <td>20</td> 141 * <td>"\n"</td> 142 * <td>true/false</td> 143 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 144 * </tr> 145 * <tr> 146 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 147 * <td>20</td> 148 * <td>"<br />"</td> 149 * <td>true/false</td> 150 * <td>"Here is one line of<br />text that is going< 151 * br />to be wrapped after<br />20 columns."</td> 152 * </tr> 153 * <tr> 154 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 155 * <td>20</td> 156 * <td>null</td> 157 * <td>true/false</td> 158 * <td>"Here is one line of" + systemNewLine + "text that is going" 159 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 160 * </tr> 161 * <tr> 162 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 163 * <td>20</td> 164 * <td>"\n"</td> 165 * <td>false</td> 166 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 167 * </tr> 168 * <tr> 169 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 170 * <td>20</td> 171 * <td>"\n"</td> 172 * <td>true</td> 173 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 174 * </tr> 175 * </table> 176 * 177 * @param str the String to be word wrapped, may be null 178 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 179 * @param newLineStr the string to insert for a new line, 180 * {@code null} uses the system property line separator 181 * @param wrapLongWords true if long words (such as URLs) should be wrapped 182 * @return a line with newlines inserted, {@code null} if null input 183 */ 184 public static String wrap(final String str, 185 final int wrapLength, 186 final String newLineStr, 187 final boolean wrapLongWords) { 188 return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); 189 } 190 191 /** 192 * <p>Wraps a single line of text, identifying words by {@code wrapOn}.</p> 193 * 194 * <p>Leading spaces on a new line are stripped. 195 * Trailing spaces are not stripped.</p> 196 * 197 * <table border="1"> 198 * <caption>Examples</caption> 199 * <tr> 200 * <th>input</th> 201 * <th>wrapLength</th> 202 * <th>newLineString</th> 203 * <th>wrapLongWords</th> 204 * <th>wrapOn</th> 205 * <th>result</th> 206 * </tr> 207 * <tr> 208 * <td>null</td> 209 * <td>*</td> 210 * <td>*</td> 211 * <td>true/false</td> 212 * <td>*</td> 213 * <td>null</td> 214 * </tr> 215 * <tr> 216 * <td>""</td> 217 * <td>*</td> 218 * <td>*</td> 219 * <td>true/false</td> 220 * <td>*</td> 221 * <td>""</td> 222 * </tr> 223 * <tr> 224 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 225 * <td>20</td> 226 * <td>"\n"</td> 227 * <td>true/false</td> 228 * <td>" "</td> 229 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 230 * </tr> 231 * <tr> 232 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 233 * <td>20</td> 234 * <td>"<br />"</td> 235 * <td>true/false</td> 236 * <td>" "</td> 237 * <td>"Here is one line of<br />text that is going<br /> 238 * to be wrapped after<br />20 columns."</td> 239 * </tr> 240 * <tr> 241 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 242 * <td>20</td> 243 * <td>null</td> 244 * <td>true/false</td> 245 * <td>" "</td> 246 * <td>"Here is one line of" + systemNewLine + "text that is going" 247 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 248 * </tr> 249 * <tr> 250 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 251 * <td>20</td> 252 * <td>"\n"</td> 253 * <td>false</td> 254 * <td>" "</td> 255 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 256 * </tr> 257 * <tr> 258 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 259 * <td>20</td> 260 * <td>"\n"</td> 261 * <td>true</td> 262 * <td>" "</td> 263 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 264 * </tr> 265 * <tr> 266 * <td>"flammable/inflammable"</td> 267 * <td>20</td> 268 * <td>"\n"</td> 269 * <td>true</td> 270 * <td>"/"</td> 271 * <td>"flammable\ninflammable"</td> 272 * </tr> 273 * </table> 274 * @param str the String to be word wrapped, may be null 275 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 276 * @param newLineStr the string to insert for a new line, 277 * {@code null} uses the system property line separator 278 * @param wrapLongWords true if long words (such as URLs) should be wrapped 279 * @param wrapOn regex expression to be used as a breakable characters, 280 * if blank string is provided a space character will be used 281 * @return a line with newlines inserted, {@code null} if null input 282 */ 283 public static String wrap(final String str, 284 int wrapLength, 285 String newLineStr, 286 final boolean wrapLongWords, 287 String wrapOn) { 288 if (str == null) { 289 return null; 290 } 291 if (newLineStr == null) { 292 newLineStr = System.lineSeparator(); 293 } 294 if (wrapLength < 1) { 295 wrapLength = 1; 296 } 297 if (StringUtils.isBlank(wrapOn)) { 298 wrapOn = " "; 299 } 300 final Pattern patternToWrapOn = Pattern.compile(wrapOn); 301 final int inputLineLength = str.length(); 302 int offset = 0; 303 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 304 int matcherSize = -1; 305 306 while (offset < inputLineLength) { 307 int spaceToWrapAt = -1; 308 Matcher matcher = patternToWrapOn.matcher(str.substring(offset, 309 Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength))); 310 if (matcher.find()) { 311 if (matcher.start() == 0) { 312 matcherSize = matcher.end() - matcher.start(); 313 if (matcherSize != 0) { 314 offset += matcher.end(); 315 continue; 316 } 317 offset += 1; 318 } 319 spaceToWrapAt = matcher.start() + offset; 320 } 321 322 // only last line without leading spaces is left 323 if (inputLineLength - offset <= wrapLength) { 324 break; 325 } 326 327 while (matcher.find()) { 328 spaceToWrapAt = matcher.start() + offset; 329 } 330 331 if (spaceToWrapAt >= offset) { 332 // normal case 333 wrappedLine.append(str, offset, spaceToWrapAt); 334 wrappedLine.append(newLineStr); 335 offset = spaceToWrapAt + 1; 336 337 } else { 338 // really long word or URL 339 if (wrapLongWords) { 340 if (matcherSize == 0) { 341 offset--; 342 } 343 // wrap really long word one line at a time 344 wrappedLine.append(str, offset, wrapLength + offset); 345 wrappedLine.append(newLineStr); 346 offset += wrapLength; 347 matcherSize = -1; 348 } else { 349 // do not wrap really long word, just extend beyond limit 350 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); 351 if (matcher.find()) { 352 matcherSize = matcher.end() - matcher.start(); 353 spaceToWrapAt = matcher.start() + offset + wrapLength; 354 } 355 356 if (spaceToWrapAt >= 0) { 357 if (matcherSize == 0 && offset != 0) { 358 offset--; 359 } 360 wrappedLine.append(str, offset, spaceToWrapAt); 361 wrappedLine.append(newLineStr); 362 offset = spaceToWrapAt + 1; 363 } else { 364 if (matcherSize == 0 && offset != 0) { 365 offset--; 366 } 367 wrappedLine.append(str, offset, str.length()); 368 offset = inputLineLength; 369 matcherSize = -1; 370 } 371 } 372 } 373 } 374 375 if (matcherSize == 0 && offset < inputLineLength) { 376 offset--; 377 } 378 379 // Whatever is left in line is short enough to just pass through 380 wrappedLine.append(str, offset, str.length()); 381 382 return wrappedLine.toString(); 383 } 384 385 // Capitalizing 386 //----------------------------------------------------------------------- 387 /** 388 * <p>Capitalizes all the whitespace separated words in a String. 389 * Only the first character of each word is changed. To convert the 390 * rest of each word to lowercase at the same time, 391 * use {@link #capitalizeFully(String)}.</p> 392 * 393 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 394 * A {@code null} input String returns {@code null}. 395 * Capitalization uses the Unicode title case, normally equivalent to 396 * upper case.</p> 397 * 398 * <pre> 399 * WordUtils.capitalize(null) = null 400 * WordUtils.capitalize("") = "" 401 * WordUtils.capitalize("i am FINE") = "I Am FINE" 402 * </pre> 403 * 404 * @param str the String to capitalize, may be null 405 * @return capitalized String, {@code null} if null String input 406 * @see #uncapitalize(String) 407 * @see #capitalizeFully(String) 408 */ 409 public static String capitalize(final String str) { 410 return capitalize(str, null); 411 } 412 413 /** 414 * <p>Capitalizes all the delimiter separated words in a String. 415 * Only the first character of each word is changed. To convert the 416 * rest of each word to lowercase at the same time, 417 * use {@link #capitalizeFully(String, char[])}.</p> 418 * 419 * <p>The delimiters represent a set of characters understood to separate words. 420 * The first string character and the first non-delimiter character after a 421 * delimiter will be capitalized.</p> 422 * 423 * <p>A {@code null} input String returns {@code null}. 424 * Capitalization uses the Unicode title case, normally equivalent to 425 * upper case.</p> 426 * 427 * <pre> 428 * WordUtils.capitalize(null, *) = null 429 * WordUtils.capitalize("", *) = "" 430 * WordUtils.capitalize(*, new char[0]) = * 431 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 432 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 433 * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine" 434 * </pre> 435 * 436 * @param str the String to capitalize, may be null 437 * @param delimiters set of characters to determine capitalization, null means whitespace 438 * @return capitalized String, {@code null} if null String input 439 * @see #uncapitalize(String) 440 * @see #capitalizeFully(String) 441 */ 442 public static String capitalize(final String str, final char... delimiters) { 443 if (StringUtils.isEmpty(str)) { 444 return str; 445 } 446 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 447 final int strLen = str.length(); 448 final int[] newCodePoints = new int[strLen]; 449 int outOffset = 0; 450 451 boolean capitalizeNext = true; 452 for (int index = 0; index < strLen;) { 453 final int codePoint = str.codePointAt(index); 454 455 if (delimiterSet.contains(codePoint)) { 456 capitalizeNext = true; 457 newCodePoints[outOffset++] = codePoint; 458 index += Character.charCount(codePoint); 459 } else if (capitalizeNext) { 460 final int titleCaseCodePoint = Character.toTitleCase(codePoint); 461 newCodePoints[outOffset++] = titleCaseCodePoint; 462 index += Character.charCount(titleCaseCodePoint); 463 capitalizeNext = false; 464 } else { 465 newCodePoints[outOffset++] = codePoint; 466 index += Character.charCount(codePoint); 467 } 468 } 469 return new String(newCodePoints, 0, outOffset); 470 } 471 472 //----------------------------------------------------------------------- 473 /** 474 * <p>Converts all the whitespace separated words in a String into capitalized words, 475 * that is each word is made up of a titlecase character and then a series of 476 * lowercase characters.</p> 477 * 478 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 479 * A {@code null} input String returns {@code null}. 480 * Capitalization uses the Unicode title case, normally equivalent to 481 * upper case.</p> 482 * 483 * <pre> 484 * WordUtils.capitalizeFully(null) = null 485 * WordUtils.capitalizeFully("") = "" 486 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 487 * </pre> 488 * 489 * @param str the String to capitalize, may be null 490 * @return capitalized String, {@code null} if null String input 491 */ 492 public static String capitalizeFully(final String str) { 493 return capitalizeFully(str, null); 494 } 495 496 /** 497 * <p>Converts all the delimiter separated words in a String into capitalized words, 498 * that is each word is made up of a titlecase character and then a series of 499 * lowercase characters.</p> 500 * 501 * <p>The delimiters represent a set of characters understood to separate words. 502 * The first string character and the first non-delimiter character after a 503 * delimiter will be capitalized.</p> 504 * 505 * <p>A {@code null} input String returns {@code null}. 506 * Capitalization uses the Unicode title case, normally equivalent to 507 * upper case.</p> 508 * 509 * <pre> 510 * WordUtils.capitalizeFully(null, *) = null 511 * WordUtils.capitalizeFully("", *) = "" 512 * WordUtils.capitalizeFully(*, null) = * 513 * WordUtils.capitalizeFully(*, new char[0]) = * 514 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 515 * </pre> 516 * 517 * @param str the String to capitalize, may be null 518 * @param delimiters set of characters to determine capitalization, null means whitespace 519 * @return capitalized String, {@code null} if null String input 520 */ 521 public static String capitalizeFully(String str, final char... delimiters) { 522 if (StringUtils.isEmpty(str)) { 523 return str; 524 } 525 str = str.toLowerCase(); 526 return capitalize(str, delimiters); 527 } 528 529 //----------------------------------------------------------------------- 530 /** 531 * <p>Uncapitalizes all the whitespace separated words in a String. 532 * Only the first character of each word is changed.</p> 533 * 534 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 535 * A {@code null} input String returns {@code null}.</p> 536 * 537 * <pre> 538 * WordUtils.uncapitalize(null) = null 539 * WordUtils.uncapitalize("") = "" 540 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 541 * </pre> 542 * 543 * @param str the String to uncapitalize, may be null 544 * @return uncapitalized String, {@code null} if null String input 545 * @see #capitalize(String) 546 */ 547 public static String uncapitalize(final String str) { 548 return uncapitalize(str, null); 549 } 550 551 /** 552 * <p>Uncapitalizes all the whitespace separated words in a String. 553 * Only the first character of each word is changed.</p> 554 * 555 * <p>The delimiters represent a set of characters understood to separate words. 556 * The first string character and the first non-delimiter character after a 557 * delimiter will be uncapitalized.</p> 558 * 559 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 560 * A {@code null} input String returns {@code null}.</p> 561 * 562 * <pre> 563 * WordUtils.uncapitalize(null, *) = null 564 * WordUtils.uncapitalize("", *) = "" 565 * WordUtils.uncapitalize(*, null) = * 566 * WordUtils.uncapitalize(*, new char[0]) = * 567 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 568 * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine" 569 * </pre> 570 * 571 * @param str the String to uncapitalize, may be null 572 * @param delimiters set of characters to determine uncapitalization, null means whitespace 573 * @return uncapitalized String, {@code null} if null String input 574 * @see #capitalize(String) 575 */ 576 public static String uncapitalize(final String str, final char... delimiters) { 577 if (StringUtils.isEmpty(str)) { 578 return str; 579 } 580 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 581 final int strLen = str.length(); 582 final int[] newCodePoints = new int[strLen]; 583 int outOffset = 0; 584 585 boolean uncapitalizeNext = true; 586 for (int index = 0; index < strLen;) { 587 final int codePoint = str.codePointAt(index); 588 589 if (delimiterSet.contains(codePoint)) { 590 uncapitalizeNext = true; 591 newCodePoints[outOffset++] = codePoint; 592 index += Character.charCount(codePoint); 593 } else if (uncapitalizeNext) { 594 final int titleCaseCodePoint = Character.toLowerCase(codePoint); 595 newCodePoints[outOffset++] = titleCaseCodePoint; 596 index += Character.charCount(titleCaseCodePoint); 597 uncapitalizeNext = false; 598 } else { 599 newCodePoints[outOffset++] = codePoint; 600 index += Character.charCount(codePoint); 601 } 602 } 603 return new String(newCodePoints, 0, outOffset); 604 } 605 606 //----------------------------------------------------------------------- 607 /** 608 * <p>Swaps the case of a String using a word based algorithm.</p> 609 * 610 * <ul> 611 * <li>Upper case character converts to Lower case</li> 612 * <li>Title case character converts to Lower case</li> 613 * <li>Lower case character after Whitespace or at start converts to Title case</li> 614 * <li>Other Lower case character converts to Upper case</li> 615 * </ul> 616 * 617 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 618 * A {@code null} input String returns {@code null}.</p> 619 * 620 * <pre> 621 * StringUtils.swapCase(null) = null 622 * StringUtils.swapCase("") = "" 623 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 624 * </pre> 625 * 626 * @param str the String to swap case, may be null 627 * @return The changed String, {@code null} if null String input 628 */ 629 public static String swapCase(final String str) { 630 if (StringUtils.isEmpty(str)) { 631 return str; 632 } 633 final int strLen = str.length(); 634 final int[] newCodePoints = new int[strLen]; 635 int outOffset = 0; 636 boolean whitespace = true; 637 for (int index = 0; index < strLen;) { 638 final int oldCodepoint = str.codePointAt(index); 639 final int newCodePoint; 640 if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) { 641 newCodePoint = Character.toLowerCase(oldCodepoint); 642 whitespace = false; 643 } else if (Character.isLowerCase(oldCodepoint)) { 644 if (whitespace) { 645 newCodePoint = Character.toTitleCase(oldCodepoint); 646 whitespace = false; 647 } else { 648 newCodePoint = Character.toUpperCase(oldCodepoint); 649 } 650 } else { 651 whitespace = Character.isWhitespace(oldCodepoint); 652 newCodePoint = oldCodepoint; 653 } 654 newCodePoints[outOffset++] = newCodePoint; 655 index += Character.charCount(newCodePoint); 656 } 657 return new String(newCodePoints, 0, outOffset); 658 } 659 660 //----------------------------------------------------------------------- 661 /** 662 * <p>Extracts the initial characters from each word in the String.</p> 663 * 664 * <p>All first characters after whitespace are returned as a new string. 665 * Their case is not changed.</p> 666 * 667 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 668 * A {@code null} input String returns {@code null}.</p> 669 * 670 * <pre> 671 * WordUtils.initials(null) = null 672 * WordUtils.initials("") = "" 673 * WordUtils.initials("Ben John Lee") = "BJL" 674 * WordUtils.initials("Ben J.Lee") = "BJ" 675 * </pre> 676 * 677 * @param str the String to get initials from, may be null 678 * @return String of initial letters, {@code null} if null String input 679 * @see #initials(String,char[]) 680 */ 681 public static String initials(final String str) { 682 return initials(str, null); 683 } 684 685 /** 686 * <p>Extracts the initial characters from each word in the String.</p> 687 * 688 * <p>All first characters after the defined delimiters are returned as a new string. 689 * Their case is not changed.</p> 690 * 691 * <p>If the delimiters array is null, then Whitespace is used. 692 * Whitespace is defined by {@link Character#isWhitespace(char)}. 693 * A {@code null} input String returns {@code null}. 694 * An empty delimiter array returns an empty String.</p> 695 * 696 * <pre> 697 * WordUtils.initials(null, *) = null 698 * WordUtils.initials("", *) = "" 699 * WordUtils.initials("Ben John Lee", null) = "BJL" 700 * WordUtils.initials("Ben J.Lee", null) = "BJ" 701 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 702 * WordUtils.initials(*, new char[0]) = "" 703 * </pre> 704 * 705 * @param str the String to get initials from, may be null 706 * @param delimiters set of characters to determine words, null means whitespace 707 * @return String of initial characters, {@code null} if null String input 708 * @see #initials(String) 709 */ 710 public static String initials(final String str, final char... delimiters) { 711 if (StringUtils.isEmpty(str)) { 712 return str; 713 } 714 if (delimiters != null && delimiters.length == 0) { 715 return StringUtils.EMPTY; 716 } 717 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 718 final int strLen = str.length(); 719 final int[] newCodePoints = new int[strLen / 2 + 1]; 720 int count = 0; 721 boolean lastWasGap = true; 722 for (int i = 0; i < strLen;) { 723 final int codePoint = str.codePointAt(i); 724 725 if (delimiterSet.contains(codePoint) || (delimiters == null && Character.isWhitespace(codePoint))) { 726 lastWasGap = true; 727 } else if (lastWasGap) { 728 newCodePoints[count++] = codePoint; 729 lastWasGap = false; 730 } 731 732 i += Character.charCount(codePoint); 733 } 734 return new String(newCodePoints, 0, count); 735 } 736 737 //----------------------------------------------------------------------- 738 /** 739 * <p>Checks if the String contains all words in the given array.</p> 740 * 741 * <p> 742 * A {@code null} String will return {@code false}. A {@code null}, zero 743 * length search array or if one element of array is null will return {@code false}. 744 * </p> 745 * 746 * <pre> 747 * WordUtils.containsAllWords(null, *) = false 748 * WordUtils.containsAllWords("", *) = false 749 * WordUtils.containsAllWords(*, null) = false 750 * WordUtils.containsAllWords(*, []) = false 751 * WordUtils.containsAllWords("abcd", "ab", "cd") = false 752 * WordUtils.containsAllWords("abc def", "def", "abc") = true 753 * </pre> 754 * 755 * @param word The CharSequence to check, may be null 756 * @param words The array of String words to search for, may be null 757 * @return {@code true} if all search words are found, {@code false} otherwise 758 */ 759 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { 760 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { 761 return false; 762 } 763 for (final CharSequence w : words) { 764 if (StringUtils.isBlank(w)) { 765 return false; 766 } 767 final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*"); 768 if (!p.matcher(word).matches()) { 769 return false; 770 } 771 } 772 return true; 773 } 774 775 //----------------------------------------------------------------------- 776 /** 777 * Is the character a delimiter. 778 * 779 * @param ch the character to check 780 * @param delimiters the delimiters 781 * @return true if it is a delimiter 782 * @deprecated as of 1.2 and will be removed in 2.0 783 */ 784 @Deprecated 785 public static boolean isDelimiter(final char ch, final char[] delimiters) { 786 if (delimiters == null) { 787 return Character.isWhitespace(ch); 788 } 789 for (final char delimiter : delimiters) { 790 if (ch == delimiter) { 791 return true; 792 } 793 } 794 return false; 795 } 796 797 //----------------------------------------------------------------------- 798 /** 799 * Is the codePoint a delimiter. 800 * 801 * @param codePoint the codePint to check 802 * @param delimiters the delimiters 803 * @return true if it is a delimiter 804 * @deprecated as of 1.2 and will be removed in 2.0 805 */ 806 @Deprecated 807 public static boolean isDelimiter(final int codePoint, final char[] delimiters) { 808 if (delimiters == null) { 809 return Character.isWhitespace(codePoint); 810 } 811 for (int index = 0; index < delimiters.length; index++) { 812 final int delimiterCodePoint = Character.codePointAt(delimiters, index); 813 if (delimiterCodePoint == codePoint) { 814 return true; 815 } 816 } 817 return false; 818 } 819 820 //----------------------------------------------------------------------- 821 /** 822 * Abbreviates the words nicely. 823 * 824 * This method searches for the first space after the lower limit and abbreviates 825 * the String there. It will also append any String passed as a parameter 826 * to the end of the String. The upper limit can be specified to forcibly 827 * abbreviate a String. 828 * 829 * @param str the string to be abbreviated. If null is passed, null is returned. 830 * If the empty String is passed, the empty string is returned. 831 * @param lower the lower limit. 832 * @param upper the upper limit; specify -1 if no limit is desired. 833 * If the upper limit is lower than the lower limit, it will be 834 * adjusted to be the same as the lower limit. 835 * @param appendToEnd String to be appended to the end of the abbreviated string. 836 * This is appended ONLY if the string was indeed abbreviated. 837 * The append does not count towards the lower or upper limits. 838 * @return The abbreviated String. 839 * 840 * <pre> 841 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now" 842 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the" 843 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all" 844 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now" 845 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the" 846 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all" 847 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..." 848 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..." 849 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..." 850 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now" 851 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the" 852 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all" 853 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men" 854 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men" 855 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException 856 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException 857 * </pre> 858 */ 859 public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) { 860 Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); 861 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); 862 863 if (StringUtils.isEmpty(str)) { 864 return str; 865 } 866 867 // if the lower value is greater than the length of the string, 868 // set to the length of the string 869 if (lower > str.length()) { 870 lower = str.length(); 871 } 872 873 // if the upper value is -1 (i.e. no limit) or is greater 874 // than the length of the string, set to the length of the string 875 if (upper == -1 || upper > str.length()) { 876 upper = str.length(); 877 } 878 879 final StringBuilder result = new StringBuilder(); 880 final int index = StringUtils.indexOf(str, " ", lower); 881 if (index == -1) { 882 result.append(str, 0, upper); 883 // only if abbreviation has occurred do we append the appendToEnd value 884 if (upper != str.length()) { 885 result.append(StringUtils.defaultString(appendToEnd)); 886 } 887 } else if (index > upper) { 888 result.append(str, 0, upper); 889 result.append(StringUtils.defaultString(appendToEnd)); 890 } else { 891 result.append(str, 0, index); 892 result.append(StringUtils.defaultString(appendToEnd)); 893 } 894 895 return result.toString(); 896 } 897 898 // ----------------------------------------------------------------------- 899 /** 900 * <p> 901 * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default 902 * value if delimiters is null. The generated hash set provides O(1) lookup time. 903 * </p> 904 * 905 * @param delimiters set of characters to determine capitalization, null means whitespace 906 * @return Set<Integer> 907 */ 908 private static Set<Integer> generateDelimiterSet(final char[] delimiters) { 909 final Set<Integer> delimiterHashSet = new HashSet<>(); 910 if (delimiters == null || delimiters.length == 0) { 911 if (delimiters == null) { 912 delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0)); 913 } 914 915 return delimiterHashSet; 916 } 917 918 for (int index = 0; index < delimiters.length; index++) { 919 delimiterHashSet.add(Character.codePointAt(delimiters, index)); 920 } 921 return delimiterHashSet; 922 } 923 }