001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.HashSet; 020import java.util.Set; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023 024import org.apache.commons.lang3.ArrayUtils; 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.lang3.Validate; 027 028/** 029 * <p> 030 * Operations on Strings that contain words. 031 * </p> 032 * 033 * <p> 034 * This class tries to handle <code>null</code> input gracefully. An exception will not be thrown for a 035 * <code>null</code> input. Each method documents its behavior in more detail. 036 * </p> 037 * 038 * @since 1.1 039 */ 040public class WordUtils { 041 042 /** 043 * <p><code>WordUtils</code> instances should NOT be constructed in 044 * standard programming. Instead, the class should be used as 045 * <code>WordUtils.wrap("foo bar", 20);</code>.</p> 046 * 047 * <p>This constructor is public to permit tools that require a JavaBean 048 * instance to operate.</p> 049 */ 050 public WordUtils() { 051 super(); 052 } 053 054 // Wrapping 055 //-------------------------------------------------------------------------- 056 /** 057 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 058 * 059 * <p>New lines will be separated by the system property line separator. 060 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 061 * 062 * <p>Leading spaces on a new line are stripped. 063 * Trailing spaces are not stripped.</p> 064 * 065 * <table border="1"> 066 * <caption>Examples</caption> 067 * <tr> 068 * <th>input</th> 069 * <th>wrapLength</th> 070 * <th>result</th> 071 * </tr> 072 * <tr> 073 * <td>null</td> 074 * <td>*</td> 075 * <td>null</td> 076 * </tr> 077 * <tr> 078 * <td>""</td> 079 * <td>*</td> 080 * <td>""</td> 081 * </tr> 082 * <tr> 083 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 084 * <td>20</td> 085 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 086 * </tr> 087 * <tr> 088 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 089 * <td>20</td> 090 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 091 * </tr> 092 * <tr> 093 * <td>"Click here, http://commons.apache.org, to jump to the commons website"</td> 094 * <td>20</td> 095 * <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td> 096 * </tr> 097 * </table> 098 * 099 * (assuming that '\n' is the systems line separator) 100 * 101 * @param str the String to be word wrapped, may be null 102 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 103 * @return a line with newlines inserted, <code>null</code> if null input 104 */ 105 public static String wrap(final String str, final int wrapLength) { 106 return wrap(str, wrapLength, null, false); 107 } 108 109 /** 110 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 111 * 112 * <p>Leading spaces on a new line are stripped. 113 * Trailing spaces are not stripped.</p> 114 * 115 * <table border="1"> 116 * <caption>Examples</caption> 117 * <tr> 118 * <th>input</th> 119 * <th>wrapLength</th> 120 * <th>newLineString</th> 121 * <th>wrapLongWords</th> 122 * <th>result</th> 123 * </tr> 124 * <tr> 125 * <td>null</td> 126 * <td>*</td> 127 * <td>*</td> 128 * <td>true/false</td> 129 * <td>null</td> 130 * </tr> 131 * <tr> 132 * <td>""</td> 133 * <td>*</td> 134 * <td>*</td> 135 * <td>true/false</td> 136 * <td>""</td> 137 * </tr> 138 * <tr> 139 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 140 * <td>20</td> 141 * <td>"\n"</td> 142 * <td>true/false</td> 143 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 144 * </tr> 145 * <tr> 146 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 147 * <td>20</td> 148 * <td>"<br />"</td> 149 * <td>true/false</td> 150 * <td>"Here is one line of<br />text that is going< 151 * br />to be wrapped after<br />20 columns."</td> 152 * </tr> 153 * <tr> 154 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 155 * <td>20</td> 156 * <td>null</td> 157 * <td>true/false</td> 158 * <td>"Here is one line of" + systemNewLine + "text that is going" 159 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 160 * </tr> 161 * <tr> 162 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 163 * <td>20</td> 164 * <td>"\n"</td> 165 * <td>false</td> 166 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 167 * </tr> 168 * <tr> 169 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 170 * <td>20</td> 171 * <td>"\n"</td> 172 * <td>true</td> 173 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 174 * </tr> 175 * </table> 176 * 177 * @param str the String to be word wrapped, may be null 178 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 179 * @param newLineStr the string to insert for a new line, 180 * <code>null</code> uses the system property line separator 181 * @param wrapLongWords true if long words (such as URLs) should be wrapped 182 * @return a line with newlines inserted, <code>null</code> if null input 183 */ 184 public static String wrap(final String str, 185 final int wrapLength, 186 final String newLineStr, 187 final boolean wrapLongWords) { 188 return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); 189 } 190 191 /** 192 * <p>Wraps a single line of text, identifying words by <code>wrapOn</code>.</p> 193 * 194 * <p>Leading spaces on a new line are stripped. 195 * Trailing spaces are not stripped.</p> 196 * 197 * <table border="1"> 198 * <caption>Examples</caption> 199 * <tr> 200 * <th>input</th> 201 * <th>wrapLength</th> 202 * <th>newLineString</th> 203 * <th>wrapLongWords</th> 204 * <th>wrapOn</th> 205 * <th>result</th> 206 * </tr> 207 * <tr> 208 * <td>null</td> 209 * <td>*</td> 210 * <td>*</td> 211 * <td>true/false</td> 212 * <td>*</td> 213 * <td>null</td> 214 * </tr> 215 * <tr> 216 * <td>""</td> 217 * <td>*</td> 218 * <td>*</td> 219 * <td>true/false</td> 220 * <td>*</td> 221 * <td>""</td> 222 * </tr> 223 * <tr> 224 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 225 * <td>20</td> 226 * <td>"\n"</td> 227 * <td>true/false</td> 228 * <td>" "</td> 229 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 230 * </tr> 231 * <tr> 232 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 233 * <td>20</td> 234 * <td>"<br />"</td> 235 * <td>true/false</td> 236 * <td>" "</td> 237 * <td>"Here is one line of<br />text that is going<br /> 238 * to be wrapped after<br />20 columns."</td> 239 * </tr> 240 * <tr> 241 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 242 * <td>20</td> 243 * <td>null</td> 244 * <td>true/false</td> 245 * <td>" "</td> 246 * <td>"Here is one line of" + systemNewLine + "text that is going" 247 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 248 * </tr> 249 * <tr> 250 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 251 * <td>20</td> 252 * <td>"\n"</td> 253 * <td>false</td> 254 * <td>" "</td> 255 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 256 * </tr> 257 * <tr> 258 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 259 * <td>20</td> 260 * <td>"\n"</td> 261 * <td>true</td> 262 * <td>" "</td> 263 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 264 * </tr> 265 * <tr> 266 * <td>"flammable/inflammable"</td> 267 * <td>20</td> 268 * <td>"\n"</td> 269 * <td>true</td> 270 * <td>"/"</td> 271 * <td>"flammable\ninflammable"</td> 272 * </tr> 273 * </table> 274 * @param str the String to be word wrapped, may be null 275 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 276 * @param newLineStr the string to insert for a new line, 277 * <code>null</code> uses the system property line separator 278 * @param wrapLongWords true if long words (such as URLs) should be wrapped 279 * @param wrapOn regex expression to be used as a breakable characters, 280 * if blank string is provided a space character will be used 281 * @return a line with newlines inserted, <code>null</code> if null input 282 */ 283 public static String wrap(final String str, 284 int wrapLength, 285 String newLineStr, 286 final boolean wrapLongWords, 287 String wrapOn) { 288 if (str == null) { 289 return null; 290 } 291 if (newLineStr == null) { 292 newLineStr = System.lineSeparator(); 293 } 294 if (wrapLength < 1) { 295 wrapLength = 1; 296 } 297 if (StringUtils.isBlank(wrapOn)) { 298 wrapOn = " "; 299 } 300 final Pattern patternToWrapOn = Pattern.compile(wrapOn); 301 final int inputLineLength = str.length(); 302 int offset = 0; 303 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 304 305 while (offset < inputLineLength) { 306 int spaceToWrapAt = -1; 307 Matcher matcher = patternToWrapOn.matcher(str.substring(offset, 308 Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength))); 309 if (matcher.find()) { 310 if (matcher.start() == 0) { 311 offset += matcher.end(); 312 continue; 313 } 314 spaceToWrapAt = matcher.start() + offset; 315 } 316 317 // only last line without leading spaces is left 318 if (inputLineLength - offset <= wrapLength) { 319 break; 320 } 321 322 while (matcher.find()) { 323 spaceToWrapAt = matcher.start() + offset; 324 } 325 326 if (spaceToWrapAt >= offset) { 327 // normal case 328 wrappedLine.append(str, offset, spaceToWrapAt); 329 wrappedLine.append(newLineStr); 330 offset = spaceToWrapAt + 1; 331 332 } else { 333 // really long word or URL 334 if (wrapLongWords) { 335 // wrap really long word one line at a time 336 wrappedLine.append(str, offset, wrapLength + offset); 337 wrappedLine.append(newLineStr); 338 offset += wrapLength; 339 } else { 340 // do not wrap really long word, just extend beyond limit 341 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); 342 if (matcher.find()) { 343 spaceToWrapAt = matcher.start() + offset + wrapLength; 344 } 345 346 if (spaceToWrapAt >= 0) { 347 wrappedLine.append(str, offset, spaceToWrapAt); 348 wrappedLine.append(newLineStr); 349 offset = spaceToWrapAt + 1; 350 } else { 351 wrappedLine.append(str, offset, str.length()); 352 offset = inputLineLength; 353 } 354 } 355 } 356 } 357 358 // Whatever is left in line is short enough to just pass through 359 wrappedLine.append(str, offset, str.length()); 360 361 return wrappedLine.toString(); 362 } 363 364 // Capitalizing 365 //----------------------------------------------------------------------- 366 /** 367 * <p>Capitalizes all the whitespace separated words in a String. 368 * Only the first character of each word is changed. To convert the 369 * rest of each word to lowercase at the same time, 370 * use {@link #capitalizeFully(String)}.</p> 371 * 372 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 373 * A <code>null</code> input String returns <code>null</code>. 374 * Capitalization uses the Unicode title case, normally equivalent to 375 * upper case.</p> 376 * 377 * <pre> 378 * WordUtils.capitalize(null) = null 379 * WordUtils.capitalize("") = "" 380 * WordUtils.capitalize("i am FINE") = "I Am FINE" 381 * </pre> 382 * 383 * @param str the String to capitalize, may be null 384 * @return capitalized String, <code>null</code> if null String input 385 * @see #uncapitalize(String) 386 * @see #capitalizeFully(String) 387 */ 388 public static String capitalize(final String str) { 389 return capitalize(str, null); 390 } 391 392 /** 393 * <p>Capitalizes all the delimiter separated words in a String. 394 * Only the first character of each word is changed. To convert the 395 * rest of each word to lowercase at the same time, 396 * use {@link #capitalizeFully(String, char[])}.</p> 397 * 398 * <p>The delimiters represent a set of characters understood to separate words. 399 * The first string character and the first non-delimiter character after a 400 * delimiter will be capitalized. </p> 401 * 402 * <p>A <code>null</code> input String returns <code>null</code>. 403 * Capitalization uses the Unicode title case, normally equivalent to 404 * upper case.</p> 405 * 406 * <pre> 407 * WordUtils.capitalize(null, *) = null 408 * WordUtils.capitalize("", *) = "" 409 * WordUtils.capitalize(*, new char[0]) = * 410 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 411 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 412 * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine" 413 * </pre> 414 * 415 * @param str the String to capitalize, may be null 416 * @param delimiters set of characters to determine capitalization, null means whitespace 417 * @return capitalized String, <code>null</code> if null String input 418 * @see #uncapitalize(String) 419 * @see #capitalizeFully(String) 420 */ 421 public static String capitalize(final String str, final char... delimiters) { 422 if (StringUtils.isEmpty(str)) { 423 return str; 424 } 425 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 426 final int strLen = str.length(); 427 final int[] newCodePoints = new int[strLen]; 428 int outOffset = 0; 429 430 boolean capitalizeNext = true; 431 for (int index = 0; index < strLen;) { 432 final int codePoint = str.codePointAt(index); 433 434 if (delimiterSet.contains(codePoint)) { 435 capitalizeNext = true; 436 newCodePoints[outOffset++] = codePoint; 437 index += Character.charCount(codePoint); 438 } else if (capitalizeNext) { 439 final int titleCaseCodePoint = Character.toTitleCase(codePoint); 440 newCodePoints[outOffset++] = titleCaseCodePoint; 441 index += Character.charCount(titleCaseCodePoint); 442 capitalizeNext = false; 443 } else { 444 newCodePoints[outOffset++] = codePoint; 445 index += Character.charCount(codePoint); 446 } 447 } 448 return new String(newCodePoints, 0, outOffset); 449 } 450 451 //----------------------------------------------------------------------- 452 /** 453 * <p>Converts all the whitespace separated words in a String into capitalized words, 454 * that is each word is made up of a titlecase character and then a series of 455 * lowercase characters. </p> 456 * 457 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 458 * A <code>null</code> input String returns <code>null</code>. 459 * Capitalization uses the Unicode title case, normally equivalent to 460 * upper case.</p> 461 * 462 * <pre> 463 * WordUtils.capitalizeFully(null) = null 464 * WordUtils.capitalizeFully("") = "" 465 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 466 * </pre> 467 * 468 * @param str the String to capitalize, may be null 469 * @return capitalized String, <code>null</code> if null String input 470 */ 471 public static String capitalizeFully(final String str) { 472 return capitalizeFully(str, null); 473 } 474 475 /** 476 * <p>Converts all the delimiter separated words in a String into capitalized words, 477 * that is each word is made up of a titlecase character and then a series of 478 * lowercase characters. </p> 479 * 480 * <p>The delimiters represent a set of characters understood to separate words. 481 * The first string character and the first non-delimiter character after a 482 * delimiter will be capitalized. </p> 483 * 484 * <p>A <code>null</code> input String returns <code>null</code>. 485 * Capitalization uses the Unicode title case, normally equivalent to 486 * upper case.</p> 487 * 488 * <pre> 489 * WordUtils.capitalizeFully(null, *) = null 490 * WordUtils.capitalizeFully("", *) = "" 491 * WordUtils.capitalizeFully(*, null) = * 492 * WordUtils.capitalizeFully(*, new char[0]) = * 493 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 494 * </pre> 495 * 496 * @param str the String to capitalize, may be null 497 * @param delimiters set of characters to determine capitalization, null means whitespace 498 * @return capitalized String, <code>null</code> if null String input 499 */ 500 public static String capitalizeFully(String str, final char... delimiters) { 501 if (StringUtils.isEmpty(str)) { 502 return str; 503 } 504 str = str.toLowerCase(); 505 return capitalize(str, delimiters); 506 } 507 508 //----------------------------------------------------------------------- 509 /** 510 * <p>Uncapitalizes all the whitespace separated words in a String. 511 * Only the first character of each word is changed.</p> 512 * 513 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 514 * A <code>null</code> input String returns <code>null</code>.</p> 515 * 516 * <pre> 517 * WordUtils.uncapitalize(null) = null 518 * WordUtils.uncapitalize("") = "" 519 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 520 * </pre> 521 * 522 * @param str the String to uncapitalize, may be null 523 * @return uncapitalized String, <code>null</code> if null String input 524 * @see #capitalize(String) 525 */ 526 public static String uncapitalize(final String str) { 527 return uncapitalize(str, null); 528 } 529 530 /** 531 * <p>Uncapitalizes all the whitespace separated words in a String. 532 * Only the first character of each word is changed.</p> 533 * 534 * <p>The delimiters represent a set of characters understood to separate words. 535 * The first string character and the first non-delimiter character after a 536 * delimiter will be uncapitalized. </p> 537 * 538 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 539 * A <code>null</code> input String returns <code>null</code>.</p> 540 * 541 * <pre> 542 * WordUtils.uncapitalize(null, *) = null 543 * WordUtils.uncapitalize("", *) = "" 544 * WordUtils.uncapitalize(*, null) = * 545 * WordUtils.uncapitalize(*, new char[0]) = * 546 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 547 * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine" 548 * </pre> 549 * 550 * @param str the String to uncapitalize, may be null 551 * @param delimiters set of characters to determine uncapitalization, null means whitespace 552 * @return uncapitalized String, <code>null</code> if null String input 553 * @see #capitalize(String) 554 */ 555 public static String uncapitalize(final String str, final char... delimiters) { 556 if (StringUtils.isEmpty(str)) { 557 return str; 558 } 559 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 560 final int strLen = str.length(); 561 final int[] newCodePoints = new int[strLen]; 562 int outOffset = 0; 563 564 boolean uncapitalizeNext = true; 565 for (int index = 0; index < strLen;) { 566 final int codePoint = str.codePointAt(index); 567 568 if (delimiterSet.contains(codePoint)) { 569 uncapitalizeNext = true; 570 newCodePoints[outOffset++] = codePoint; 571 index += Character.charCount(codePoint); 572 } else if (uncapitalizeNext) { 573 final int titleCaseCodePoint = Character.toLowerCase(codePoint); 574 newCodePoints[outOffset++] = titleCaseCodePoint; 575 index += Character.charCount(titleCaseCodePoint); 576 uncapitalizeNext = false; 577 } else { 578 newCodePoints[outOffset++] = codePoint; 579 index += Character.charCount(codePoint); 580 } 581 } 582 return new String(newCodePoints, 0, outOffset); 583 } 584 585 //----------------------------------------------------------------------- 586 /** 587 * <p>Swaps the case of a String using a word based algorithm.</p> 588 * 589 * <ul> 590 * <li>Upper case character converts to Lower case</li> 591 * <li>Title case character converts to Lower case</li> 592 * <li>Lower case character after Whitespace or at start converts to Title case</li> 593 * <li>Other Lower case character converts to Upper case</li> 594 * </ul> 595 * 596 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 597 * A <code>null</code> input String returns <code>null</code>.</p> 598 * 599 * <pre> 600 * StringUtils.swapCase(null) = null 601 * StringUtils.swapCase("") = "" 602 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 603 * </pre> 604 * 605 * @param str the String to swap case, may be null 606 * @return the changed String, <code>null</code> if null String input 607 */ 608 public static String swapCase(final String str) { 609 if (StringUtils.isEmpty(str)) { 610 return str; 611 } 612 final int strLen = str.length(); 613 final int[] newCodePoints = new int[strLen]; 614 int outOffset = 0; 615 boolean whitespace = true; 616 for (int index = 0; index < strLen;) { 617 final int oldCodepoint = str.codePointAt(index); 618 final int newCodePoint; 619 if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) { 620 newCodePoint = Character.toLowerCase(oldCodepoint); 621 whitespace = false; 622 } else if (Character.isLowerCase(oldCodepoint)) { 623 if (whitespace) { 624 newCodePoint = Character.toTitleCase(oldCodepoint); 625 whitespace = false; 626 } else { 627 newCodePoint = Character.toUpperCase(oldCodepoint); 628 } 629 } else { 630 whitespace = Character.isWhitespace(oldCodepoint); 631 newCodePoint = oldCodepoint; 632 } 633 newCodePoints[outOffset++] = newCodePoint; 634 index += Character.charCount(newCodePoint); 635 } 636 return new String(newCodePoints, 0, outOffset); 637 } 638 639 //----------------------------------------------------------------------- 640 /** 641 * <p>Extracts the initial characters from each word in the String.</p> 642 * 643 * <p>All first characters after whitespace are returned as a new string. 644 * Their case is not changed.</p> 645 * 646 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 647 * A <code>null</code> input String returns <code>null</code>.</p> 648 * 649 * <pre> 650 * WordUtils.initials(null) = null 651 * WordUtils.initials("") = "" 652 * WordUtils.initials("Ben John Lee") = "BJL" 653 * WordUtils.initials("Ben J.Lee") = "BJ" 654 * </pre> 655 * 656 * @param str the String to get initials from, may be null 657 * @return String of initial letters, <code>null</code> if null String input 658 * @see #initials(String,char[]) 659 */ 660 public static String initials(final String str) { 661 return initials(str, null); 662 } 663 664 /** 665 * <p>Extracts the initial characters from each word in the String.</p> 666 * 667 * <p>All first characters after the defined delimiters are returned as a new string. 668 * Their case is not changed.</p> 669 * 670 * <p>If the delimiters array is null, then Whitespace is used. 671 * Whitespace is defined by {@link Character#isWhitespace(char)}. 672 * A <code>null</code> input String returns <code>null</code>. 673 * An empty delimiter array returns an empty String.</p> 674 * 675 * <pre> 676 * WordUtils.initials(null, *) = null 677 * WordUtils.initials("", *) = "" 678 * WordUtils.initials("Ben John Lee", null) = "BJL" 679 * WordUtils.initials("Ben J.Lee", null) = "BJ" 680 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 681 * WordUtils.initials(*, new char[0]) = "" 682 * </pre> 683 * 684 * @param str the String to get initials from, may be null 685 * @param delimiters set of characters to determine words, null means whitespace 686 * @return String of initial characters, <code>null</code> if null String input 687 * @see #initials(String) 688 */ 689 public static String initials(final String str, final char... delimiters) { 690 if (StringUtils.isEmpty(str)) { 691 return str; 692 } 693 if (delimiters != null && delimiters.length == 0) { 694 return ""; 695 } 696 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 697 final int strLen = str.length(); 698 final int[] newCodePoints = new int[strLen / 2 + 1]; 699 int count = 0; 700 boolean lastWasGap = true; 701 for (int i = 0; i < strLen;) { 702 final int codePoint = str.codePointAt(i); 703 704 if (delimiterSet.contains(codePoint) || (delimiters == null && Character.isWhitespace(codePoint))) { 705 lastWasGap = true; 706 } else if (lastWasGap) { 707 newCodePoints[count++] = codePoint; 708 lastWasGap = false; 709 } 710 711 i += Character.charCount(codePoint); 712 } 713 return new String(newCodePoints, 0, count); 714 } 715 716 //----------------------------------------------------------------------- 717 /** 718 * <p>Checks if the String contains all words in the given array.</p> 719 * 720 * <p> 721 * A {@code null} String will return {@code false}. A {@code null}, zero 722 * length search array or if one element of array is null will return {@code false}. 723 * </p> 724 * 725 * <pre> 726 * WordUtils.containsAllWords(null, *) = false 727 * WordUtils.containsAllWords("", *) = false 728 * WordUtils.containsAllWords(*, null) = false 729 * WordUtils.containsAllWords(*, []) = false 730 * WordUtils.containsAllWords("abcd", "ab", "cd") = false 731 * WordUtils.containsAllWords("abc def", "def", "abc") = true 732 * </pre> 733 * 734 * @param word The CharSequence to check, may be null 735 * @param words The array of String words to search for, may be null 736 * @return {@code true} if all search words are found, {@code false} otherwise 737 */ 738 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { 739 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { 740 return false; 741 } 742 for (final CharSequence w : words) { 743 if (StringUtils.isBlank(w)) { 744 return false; 745 } 746 final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*"); 747 if (!p.matcher(word).matches()) { 748 return false; 749 } 750 } 751 return true; 752 } 753 754 //----------------------------------------------------------------------- 755 /** 756 * Is the character a delimiter. 757 * 758 * @param ch the character to check 759 * @param delimiters the delimiters 760 * @return true if it is a delimiter 761 * @deprecated as of 1.2 and will be removed in 2.0 762 */ 763 @Deprecated 764 public static boolean isDelimiter(final char ch, final char[] delimiters) { 765 if (delimiters == null) { 766 return Character.isWhitespace(ch); 767 } 768 for (final char delimiter : delimiters) { 769 if (ch == delimiter) { 770 return true; 771 } 772 } 773 return false; 774 } 775 776 //----------------------------------------------------------------------- 777 /** 778 * Is the codePoint a delimiter. 779 * 780 * @param codePoint the codePint to check 781 * @param delimiters the delimiters 782 * @return true if it is a delimiter 783 * @deprecated as of 1.2 and will be removed in 2.0 784 */ 785 @Deprecated 786 public static boolean isDelimiter(final int codePoint, final char[] delimiters) { 787 if (delimiters == null) { 788 return Character.isWhitespace(codePoint); 789 } 790 for (int index = 0; index < delimiters.length; index++) { 791 final int delimiterCodePoint = Character.codePointAt(delimiters, index); 792 if (delimiterCodePoint == codePoint) { 793 return true; 794 } 795 } 796 return false; 797 } 798 799 //----------------------------------------------------------------------- 800 /** 801 * Abbreviates the words nicely. 802 * 803 * This method searches for the first space after the lower limit and abbreviates 804 * the String there. It will also append any String passed as a parameter 805 * to the end of the String. The upper limit can be specified to forcibly 806 * abbreviate a String. 807 * 808 * @param str the string to be abbreviated. If null is passed, null is returned. 809 * If the empty String is passed, the empty string is returned. 810 * @param lower the lower limit. 811 * @param upper the upper limit; specify -1 if no limit is desired. 812 * If the upper limit is lower than the lower limit, it will be 813 * adjusted to be the same as the lower limit. 814 * @param appendToEnd String to be appended to the end of the abbreviated string. 815 * This is appended ONLY if the string was indeed abbreviated. 816 * The append does not count towards the lower or upper limits. 817 * @return the abbreviated String. 818 * 819 * <pre> 820 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now" 821 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the" 822 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all" 823 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now" 824 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the" 825 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all" 826 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..." 827 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..." 828 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..." 829 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now" 830 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the" 831 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all" 832 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men" 833 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men" 834 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException 835 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException 836 * </pre> 837 */ 838 public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) { 839 Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); 840 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); 841 842 if (StringUtils.isEmpty(str)) { 843 return str; 844 } 845 846 // if the lower value is greater than the length of the string, 847 // set to the length of the string 848 if (lower > str.length()) { 849 lower = str.length(); 850 } 851 852 // if the upper value is -1 (i.e. no limit) or is greater 853 // than the length of the string, set to the length of the string 854 if (upper == -1 || upper > str.length()) { 855 upper = str.length(); 856 } 857 858 final StringBuilder result = new StringBuilder(); 859 final int index = StringUtils.indexOf(str, " ", lower); 860 if (index == -1) { 861 result.append(str, 0, upper); 862 // only if abbreviation has occured do we append the appendToEnd value 863 if (upper != str.length()) { 864 result.append(StringUtils.defaultString(appendToEnd)); 865 } 866 } else if (index > upper) { 867 result.append(str, 0, upper); 868 result.append(StringUtils.defaultString(appendToEnd)); 869 } else { 870 result.append(str, 0, index); 871 result.append(StringUtils.defaultString(appendToEnd)); 872 } 873 874 return result.toString(); 875 } 876 877 // ----------------------------------------------------------------------- 878 /** 879 * <p> 880 * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default 881 * value if delimiters is null. The generated hash set provides O(1) lookup time. 882 * </p> 883 * 884 * @param delimiters set of characters to determine capitalization, null means whitespace 885 * @return Set<Integer> 886 */ 887 private static Set<Integer> generateDelimiterSet(final char[] delimiters) { 888 final Set<Integer> delimiterHashSet = new HashSet<>(); 889 if (delimiters == null || delimiters.length == 0) { 890 if (delimiters == null) { 891 delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0)); 892 } 893 894 return delimiterHashSet; 895 } 896 897 for (int index = 0; index < delimiters.length; index++) { 898 delimiterHashSet.add(Character.codePointAt(delimiters, index)); 899 } 900 return delimiterHashSet; 901 } 902 }