001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import org.apache.commons.lang3.Validate; 020 021import java.util.ArrayList; 022import java.util.HashSet; 023import java.util.List; 024import java.util.Set; 025import java.util.Collections; 026import java.util.concurrent.ThreadLocalRandom; 027 028/** 029 * <p> 030 * Generates random Unicode strings containing the specified number of code points. 031 * Instances are created using a builder class, which allows the 032 * callers to define the properties of the generator. See the documentation for the 033 * {@link Builder} class to see available properties. 034 * </p> 035 * <pre> 036 * // Generates a 20 code point string, using only the letters a-z 037 * RandomStringGenerator generator = new RandomStringGenerator.Builder() 038 * .withinRange('a', 'z').build(); 039 * String randomLetters = generator.generate(20); 040 * </pre> 041 * <pre> 042 * // Using Apache Commons RNG for randomness 043 * UniformRandomProvider rng = RandomSource.create(...); 044 * // Generates a 20 code point string, using only the letters a-z 045 * RandomStringGenerator generator = new RandomStringGenerator.Builder() 046 * .withinRange('a', 'z') 047 * .usingRandom(rng::nextInt) // uses Java 8 syntax 048 * .build(); 049 * String randomLetters = generator.generate(20); 050 * </pre> 051 * <p> 052 * {@code RandomStringBuilder} instances are thread-safe when using the 053 * default random number generator (RNG). If a custom RNG is set by calling the method 054 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety 055 * must be ensured externally. 056 * </p> 057 * @since 1.1 058 */ 059public final class RandomStringGenerator { 060 061 /** 062 * The smallest allowed code point (inclusive). 063 */ 064 private final int minimumCodePoint; 065 066 /** 067 * The largest allowed code point (inclusive). 068 */ 069 private final int maximumCodePoint; 070 071 /** 072 * Filters for code points. 073 */ 074 private final Set<CharacterPredicate> inclusivePredicates; 075 076 /** 077 * The source of randomness for this generator. 078 */ 079 private final TextRandomProvider random; 080 081 /** 082 * The source of provided characters. 083 */ 084 private final List<Character> characterList; 085 086 /** 087 * Constructs the generator. 088 * 089 * @param minimumCodePoint 090 * smallest allowed code point (inclusive) 091 * @param maximumCodePoint 092 * largest allowed code point (inclusive) 093 * @param inclusivePredicates 094 * filters for code points 095 * @param random 096 * source of randomness 097 * @param characterList list of predefined set of characters. 098 */ 099 private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint, 100 final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random, 101 final List<Character> characterList) { 102 this.minimumCodePoint = minimumCodePoint; 103 this.maximumCodePoint = maximumCodePoint; 104 this.inclusivePredicates = inclusivePredicates; 105 this.random = random; 106 this.characterList = characterList; 107 } 108 109 /** 110 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance 111 * or the user-supplied source of randomness. 112 * 113 * @param minInclusive 114 * the minimum value allowed 115 * @param maxInclusive 116 * the maximum value allowed 117 * @return the random number. 118 */ 119 private int generateRandomNumber(final int minInclusive, final int maxInclusive) { 120 if (random != null) { 121 return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive; 122 } 123 return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1); 124 } 125 126 /** 127 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance 128 * or the user-supplied source of randomness. 129 * 130 * @param characterList predefined char list. 131 * @return the random number. 132 */ 133 private int generateRandomNumber(final List<Character> characterList) { 134 final int listSize = characterList.size(); 135 if (random != null) { 136 return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0); 137 } 138 return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0); 139 } 140 141 /** 142 * <p> 143 * Generates a random string, containing the specified number of code points. 144 * </p> 145 * <p>Code points are randomly selected between the minimum and maximum values defined 146 * in the generator. 147 * Surrogate and private use characters are not returned, although the 148 * resulting string may contain pairs of surrogates that together encode a 149 * supplementary character. 150 * </p> 151 * <p> 152 * Note: the number of {@code char} code units generated will exceed 153 * {@code length} if the string contains supplementary characters. See the 154 * {@link Character} documentation to understand how Java stores Unicode 155 * values. 156 * </p> 157 * 158 * @param length 159 * the number of code points to generate 160 * @return the generated string 161 * @throws IllegalArgumentException 162 * if {@code length < 0} 163 */ 164 public String generate(final int length) { 165 if (length == 0) { 166 return ""; 167 } 168 Validate.isTrue(length > 0, "Length %d is smaller than zero.", length); 169 170 final StringBuilder builder = new StringBuilder(length); 171 long remaining = length; 172 173 do { 174 int codePoint; 175 if (characterList != null && !characterList.isEmpty()) { 176 codePoint = generateRandomNumber(characterList); 177 } else { 178 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint); 179 } 180 switch (Character.getType(codePoint)) { 181 case Character.UNASSIGNED: 182 case Character.PRIVATE_USE: 183 case Character.SURROGATE: 184 continue; 185 default: 186 } 187 188 if (inclusivePredicates != null) { 189 boolean matchedFilter = false; 190 for (final CharacterPredicate predicate : inclusivePredicates) { 191 if (predicate.test(codePoint)) { 192 matchedFilter = true; 193 break; 194 } 195 } 196 if (!matchedFilter) { 197 continue; 198 } 199 } 200 201 builder.appendCodePoint(codePoint); 202 remaining--; 203 204 } while (remaining != 0); 205 206 return builder.toString(); 207 } 208 209 /** 210 * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) 211 * number of code points. 212 * 213 * @param minLengthInclusive 214 * the minimum (inclusive) number of code points to generate 215 * @param maxLengthInclusive 216 * the maximum (inclusive) number of code points to generate 217 * @return the generated string 218 * @throws IllegalArgumentException 219 * if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive} 220 * @see RandomStringGenerator#generate(int) 221 * @since 1.2 222 */ 223 public String generate(final int minLengthInclusive, final int maxLengthInclusive) { 224 Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive); 225 Validate.isTrue(minLengthInclusive <= maxLengthInclusive, 226 "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive); 227 return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive)); 228 } 229 230 /** 231 * <p>A builder for generating {@code RandomStringGenerator} instances.</p> 232 * <p>The behaviour of a generator is controlled by properties set by this 233 * builder. Each property has a default value, which can be overridden by 234 * calling the methods defined in this class, prior to calling {@link #build()}.</p> 235 * 236 * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p> 237 * 238 * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The 239 * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p> 240 * 241 * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, 242 * otherwise {@link ThreadLocalRandom} is used.</p> 243 * 244 * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, 245 * which defines a collection of tests that are applied to the randomly generated code points. 246 * The code points will only be included in the result if they pass at least one of the tests. 247 * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p> 248 * 249 * <p>This class is not thread safe.</p> 250 * @since 1.1 251 */ 252 public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> { 253 254 /** 255 * The default maximum code point allowed: {@link Character#MAX_CODE_POINT} 256 * ({@value}). 257 */ 258 public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT; 259 260 /** 261 * The default string length produced by this builder: {@value}. 262 */ 263 public static final int DEFAULT_LENGTH = 0; 264 265 /** 266 * The default minimum code point allowed: {@value}. 267 */ 268 public static final int DEFAULT_MINIMUM_CODE_POINT = 0; 269 270 /** 271 * The minimum code point allowed. 272 */ 273 private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT; 274 275 /** 276 * The maximum code point allowed. 277 */ 278 private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT; 279 280 /** 281 * Filters for code points. 282 */ 283 private Set<CharacterPredicate> inclusivePredicates; 284 285 /** 286 * The source of randomness. 287 */ 288 private TextRandomProvider random; 289 290 /** 291 * The source of provided characters. 292 */ 293 private List<Character> characterList; 294 295 /** 296 * <p> 297 * Specifies the minimum and maximum code points allowed in the 298 * generated string. 299 * </p> 300 * 301 * @param minimumCodePoint 302 * the smallest code point allowed (inclusive) 303 * @param maximumCodePoint 304 * the largest code point allowed (inclusive) 305 * @return {@code this}, to allow method chaining 306 * @throws IllegalArgumentException 307 * if {@code maximumCodePoint >} 308 * {@link Character#MAX_CODE_POINT} 309 * @throws IllegalArgumentException 310 * if {@code minimumCodePoint < 0} 311 * @throws IllegalArgumentException 312 * if {@code minimumCodePoint > maximumCodePoint} 313 */ 314 public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) { 315 Validate.isTrue(minimumCodePoint <= maximumCodePoint, 316 "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint); 317 Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint); 318 Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, 319 "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint); 320 321 this.minimumCodePoint = minimumCodePoint; 322 this.maximumCodePoint = maximumCodePoint; 323 return this; 324 } 325 326 /** 327 * <p> 328 * Specifies the array of minimum and maximum char allowed in the 329 * generated string. 330 * </p> 331 * 332 * For example: 333 * <pre> 334 * {@code 335 * char [][] pairs = {{'0','9'}}; 336 * char [][] pairs = {{'a','z'}}; 337 * char [][] pairs = {{'a','z'},{'0','9'}}; 338 * } 339 * </pre> 340 * 341 * @param pairs array of characters array, expected is to pass min, max pairs through this arg. 342 * @return {@code this}, to allow method chaining. 343 */ 344 public Builder withinRange(final char[]... pairs) { 345 characterList = new ArrayList<>(); 346 for (final char[] pair : pairs) { 347 Validate.isTrue(pair.length == 2, 348 "Each pair must contain minimum and maximum code point"); 349 final int minimumCodePoint = pair[0]; 350 final int maximumCodePoint = pair[1]; 351 Validate.isTrue(minimumCodePoint <= maximumCodePoint, 352 "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint); 353 354 for (int index = minimumCodePoint; index <= maximumCodePoint; index++) { 355 characterList.add((char) index); 356 } 357 } 358 return this; 359 360 } 361 362 /** 363 * <p> 364 * Limits the characters in the generated string to those that match at 365 * least one of the predicates supplied. 366 * </p> 367 * 368 * <p> 369 * Passing {@code null} or an empty array to this method will revert to the 370 * default behaviour of allowing any character. Multiple calls to this 371 * method will replace the previously stored predicates. 372 * </p> 373 * 374 * @param predicates 375 * the predicates, may be {@code null} or empty 376 * @return {@code this}, to allow method chaining 377 */ 378 public Builder filteredBy(final CharacterPredicate... predicates) { 379 if (predicates == null || predicates.length == 0) { 380 inclusivePredicates = null; 381 return this; 382 } 383 384 if (inclusivePredicates == null) { 385 inclusivePredicates = new HashSet<>(); 386 } else { 387 inclusivePredicates.clear(); 388 } 389 390 Collections.addAll(inclusivePredicates, predicates); 391 392 return this; 393 } 394 395 /** 396 * <p> 397 * Overrides the default source of randomness. It is highly 398 * recommended that a random number generator library like 399 * <a href="http://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> 400 * be used to provide the random number generation. 401 * </p> 402 * 403 * <p> 404 * When using Java 8 or later, {@link TextRandomProvider} is a 405 * functional interface and need not be explicitly implemented: 406 * </p> 407 * <pre> 408 * {@code 409 * UniformRandomProvider rng = RandomSource.create(...); 410 * RandomStringGenerator gen = new RandomStringGenerator.Builder() 411 * .usingRandom(rng::nextInt) 412 * // additional builder calls as needed 413 * .build(); 414 * } 415 * </pre> 416 * 417 * <p> 418 * Passing {@code null} to this method will revert to the default source of 419 * randomness. 420 * </p> 421 * 422 * @param random 423 * the source of randomness, may be {@code null} 424 * @return {@code this}, to allow method chaining 425 */ 426 public Builder usingRandom(final TextRandomProvider random) { 427 this.random = random; 428 return this; 429 } 430 431 /** 432 * <p> 433 * Limits the characters in the generated string to those who match at 434 * supplied list of Character. 435 * </p> 436 * 437 * <p> 438 * Passing {@code null} or an empty array to this method will revert to the 439 * default behaviour of allowing any character. Multiple calls to this 440 * method will replace the previously stored Character. 441 * </p> 442 * 443 * @param chars set of predefined Characters for random string generation 444 * the Character can be, may be {@code null} or empty 445 * @return {@code this}, to allow method chaining 446 * @since 1.2 447 */ 448 public Builder selectFrom(final char... chars) { 449 characterList = new ArrayList<>(); 450 for (final char c : chars) { 451 characterList.add(c); 452 } 453 return this; 454 } 455 456 /** 457 * <p>Builds the {@code RandomStringGenerator} using the properties specified.</p> 458 * @return the configured {@code RandomStringGenerator} 459 */ 460 @Override 461 public RandomStringGenerator build() { 462 return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates, 463 random, characterList); 464 } 465 } 466}