import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; public class NotStringPerformance { public static final boolean lookahead = true; public static final int mode = 2; private static String repeat(String base, int times) { return new String(new char[times]).replace("\0", base); } /** * Generate a regex accepting any string but the string of concatenated * numbers * lower_bound to upper_bound (each inclusive). * * @param lower_bound * the lower value in x..y * @param lower_bound * the greater value in x..y * @return the regular expression */ public static String generateRegex(int lower_bound, int upper_bound) { // creating not_string String not_string = ""; for (int number = lower_bound; number <= upper_bound; number++) { not_string += Integer.toString(number); } if (lookahead) return "^(?!" + not_string + "$).*$"; // strings with length smaller than not_string String smaller = ""; if (mode == 1) { for (int length = 0; length < not_string.length(); length++) { smaller += repeat(".", length); if (length != not_string.length() - 1) { smaller += "|"; } } } else { smaller += repeat(".?", not_string.length() - 1); } // strings with length equal to not_string String equal = ""; for (int index = 0; index < not_string.length(); index++) { equal += repeat(".", index); equal += "[^"; equal += not_string.charAt(index); equal += "]"; equal += repeat(".", not_string.length() - index - 1); if (index != not_string.length() - 1) { equal += "|"; } } // strings with length greater than not_string String greater = repeat(".", not_string.length()) + ".+"; return "^(" + smaller + "|" + equal + "|" + greater + ")$"; } /** * Generate less than `(upper_bound - lower_bound)^2` input strings. * Returns testsuite map. * * @param lower_bound * @param upper_bound * @return */ public static Map<String, Boolean> generateInputStrings(int lower_bound, int upper_bound) { Map<String, Boolean> testsuite = new HashMap<String, Boolean>(); String not_string = ""; // generate not_string for (int number = lower_bound; number <= upper_bound; number++) { not_string += Integer.toString(number); } for (int begin = lower_bound; begin <= upper_bound; begin++) { for (int end = begin + 1; end <= upper_bound; end++) { String input = not_string.substring(begin, end); boolean matches = (!input.equals(not_string)); testsuite.put(input, new Boolean(matches)); } } return testsuite; } public static void runTest(Pattern pat, Map<String, Boolean> testsuite) { for (Entry<String, Boolean> e : testsuite.entrySet()) { String input = e.getKey(); Matcher m = pat.matcher(input); if (!e.getValue().equals(m.matches())) { String val = "true"; if (!e.getValue()) { val = "false"; } System.err.println("FAIL for '" + input + "' (should be " + val + ")"); } } } public static void main(String[] argv) { int low = 1; int upp = 500; // do not time measure that. // Initialization is not considered computation time. String regex = generateRegex(low, upp); Map<String, Boolean> testsuite = generateInputStrings(low, upp); System.out.print("Generated regex of string length "); System.out.print(regex.length()); System.out.println("."); System.out.print("Generated "); System.out.print(testsuite.size()); System.out.println(" input strings."); Pattern pat = Pattern.compile(regex, Pattern.DOTALL); System.out.println("Regex compilation has finished."); // Measure here long start = System.currentTimeMillis(); runTest(pat, testsuite); long end = System.currentTimeMillis(); long diff = (end - start); System.out.print("It takes "); System.out.print(diff / 1000.0); System.out.println(" seconds to test all input strings."); } }