#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Find a regex accepting any arbitrary string but the string with numbers 0 to 1000. (C) 2015, Public Domain, meisterluk """ import re import sys import timeit def generate_regex(lower_bound, upper_bound, mode=('no lookahead', 1)): """ Generate a regex accepting any string but the string of concatenated numbers lower_bound to upper_bound (each inclusive). We generate the regex, certainly it would be crazy to write it. "^(.|..|...|....|[^0]....|.[^1]...|..[^2]..|...[^3].|....[^4]|......+)$" corresponds to generate_regex(0, 4) """ not_string = '' for number in range(lower_bound, upper_bound + 1): not_string += str(number) if mode[0] == 'lookahead': return '^(?!{}$).*$'.format(not_string) if mode[1] == 1: # strings with length smaller than not_string smaller_strings = [] for length in range(len(not_string)): smaller_strings.append('.' * length) smaller = '|'.join(smaller_strings) elif mode[1] == 2: smaller = '.?' * (len(not_string) - 1) # strings with length equal to not_string equal_strings = [] dotstring = '.' * len(not_string) for index in range(len(not_string)): equal_strings.append(dotstring[0:index] + '[^' + not_string[index] + ']' + dotstring[index + 1:]) equal = '|'.join(equal_strings) # strings with length greater than not_string greater = '.' * len(not_string) + '.+' return '^(' + smaller + '|' + equal + '|' + greater + ')$' def generate_input_strings(lower_bound, upper_bound): """Generate less than `(upper_bound - lower_bound)^2` input strings. Returns testsuite dict. """ testsuite = {} not_string = '' # generate not_string for number in range(lower_bound, upper_bound + 1): not_string += str(number) for begin in range(lower_bound, upper_bound + 1): for end in range(begin + 1, upper_bound + 1): input_string = not_string[begin:end] matches = (input_string != not_string) testsuite[input_string] = matches return testsuite def run_test(pat, testsuite): """Actually run all pattern searches. Does not use global variables.""" for string, match in testsuite.items(): assert bool(pat.search(string)) == match PATTERN = None TESTSUITE = None def run_timeit_test(): """Actually run all pattern searches. Does not use global variables.""" for string, match in TESTSUITE.items(): assert bool(PATTERN.search(string)) == match def main(low, upp): """Main routine""" global PATTERN global TESTSUITE # do not time measure that. # Initialization is not considered computation time. regex = generate_regex(low, upp, ('lookahead',)) testsuite = generate_input_strings(low, upp) print("Generated regex of string length {}.".format(len(regex))) print("Generated {} input strings.".format(len(testsuite))) pat = re.compile(regex, flags=re.S | re.U) print("Regex compilation has finished.") # Measure here #run_test(pat, testsuite) PATTERN = pat TESTSUITE = testsuite duration = timeit.timeit("run_timeit_test()", setup="from __main__ import run_timeit_test", number=100) print('It takes {} seconds to test all input strings.'.format(duration)) return 0 if __name__ == '__main__': low = 1 upp = int(sys.argv[1], 10) if len(sys.argv) > 1 else 500 sys.exit(main(low, upp))