#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
    Find a regex accepting any arbitrary string but the string with numbers 0 to 1000.

    (C) 2015, Public Domain, meisterluk
"""

import re
import sys
import timeit


def generate_regex(lower_bound, upper_bound, mode=('no lookahead', 1)):
    """
    Generate a regex accepting any string but the string of concatenated numbers
    lower_bound to upper_bound (each inclusive).

    We generate the regex, certainly it would be crazy to write it.
    "^(.|..|...|....|[^0]....|.[^1]...|..[^2]..|...[^3].|....[^4]|......+)$"
    corresponds to generate_regex(0, 4)
    """
    not_string = ''
    for number in range(lower_bound, upper_bound + 1):
        not_string += str(number)

    if mode[0] == 'lookahead':
        return '^(?!{}$).*$'.format(not_string)

    if mode[1] == 1:
        # strings with length smaller than not_string
        smaller_strings = []
        for length in range(len(not_string)):
            smaller_strings.append('.' * length)
        smaller = '|'.join(smaller_strings)

    elif mode[1] == 2:
        smaller = '.?' * (len(not_string) - 1)

    # strings with length equal to not_string
    equal_strings = []
    dotstring = '.' * len(not_string)
    for index in range(len(not_string)):
        equal_strings.append(dotstring[0:index] + '[^' + not_string[index] + ']' + dotstring[index + 1:])
    equal = '|'.join(equal_strings)

    # strings with length greater than not_string
    greater = '.' * len(not_string) + '.+'

    return '^(' + smaller + '|' + equal + '|' + greater + ')$'


def generate_input_strings(lower_bound, upper_bound):
    """Generate less than `(upper_bound - lower_bound)^2` input strings.
    Returns testsuite dict.
    """
    testsuite = {}
    not_string = ''

    # generate not_string
    for number in range(lower_bound, upper_bound + 1):
        not_string += str(number)

    for begin in range(lower_bound, upper_bound + 1):
        for end in range(begin + 1, upper_bound + 1):
            input_string = not_string[begin:end]
            matches = (input_string != not_string)
            testsuite[input_string] = matches

    return testsuite


def run_test(pat, testsuite):
    """Actually run all pattern searches. Does not use global variables."""
    for string, match in testsuite.items():
        assert bool(pat.search(string)) == match


PATTERN = None
TESTSUITE = None
def run_timeit_test():
    """Actually run all pattern searches. Does not use global variables."""
    for string, match in TESTSUITE.items():
        assert bool(PATTERN.search(string)) == match


def main(low, upp):
    """Main routine"""
    global PATTERN
    global TESTSUITE

    # do not time measure that.
    # Initialization is not considered computation time.
    regex = generate_regex(low, upp, ('lookahead',))
    testsuite = generate_input_strings(low, upp)
    print("Generated regex of string length {}.".format(len(regex)))
    print("Generated {} input strings.".format(len(testsuite)))
    pat = re.compile(regex, flags=re.S | re.U)
    print("Regex compilation has finished.")

    # Measure here
    #run_test(pat, testsuite)

    PATTERN = pat
    TESTSUITE = testsuite

    duration = timeit.timeit("run_timeit_test()",
        setup="from __main__ import run_timeit_test",
        number=100)
    print('It takes {} seconds to test all input strings.'.format(duration))

    return 0


if __name__ == '__main__':
    low = 1
    upp = int(sys.argv[1], 10) if len(sys.argv) > 1 else 500

    sys.exit(main(low, upp))