from faker import Faker
from faker.providers import internet, date_time
import csv

def generate_indicator(fake, indicator_type):
    fakes = {"IP": fake.ipv4(),
             "Domain": fake.pystr_format(string_format="?????#???????###.com",
                                         letters="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
             "URL": fake.pystr_format(string_format="https://?????#???????###.com",
                                      letters="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
             "MD5": fake.md5()}
    value = fakes[indicator_type]

    return value, [value, fake.lexify(text="???? ?????? ?? ?????", letters="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), fake.date_this_century()]

def generate_file(filename, generated_type):
    print("Generating " + filename)
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=',',
                            quotechar='"', quoting=csv.QUOTE_MINIMAL)
        generated_set = set()
        fake = Faker()
        fake.add_provider(internet)
        fake.add_provider(date_time)
        soFar = 0
        total = 2000000
        while len(generated_set) != total:
            indicator, csv_line = generate_indicator(fake, generated_type)
            if indicator in generated_set:
                continue
            generated_set.add(indicator)
            writer.writerow(csv_line)
            soFar += 1
            if (soFar % 10000 == 0):
                print(f"Finished {soFar} out of {total}")

    print("Finished" + filename)


generate_file('ips.csv', 'IP')
generate_file('domains.csv', 'Domain')
generate_file('hashes.csv', 'MD5')