diff --git a/trojsten/people/constants.py b/trojsten/people/constants.py index b1549899c..96248f2ac 100644 --- a/trojsten/people/constants.py +++ b/trojsten/people/constants.py @@ -13,4 +13,26 @@ MAILING_OPTION_SCHOOL = "S" MAILING_OPTION_OTHER = "O" +OTHER_SCHOOL_ID = 1 + DEENVELOPING_NOT_REVIEWED_SYMBOL = "*" + +# User properties +# User id in the old fks database +FKS_ID_PROPERTY_KEY = "FKS ID" +# User id in the old kms database +KMS_ID_PROPERTY_KEY = "KMS ID" +# User id in the old ksp database +KASPAR_ID_PROPERTY_KEY = "KSP ID" +# User id in the csv file +CSV_ID_PROPERTY_KEY = "csv ID" +MOBIL_PROPERTY_KEY = "Mobil" +NICKNAME_PROPERTY_KEY = "Prezyvka" +BIRTH_NAME_PROPERTY_KEY = "Rodne Meno" +LAST_CONTACT_PROPERTY_KEY = "Posledny kontakt" +KMS_CAMPS_PROPERTY_KEY = "KMS sustredenia" +KASPAR_NOTE_PROPERTY_KEY = "KSP note" +KSP_CAMPS_PROPERTY_KEY = "KSP sustredenia" +MEMORY_PROPERTY_KEY = "Spomienky" +COMPANY_PROPERTY_KEY = "Posobisko" +AFFILIATION_PROPERTY_KEY = "Pozicia" diff --git a/trojsten/people/helpers.py b/trojsten/people/helpers.py index f16d6a1ff..3f26fff20 100644 --- a/trojsten/people/helpers.py +++ b/trojsten/people/helpers.py @@ -9,6 +9,7 @@ def get_similar_users(user): """Returns a list of users similar to the specified user.""" + # TODO check birth day as well. (if defined, filter different) return User.objects.exclude(pk=user.pk).filter( first_name=user.first_name, last_name=user.last_name ) diff --git a/trojsten/people/management/commands/migrate_30rokovfks1_csv.py b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py new file mode 100644 index 000000000..d481daac7 --- /dev/null +++ b/trojsten/people/management/commands/migrate_30rokovfks1_csv.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals + +import csv + +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand + + +class Command(MigrateBaseCommand): + help = "Imports people and their related info from fks_csv." + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument("file", type=str) + + def handle(self, **options): + super(Command, self).handle(**options) + participants_file = options["file"] + + participants = csv.DictReader(open(participants_file)) + + idd = 0 + for l in participants: + idd += 1 + csv_id = "30rokovFKS1_{0:d}".format(idd) + contacted = l["kontaktovany?"] == "ano" + if contacted: + self.last_contact[csv_id].append(2014) + + user = {"first_name": l["Meno"], "last_name": l["Priezvisko"], "email": l["Email"]} + user_properties = [ + (self.MOBIL_PROPERTY, l["Telefon"].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l["Rodne priezvisko"]), + (self.NICKNAME_PROPERTY, l["Prezyvka"]), + ] + + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, csv_id) + + self.print_stats() diff --git a/trojsten/people/management/commands/migrate_30rokovfks2_csv.py b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py new file mode 100644 index 000000000..5075c4a53 --- /dev/null +++ b/trojsten/people/management/commands/migrate_30rokovfks2_csv.py @@ -0,0 +1,41 @@ +from __future__ import unicode_literals + +import csv + +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand + + +class Command(MigrateBaseCommand): + help = "Imports people and their related info from fks_csv." + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument("file", type=str) + + def handle(self, **options): + super(Command, self).handle(**options) + participants_file = options["file"] + + participants = csv.DictReader(open(participants_file)) + idd = 0 + for l in participants: + idd += 1 + csv_id = "30rokovFKS2_{0:d}".format(idd) + if not l["Meno"]: + continue + + self.last_contact[csv_id].append(2014) + user = {"first_name": l["Meno"], "last_name": l["Priezvisko"], "email": l["E-mail"]} + user_properties = [ + (self.MOBIL_PROPERTY, l["Telefon"].replace(" ", "").strip()), + (self.BIRTH_NAME_PROPERTY, l["Rodne priezvisko"]), + (self.NICKNAME_PROPERTY, l["Prezyvka"]), + (self.COMPANY_PROPERTY, l["Posobisko"]), + (self.AFFILIATION_PROPERTY, l["Pozicia"]), + (self.MEMORY_PROPERTY, l["spomienka"]), + ] + # TODO Adresa + + self.process_person(user, user_properties, self.CSV_ID_PROPERTY, csv_id) + + self.print_stats() diff --git a/trojsten/people/management/commands/migrate_base_class.py b/trojsten/people/management/commands/migrate_base_class.py new file mode 100644 index 000000000..fde2c94b0 --- /dev/null +++ b/trojsten/people/management/commands/migrate_base_class.py @@ -0,0 +1,253 @@ +from collections import defaultdict +from datetime import datetime + +from django.core.management import BaseCommand as NoArgsCommand +from django.db import transaction +from django.db.models import Q +from django.utils.six.moves import input + +from trojsten.people import constants +from trojsten.people.helpers import get_similar_users +from trojsten.people.models import Address, DuplicateUser, User, UserProperty, UserPropertyKey +from trojsten.schools.models import School + + +class MigrateBaseCommand(NoArgsCommand): + help = "Base class for importing people." + NUMBER_OF_SCHOOLS_IN_FAST_RUN = 100 + NUMBER_OF_USERS_IN_FAST_RUN = 100 + + def add_arguments(self, parser): + parser.add_argument( + "--wet_run", + action="store_false", + dest="dry", + default=True, + help="Actually write something to DB", + ) + parser.add_argument( + "--fast", + action="store_true", + dest="fast", + default=False, + help="Create only the first {} users and {} schools".format( + self.NUMBER_OF_USERS_IN_FAST_RUN, self.NUMBER_OF_SCHOOLS_IN_FAST_RUN + ), + ) + + def handle(self, **options): + self.dry = options["dry"] + self.fast = options["fast"] + self.done_users = 0 + self.done_schools = 0 + if self.dry: + self.stderr.write("Running dry run!") + + self.verbosity = options["verbosity"] + self.similar_users = [] + self.school_id_map = {} + self.last_contact = defaultdict(list) + + self.CSV_ID_PROPERTY = self.process_property( + constants.CSV_ID_PROPERTY_KEY, r"(.{1,20}_)?\d+" + ) + self.MOBIL_PROPERTY = self.process_property(constants.MOBIL_PROPERTY_KEY, r"\+?\d+\/?\d+") + self.NICKNAME_PROPERTY = self.process_property(constants.NICKNAME_PROPERTY_KEY, r".{1,30}") + self.BIRTH_NAME_PROPERTY = self.process_property( + constants.BIRTH_NAME_PROPERTY_KEY, r".{1,30}" + ) + # TODO fix False and stupid values + self.LAST_CONTACT_PROPERTY = self.process_property( + constants.LAST_CONTACT_PROPERTY_KEY, r"\d\d\d\d" + ) + self.FKS_ID_PROPERTY = self.process_property(constants.FKS_ID_PROPERTY_KEY, r"\d+") + self.KMS_ID_PROPERTY = self.process_property(constants.KMS_ID_PROPERTY_KEY, r"\d+") + self.KMS_CAMPS_PROPERTY = self.process_property(constants.KMS_CAMPS_PROPERTY_KEY, r"\d+") + self.KASPAR_ID_PROPERTY = self.process_property(constants.KASPAR_ID_PROPERTY_KEY, r"\d+") + self.KASPAR_NOTE_PROPERTY = self.process_property(constants.KASPAR_NOTE_PROPERTY_KEY, r".*") + self.KSP_CAMPS_PROPERTY = self.process_property(constants.KSP_CAMPS_PROPERTY_KEY, r"\d+") + self.MEMORY_PROPERTY = self.process_property(constants.MEMORY_PROPERTY_KEY, r".*") + self.COMPANY_PROPERTY = self.process_property(constants.COMPANY_PROPERTY_KEY, r".*") + self.AFFILIATION_PROPERTY = self.process_property(constants.AFFILIATION_PROPERTY_KEY, r".*") + + @transaction.atomic + def process_school(self, old_id, abbr, name, addr_name, street, city, zip_code): + + self.done_schools += 1 + if self.fast and self.done_schools > self.NUMBER_OF_SCHOOLS_IN_FAST_RUN: + return None + # TODO improve this, do not work with abbreviations + if not abbr: + self.school_id_map[old_id] = None + return + + candidates = School.objects.filter( + Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + "?") + ) + row = (abbr, name, addr_name, street, city, self.fix_string(zip_code)) + if len(candidates) == 1: + if self.verbosity >= 2: + self.stderr.write("Matched %r to %s" % (row, candidates[0])) + self.school_id_map[old_id] = candidates[0] + elif len(candidates) > 1: + self.stderr.write( + "Multiple candidates for %r:\n%s" + % ( + row, + "\n".join( + "%02d: %s" % (i, candidate) for i, candidate in enumerate(candidates) + ), + ) + ) + try: + choice = int(input("Choice (empty or invalid to create new): ")) + self.school_id_map[old_id] = candidates[choice] + except (KeyError): + self.school_id_map[old_id] = self.create_school(*row) + else: + self.school_id_map[old_id] = self.create_school(*row) + + def create_school(self, abbr, name, addr_name, street, city, zip_code): + abbr += "?" # Question mark denotes schools needing review. + school = None + if len(zip_code) > 10: + # Swiss zip codes are longer than 10 chars, but our db model does not allow + # them so we skip them. + zip_code = 0 + + if self.dry: + school = School( + abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code, + ) + else: + school = School.objects.create( + abbreviation=abbr, + verbose_name=name, + addr_name=addr_name, + street=street, + city=city, + zip_code=zip_code, + ) + if self.verbosity >= 2: + self.stderr.write("Created new school %s" % school) + return school + + @transaction.atomic + def process_person( + self, user_args, user_properties, old_user_id_field, old_user_id, address=None + ): + """ + Args: + user_args (dict): will be used for user constructor as is. Except for school_id. + user_properties (list(tuple(UserPropertyKey, string))): + will create additional user properties + old_user_id_field (UserPropertyKey): old field that contained oser id + (kaspar_id/ kms id ...), used for faster deduplication. + old_user_id (int/string): old id + user_args can have + first_name, last_name, graduation, email, birth_date, school_id + """ + # If we run in the fast mode and we already processed enough users, we skip this one. + self.done_users += 1 + if self.fast and self.done_users > self.NUMBER_OF_USERS_IN_FAST_RUN: + return None + + old_id_property = None + if old_user_id: + old_id_property = UserProperty.objects.filter(key=old_user_id_field, value=old_user_id) + else: + old_id_property = UserProperty.objects.none() + + first_name = user_args["first_name"] + last_name = user_args["last_name"] + if old_id_property.exists(): + if self.verbosity >= 2: + self.stderr.write("Skipping user %s %s" % (first_name, last_name)) + return None + + user_args["is_active"] = False + + if "school_id" in user_args: + school_id = user_args["school_id"] + del user_args["school_id"] + user_args["school"] = self.school_id_map.get(school_id) + + if self.verbosity >= 2: + self.stderr.write("Creating user %s %s" % (first_name, last_name)) + + new_user = None + if self.dry: + new_user = User(**user_args) + else: + if address: + user_args["home_address"] = Address.objects.create( + street=address["street"], + town=address["town"], + postal_code=address["postal_code"], + country=address["country"], + ) + + new_user = User.objects.create(**user_args) + + new_user.properties.create(key=old_user_id_field, value=old_user_id) + + # TODO last_contacted + if old_user_id in self.last_contact: + contacts = self.last_contact[old_user_id] + valid_contacts = filter(lambda c: 1900 < c and c < 2017, contacts) + if valid_contacts: + user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)]) + + user_properties = [prop for prop in user_properties if prop is not None] + for key, value in user_properties: + new_user.properties.create(key=key, value=value) + + similar_users = get_similar_users(new_user) + if len(similar_users): + names_of_similar = [(user.first_name, user.last_name) for user in similar_users] + self.similar_users.append(((first_name, last_name), names_of_similar)) + if self.verbosity >= 2: + self.stderr.write("Similar users: %s" % str(names_of_similar)) + if not self.dry: + DuplicateUser.objects.create(user=new_user) + + return new_user + + def print_stats(self): + for conflict in self.similar_users: + self.stderr.write("Conflicts: %s" % str(conflict)) + + self.stderr.write("Conflict users: %d" % len(self.similar_users)) + + def parse_dot_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(" ", "") + # Just hope that all dates are in the same format. + return datetime.strptime(date_string, "%d.%m.%Y") + + def parse_dash_date(self, date_string): + # Remove any whitespace inside the string. + date_string = date_string.replace(" ", "") + if date_string == "0000-00-00" or date_string == "NULL": + return None + else: + return datetime.strptime(date_string, "%Y-%m-%d") + + def process_property(self, key_name, regexp=None): + user_property = UserPropertyKey.objects.filter(key_name=key_name) + if not user_property.exists(): + if self.dry: + user_property = UserPropertyKey(key_name=key_name, regex=regexp) + else: + user_property = UserPropertyKey.objects.create(key_name=key_name, regex=regexp) + else: + user_property = user_property.first() + return user_property + + def fix_string(self, string): + return string.replace(" ", "").strip() diff --git a/trojsten/people/management/commands/migrate_fks_csv.py b/trojsten/people/management/commands/migrate_fks_csv.py new file mode 100644 index 000000000..f986f3d5a --- /dev/null +++ b/trojsten/people/management/commands/migrate_fks_csv.py @@ -0,0 +1,100 @@ +from __future__ import unicode_literals + +import csv +import os + +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand + +r""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in adresa osoba riesitel skola +do +mysql -u -p fks -B -e "select * from \`$tn\`;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done + +mysql -u -p fks -B -e " \ +select riesitel_id, termin \ +from seria as s, priklad as p, riesitel_priklady as rp, riesitel as r \ +where s.id = p.seria_id and rp.priklad_id = p.id and rp.riesitel_id = r.id;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > aktivita.csv +""" +# TODO vvysledkovky + + +class Command(MigrateBaseCommand): + help = "Imports people and their related info from fks_csv." + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument("csv_directory", type=str, help="Directory containing all csv files.") + + def handle(self, **options): + super(Command, self).handle(**options) + base = options["csv_directory"] + + addresses_file = os.path.join(base, "adresa.csv") + addresses = csv.DictReader(open(addresses_file)) + address_by_id = {} + for address in addresses: + address_by_id[address["id"]] = address + + schools_file = os.path.join(base, "skola.csv") + schools = csv.DictReader(open(schools_file)) + for school in schools: + abbr = school["skratka"].split(" ", 1)[0] + addr = address_by_id[school["adresa_id"]] + + street = addr["ulica"] + + addr_name = school["nazov"] + ", " + street + self.process_school( + school["id"], abbr, school["nazov"], addr_name, street, addr["mesto"], addr["psc"] + ) + + activity_file = os.path.join(base, "aktivita.csv") + activity = csv.DictReader(open(activity_file)) + for act in activity: + idd = act["riesitel_id"] + date = self.parse_dash_date(act["termin"]) + self.last_contact[idd].append(int(date.year)) + + people_file = os.path.join(base, "osoba.csv") + people = csv.DictReader(open(people_file)) + + people_by_id = {} + for person in people: + people_by_id[person["id"]] = person + + participants_file = os.path.join(base, "riesitel.csv") + participants = csv.DictReader(open(participants_file)) + + for l in participants: + idd = l["osoba_id"] + person = people_by_id[idd] + matura = l["rok_maturity"] + self.last_contact[idd].append(int(matura) - 3) + address = address_by_id[person["adresa_id"]] + parsed_address = { + "street": address["ulica"], + "town": address["mesto"], + "postal_code": address["psc"], + "country": address["stat"], + } + user = { + "first_name": person["meno"], + "last_name": person["priezvisko"], + "graduation": matura, + "email": person["email"], + "birth_date": self.parse_dash_date(person["datum_narodenia"]), + "school_id": l["skola_id"], + } + + user_properties = [(self.MOBIL_PROPERTY, person["telefon"].replace(" ", "").strip())] + self.process_person( + user, user_properties, self.FKS_ID_PROPERTY, idd, address=parsed_address + ) + + self.print_stats() diff --git a/trojsten/people/management/commands/migrate_kms_csv.py b/trojsten/people/management/commands/migrate_kms_csv.py new file mode 100644 index 000000000..b3bd9ea1f --- /dev/null +++ b/trojsten/people/management/commands/migrate_kms_csv.py @@ -0,0 +1,83 @@ +from __future__ import unicode_literals + +import csv +import os +from collections import defaultdict + +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand + + +r""" +Restore the mysql database dump and run (replace and ) +Alternatively you can export these tables from phpAdmin. + +for tn in akcie riesitelia skoly sustredenia +do +mysql -u -p fks -B -e "select * from \`$tn\`;" \ +| sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $tn.csv +done +""" + + +class Command(MigrateBaseCommand): + help = "Imports people and their related info from kms_csv." + + def add_arguments(self, parser): + super(Command, self).add_arguments(parser) + parser.add_argument("csv_directory", type=str, help="Directory containing all csv files.") + + def handle(self, **options): + super(Command, self).handle(**options) + base = options["csv_directory"] + participants_file = os.path.join(base, "riesitelia.csv") + participants = csv.DictReader(open(participants_file)) + camps_file = os.path.join(base, "sustredenia.csv") + camps = csv.DictReader(open(camps_file)) + camps_survived = defaultdict(int) + for camp in camps: + idd = camp["id_riesitela"].strip() + camps_survived[idd] += 1 + if camp["rok"]: + self.last_contact[idd].append(int(camp["rok"])) + + schools_file = os.path.join(base, "skoly.csv") + schools = csv.DictReader(open(schools_file)) + for school in schools: + abbr = school["skratka"].split(" ", 1)[0] + addr_name = school["nazov"] + ", " + school["ulica"] + self.process_school( + school["id"], + abbr, + school["nazov"], + addr_name, + school["ulica"], + school["mesto"], + school["PSC"], + ) + + for l in participants: + if not l["meno"]: + continue + idd = l["id"] + self.last_contact[idd].append(int(l["matura"]) - 3) + user = { + "first_name": l["meno"], + "last_name": l["priezvisko"], + "graduation": l["matura"], + "email": l["email"], + "birth_date": self.parse_dash_date(l["datnar"]), + "school_id": l["id_skoly"], + } + + # TODO parse addresses from string. + "adresa_domov" + "adresa_kores" + + user_properties = [ + (self.MOBIL_PROPERTY, l["mobil"].replace(" ", "").strip()), + (self.KMS_CAMPS_PROPERTY, camps_survived[idd]), + ] + self.process_person(user, user_properties, self.KMS_ID_PROPERTY, idd) + + # TODO parse camps more precisely + self.print_stats() diff --git a/trojsten/people/management/commands/migrate_ksp_kaspar.py b/trojsten/people/management/commands/migrate_ksp_kaspar.py new file mode 100644 index 000000000..97f5ddc25 --- /dev/null +++ b/trojsten/people/management/commands/migrate_ksp_kaspar.py @@ -0,0 +1,124 @@ +from __future__ import unicode_literals + +from django.db import connections + +from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand + +# Kaspar property IDs +EMAIL_PROP = 1 +BIRTHDAY_PROP = 2 + + +class Command(MigrateBaseCommand): + help = "Imports people and their related info from kaspar." + + def process_schools(self): + cursor = self.kaspar.cursor() + cursor.execute( + """ + SELECT school_id, short, name, addr_name, addr_street, + addr_city, addr_zip + FROM schools; + """ + ) + for row in cursor: + self.process_school(*row) + + def process_particiaptions(self): + cursor = self.kaspar.cursor() + cursor.execute( + """ + SELECT action_id, name, date_start, date_end + FROM actions + """ + ) + + actions = {} + for action in cursor: + actions[action[0]] = {"name": action[1], "start": action[2], "end": action[3]} + + cursor.execute( + """ + SELECT action_id, man_id, task, note + FROM participants + """ + ) + + self.camps_survived = {} + for participant in cursor: + man_id = participant[1] + action = actions[participant[0]] + self.last_contact[man_id].append(int(action["end"].year)) + self.camps_survived[man_id] = self.camps_survived.get(man_id, 0) + 1 + + def process_people(self): + cursor1 = self.kaspar.cursor() + cursor2 = self.kaspar.cursor() + fields = ["man_id", "firstname", "lastname", "school_id", "finish", "note"] + cursor1.execute( + """ + SELECT %s + FROM people; + """ + % (", ".join(fields)) + ) + + for l in cursor1: + l = dict(zip(fields, l)) # noqa: E741 + idcko = l["man_id"] + self.last_contact[idcko].append(int(l["finish"]) - 3) + + user = { + "first_name": l["firstname"], + "last_name": l["lastname"], + "graduation": l["finish"], + "school_id": l["school_id"], + } + cursor2.execute( + """ + SELECT ppt_id, value + FROM people_prop + WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); + """, + (idcko, EMAIL_PROP, BIRTHDAY_PROP), + ) + for prop_id, value in cursor2: + if prop_id == EMAIL_PROP: + user["email"] = value + elif prop_id == BIRTHDAY_PROP: + try: + user["birth_date"] = self.parse_dot_date(value) + except ValueError: + # If we can't parse the date, give up. + pass + + user_properties = [ + (self.KASPAR_NOTE_PROPERTY, l["note"]), + (self.KSP_CAMPS_PROPERTY, self.camps_survived.get(idcko, 0)), + ] + self.process_person(user, user_properties, self.KASPAR_ID_PROPERTY, idcko) + + cursor1.close() + cursor2.close() + + def handle(self, **options): + super(Command, self).handle(**options) + self.kaspar = connections["kaspar"] + + if self.verbosity >= 1: + self.stderr.write("Migrating schools...") + + self.process_schools() + + # TODO sustredka + + if self.verbosity >= 1: + self.stderr.write("Dumping participations") + + self.process_particiaptions() + + if self.verbosity >= 1: + self.stderr.write("Migrating people...") + + self.process_people() + self.print_stats() diff --git a/trojsten/people/management/commands/migrate_people_from_kaspar.py b/trojsten/people/management/commands/migrate_people_from_kaspar.py deleted file mode 100644 index 2c722ddde..000000000 --- a/trojsten/people/management/commands/migrate_people_from_kaspar.py +++ /dev/null @@ -1,165 +0,0 @@ -from __future__ import unicode_literals - -from datetime import datetime - -from django.core.management.base import NoArgsCommand -from django.db import connections, transaction -from django.db.models import Q -from django.utils.six.moves import input - -from trojsten.people.helpers import get_similar_users -from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey - -# Kaspar property IDs -EMAIL_PROP = 1 -BIRTHDAY_PROP = 2 -# Labels for auto-generated properties -KASPAR_ID_LABEL = "kaspar ID" -KASPAR_NOTE_LABEL = "kaspar note" - - -class Command(NoArgsCommand): - help = "Imports people and their related info from kaspar." - - def handle_noargs(self, **options): - self.verbosity = options["verbosity"] - self.kaspar = connections["kaspar"] - c = self.kaspar.cursor() - - if self.verbosity >= 1: - self.stdout.write("Migrating schools...") - - c.execute( - """ - SELECT school_id, short, name, addr_name, addr_street, - addr_city, addr_zip - FROM schools; - """ - ) - self.school_id_map = dict() - for row in c: - self.process_school(*row) - - if self.verbosity >= 1: - self.stdout.write("Creating/retrieving required UserPropertyKeys...") - - self.kaspar_id_key, _ = UserPropertyKey.objects.get_or_create(key_name=KASPAR_ID_LABEL) - self.kaspar_note_key, _ = UserPropertyKey.objects.get_or_create(key_name=KASPAR_NOTE_LABEL) - - if self.verbosity >= 1: - self.stdout.write("Migrating people...") - - c.execute( - """ - SELECT man_id, firstname, lastname, school_id, finish, note - FROM people; - """ - ) - self.man_id_map = dict() - # This loop takes O(N) queries and I don't care -- it's a one-time - # background job anyway. - for row in c: - self.process_person(*row) - - @transaction.atomic - def process_school(self, kaspar_id, abbr, name, addr_name, street, city, zip_code): - candidates = School.objects.filter( - Q(abbreviation__iexact=abbr) | Q(abbreviation__iexact=abbr + "?") - ) - row = (kaspar_id, abbr, name, addr_name, street, city, zip_code) - if len(candidates) == 1: - if self.verbosity >= 2: - self.stdout.write("Matched %r to %s" % (row, candidates[0])) - self.school_id_map[kaspar_id] = candidates[0] - elif len(candidates) > 1: - self.stdout.write( - "Multiple candidates for %r:\n%s" - % ( - row, - "\n".join( - "%02d: %s" % (i, candidate) for i, candidate in enumerate(candidates) - ), - ) - ) - try: - choice = int(input("Choice (empty or invalid to create new): ")) - self.school_id_map[kaspar_id] = candidates[choice] - except (ValueError, KeyError): - self.school_id_map[kaspar_id] = self.create_school(*row) - else: - self.school_id_map[kaspar_id] = self.create_school(*row) - - def create_school(self, kaspar_id, abbr, name, addr_name, street, city, zip_code): - abbr += "?" # Question mark denotes schools needing review. - school = School.objects.create( - abbreviation=abbr, - verbose_name=name, - addr_name=addr_name, - street=street, - city=city, - zip_code=zip_code, - ) - if self.verbosity >= 2: - self.stdout.write("Created new school %s" % school) - return school - - @transaction.atomic - def process_person(self, man_id, first_name, last_name, school_id, grad_year, note): - # If the user already exists in our database, skip. - if self.kaspar_id_key.properties.filter(value=man_id).exists(): - if self.verbosity >= 2: - self.stdout.write("Skipping user %s %s" % (first_name, last_name)) - return - - new_user_args = { - "first_name": first_name, - "last_name": last_name, - # The username needs to be unique, thus the ID. - "username": "%s%s%d" % (first_name, last_name, man_id), - "is_active": False, - "school": self.school_id_map[school_id], - } - - if grad_year: - new_user_args["graduation"] = grad_year - - c = self.kaspar.cursor() - c.execute( - """ - SELECT ppt_id, value - FROM people_prop - WHERE people_prop.man_id = %s AND ppt_id IN (%s, %s); - """, - (man_id, EMAIL_PROP, BIRTHDAY_PROP), - ) - for prop_id, value in c: - if prop_id == EMAIL_PROP: - new_user_args["email"] = value - elif prop_id == BIRTHDAY_PROP: - try: - new_user_args["birth_date"] = self.parse_date(value) - except ValueError: - # If we can't parse the date, give up. - pass - c.close() - - if self.verbosity >= 2: - self.stdout.write("Creating user %s %s" % (first_name, last_name)) - - new_user = User.objects.create(**new_user_args) - self.man_id_map[man_id] = new_user - - new_user.properties.create(key=self.kaspar_id_key, value=man_id) - if note: - new_user.properties.create(key=self.kaspar_note_key, value=note) - similar_users = get_similar_users(new_user) - if len(similar_users): - if self.verbosity >= 2: - self.stdout.write("Similar users: %s" % str(similar_users)) - DuplicateUser.objects.create(user=new_user) - - def parse_date(self, date_string): - # Remove any whitespace inside the string. - date_string = date_string.replace(" ", "") - # Just hope that all dates are in the same format. - return datetime.strptime(date_string, "%d.%m.%Y")