Skip to content

Commit 7005d33

Browse files
author
Maximilian Karl
committed
define unknown users takes now a dictionary
1 parent 2f1b2b5 commit 7005d33

File tree

4 files changed

+56
-38
lines changed

4 files changed

+56
-38
lines changed

docs/changes.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,10 @@ Version 1.1.5 (Mai 27, 2021)
5959
* solve author and committer problem
6060
* add define_unknown_user to Version
6161
* add get unknown_user from commits
62-
* get_repos has now mutiple whitelist and blacklist pattern and are optional now
62+
* get_repos has now mutiple whitelist and blacklist pattern and are optional now
63+
64+
65+
Version 1.1.6 (Mai 28, 2021)
66+
-----------------------------------
67+
68+
* define unknown users takes now a dictionary in with unknown user as key and id as value. If the user is doesnt exists then a new user will be added.

github2pandas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.1.5'
1+
__version__ = '1.1.6'

github2pandas/utility.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from pathlib import Path
3+
import numpy
34
import pandas as pd
45
import github
56
import pickle
@@ -440,18 +441,14 @@ def extract_user_data(user, users_ids, data_root_dir):
440441
user_data["anonym_uuid"] = generate_id(seed=user.node_id)
441442
users_ids[user.node_id] = user_data["anonym_uuid"]
442443
user_data["id"] = user.node_id
444+
user_data["name"] = user.name
445+
user_data["email"] = user.email
446+
user_data["login"] = user.login
443447
try:
444-
user_data["name"] = user.name
448+
user_data["alias"] = user.alias
445449
except:
446-
user_data["name"] = "exception Name"
447-
try:
448-
user_data["email"] = user.email
449-
except:
450-
user_data["email"] = "exception Email"
451-
try:
452-
user_data["login"] = user.login
453-
except:
454-
user_data["login"] = "exception Login"
450+
pass
451+
455452
users_df = users_df.append(user_data, ignore_index=True)
456453
with open(users_file, "wb") as f:
457454
pickle.dump(users_df, f)
@@ -656,3 +653,32 @@ def extract_comment_data(comment, parent_id, parent_name, users_ids, data_root_d
656653
comment_data["author"] = Utility.extract_user_data(comment.user, users_ids, data_root_dir)
657654
return comment_data
658655

656+
@staticmethod
657+
def define_unknown_user(user_dict, unknown_user, data_root_dir):
658+
users = Utility.get_users(data_root_dir)
659+
if unknown_user in user_dict:
660+
p_user = users.loc[users.id == user_dict[unknown_user]]
661+
if not p_user.empty:
662+
alias = ""
663+
user = p_user.iloc[0]
664+
if "alias" in user:
665+
if (user["alias"] == numpy.NAN) or (user["alias"] is None):
666+
alias = unknown_user
667+
else:
668+
alias = user["alias"] + ";" + unknown_user
669+
else:
670+
alias = unknown_user
671+
users.loc[users.id == user_dict[unknown_user], 'alias'] = alias
672+
pd_file = Path(data_root_dir, Utility.USERS)
673+
with open(pd_file, "wb") as f:
674+
pickle.dump(users, f)
675+
return user["anonym_uuid"]
676+
677+
class UserData:
678+
node_id = user_dict[unknown_user]
679+
name = numpy.NaN
680+
email = numpy.NaN
681+
login = numpy.NaN
682+
alias = unknown_user
683+
users_ids = Utility.get_users_ids(data_root_dir)
684+
return Utility.extract_user_data(UserData(),users_ids,data_root_dir)

github2pandas/version.py

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -301,50 +301,36 @@ def generate_version_pandas_tables(repo, data_root_dir):
301301
pickle.dump(pd_Branches, f)
302302

303303
@staticmethod
304-
def define_unknown_users(user_list, data_root_dir):
304+
def define_unknown_users(user_dict, data_root_dir):
305305
"""
306-
define_unknown_users(user_list, data_root_dir)
306+
define_unknown_users(user_dict, data_root_dir)
307307
308308
Define unknown users in commits pandas table
309309
310310
Parameters
311311
----------
312-
user_list: list
313-
List which contains users.
312+
user_dict: dict
313+
Dictionary which contains users.
314314
data_root_dir : str
315315
Data root directory for the repository.
316316
317317
Notes
318318
-----
319-
Example User: {"node_id": "unique_id_0", "email":"mail", "name": "name", "login": "login"}
320-
All keys are optional.
319+
Example User: {"unknown_user": "real user node id"}
320+
If the real user node id does not exist in the users table then a new user will be created
321321
322322
"""
323323
pd_commits = Version.get_version(data_root_dir)
324324
if "unknown_user" in pd_commits:
325325
unknown_user_commits = pd_commits.loc[pd_commits.unknown_user.notna()]
326326
unknown_users = unknown_user_commits.unknown_user.unique()
327327
for unknown_user in unknown_users:
328-
for user in user_list:
329-
if (user["email"] == unknown_user) or (user["name"] == unknown_user) or (user["login"] == unknown_user):
330-
if "node_id" not in user:
331-
user["node_id"] = "node_id"
332-
if "name" not in user:
333-
user["name"] = numpy.NaN
334-
if "email" not in user:
335-
user["email"] = numpy.NaN
336-
if "login" not in user:
337-
user["login"] = numpy.NaN
338-
class UserData:
339-
node_id = user["node_id"]
340-
name = user["name"]
341-
email = user["email"]
342-
login = user["login"]
343-
users_ids = Utility.get_users_ids(data_root_dir)
344-
uuid = Utility.extract_user_data(UserData(),users_ids,data_root_dir)
345-
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'author'] = uuid
346-
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'committer'] = uuid
347-
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'unknown_user'] = numpy.NaN
328+
uuid = Utility.define_unknown_user(user_dict,unknown_user,data_root_dir)
329+
if uuid is not None:
330+
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'author'] = uuid
331+
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'committer'] = uuid
332+
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'unknown_user'] = numpy.NaN
333+
348334
version_folder = Path(data_root_dir, Version.VERSION_DIR)
349335
pd_commits_file = Path(version_folder, Version.VERSION_COMMITS)
350336
with open(pd_commits_file, "wb") as f:

0 commit comments

Comments
 (0)