@@ -43,7 +43,7 @@ class Utility():
4343 Get all assignees as one string.
4444 extract_labels(github_labels)
4545 Get all labels as one string.
46- extract_user_data(user, users_ids, data_root_dir)
46+ extract_user_data(user, users_ids, data_root_dir, node_id_to_anonym_uuid=False )
4747 Extracting general user data.
4848 extract_author_data_from_commit(repo, sha, users_ids, data_root_dir)
4949 Extracting general author data from a commit.
@@ -55,8 +55,8 @@ class Utility():
5555 Extracting general event data from a issue or pull request.
5656 extract_comment_data(comment, parent_id, parent_name, users_ids, data_root_dir)
5757 Extracting general comment data from a pull request or issue.
58- define_unknown_user(user_dict, unknown_user , data_root_dir)
59- Defines a unknown user. Add unknown user to alias.
58+ define_unknown_user(unknown_user_name, uuid , data_root_dir, new_user=False )
59+ Defines a unknown user. Add unknown user to alias or creates new user
6060
6161 """
6262 USERS = "Users.p"
@@ -407,9 +407,9 @@ def extract_labels(github_labels):
407407 return labels
408408
409409 @staticmethod
410- def extract_user_data (user , users_ids , data_root_dir ):
410+ def extract_user_data (user , users_ids , data_root_dir , node_id_to_anonym_uuid = False ):
411411 """
412- extract_user_data(user, users_ids, data_root_dir)
412+ extract_user_data(user, users_ids, data_root_dir, node_id_to_anonym_uuid=False )
413413
414414 Extracting general user data.
415415
@@ -421,6 +421,8 @@ def extract_user_data(user, users_ids, data_root_dir):
421421 Dict of User Ids as Keys and anonym Ids as Value.
422422 data_root_dir : str
423423 Repo dir of the project.
424+ node_id_to_anonym_uuid : bool, default=False
425+ Node_id will be the anonym_uuid
424426
425427 Returns
426428 -------
@@ -441,7 +443,10 @@ def extract_user_data(user, users_ids, data_root_dir):
441443 if users_file .is_file ():
442444 users_df = pd .read_pickle (users_file )
443445 user_data = {}
444- user_data ["anonym_uuid" ] = generate_id (seed = user .node_id )
446+ if node_id_to_anonym_uuid :
447+ user_data ["anonym_uuid" ] = user .node_id
448+ else :
449+ user_data ["anonym_uuid" ] = generate_id (seed = user .node_id )
445450 user_data ["id" ] = user .node_id
446451 try :
447452 user_data ["name" ] = user .name
@@ -671,59 +676,57 @@ def extract_comment_data(comment, parent_id, parent_name, users_ids, data_root_d
671676 return comment_data
672677
673678 @staticmethod
674- def define_unknown_user (user_dict , unknown_user , data_root_dir ):
679+ def define_unknown_user (unknown_user_name , uuid , data_root_dir , new_user = False ):
675680 """
676- define_unknown_user(user_dict, unknown_user , data_root_dir)
681+ define_unknown_user(unknown_user_name, uuid , data_root_dir, new_user=False )
677682
678- Defines a unknown user. Add unknown user to alias.
683+ Defines a unknown user. Add unknown user to alias or creates new user
679684
680685 Parameters
681686 ----------
682- user_dict: dict
683- Dictionary which contains users .
684- unknown_user : str
685- Name of a unknown user.
687+ unknown_user_name: str
688+ Name of unknown user .
689+ uuid : str
690+ Uuid can be the anonym uuid of another user or random uuid for a new user.
686691 data_root_dir : str
687- Repo dir of the project.
692+ Data root directory for the repository.
693+ new_user : bool, default=False
694+ A complete new user with anonym_uuid will be generated.
688695
689696 Returns
690697 -------
691698 str
692699 Uuid of the user.
693700
694- Notes
695- -----
696- Example User Dict: {"unknown_user": "user uuid"}
697- If the real user node id does not exist in the users table then a new user will be created and the user uuid will be the node Id
698-
699701 """
700702 users = Utility .get_users (data_root_dir )
701- if unknown_user in user_dict :
702- p_user = users .loc [users .anonym_uuid == user_dict [unknown_user ]]
703- if not p_user .empty :
704- alias = ""
705- user = p_user .iloc [0 ]
706- if "alias" in user :
707- if pd .isnull (user ["alias" ]) or (user ["alias" ] is None ):
708- alias = unknown_user
709- else :
710- all_alias = user ["alias" ].split (';' )
711- if not unknown_user in all_alias :
712- alias = user ["alias" ] + ";" + unknown_user
713- else :
714- alias = user ["alias" ]
703+ p_user = users .loc [users .anonym_uuid == uuid ]
704+ if not p_user .empty :
705+ alias = ""
706+ user = p_user .iloc [0 ]
707+ if "alias" in user :
708+ if pd .isnull (user ["alias" ]) or (user ["alias" ] is None ):
709+ alias = unknown_user_name
715710 else :
716- alias = unknown_user
717- users .loc [users .anonym_uuid == user_dict [unknown_user ], 'alias' ] = alias
718- pd_file = Path (data_root_dir , Utility .USERS )
719- with open (pd_file , "wb" ) as f :
720- pickle .dump (users , f )
721- return user ["anonym_uuid" ]
722-
723- class UserData :
724- node_id = user_dict [unknown_user ]
725- name = unknown_user
726- email = numpy .NaN
727- login = numpy .NaN
728- users_ids = Utility .get_users_ids (data_root_dir )
729- return Utility .extract_user_data (UserData (),users_ids ,data_root_dir )
711+ all_alias = user ["alias" ].split (';' )
712+ if not unknown_user_name in all_alias :
713+ alias = user ["alias" ] + ";" + unknown_user_name
714+ else :
715+ alias = user ["alias" ]
716+ else :
717+ alias = unknown_user_name
718+ users .loc [users .anonym_uuid == uuid , 'alias' ] = alias
719+ pd_file = Path (data_root_dir , Utility .USERS )
720+ with open (pd_file , "wb" ) as f :
721+ pickle .dump (users , f )
722+ return user ["anonym_uuid" ]
723+
724+ class UserData :
725+ node_id = uuid
726+ name = unknown_user_name
727+ email = numpy .NaN
728+ login = numpy .NaN
729+ users_ids = Utility .get_users_ids (data_root_dir )
730+ if new_user :
731+ return Utility .extract_user_data (UserData (),users_ids ,data_root_dir )
732+ return Utility .extract_user_data (UserData (),users_ids ,data_root_dir , node_id_to_anonym_uuid = True )
0 commit comments