Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion osf/models/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,18 @@ def validate_email(value):


def has_domain_in_user_fields_for_names(user):
name_content = ' '.join([getattr(user, field) or '' for field in user.DOMAIN_VALIDATION_FIELDS])
name_content = ' '.join(getattr(user, field) or '' for field in user.DOMAIN_VALIDATION_FIELDS).strip()

if not name_content:
return False

text = name_content.lower()
if any(suffix in text for suffix in ['m.sc.', 'msc.', 'phd.', 'ph.d.', 'msc.pt', 'pt.', 'prof.', 'dr.', 'md.', 'jd.', 'esq.']):

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is far too specific and only handles the cases we know of, not the potential future cases. I think we need to adjust the DOMAIN_REGEX to be better (it should grab complete URLs), and also maybe if the domain regex finds something, pass the suspected domain through urllib.parse.urlsplit to see if it has a scheme and a netloc. If it doesn't have both of those, we could probably return false, since it won't link in an email to a location.

return False

if re.search(r'\b[a-z]\.[a-z0-9-]+\b', text):
return False

return True if DOMAIN_REGEX.search(name_content) else False


Expand Down
117 changes: 116 additions & 1 deletion osf_tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@
from osf.exceptions import ValidationValueError

from osf.models import validators
from osf_tests.factories import SubjectFactory
from osf.models.validators import has_domain_in_user_fields_for_names
from osf_tests.factories import SubjectFactory, AuthUserFactory


# Ported from tests/framework/test_mongo.py

@pytest.fixture()
def user(user_data):
return AuthUserFactory(**user_data)


def test_string_required_passes_with_string():
assert validators.string_required('Hi!') is True

Expand Down Expand Up @@ -50,3 +57,111 @@ def test_validate_expand_subject_hierarchy():
subject_list = [fruit._id, '12345_bad_id']
with pytest.raises(ValidationValueError):
validators.expand_subject_hierarchy(subject_list)


@pytest.mark.parametrize(
'user_data',
[
{
'fullname': 'Judith Sarah Preuss, M.Sc.',
'given_name': 'Judith',
'family_name': 'Preuss',
'middle_names': 'Sarah',
'suffix': 'M.Sc.',
},
{
'fullname': 'J.H. van Hateren',
'given_name': 'J.H.',
'family_name': 'van Hateren',
'middle_names': '',
},
{
'fullname': 'Sami-Egil Ahonen',
'given_name': 'Sami-Egil',
'family_name': 'Ahonen',
'middle_names': '',
},
{
'fullname': 'Giovanni Luca Ciampaglia',
'given_name': 'Giovanni Luca',
'family_name': 'Ciampaglia',
'middle_names': '',
},
{
'fullname': 'Joseph P.R.O. Orgel',
'given_name': 'Joseph',
'family_name': 'Orgel',
'middle_names': 'P.R.O.',
},
{
'fullname': 'Andrew Daoust',
'given_name': 'Andrew',
'family_name': 'Daoust',
'middle_names': '',
},
{
'fullname': 'Aidan G.C. Wright',
'given_name': 'Aidan',
'family_name': 'Wright',
'middle_names': 'G.C.',
},
{
'fullname': 'Guillermo Perez Algorta',
'given_name': 'Guillermo',
'family_name': 'Perez Algorta',
'middle_names': '',
},
{
'fullname': 'Sarah Wojkowski, MSc.PT, PhD.',
'given_name': 'Sarah',
'family_name': 'Wojkowski',
'middle_names': 'MSc.PT',
},
{
'fullname': 'Brockmann, L.C. (Leon)',
'given_name': 'Leon',
'family_name': 'Brockmann',
'middle_names': 'L.C.',
},
{
'fullname': 'Gragnolati, G.M. (Gaia Mariavittoria)',
'given_name': 'Gaia',
'family_name': 'Gragnolati',
'middle_names': 'G.M.',
},
{
'fullname': 'F.H. Leeuwis',
'given_name': 'F.H.',
'family_name': 'Leeuwis',
'middle_names': '',
},
{
'fullname': 'Grauss, S.E. (Sophie)',
'given_name': 'Sophie',
'family_name': 'Grauss',
'middle_names': 'S.E.',
},
{
'fullname': 'Sandhya N.Sathesh',
'given_name': 'Sandhya',
'family_name': 'N.Sathesh',
'middle_names': '',
},
{
'fullname': 'John Doe',
'given_name': 'John',
'family_name': 'Doe',
'middle_names': '',
}
]
)
def test_has_domain_in_user_fields_for_names_returns_false(user, user_data):
user.DOMAIN_VALIDATION_FIELDS = [
'fullname',
'given_name',
'middle_names',
'family_name',
'suffix',
]

assert has_domain_in_user_fields_for_names(user) is False