-
Notifications
You must be signed in to change notification settings - Fork 4.3k
fix: retirement PII leaks by redacting pending secondary email/name data #38427
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
1e6614b
da2bbfd
4326d69
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -934,14 +934,31 @@ class PendingSecondaryEmailChange(DeletableByUserValue, models.Model): # noqa: | |||||||||
| """ | ||||||||||
| This model keeps track of pending requested changes to a user's secondary email address. | ||||||||||
|
|
||||||||||
| .. pii: Contains new_secondary_email, not currently retired | ||||||||||
| .. pii: Contains new_secondary_email, redacted in `DeactivateLogoutView` | ||||||||||
| .. pii_types: email_address | ||||||||||
| .. pii_retirement: retained | ||||||||||
| .. pii_retirement: local_api | ||||||||||
| """ | ||||||||||
| user = models.OneToOneField(User, unique=True, db_index=True, on_delete=models.CASCADE) | ||||||||||
| new_secondary_email = models.CharField(blank=True, max_length=255, db_index=True) | ||||||||||
| activation_key = models.CharField(('activation key'), max_length=32, unique=True, db_index=True) | ||||||||||
|
|
||||||||||
| @classmethod | ||||||||||
| def redact_and_delete_pending_secondary_email(cls, user_id): | ||||||||||
| """ | ||||||||||
| Redact and delete a pending secondary email change row for a user. | ||||||||||
|
|
||||||||||
| Redacts the email before deletion so any downstream soft-delete mirror does | ||||||||||
| not retain the original secondary email address in the final row image. | ||||||||||
| """ | ||||||||||
| try: | ||||||||||
| pending_secondary_email = cls.objects.get(user_id=user_id) | ||||||||||
| except cls.DoesNotExist: | ||||||||||
| return True | ||||||||||
| pending_secondary_email.new_secondary_email = f"redacted+{user_id}@redacted.com" | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But Produces
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||
| pending_secondary_email.save(update_fields=['new_secondary_email']) | ||||||||||
| pending_secondary_email.delete() | ||||||||||
|
robrap marked this conversation as resolved.
|
||||||||||
| return True | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class LoginFailures(models.Model): | ||||||||||
| """ | ||||||||||
|
|
@@ -1688,7 +1705,7 @@ class AccountRecovery(models.Model): # noqa: DJ008 | |||||||||
| """ | ||||||||||
| Model for storing information for user's account recovery in case of access loss. | ||||||||||
|
|
||||||||||
| .. pii: the field named secondary_email contains pii, retired in the `DeactivateLogoutView` | ||||||||||
| .. pii: the field named secondary_email contains pii, redacted in the `DeactivateLogoutView` | ||||||||||
| .. pii_types: email_address | ||||||||||
| .. pii_retirement: local_api | ||||||||||
| """ | ||||||||||
|
|
@@ -1721,19 +1738,25 @@ def update_recovery_email(self, email): | |||||||||
| @classmethod | ||||||||||
| def retire_recovery_email(cls, user_id): | ||||||||||
| """ | ||||||||||
| Retire user's recovery/secondary email as part of GDPR Phase I. | ||||||||||
| Returns 'True' | ||||||||||
| Redact and delete user's recovery/secondary email as part of GDPR Phase I. | ||||||||||
|
|
||||||||||
| If an AccountRecovery record is found for this user it will be deleted, | ||||||||||
| if it is not found it is assumed this table has no PII for the given user. | ||||||||||
| If an AccountRecovery record is found for this user it will be redacted and | ||||||||||
| deleted. If it is not found it is assumed this table has no PII for the given user. | ||||||||||
|
robrap marked this conversation as resolved.
|
||||||||||
|
|
||||||||||
| Note: "Retire" here refers to retiring this user's data, not the recovery email | ||||||||||
| feature itself, which remains available for new accounts. | ||||||||||
|
Comment on lines
+1746
to
+1747
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure what "the recovery email feature itself" means. Here is my proposed update:
Suggested change
|
||||||||||
|
|
||||||||||
| :param user_id: int | ||||||||||
| :return: bool | ||||||||||
| :return: bool - True on success | ||||||||||
| """ | ||||||||||
| try: | ||||||||||
| cls.objects.get(user_id=user_id).delete() | ||||||||||
| account_recovery = cls.objects.get(user_id=user_id) | ||||||||||
| except cls.DoesNotExist: | ||||||||||
| pass | ||||||||||
| return True | ||||||||||
| account_recovery.secondary_email = f"redacted+{user_id}@redacted.com" | ||||||||||
| account_recovery.is_active = False | ||||||||||
| account_recovery.save(update_fields=['secondary_email', 'is_active']) | ||||||||||
|
Comment on lines
+1757
to
+1758
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Messing with
Suggested change
|
||||||||||
| account_recovery.delete() | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktyagiapphelix2u: Is it true that the recovery email itself is not yet redacted? I thought it was just pending secondary email updates. If so, I think secondary (recovery) email and pending secondary email should be separate PRs. Also, it look like you wish to delete where we weren't previously doing so. @bmedx: Any strong opinions either way? In general are we redacting and leaving things in place?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktyagiapphelix2u: You did not respond on this comment in particular, but from elsewhere, I think you want this all to stay together, right? Also, I missed the earlier |
||||||||||
|
|
||||||||||
| return True | ||||||||||
|
|
||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
| ManualEnrollmentAudit, | ||
| PendingEmailChange, | ||
| PendingNameChange, | ||
| PendingSecondaryEmailChange, | ||
| UserAttribute, | ||
| UserCelebration, | ||
| UserProfile, | ||
|
|
@@ -747,6 +748,27 @@ def test_retire_recovery_email(self): | |
| assert len(AccountRecovery.objects.filter(user_id=user.id)) == 0 | ||
|
|
||
|
|
||
| class TestPendingSecondaryEmailChange(TestCase): | ||
| """Tests for retiring PendingSecondaryEmailChange records.""" | ||
|
|
||
| def test_redact_and_delete_pending_secondary_email(self): | ||
| """Assert that pending secondary email records are deleted for retired users.""" | ||
| user = UserFactory() | ||
| PendingSecondaryEmailChange.objects.create( | ||
| user=user, | ||
| new_secondary_email='new-secondary@example.com', | ||
| activation_key='a' * 32, | ||
| ) | ||
| assert len(PendingSecondaryEmailChange.objects.filter(user_id=user.id)) == 1 | ||
| PendingSecondaryEmailChange.redact_and_delete_pending_secondary_email(user_id=user.id) | ||
| assert len(PendingSecondaryEmailChange.objects.filter(user_id=user.id)) == 0 | ||
|
Comment on lines
+763
to
+764
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only checks the record was deleted. Doesn't assert:
The test should verify the redacted value was persisted and consistent. |
||
|
|
||
| def test_redact_and_delete_pending_secondary_email_when_no_record(self): | ||
| """Assert retirement cleanup returns True when no pending secondary row exists.""" | ||
| user = UserFactory() | ||
| assert PendingSecondaryEmailChange.redact_and_delete_pending_secondary_email(user_id=user.id) is True | ||
|
|
||
|
|
||
| @ddt.ddt | ||
| class TestUserPostSaveCallback(SharedModuleStoreTestCase): | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,6 +84,7 @@ | |
| UserStanding, | ||
| create_comments_service_user, # noqa: F401 | ||
| email_exists_or_retired, # noqa: F401 | ||
| get_retired_email_by_email, | ||
| ) | ||
| from common.djangoapps.student.signals import REFUND_ORDER, USER_EMAIL_CHANGED | ||
| from common.djangoapps.student.toggles import should_redirect_to_courseware_after_enrollment | ||
|
|
@@ -890,6 +891,10 @@ def activate_secondary_email(request, key): | |
| 'secondary_email': pending_secondary_email_change.new_secondary_email | ||
| }) | ||
|
|
||
| pending_secondary_email_change.new_secondary_email = get_retired_email_by_email( | ||
| pending_secondary_email_change.new_secondary_email | ||
| ) | ||
| pending_secondary_email_change.save(update_fields=['new_secondary_email']) | ||
|
Comment on lines
+894
to
+897
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This change does not belong in this view. The soft-delete protection goal is valid, but the redaction function must not be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right—activate_secondary_email is a normal user action, not a retirement flow. Using get_retired_email_by_email() here would incorrectly mark a regular user as “retired” in downstream systems, which could cause confusion or false positives. I’ve removed the redaction from this path, so now the pending secondary email is just deleted after activation, with no retirement-style hashing. Redaction only happens in actual retirement flows.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about something like using completed@delete-pending.com? If the user is later retired, this soft-delete would persist, and isn’t that the case we’re trying to fix?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktyagiapphelix2u: Did you see my comment? Can you acknowledge and respond? Thank you. |
||
| pending_secondary_email_change.delete() | ||
|
|
||
| return render_to_response("secondary_email_change_successful.html") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| ManualEnrollmentAudit, | ||
| PendingEmailChange, | ||
| PendingNameChange, | ||
| PendingSecondaryEmailChange, | ||
| Registration, | ||
| SocialLink, | ||
| UserProfile, | ||
|
|
@@ -233,6 +234,24 @@ def test_user_can_deactivate_secondary_email(self): | |
| # Assert that there is no longer a secondary/recovery email for test user | ||
| assert len(AccountRecovery.objects.filter(user_id=self.test_user.id)) == 0 | ||
|
|
||
| def test_user_can_deactivate_pending_secondary_email_change(self): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm holding off on reviewing tests. I'm getting confused between the pending and actual records, and this will be simpler when the PRs are split. Also, feel free to get an approval from the team first.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Helpful Info This PR fixes a PII leakage issue affecting two related models that store secondary email data at different lifecycle stages. The AccountRecovery model stores confirmed/active recovery emails, while PendingSecondaryEmailChange stores unconfirmed email change requests (before the user clicks the activation link). During user retirement, we must handle both models because a user can have: (1) only a confirmed email, (2) only a pending change request, (3) both (when changing their existing recovery email to a new one), or (4) neither. Both models require the same fix pattern: redact the email field to prevent PII from syncing to downstream systems save the redacted record, then delete it. The methods are idempotent, they return True if no record exists. For review clarity, I recommend examining the AccountRecovery changes first (confirmed emails), then PendingSecondaryEmailChange (pending emails), and finally the retirement flow that calls both methods. |
||
| """ | ||
| Verify that pending secondary email change records are removed when a user retires. | ||
| """ | ||
| PendingSecondaryEmailChange.objects.create( | ||
| user=self.test_user, | ||
| new_secondary_email='pending-secondary@example.com', | ||
| activation_key='b' * 32, | ||
| ) | ||
| assert len(PendingSecondaryEmailChange.objects.filter(user_id=self.test_user.id)) == 1 | ||
|
|
||
| self.client.login(username=self.test_user.username, password=self.test_password) | ||
| headers = build_jwt_headers(self.test_user) | ||
| response = self.client.post(self.url, self.build_post(self.test_password), **headers) | ||
| assert response.status_code == status.HTTP_204_NO_CONTENT | ||
|
|
||
| assert len(PendingSecondaryEmailChange.objects.filter(user_id=self.test_user.id)) == 0 | ||
|
|
||
| def test_password_mismatch(self): | ||
| """ | ||
| Verify that the user submitting a mismatched password results in | ||
|
|
@@ -1293,6 +1312,18 @@ def setUp(self): | |
| UserOrgTagFactory.create(user=self.test_user, key='foo', value='bar') | ||
| UserOrgTagFactory.create(user=self.test_user, key='cat', value='dog') | ||
|
|
||
| # Secondary email setup | ||
| PendingSecondaryEmailChange.objects.create( | ||
| user=self.test_user, | ||
| new_secondary_email='pending_secondary@example.com', | ||
| activation_key='test_activation_key_123' | ||
| ) | ||
| AccountRecovery.objects.create( | ||
| user=self.test_user, | ||
| secondary_email='confirmed_secondary@example.com', | ||
| is_active=True | ||
| ) | ||
|
|
||
| CourseEnrollmentAllowedFactory.create(email=self.original_email) | ||
|
|
||
| self.course_key = CourseKey.from_string('course-v1:edX+DemoX+Demo_Course') | ||
|
|
@@ -1399,6 +1430,10 @@ def test_retire_user(self, mock_remove_profile_images, mock_get_profile_image_na | |
| assert not PendingEmailChange.objects.filter(user=self.test_user).exists() | ||
| assert not UserOrgTag.objects.filter(user=self.test_user).exists() | ||
|
|
||
| # Verify secondary email models were cleaned | ||
| assert not PendingSecondaryEmailChange.objects.filter(user=self.test_user).exists() | ||
| assert not AccountRecovery.objects.filter(user=self.test_user).exists() | ||
|
|
||
| assert not CourseEnrollmentAllowed.objects.filter(email=self.original_email).exists() | ||
| assert not UnregisteredLearnerCohortAssignments.objects.filter(email=self.original_email).exists() | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,11 +37,13 @@ | |
|
|
||
| from common.djangoapps.entitlements.models import CourseEntitlement | ||
| from common.djangoapps.student.models import ( # lint-amnesty, pylint: disable=unused-import | ||
| AccountRecovery, | ||
| CourseEnrollmentAllowed, | ||
| LoginFailures, | ||
| ManualEnrollmentAudit, | ||
| PendingEmailChange, | ||
| PendingNameChange, | ||
| PendingSecondaryEmailChange, | ||
| User, | ||
| UserProfile, | ||
| get_potentially_retired_user_by_username, | ||
|
|
@@ -1152,8 +1154,15 @@ def post(self, request): | |
| self.retire_entitlement_support_detail(user) | ||
|
|
||
| # Retire misc. models that may contain PII of this user | ||
| # Redact pending email change before deletion to prevent plaintext sync to Snowflake | ||
| pending_email = PendingEmailChange.objects.filter(user=user).first() | ||
| if pending_email: | ||
| pending_email.new_email = get_retired_email_by_email(pending_email.new_email) | ||
| pending_email.save(update_fields=['new_email']) | ||
|
Comment on lines
+1158
to
+1161
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PR #38426 adds This inline block must be removed |
||
| PendingEmailChange.delete_by_user_value(user, field="user") | ||
| UserOrgTag.delete_by_user_value(user, field="user") | ||
| PendingSecondaryEmailChange.redact_and_delete_pending_secondary_email(user.id) | ||
| AccountRecovery.retire_recovery_email(user.id) | ||
|
|
||
| # Retire any objects linked to the user via their original email | ||
| CourseEnrollmentAllowed.delete_by_user_value(original_email, field="email") | ||
|
|
@@ -1171,6 +1180,7 @@ def post(self, request): | |
| user.last_name = "" | ||
| user.is_active = False | ||
| user.username = retired_username | ||
| user.email = retired_email | ||
| user.save() | ||
| except UserRetirementStatus.DoesNotExist: | ||
| return Response(status=status.HTTP_404_NOT_FOUND) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ | |
| from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user | ||
| from django.core.management import CommandError, call_command | ||
|
|
||
| from common.djangoapps.student.models import PendingSecondaryEmailChange | ||
| from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order | ||
| from openedx.core.djangoapps.user_api.accounts.tests.retirement_helpers import ( # lint-amnesty, pylint: disable=unused-import, wrong-import-order | ||
| setup_retirement_states, # noqa: F401 | ||
|
|
@@ -107,3 +108,18 @@ def test_retire_with_username_email_userfile(setup_retirement_states): # lint-a | |
| with pytest.raises(CommandError, match=r'You cannot use userfile option with username and user_email'): | ||
| call_command('retire_user', user_file=user_file, username=username, user_email=user_email) | ||
| remove_user_file() | ||
|
|
||
|
|
||
| @skip_unless_lms | ||
| def test_retire_user_cleans_pending_secondary_email(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811 | ||
| user = UserFactory.create(username='user-cleanup', email='user-cleanup@example.com') | ||
| PendingSecondaryEmailChange.objects.create( | ||
| user=user, | ||
| new_secondary_email='pending-secondary@example.com', | ||
| activation_key='c' * 32, | ||
| ) | ||
| assert PendingSecondaryEmailChange.objects.filter(user=user).exists() | ||
|
|
||
| call_command('retire_user', username=user.username, user_email=user.email) | ||
|
|
||
| assert not PendingSecondaryEmailChange.objects.filter(user=user).exists() | ||
|
Comment on lines
+123
to
+125
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only verifies deletion — does not verify the email was redacted before the delete happened. Use the same |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bmedx: [inform] This doesn't seem like it would have been intentionally retained, so I'm fine with calling this a bug and just fixing. Any objections?