Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions openedx/core/djangoapps/user_api/accounts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from django.utils.text import format_lazy
from django.utils.translation import gettext_lazy as _

# Import signals to ensure they are registered
from . import signals # noqa: F401, pylint: disable=unused-import

# The maximum length for the bio ("about me") account field
BIO_MAX_LENGTH = 300

Expand Down
38 changes: 37 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@
Django Signal related functionality for user_api accounts
"""

import logging

from django.dispatch import Signal
from django.db.models.signals import pre_delete
from django.dispatch import Signal, receiver
from social_django.models import UserSocialAuth

from .utils import redact_user_social_auth_pii

logger = logging.getLogger(__name__)

# Signal to retire a user from LMS-initiated mailings (course mailings, etc)
# providing_args=["user"]
Expand All @@ -16,3 +23,32 @@
# Signal to retire LMS misc information
# providing_args=["user"]
USER_RETIRE_LMS_MISC = Signal()


@receiver(pre_delete, sender=UserSocialAuth)
def redact_social_auth_pii_before_deletion(sender, instance, **kwargs): # pylint: disable=unused-argument
"""
Signal handler to redact PII from UserSocialAuth records before deletion.

This ensures that when SSO records are deleted (either via user retirement, manual unlinking,
or any other method), PII is redacted first. This prevents downstream systems that maintain
soft-deleted records from retaining sensitive user information.

Note: We call redact_user_social_auth_pii which saves the redacted data before the actual
deletion happens. This is intentional - downstream systems will capture the redacted
state before marking the record as deleted.

If redaction fails, the exception is re-raised to prevent deletion from proceeding,
ensuring GDPR compliance and preventing PII leaks to downstream systems.
"""
try:
redact_user_social_auth_pii(instance)
except Exception as e: # pylint: disable=broad-except
logger.exception(
"Failed to redact PII for UserSocialAuth before deletion: user_id=%s, provider=%s, error=%s",
instance.user_id,
instance.provider,
str(e)
Comment on lines +46 to +51
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Swallowing the exception here means if redaction fails, deletion proceeds with PII intact — exactly what this PR is meant to prevent. Either re-raise after logging, or explicitly document "best-effort" as the intended part.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! I've updated the signal handler to re-raise the exception after logging.

Before:
Redaction fails → Exception logged → Deletion proceeds → PII leaked

After:
Redaction fails → Exception logged → Exception re-raised → Deletion blocked → PII protected

)
# Re-raise to prevent deletion from proceeding without redaction
raise
82 changes: 81 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@
from completion.test_utils import CompletionWaffleTestMixin
from django.test import TestCase
from django.test.utils import override_settings
from social_django.models import UserSocialAuth

from common.djangoapps.student.models import CourseEnrollment
from common.djangoapps.student.tests.factories import UserFactory
from openedx.core.djangoapps.user_api.accounts.utils import retrieve_last_sitewide_block_completed
from openedx.core.djangoapps.user_api.accounts.utils import (
redact_user_social_auth_pii,
retrieve_last_sitewide_block_completed,
)
from openedx.core.djangolib.testing.utils import skip_unless_lms
from xmodule.modulestore.tests.django_utils import (
SharedModuleStoreTestCase, # lint-amnesty, pylint: disable=wrong-import-order
Expand Down Expand Up @@ -133,3 +137,79 @@ def test_retrieve_last_sitewide_block_completed(self):
)

assert empty_block_url is None


@skip_unless_lms
class RedactUserSocialAuthPIITest(TestCase):
"""
Tests for SSO PII redaction before deletion.
"""

def setUp(self):
super().setUp()
self.user = UserFactory.create(username='testuser', email='testuser@example.com')

def create_social_auth(self, provider='google-oauth2', uid='user@example.com', extra_data=None):
"""
Helper method to create UserSocialAuth instances for testing.
"""
if extra_data is None:
extra_data = {
'email': 'user@example.com',
'name': 'Test User',
'id': '123456789',
}
return UserSocialAuth.objects.create(
user=self.user,
provider=provider,
uid=uid,
extra_data=extra_data,
)

def test_redact_user_social_auth_pii(self):
"""
Test that redact_user_social_auth_pii correctly redacts uid and extra_data fields.
"""
social_auth = self.create_social_auth()

redact_user_social_auth_pii(social_auth)
social_auth.refresh_from_db()

assert social_auth.uid == f'redacted_{social_auth.pk}@retired.invalid'
assert social_auth.extra_data == {}

def test_redact_user_social_auth_pii_idempotent(self):
"""
Test that calling redact_user_social_auth_pii multiple times is idempotent.
"""
social_auth = self.create_social_auth()

redact_user_social_auth_pii(social_auth)
# Duplicate call to redact user method to validate idempotency
redact_user_social_auth_pii(social_auth)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ktyagiapphelix2u may be adding a comment on line#189 will help. "duplicate call to redact user method to validate idempotency"

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

social_auth.refresh_from_db()

assert social_auth.uid == f'redacted_{social_auth.pk}@retired.invalid'
assert social_auth.extra_data == {}

def test_redact_multiple_sso_providers(self):
"""
Test that redaction works correctly for multiple SSO providers.
"""
auths = [
self.create_social_auth(
provider='google-oauth2',
uid='google@example.com',
extra_data={'email': 'google@example.com', 'name': 'Google User'}
),
self.create_social_auth(
provider='tpa-saml',
uid='saml@example.com',
extra_data={'email': 'saml@example.com', 'name': 'SAML User', 'uid': 'saml-uid'}
),
]
for auth in auths:
redact_user_social_auth_pii(auth)
auth.refresh_from_db()
assert auth.uid == f'redacted_{auth.pk}@retired.invalid'
assert auth.extra_data == {}
37 changes: 35 additions & 2 deletions openedx/core/djangoapps/user_api/accounts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,37 @@ def is_secondary_email_feature_enabled():
return waffle.switch_is_active(ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH)


def redact_user_social_auth_pii(user_social_auth):
"""
Redact PII from a UserSocialAuth record before deletion.

Downstream systems can retain deleted source rows as soft-deleted records, so sensitive
fields should be overwritten before deletion.

Uses a unique redacted UID per record (redacted_{pk}@retired.invalid) to avoid
IntegrityError from the unique_together constraint on (provider, uid).
"""
if not user_social_auth or not user_social_auth.pk:
return

update_fields = {}

redacted_uid = f'redacted_{user_social_auth.pk}@retired.invalid'
if user_social_auth.uid != redacted_uid:
update_fields['uid'] = redacted_uid
if user_social_auth.extra_data:
update_fields['extra_data'] = {}

if not update_fields:
return

UserSocialAuth.objects.filter(pk=user_social_auth.pk).update(**update_fields)

# Keep instance in sync in case the caller reuses it.
for field_name, value in update_fields.items():
setattr(user_social_auth, field_name, value)


def create_retirement_request_and_deactivate_account(user):
"""
Adds user to retirement queue, unlinks social auth accounts, changes user passwords
Expand All @@ -204,8 +235,10 @@ def create_retirement_request_and_deactivate_account(user):
# Add user to retirement queue.
UserRetirementStatus.create_retirement(user)

# Unlink LMS social auth accounts
UserSocialAuth.objects.filter(user_id=user.id).delete()
# Redact and unlink LMS social auth accounts
for social_auth in UserSocialAuth.objects.filter(user_id=user.id):
redact_user_social_auth_pii(social_auth)
social_auth.delete()

# Change LMS password & email
user.email = get_retired_email_by_email(user.email)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email
from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models

from ...accounts.utils import redact_user_social_auth_pii
from ...models import BulkUserRetirementConfig, UserRetirementStatus

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -144,8 +145,10 @@ def handle(self, *args, **options):
for user in users:
# Add user to retirement queue.
UserRetirementStatus.create_retirement(user)
# Unlink LMS social auth accounts
UserSocialAuth.objects.filter(user_id=user.id).delete()
# Redact and unlink LMS social auth accounts
for social_auth in UserSocialAuth.objects.filter(user_id=user.id):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. In the original PR where we fixed redact before delete, didn't we already establish use of django calls that avoided looping?
  2. Can we avoid the separate method and just add some hard-coded values? What are all the fields we need to update, and do you have example values that have PII? Just curious. From current redact_user_social_auth_pii it looks like there are multiple values you are clearing out.

redact_user_social_auth_pii(social_auth)
social_auth.delete()
# Change LMS password & email
user.email = get_retired_email_by_email(user.email)
user.set_unusable_password()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

import csv
import os
from unittest import mock

import pytest
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
from django.core.management import CommandError, call_command
from social_django.models import UserSocialAuth

from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order
from openedx.core.djangoapps.user_api.accounts.tests.retirement_helpers import ( # lint-amnesty, pylint: disable=unused-import, wrong-import-order
Expand Down Expand Up @@ -107,3 +109,81 @@ def test_retire_with_username_email_userfile(setup_retirement_states): # lint-a
with pytest.raises(CommandError, match=r'You cannot use userfile option with username and user_email'):
call_command('retire_user', user_file=user_file, username=username, user_email=user_email)
remove_user_file()


@skip_unless_lms
def test_retire_user_redacts_sso_pii_before_deletion(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test only checks the record was deleted — it doesn't verify redaction happened before deletion. Use a mock.side_effect on delete_by_user_value to assert the redacted state mid-flow.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the test Thanks

"""
Test that SSO PII is redacted before UserSocialAuth records are deleted during retirement.

This test verifies the order of operations by capturing the record's state
at the moment of deletion to ensure it was already redacted.
"""
user = UserFactory.create(username='sso-user', email='sso-user@example.com')
social_auth = UserSocialAuth.objects.create(
user=user,
provider='google-oauth2',
uid='sso-user@example.com',
extra_data={
'email': 'sso-user@example.com',
'name': 'SSO Test User',
'id': '123456789',
}
)
social_auth_id = social_auth.id

# Capture the state at the moment of deletion to verify redaction happened first
captured_state = {}
original_delete = UserSocialAuth.delete

def capture_state_and_delete(self):
"""Wrapper to capture state before deletion."""
# Refresh from database to get the actual current state
self.refresh_from_db()
captured_state['uid'] = self.uid
captured_state['extra_data'] = dict(self.extra_data) if self.extra_data else {}
# Call original delete
return original_delete(self)

with mock.patch.object(UserSocialAuth, 'delete', capture_state_and_delete):
call_command('retire_user', username=user.username, user_email=user.email)

# Verify that at the moment of deletion, the record was already redacted
assert captured_state['uid'] == f'redacted_{social_auth_id}@retired.invalid', \
"UID should be redacted before deletion"
assert captured_state['extra_data'] == {}, \
"extra_data should be empty before deletion"

# Verify deletion completed
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()

retired_user_status = UserRetirementStatus.objects.filter(original_username=user.username).first()
assert retired_user_status is not None
assert retired_user_status.original_email == 'sso-user@example.com'


@skip_unless_lms
def test_retire_user_calls_redaction_for_each_social_auth(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811
"""
Test that redact_user_social_auth_pii is called for each UserSocialAuth record during retirement.
"""
user = UserFactory.create(username='multi-sso-user', email='multi-sso@example.com')
UserSocialAuth.objects.create(
user=user,
provider='google-oauth2',
uid='google-multi@example.com',
extra_data={'email': 'google-multi@example.com', 'name': 'Google User'}
)
UserSocialAuth.objects.create(
user=user,
provider='tpa-saml',
uid='saml-multi@example.com',
extra_data={'email': 'saml-multi@example.com', 'name': 'SAML User', 'uid': 'saml-123'}
)

with mock.patch(
'openedx.core.djangoapps.user_api.management.commands.retire_user.redact_user_social_auth_pii'
) as mock_redact:
call_command('retire_user', username=user.username, user_email=user.email)

assert mock_redact.call_count == 2
Loading