diff --git a/openedx/core/djangoapps/user_api/accounts/__init__.py b/openedx/core/djangoapps/user_api/accounts/__init__.py index faed01a6bb8d..e80aabe4169d 100644 --- a/openedx/core/djangoapps/user_api/accounts/__init__.py +++ b/openedx/core/djangoapps/user_api/accounts/__init__.py @@ -6,6 +6,9 @@ from django.utils.text import format_lazy from django.utils.translation import gettext_lazy as _ +# Import signals to ensure they are registered +from . import signals # noqa: F401, pylint: disable=unused-import + # The maximum length for the bio ("about me") account field BIO_MAX_LENGTH = 300 diff --git a/openedx/core/djangoapps/user_api/accounts/signals.py b/openedx/core/djangoapps/user_api/accounts/signals.py index be7bf75d99ee..7c4026cd4d85 100644 --- a/openedx/core/djangoapps/user_api/accounts/signals.py +++ b/openedx/core/djangoapps/user_api/accounts/signals.py @@ -2,8 +2,24 @@ Django Signal related functionality for user_api accounts """ +import logging -from django.dispatch import Signal +from django.db.models.signals import pre_delete +from django.dispatch import Signal, receiver +from social_django.models import UserSocialAuth + +from .utils import REDACTED_SOCIAL_AUTH_UID_PREFIX, REDACTED_SOCIAL_AUTH_UID_SUFFIX, redact_and_delete_social_auth + +logger = logging.getLogger(__name__) + + +def get_redacted_social_auth_uid(pk): + """ + Return the redacted uid for a UserSocialAuth record. + + This must match the format used in redact_and_delete_social_auth. + """ + return f'{REDACTED_SOCIAL_AUTH_UID_PREFIX}{pk}{REDACTED_SOCIAL_AUTH_UID_SUFFIX}' # Signal to retire a user from LMS-initiated mailings (course mailings, etc) # providing_args=["user"] @@ -16,3 +32,24 @@ # Signal to retire LMS misc information # providing_args=["user"] USER_RETIRE_LMS_MISC = Signal() + + +@receiver(pre_delete, sender=UserSocialAuth) +def redact_social_auth_pii_before_deletion(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Safety-net signal handler that redacts PII on any UserSocialAuth before deletion. + + Records deleted via ``redact_and_delete_social_auth`` will already be redacted; + this handler is a fallback for any other deletion path. + """ + redacted_uid = get_redacted_social_auth_uid(instance.pk) + + # Safety-net in case the record wasn't redacted before delete. + if instance.extra_data or instance.uid != redacted_uid: + logger.warning( + 'Social auth link for user_id=%s, provider=%s was deleted without first being redacted.' + ' Redacting in pre_delete.', + instance.user_id, + instance.provider, + ) + redact_and_delete_social_auth(instance.user_id, skip_delete=True) diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py b/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py index 77ac4ab1ba80..d20a86d2ba43 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py @@ -6,10 +6,14 @@ from completion.test_utils import CompletionWaffleTestMixin from django.test import TestCase from django.test.utils import override_settings +from social_django.models import UserSocialAuth from common.djangoapps.student.models import CourseEnrollment from common.djangoapps.student.tests.factories import UserFactory -from openedx.core.djangoapps.user_api.accounts.utils import retrieve_last_sitewide_block_completed +from openedx.core.djangoapps.user_api.accounts.signals import get_redacted_social_auth_uid +from openedx.core.djangoapps.user_api.accounts.utils import ( + retrieve_last_sitewide_block_completed, +) from openedx.core.djangolib.testing.utils import skip_unless_lms from xmodule.modulestore.tests.django_utils import ( SharedModuleStoreTestCase, # lint-amnesty, pylint: disable=wrong-import-order @@ -133,3 +137,142 @@ def test_retrieve_last_sitewide_block_completed(self): ) assert empty_block_url is None + + +@skip_unless_lms +class RedactUserSocialAuthPIITest(TestCase): + """ + Tests for SSO PII redaction before deletion. + """ + + def setUp(self): + super().setUp() + self.user = UserFactory.create(username='testuser', email='testuser@example.com') + + def create_social_auth(self, provider='google-oauth2', uid='user@example.com', extra_data=None): + """ + Helper method to create UserSocialAuth instances for testing. + """ + if extra_data is None: + extra_data = { + 'email': 'user@example.com', + 'name': 'Test User', + 'id': '123456789', + } + return UserSocialAuth.objects.create( + user=self.user, + provider=provider, + uid=uid, + extra_data=extra_data, + ) + + def test_get_redacted_social_auth_uid_format(self): + """ + Test that get_redacted_social_auth_uid returns the expected string format. + + This is the single source of truth for the redacted uid format. + If this test breaks, the bulk retirement Concat/Cast in utils.py and + retire_user.py must also be updated to match. + """ + assert get_redacted_social_auth_uid(42) == 'redacted-before-delete-42@safe.com' + assert get_redacted_social_auth_uid(1) == 'redacted-before-delete-1@safe.com' + + def test_delete_redacts_user_social_auth_pii(self): + """ + Test that deleting social auth redacts uid and extra_data before removal. + """ + social_auth = self.create_social_auth() + social_auth_id = social_auth.id + + captured_states = [] + + def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + instance.refresh_from_db() + captured_states.append({ + 'id': instance.id, + 'uid': instance.uid, + 'extra_data': dict(instance.extra_data) if instance.extra_data else {}, + }) + + from django.db.models.signals import pre_delete + + pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth) + try: + social_auth.delete() + finally: + pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth) + + assert captured_states == [{ + 'id': social_auth_id, + 'uid': get_redacted_social_auth_uid(social_auth_id), + 'extra_data': {}, + }] + assert not UserSocialAuth.objects.filter(id=social_auth_id).exists() + + def test_delete_already_redacted_user_social_auth_is_idempotent(self): + """ + Test that deleting an already redacted social auth keeps the redacted state. + """ + social_auth = self.create_social_auth() + social_auth.uid = get_redacted_social_auth_uid(social_auth.pk) + social_auth.extra_data = {} + social_auth.save(update_fields=['uid', 'extra_data']) + social_auth_id = social_auth.id + + captured_states = [] + + def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + instance.refresh_from_db() + captured_states.append((instance.uid, instance.extra_data)) + + from django.db.models.signals import pre_delete + + pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth) + try: + social_auth.delete() + finally: + pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth) + + assert captured_states == [ + (get_redacted_social_auth_uid(social_auth_id), {}), + ] + assert not UserSocialAuth.objects.filter(id=social_auth_id).exists() + + def test_delete_redacts_multiple_sso_providers(self): + """ + Test that deletion redacts multiple SSO providers before removal. + """ + auths = [ + self.create_social_auth( + provider='google-oauth2', + uid='google@example.com', + extra_data={'email': 'google@example.com', 'name': 'Google User'} + ), + self.create_social_auth( + provider='tpa-saml', + uid='saml@example.com', + extra_data={'email': 'saml@example.com', 'name': 'SAML User', 'uid': 'saml-uid'} + ), + ] + # Save IDs before deletion (they become None after delete) + auth_ids = [auth.pk for auth in auths] + + captured_states = [] + + def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + instance.refresh_from_db() + captured_states.append((instance.provider, instance.uid, instance.extra_data)) + + from django.db.models.signals import pre_delete + + pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth) + try: + for auth in auths: + auth.delete() + finally: + pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth) + + assert sorted(captured_states) == sorted([ + ('google-oauth2', get_redacted_social_auth_uid(auth_ids[0]), {}), + ('tpa-saml', get_redacted_social_auth_uid(auth_ids[1]), {}), + ]) diff --git a/openedx/core/djangoapps/user_api/accounts/utils.py b/openedx/core/djangoapps/user_api/accounts/utils.py index ab70b8f15c8d..12a92318fb2b 100644 --- a/openedx/core/djangoapps/user_api/accounts/utils.py +++ b/openedx/core/djangoapps/user_api/accounts/utils.py @@ -11,6 +11,8 @@ from completion.models import BlockCompletion from completion.waffle import ENABLE_COMPLETION_TRACKING_SWITCH from django.conf import settings +from django.db.models import CharField, Value +from django.db.models.functions import Cast, Concat from django.utils.translation import gettext as _ from edx_django_utils.user import generate_password from social_django.models import UserSocialAuth @@ -23,6 +25,10 @@ from ..models import UserRetirementStatus +# Prefix and suffix used to build a per-record redacted uid for UserSocialAuth. +REDACTED_SOCIAL_AUTH_UID_PREFIX = 'redacted-before-delete-' +REDACTED_SOCIAL_AUTH_UID_SUFFIX = '@safe.com' + ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH = 'enable_secondary_email_feature' LOGGER = logging.getLogger(__name__) @@ -196,6 +202,31 @@ def is_secondary_email_feature_enabled(): return waffle.switch_is_active(ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH) +def redact_and_delete_social_auth(user_id, skip_delete=False): + """ + Redact PII from all UserSocialAuth records for the given user, then delete them. + + Redaction happens before deletion so that any observers see only sanitised data. + Downstream copies of data may use soft-deletes, and redacting before deleting + ensures PII for retired users (or future retirements) is not retained. + The uid format matches ``get_redacted_social_auth_uid()``. + + ``skip_delete`` should only be set to True when called from the pre_delete signal + handler, where deletion is already in progress. + """ + social_auth_queryset = UserSocialAuth.objects.filter(user_id=user_id) + social_auth_queryset.update( + uid=Concat( + Value(REDACTED_SOCIAL_AUTH_UID_PREFIX), + Cast('id', output_field=CharField()), + Value(REDACTED_SOCIAL_AUTH_UID_SUFFIX), + ), + extra_data={}, + ) + if not skip_delete: + social_auth_queryset.delete() + + def create_retirement_request_and_deactivate_account(user): """ Adds user to retirement queue, unlinks social auth accounts, changes user passwords @@ -204,8 +235,8 @@ def create_retirement_request_and_deactivate_account(user): # Add user to retirement queue. UserRetirementStatus.create_retirement(user) - # Unlink LMS social auth accounts - UserSocialAuth.objects.filter(user_id=user.id).delete() + # Redact and unlink LMS social auth accounts. + redact_and_delete_social_auth(user.id) # Change LMS password & email user.email = get_retired_email_by_email(user.email) diff --git a/openedx/core/djangoapps/user_api/management/commands/retire_user.py b/openedx/core/djangoapps/user_api/management/commands/retire_user.py index 1b3b497ea5c7..4959bb31eb0f 100644 --- a/openedx/core/djangoapps/user_api/management/commands/retire_user.py +++ b/openedx/core/djangoapps/user_api/management/commands/retire_user.py @@ -5,11 +5,11 @@ from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user from django.core.management.base import BaseCommand, CommandError from django.db import transaction -from social_django.models import UserSocialAuth from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models +from ...accounts.utils import redact_and_delete_social_auth from ...models import BulkUserRetirementConfig, UserRetirementStatus logger = logging.getLogger(__name__) @@ -144,8 +144,8 @@ def handle(self, *args, **options): for user in users: # Add user to retirement queue. UserRetirementStatus.create_retirement(user) - # Unlink LMS social auth accounts - UserSocialAuth.objects.filter(user_id=user.id).delete() + # Redact and unlink LMS social auth accounts. + redact_and_delete_social_auth(user.id) # Change LMS password & email user.email = get_retired_email_by_email(user.email) user.set_unusable_password() diff --git a/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py b/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py index 6de2287fc4ba..b791b8a4cd54 100644 --- a/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py +++ b/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py @@ -9,6 +9,8 @@ import pytest from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user from django.core.management import CommandError, call_command +from django.db.models.signals import pre_delete +from social_django.models import UserSocialAuth from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order from openedx.core.djangoapps.user_api.accounts.tests.retirement_helpers import ( # lint-amnesty, pylint: disable=unused-import, wrong-import-order @@ -16,6 +18,7 @@ ) from openedx.core.djangolib.testing.utils import skip_unless_lms # lint-amnesty, pylint: disable=wrong-import-order +from ...accounts.signals import get_redacted_social_auth_uid from ...models import UserRetirementStatus pytestmark = pytest.mark.django_db @@ -107,3 +110,99 @@ def test_retire_with_username_email_userfile(setup_retirement_states): # lint-a with pytest.raises(CommandError, match=r'You cannot use userfile option with username and user_email'): call_command('retire_user', user_file=user_file, username=username, user_email=user_email) remove_user_file() + + +@skip_unless_lms +def test_retire_user_redacts_sso_pii_before_deletion(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811 + """ + Test that SSO PII is redacted before UserSocialAuth records are deleted during retirement. + + This test verifies the order of operations by capturing the record's state + at the moment of deletion to ensure it was already redacted. + """ + user = UserFactory.create(username='sso-user', email='sso-user@example.com') + social_auth = UserSocialAuth.objects.create( + user=user, + provider='google-oauth2', + uid='sso-user@example.com', + extra_data={ + 'email': 'sso-user@example.com', + 'name': 'SSO Test User', + 'id': '123456789', + } + ) + social_auth_id = social_auth.id + + captured_states = [] + + def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + """Capture the database state seen by the pre_delete signal.""" + instance.refresh_from_db() + captured_states.append({ + 'id': instance.id, + 'uid': instance.uid, + 'extra_data': dict(instance.extra_data) if instance.extra_data else {}, + }) + + pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth) + try: + call_command('retire_user', username=user.username, user_email=user.email) + finally: + pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth) + + # Verify that at the moment of deletion, the record was already redacted + assert captured_states == [{ + 'id': social_auth_id, + 'uid': get_redacted_social_auth_uid(social_auth_id), + 'extra_data': {}, + }], \ + "SSO records should be redacted before deletion" + + # Verify deletion completed + assert not UserSocialAuth.objects.filter(id=social_auth_id).exists() + + retired_user_status = UserRetirementStatus.objects.filter(original_username=user.username).first() + assert retired_user_status is not None + assert retired_user_status.original_email == 'sso-user@example.com' + + +@skip_unless_lms +def test_retire_user_redacts_each_social_auth_before_bulk_deletion(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811 + """ + Test that each UserSocialAuth record is redacted before bulk deletion during retirement. + """ + user = UserFactory.create(username='multi-sso-user', email='multi-sso@example.com') + google_auth = UserSocialAuth.objects.create( + user=user, + provider='google-oauth2', + uid='google-multi@example.com', + extra_data={'email': 'google-multi@example.com', 'name': 'Google User'} + ) + saml_auth = UserSocialAuth.objects.create( + user=user, + provider='tpa-saml', + uid='saml-multi@example.com', + extra_data={'email': 'saml-multi@example.com', 'name': 'SAML User', 'uid': 'saml-123'} + ) + # Save IDs before deletion (they become None after delete) + google_auth_id = google_auth.id + saml_auth_id = saml_auth.id + + captured_states = [] + + def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + """Capture the database state seen by the pre_delete signal.""" + instance.refresh_from_db() + extra = dict(instance.extra_data) if instance.extra_data else {} + captured_states.append((instance.provider, instance.uid, extra)) + + pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth) + try: + call_command('retire_user', username=user.username, user_email=user.email) + finally: + pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth) + + assert sorted(captured_states) == sorted([ + ('google-oauth2', get_redacted_social_auth_uid(google_auth_id), {}), + ('tpa-saml', get_redacted_social_auth_uid(saml_auth_id), {}), + ])