From 905b3811c81352f2e96d7780a07dd56266255086 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Thu, 30 Apr 2026 18:32:19 +0530 Subject: [PATCH 1/8] fix: atomic last-owner guard prevents TOCTOU race on role demotion Add DeleteWithMinRoleGuard to the policy repository that atomically checks the owner count within the DELETE statement itself. This eliminates the race where two concurrent demotion requests both pass the application-level owner count check then both proceed. The SQL condition ensures the DELETE only executes if the policy being removed is either not the guarded role, or at least one other policy with that role exists for the same resource. If the condition fails (last owner), zero rows are affected and ErrLastRoleGuard is returned. The existing validateMinOwnerConstraint remains as a fast-path optimization to avoid unnecessary DB round-trips. --- core/membership/mocks/policy_service.go | 50 +++++++++++++++++ core/membership/service.go | 35 +++++++++++- core/membership/service_test.go | 24 ++++---- core/policy/errors.go | 1 + core/policy/mocks/repository.go | 50 +++++++++++++++++ core/policy/policy.go | 1 + core/policy/service.go | 12 ++++ internal/store/postgres/policy_repository.go | 58 ++++++++++++++++++++ 8 files changed, 218 insertions(+), 13 deletions(-) diff --git a/core/membership/mocks/policy_service.go b/core/membership/mocks/policy_service.go index ab81d7b31..834eee3dc 100644 --- a/core/membership/mocks/policy_service.go +++ b/core/membership/mocks/policy_service.go @@ -127,6 +127,56 @@ func (_c *PolicyService_Delete_Call) RunAndReturn(run func(context.Context, stri return _c } +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType +func (_m *PolicyService) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) + + if len(ret) == 0 { + panic("no return value specified for DeleteWithMinRoleGuard") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// PolicyService_DeleteWithMinRoleGuard_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteWithMinRoleGuard' +type PolicyService_DeleteWithMinRoleGuard_Call struct { + *mock.Call +} + +// DeleteWithMinRoleGuard is a helper method to define mock.On call +// - ctx context.Context +// - id string +// - guardRoleID string +// - resourceID string +// - resourceType string +func (_e *PolicyService_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *PolicyService_DeleteWithMinRoleGuard_Call { + return &PolicyService_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +} + +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *PolicyService_DeleteWithMinRoleGuard_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + }) + return _c +} + +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Return(_a0 error) *PolicyService_DeleteWithMinRoleGuard_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *PolicyService_DeleteWithMinRoleGuard_Call { + _c.Call.Return(run) + return _c +} + // List provides a mock function with given fields: ctx, flt func (_m *PolicyService) List(ctx context.Context, flt policy.Filter) ([]policy.Policy, error) { ret := _m.Called(ctx, flt) diff --git a/core/membership/service.go b/core/membership/service.go index c81b0ebc1..63d834f92 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -28,6 +28,7 @@ type PolicyService interface { Create(ctx context.Context, pol policy.Policy) (policy.Policy, error) List(ctx context.Context, flt policy.Filter) ([]policy.Policy, error) Delete(ctx context.Context, id string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error } type RelationService interface { @@ -221,7 +222,11 @@ func (s *Service) SetOrganizationMemberRole(ctx context.Context, orgID, principa return err } - if err := s.replacePolicy(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing); err != nil { + ownerRole, err := s.roleService.Get(ctx, schema.RoleOrganizationOwner) + if err != nil { + return fmt.Errorf("get owner role for guard: %w", err) + } + if err := s.replacePolicyWithOwnerGuard(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRole.ID); err != nil { return err } @@ -448,6 +453,34 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole return nil } +// replacePolicyWithOwnerGuard deletes existing policies using an atomic SQL guard +// that prevents removing the last owner, then creates the new policy. +func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { + for _, p := range existing { + err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID, resourceID, resourceType) + if err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + return fmt.Errorf("delete policy %s: %w", p.ID, err) + } + } + + _, err := s.createPolicy(ctx, resourceID, resourceType, principalID, principalType, roleID) + if err != nil { + s.log.ErrorContext(ctx, "membership state inconsistent: old policies deleted but new policy creation failed, needs manual fix", + "resource_id", resourceID, + "resource_type", resourceType, + "principal_id", principalID, + "principal_type", principalType, + "role_id", roleID, + "error", err, + ) + return err + } + return nil +} + // replacePolicy deletes the given existing policies and creates a new one with the new role. func (s *Service) replacePolicy(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy) error { for _, p := range existing { diff --git a/core/membership/service_test.go b/core/membership/service_test.go index 960102619..265eac487 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -412,10 +412,10 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}, {ID: "p2", RoleID: ownerRoleID}}, nil) - // replace policy - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + // replace policy with owner guard + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: viewerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -437,9 +437,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleSvc.EXPECT().Get(ctx, ownerRoleID).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: viewerRoleID}}, nil) // promoting to owner — min-owner constraint doesn't apply - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - // replace policy - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + // replace policy with owner guard + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: ownerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -460,9 +460,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // relation delete fails with a real error — logged, no rollback relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(errors.New("spicedb connection error")) @@ -477,8 +477,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: managerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // both relation deletes return not-found — that's fine, should continue relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(relation.ErrNotExist) @@ -544,9 +544,9 @@ func TestService_SetOrganizationMemberRole_ServiceUser(t *testing.T) { mockSuSvc.EXPECT().Get(ctx, suID).Return(serviceuser.ServiceUser{ID: suID, OrgID: orgID, Title: "test-su", State: string(serviceuser.Enabled)}, nil) mockRoleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: suID, PrincipalType: schema.ServiceUserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - mockPolicySvc.EXPECT().Delete(ctx, "p1").Return(nil) + mockPolicySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) mockPolicySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) mockRelSvc.EXPECT().Delete(ctx, mock.Anything).Return(relation.ErrNotExist).Times(2) mockRelSvc.EXPECT().Create(ctx, mock.Anything).Return(relation.Relation{}, nil) diff --git a/core/policy/errors.go b/core/policy/errors.go index e33649d08..236305b23 100644 --- a/core/policy/errors.go +++ b/core/policy/errors.go @@ -8,4 +8,5 @@ var ( ErrInvalidID = errors.New("policy id is invalid") ErrConflict = errors.New("policy already exist") ErrInvalidDetail = errors.New("invalid policy detail") + ErrLastRoleGuard = errors.New("cannot delete: this is the last policy with the guarded role for this resource") ) diff --git a/core/policy/mocks/repository.go b/core/policy/mocks/repository.go index fdd9172ba..eed4b5f65 100644 --- a/core/policy/mocks/repository.go +++ b/core/policy/mocks/repository.go @@ -126,6 +126,56 @@ func (_c *Repository_Delete_Call) RunAndReturn(run func(context.Context, string) return _c } +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType +func (_m *Repository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) + + if len(ret) == 0 { + panic("no return value specified for DeleteWithMinRoleGuard") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Repository_DeleteWithMinRoleGuard_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteWithMinRoleGuard' +type Repository_DeleteWithMinRoleGuard_Call struct { + *mock.Call +} + +// DeleteWithMinRoleGuard is a helper method to define mock.On call +// - ctx context.Context +// - id string +// - guardRoleID string +// - resourceID string +// - resourceType string +func (_e *Repository_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *Repository_DeleteWithMinRoleGuard_Call { + return &Repository_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +} + +func (_c *Repository_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *Repository_DeleteWithMinRoleGuard_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + }) + return _c +} + +func (_c *Repository_DeleteWithMinRoleGuard_Call) Return(_a0 error) *Repository_DeleteWithMinRoleGuard_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *Repository_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *Repository_DeleteWithMinRoleGuard_Call { + _c.Call.Return(run) + return _c +} + // Get provides a mock function with given fields: ctx, id func (_m *Repository) Get(ctx context.Context, id string) (policy.Policy, error) { ret := _m.Called(ctx, id) diff --git a/core/policy/policy.go b/core/policy/policy.go index ce2aaecf6..9d13ee764 100644 --- a/core/policy/policy.go +++ b/core/policy/policy.go @@ -13,6 +13,7 @@ type Repository interface { Count(ctx context.Context, f Filter) (int64, error) Upsert(ctx context.Context, pol Policy) (Policy, error) Delete(ctx context.Context, id string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error GroupMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) ProjectMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) OrgMemberCount(ctx context.Context, ID string) (MemberCount, error) diff --git a/core/policy/service.go b/core/policy/service.go index 93075d4c6..62ed62942 100644 --- a/core/policy/service.go +++ b/core/policy/service.go @@ -89,6 +89,18 @@ func (s Service) Delete(ctx context.Context, id string) error { return s.repository.Delete(ctx, id) } +func (s Service) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + if err := s.relationService.Delete(ctx, relation.Relation{ + Object: relation.Object{ + ID: id, + Namespace: schema.RoleBindingNamespace, + }, + }); err != nil { + return err + } + return s.repository.DeleteWithMinRoleGuard(ctx, id, guardRoleID, resourceID, resourceType) +} + // AssignRole Note: ideally this should be in a single transaction // read more about how user defined roles work in spicedb https://authzed.com/blog/user-defined-roles func (s Service) AssignRole(ctx context.Context, pol Policy) error { diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index cb834a479..26124390d 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -363,6 +363,64 @@ func (r PolicyRepository) Delete(ctx context.Context, id string) error { return nil } +// DeleteWithMinRoleGuard atomically deletes a policy only if at least one other +// policy with the same guarded role remains for the given resource. This prevents +// the TOCTOU race where concurrent requests both pass a count check then both delete. +func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + existingPolicy, err := r.Get(ctx, id) + if err != nil { + return err + } + + if err := r.dbc.WithTxn(ctx, sql.TxOptions{}, func(tx *sqlx.Tx) error { + return r.dbc.WithTimeout(ctx, TABLE_POLICIES, "DeleteWithMinRoleGuard", func(ctx context.Context) error { + query := `DELETE FROM policies WHERE id = $1 AND ( + role_id != $2 + OR (SELECT COUNT(*) FROM policies + WHERE resource_id = $3 + AND resource_type = $4 + AND role_id = $2 + AND id != $1 + ) > 0 + )` + result, err := tx.ExecContext(ctx, query, id, guardRoleID, resourceID, resourceType) + if err != nil { + return err + } + rowsAffected, err := result.RowsAffected() + if err != nil { + return err + } + if rowsAffected == 0 { + return policy.ErrLastRoleGuard + } + + policyDB := Policy{ + ID: existingPolicy.ID, + RoleID: existingPolicy.RoleID, + ResourceID: existingPolicy.ResourceID, + ResourceType: existingPolicy.ResourceType, + PrincipalID: existingPolicy.PrincipalID, + PrincipalType: existingPolicy.PrincipalType, + } + auditRecord := r.buildPolicyAuditRecord(ctx, tx, auditrecord.PolicyDeletedEvent, policyDB, time.Now(), nil) + return InsertAuditRecordInTx(ctx, tx, auditRecord) + }) + }); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return err + } + err = checkPostgresError(err) + switch { + case errors.Is(err, sql.ErrNoRows): + return policy.ErrNotExist + default: + return err + } + } + return nil +} + func (r PolicyRepository) GroupMemberCount(ctx context.Context, groupIDs []string) ([]policy.MemberCount, error) { if len(groupIDs) == 0 { return nil, policy.ErrInvalidID From 1b009c2cce6f9859b9662d153649afb8c530ba7f Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Mon, 4 May 2026 14:51:22 +0530 Subject: [PATCH 2/8] fix: address review feedback on TOCTOU last-owner guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Flip SpiceDB/DB ordering — DB delete first, then SpiceDB relation removal. Prevents inconsistent state if the guard rejects the delete. 2. Use SELECT FOR UPDATE in CTE to serialize concurrent deletions under READ COMMITTED isolation. Two concurrent DELETEs of different owner rows now block on the row lock instead of both proceeding. 3. Use TABLE_POLICIES constant instead of hardcoded "policies" string. 4. Derive resource_id/resource_type from existingPolicy inside the repository instead of trusting caller-supplied values. 5. Fetch owner role once — validateMinOwnerConstraint now returns the resolved ownerRoleID for reuse, eliminating the duplicate Get call. --- core/membership/mocks/policy_service.go | 22 ++++++------- core/membership/service.go | 30 +++++++++--------- core/membership/service_test.go | 20 ++++++------ core/policy/mocks/repository.go | 22 ++++++------- core/policy/policy.go | 2 +- core/policy/service.go | 12 +++---- internal/store/postgres/policy_repository.go | 33 +++++++++++++------- 7 files changed, 72 insertions(+), 69 deletions(-) diff --git a/core/membership/mocks/policy_service.go b/core/membership/mocks/policy_service.go index 834eee3dc..c2e1b4dac 100644 --- a/core/membership/mocks/policy_service.go +++ b/core/membership/mocks/policy_service.go @@ -127,17 +127,17 @@ func (_c *PolicyService_Delete_Call) RunAndReturn(run func(context.Context, stri return _c } -// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType -func (_m *PolicyService) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { - ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID +func (_m *PolicyService) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { + ret := _m.Called(ctx, id, guardRoleID) if len(ret) == 0 { panic("no return value specified for DeleteWithMinRoleGuard") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { - r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID) } else { r0 = ret.Error(0) } @@ -154,15 +154,13 @@ type PolicyService_DeleteWithMinRoleGuard_Call struct { // - ctx context.Context // - id string // - guardRoleID string -// - resourceID string -// - resourceType string -func (_e *PolicyService_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *PolicyService_DeleteWithMinRoleGuard_Call { - return &PolicyService_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +func (_e *PolicyService_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}) *PolicyService_DeleteWithMinRoleGuard_Call { + return &PolicyService_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID)} } -func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *PolicyService_DeleteWithMinRoleGuard_Call { +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string)) *PolicyService_DeleteWithMinRoleGuard_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + run(args[0].(context.Context), args[1].(string), args[2].(string)) }) return _c } @@ -172,7 +170,7 @@ func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Return(_a0 error) *PolicySe return _c } -func (_c *PolicyService_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *PolicyService_DeleteWithMinRoleGuard_Call { +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string) error) *PolicyService_DeleteWithMinRoleGuard_Call { _c.Call.Return(run) return _c } diff --git a/core/membership/service.go b/core/membership/service.go index 63d834f92..a0cfbe4f6 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -28,7 +28,7 @@ type PolicyService interface { Create(ctx context.Context, pol policy.Policy) (policy.Policy, error) List(ctx context.Context, flt policy.Filter) ([]policy.Policy, error) Delete(ctx context.Context, id string) error - DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error } type RelationService interface { @@ -218,15 +218,12 @@ func (s *Service) SetOrganizationMemberRole(ctx context.Context, orgID, principa return nil } - if err := s.validateMinOwnerConstraint(ctx, orgID, resolvedRoleID, existing); err != nil { + ownerRoleID, err := s.validateMinOwnerConstraint(ctx, orgID, resolvedRoleID, existing) + if err != nil { return err } - ownerRole, err := s.roleService.Get(ctx, schema.RoleOrganizationOwner) - if err != nil { - return fmt.Errorf("get owner role for guard: %w", err) - } - if err := s.replacePolicyWithOwnerGuard(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRole.ID); err != nil { + if err := s.replacePolicyWithOwnerGuard(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRoleID); err != nil { return err } @@ -276,7 +273,7 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return ErrNotMember } - if err = s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies); err != nil { + if _, err = s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies); err != nil { return err } @@ -416,15 +413,16 @@ func (s *Service) removeRelations(ctx context.Context, resourceID, resourceType, } // validateMinOwnerConstraint ensures the org always has at least one owner after a role change. -func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRoleID string, existing []policy.Policy) error { +// Returns the resolved owner role ID for reuse by callers. +func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRoleID string, existing []policy.Policy) (string, error) { ownerRole, err := s.roleService.Get(ctx, schema.RoleOrganizationOwner) if err != nil { - return fmt.Errorf("get owner role: %w", err) + return "", fmt.Errorf("get owner role: %w", err) } // no constraint if promoting to owner if newRoleID == ownerRole.ID { - return nil + return ownerRole.ID, nil } // no constraint if user is not currently an owner @@ -436,7 +434,7 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole } } if !isCurrentlyOwner { - return nil + return ownerRole.ID, nil } // user is owner, being demoted — make sure at least one other owner remains @@ -445,19 +443,19 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole RoleID: ownerRole.ID, }) if err != nil { - return fmt.Errorf("list owner policies: %w", err) + return "", fmt.Errorf("list owner policies: %w", err) } if len(ownerPolicies) <= 1 { - return ErrLastOwnerRole + return "", ErrLastOwnerRole } - return nil + return ownerRole.ID, nil } // replacePolicyWithOwnerGuard deletes existing policies using an atomic SQL guard // that prevents removing the last owner, then creates the new policy. func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { for _, p := range existing { - err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID, resourceID, resourceType) + err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID) if err != nil { if errors.Is(err, policy.ErrLastRoleGuard) { return ErrLastOwnerRole diff --git a/core/membership/service_test.go b/core/membership/service_test.go index 265eac487..beafe2c50 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -412,10 +412,10 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}, {ID: "p2", RoleID: ownerRoleID}}, nil) // replace policy with owner guard - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: viewerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -437,9 +437,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleSvc.EXPECT().Get(ctx, ownerRoleID).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: viewerRoleID}}, nil) // promoting to owner — min-owner constraint doesn't apply - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) // replace policy with owner guard - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: ownerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -460,9 +460,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // relation delete fails with a real error — logged, no rollback relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(errors.New("spicedb connection error")) @@ -477,8 +477,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: managerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // both relation deletes return not-found — that's fine, should continue relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(relation.ErrNotExist) @@ -544,9 +544,9 @@ func TestService_SetOrganizationMemberRole_ServiceUser(t *testing.T) { mockSuSvc.EXPECT().Get(ctx, suID).Return(serviceuser.ServiceUser{ID: suID, OrgID: orgID, Title: "test-su", State: string(serviceuser.Enabled)}, nil) mockRoleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: suID, PrincipalType: schema.ServiceUserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - mockPolicySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + mockPolicySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) mockPolicySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) mockRelSvc.EXPECT().Delete(ctx, mock.Anything).Return(relation.ErrNotExist).Times(2) mockRelSvc.EXPECT().Create(ctx, mock.Anything).Return(relation.Relation{}, nil) diff --git a/core/policy/mocks/repository.go b/core/policy/mocks/repository.go index eed4b5f65..7486bbebe 100644 --- a/core/policy/mocks/repository.go +++ b/core/policy/mocks/repository.go @@ -126,17 +126,17 @@ func (_c *Repository_Delete_Call) RunAndReturn(run func(context.Context, string) return _c } -// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType -func (_m *Repository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { - ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID +func (_m *Repository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { + ret := _m.Called(ctx, id, guardRoleID) if len(ret) == 0 { panic("no return value specified for DeleteWithMinRoleGuard") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { - r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID) } else { r0 = ret.Error(0) } @@ -153,15 +153,13 @@ type Repository_DeleteWithMinRoleGuard_Call struct { // - ctx context.Context // - id string // - guardRoleID string -// - resourceID string -// - resourceType string -func (_e *Repository_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *Repository_DeleteWithMinRoleGuard_Call { - return &Repository_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +func (_e *Repository_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}) *Repository_DeleteWithMinRoleGuard_Call { + return &Repository_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID)} } -func (_c *Repository_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *Repository_DeleteWithMinRoleGuard_Call { +func (_c *Repository_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string)) *Repository_DeleteWithMinRoleGuard_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + run(args[0].(context.Context), args[1].(string), args[2].(string)) }) return _c } @@ -171,7 +169,7 @@ func (_c *Repository_DeleteWithMinRoleGuard_Call) Return(_a0 error) *Repository_ return _c } -func (_c *Repository_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *Repository_DeleteWithMinRoleGuard_Call { +func (_c *Repository_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string) error) *Repository_DeleteWithMinRoleGuard_Call { _c.Call.Return(run) return _c } diff --git a/core/policy/policy.go b/core/policy/policy.go index 9d13ee764..7287d10b6 100644 --- a/core/policy/policy.go +++ b/core/policy/policy.go @@ -13,7 +13,7 @@ type Repository interface { Count(ctx context.Context, f Filter) (int64, error) Upsert(ctx context.Context, pol Policy) (Policy, error) Delete(ctx context.Context, id string) error - DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error GroupMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) ProjectMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) OrgMemberCount(ctx context.Context, ID string) (MemberCount, error) diff --git a/core/policy/service.go b/core/policy/service.go index 62ed62942..e1a432d31 100644 --- a/core/policy/service.go +++ b/core/policy/service.go @@ -89,16 +89,16 @@ func (s Service) Delete(ctx context.Context, id string) error { return s.repository.Delete(ctx, id) } -func (s Service) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { - if err := s.relationService.Delete(ctx, relation.Relation{ +func (s Service) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { + if err := s.repository.DeleteWithMinRoleGuard(ctx, id, guardRoleID); err != nil { + return err + } + return s.relationService.Delete(ctx, relation.Relation{ Object: relation.Object{ ID: id, Namespace: schema.RoleBindingNamespace, }, - }); err != nil { - return err - } - return s.repository.DeleteWithMinRoleGuard(ctx, id, guardRoleID, resourceID, resourceType) + }) } // AssignRole Note: ideally this should be in a single transaction diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 26124390d..8596fba4c 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -364,9 +364,11 @@ func (r PolicyRepository) Delete(ctx context.Context, id string) error { } // DeleteWithMinRoleGuard atomically deletes a policy only if at least one other -// policy with the same guarded role remains for the given resource. This prevents -// the TOCTOU race where concurrent requests both pass a count check then both delete. -func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { +// policy with the same guarded role remains for the resource. Uses SELECT FOR UPDATE +// to serialize concurrent deletions under READ COMMITTED isolation, preventing the +// TOCTOU race where two concurrent requests both pass a count check then both delete. +// Resource ID and type are derived from the existing policy, not from caller input. +func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { existingPolicy, err := r.Get(ctx, id) if err != nil { return err @@ -374,16 +376,23 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, if err := r.dbc.WithTxn(ctx, sql.TxOptions{}, func(tx *sqlx.Tx) error { return r.dbc.WithTimeout(ctx, TABLE_POLICIES, "DeleteWithMinRoleGuard", func(ctx context.Context) error { - query := `DELETE FROM policies WHERE id = $1 AND ( - role_id != $2 - OR (SELECT COUNT(*) FROM policies - WHERE resource_id = $3 - AND resource_type = $4 - AND role_id = $2 - AND id != $1 - ) > 0 + query := `WITH locked AS ( + SELECT id FROM ` + TABLE_POLICIES + ` + WHERE resource_id = $2 + AND resource_type = $3 + AND role_id = $4 + FOR UPDATE + ) + DELETE FROM ` + TABLE_POLICIES + ` WHERE id = $1 AND ( + (SELECT role_id FROM ` + TABLE_POLICIES + ` WHERE id = $1) != $4 + OR (SELECT COUNT(*) FROM locked WHERE id != $1) > 0 )` - result, err := tx.ExecContext(ctx, query, id, guardRoleID, resourceID, resourceType) + result, err := tx.ExecContext(ctx, query, + id, + existingPolicy.ResourceID, + existingPolicy.ResourceType, + guardRoleID, + ) if err != nil { return err } From 0c8d5a2e81e3e021ee74a43f705058c3dda2fe36 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Mon, 4 May 2026 21:15:43 +0530 Subject: [PATCH 3/8] fix: address round 2 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Only use guarded delete for owner policies, plain Delete for non-owner (avoids unnecessary locking/serialization) - Apply guarded delete in RemoveOrganizationMember flow too (same TOCTOU) - Add unit test for ErrLastRoleGuard → ErrLastOwnerRole mapping - Fix test expectations for non-owner policy deletions --- core/membership/service.go | 47 ++++++++++++++++++++++++--------- core/membership/service_test.go | 23 +++++++++++++--- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/core/membership/service.go b/core/membership/service.go index a0cfbe4f6..d1c25d0cc 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -273,7 +273,8 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return ErrNotMember } - if _, err = s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies); err != nil { + ownerRoleID, err := s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies) + if err != nil { return err } @@ -307,13 +308,13 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal // delete sub-resource policies first (projects, groups), then relations, // then org policies last — so a retry after partial failure won't hit ErrNotMember - var orgPolicyIDs []string + var orgPoliciesToDelete []policy.Policy var errs error for _, pol := range allPolicies { switch pol.ResourceType { case schema.OrganizationNamespace: if pol.ResourceID == orgID { - orgPolicyIDs = append(orgPolicyIDs, pol.ID) + orgPoliciesToDelete = append(orgPoliciesToDelete, pol) } case schema.ProjectNamespace: if _, ok := orgProjectIDSet[pol.ResourceID]; ok { @@ -376,17 +377,31 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - // delete org-level policies last - for _, policyID := range orgPolicyIDs { - if err := s.policyService.Delete(ctx, policyID); err != nil { + // delete org-level policies last, using guarded delete for owner policies + for _, pol := range orgPoliciesToDelete { + if pol.RoleID == ownerRoleID { + if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", + "org_id", orgID, + "policy_id", pol.ID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", "org_id", orgID, - "policy_id", policyID, + "policy_id", pol.ID, "principal_id", principalID, "principal_type", principalType, "error", err, ) - return fmt.Errorf("delete org policy %s: %w", policyID, err) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) } } @@ -455,12 +470,18 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole // that prevents removing the last owner, then creates the new policy. func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { for _, p := range existing { - err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID) - if err != nil { - if errors.Is(err, policy.ErrLastRoleGuard) { - return ErrLastOwnerRole + if p.RoleID == ownerRoleID { + err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID) + if err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + return fmt.Errorf("delete policy %s: %w", p.ID, err) + } + } else { + if err := s.policyService.Delete(ctx, p.ID); err != nil { + return fmt.Errorf("delete policy %s: %w", p.ID, err) } - return fmt.Errorf("delete policy %s: %w", p.ID, err) } } diff --git a/core/membership/service_test.go b/core/membership/service_test.go index beafe2c50..f3950b5ce 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -405,6 +405,22 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleID: managerRoleID, wantErr: membership.ErrLastOwnerRole, }, + { + name: "should return ErrLastOwnerRole when DB guard rejects concurrent demotion", + setup: func(policySvc *mocks.PolicyService, _ *mocks.RelationService, roleSvc *mocks.RoleService, orgSvc *mocks.OrgService, userSvc *mocks.UserService, _ *mocks.AuditRecordRepository) { + orgSvc.EXPECT().Get(ctx, orgID).Return(enabledOrg, nil) + userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) + roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) + policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + // app-level check passes (sees 2 owners) + policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) + // DB-level guard rejects (concurrent request already deleted the other owner) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(policy.ErrLastRoleGuard) + }, + roleID: viewerRoleID, + wantErr: membership.ErrLastOwnerRole, + }, { name: "should succeed demoting owner to viewer with multiple owners", setup: func(policySvc *mocks.PolicyService, relSvc *mocks.RelationService, roleSvc *mocks.RoleService, orgSvc *mocks.OrgService, userSvc *mocks.UserService, auditRepo *mocks.AuditRecordRepository) { @@ -438,8 +454,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: viewerRoleID}}, nil) // promoting to owner — min-owner constraint doesn't apply roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - // replace policy with owner guard - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) + // existing policy is viewer (non-owner), uses plain Delete + policySvc.EXPECT().Delete(ctx, "p1").Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: ownerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -478,7 +494,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: managerRoleID}}, nil) roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) + // existing policy is manager (non-owner), uses plain Delete + policySvc.EXPECT().Delete(ctx, "p1").Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // both relation deletes return not-found — that's fine, should continue relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(relation.ErrNotExist) From 4fdd3fb0474e2da33b97c2cae2d21cd753b6441e Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Tue, 5 May 2026 12:57:11 +0530 Subject: [PATCH 4/8] fix: move guarded org policy delete before relation cleanup in RemoveOrganizationMember If the guard rejects (last owner), we now return early before touching any SpiceDB relations, preventing partial state modification on rejection. --- core/membership/service.go | 61 ++++++++++++++++----------------- core/membership/service_test.go | 8 +++-- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/core/membership/service.go b/core/membership/service.go index d1c25d0cc..680f9496a 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -340,7 +340,36 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return errs } - // remove relations at group level + // delete org-level owner policies first via guarded delete — if the guard rejects + // (last owner), we return early before touching any relations or other policies + for _, pol := range orgPoliciesToDelete { + if pol.RoleID == ownerRoleID { + if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", + "org_id", orgID, + "policy_id", pol.ID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { + s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", + "org_id", orgID, + "policy_id", pol.ID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } + + // guarded deletes passed — safe to clean up relations for _, g := range orgGroups { if err := s.removeRelations(ctx, g.ID, schema.GroupNamespace, principalID, principalType); err != nil { s.log.Error("partial failure removing member: group relation cleanup failed, manual cleanup may be needed", @@ -354,7 +383,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - // remove relations at org level if err := s.removeRelations(ctx, orgID, schema.OrganizationNamespace, principalID, principalType); err != nil { s.log.Error("partial failure removing member: org relation cleanup failed, manual cleanup may be needed", "org_id", orgID, @@ -365,7 +393,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return fmt.Errorf("remove org relations: %w", err) } - // remove identity link for service users (serviceuser#org@organization) if principalType == schema.ServiceUserPrincipal { err := s.relationService.Delete(ctx, relation.Relation{ Object: relation.Object{ID: principalID, Namespace: schema.ServiceUserPrincipal}, @@ -377,34 +404,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - // delete org-level policies last, using guarded delete for owner policies - for _, pol := range orgPoliciesToDelete { - if pol.RoleID == ownerRoleID { - if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { - if errors.Is(err, policy.ErrLastRoleGuard) { - return ErrLastOwnerRole - } - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) - } - } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) - } - } - s.auditOrgMemberRemoved(ctx, org, principalID, targetAuditType) audit.GetAuditor(ctx, org.ID).Log(audit.OrgMemberDeletedEvent, audit.Target{ ID: principalID, diff --git a/core/membership/service_test.go b/core/membership/service_test.go index f3950b5ce..5f1aad008 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -772,7 +772,7 @@ func TestService_RemoveOrganizationMember(t *testing.T) { wantErrContain: "delete project policy", }, { - name: "should return error if org relation removal fails without deleting org policies", + name: "should return error if org relation removal fails after org policies deleted", setup: func(d testDeps) { d.orgSvc.EXPECT().Get(ctx, orgID).Return(enabledOrg, nil) d.policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "org-p1", RoleID: viewerRoleID}}, nil) @@ -780,9 +780,11 @@ func TestService_RemoveOrganizationMember(t *testing.T) { d.projSvc.EXPECT().List(ctx, project.Filter{OrgID: orgID}).Return([]project.Project{}, nil) d.grpSvc.EXPECT().List(ctx, group.Filter{OrganizationID: orgID}).Return([]group.Group{}, nil) d.policySvc.EXPECT().List(ctx, policy.Filter{PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{ - {ID: "org-p1", ResourceType: schema.OrganizationNamespace, ResourceID: orgID}, + {ID: "org-p1", ResourceType: schema.OrganizationNamespace, ResourceID: orgID, RoleID: viewerRoleID}, }, nil) - // org policy Delete should NOT be called — relations fail first, org policies are last + // org policy deleted first (viewer, plain Delete) + d.policySvc.EXPECT().Delete(ctx, "org-p1").Return(nil) + // then relation removal fails d.relSvc.EXPECT().Delete(ctx, relation.Relation{Object: orgObj, Subject: userSub, RelationName: schema.OwnerRelationName}).Return(errors.New("spicedb down")) }, wantErrContain: "remove org relations", From 20f5d24c11da976e8fc45153f099894ce063b495 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Tue, 5 May 2026 15:23:05 +0530 Subject: [PATCH 5/8] fix: run guarded owner delete before any sub-resource policy deletions Restructured RemoveOrganizationMember into 3 phases: 1. Classify policies by type (no deletions) 2. Guarded org owner delete (rejects early if last owner) 3. Remaining org policies, sub-resource policies, then relations This ensures guard rejection leaves no partial state. --- core/membership/service.go | 66 +++++++++++++++------------------ core/membership/service_test.go | 2 +- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/core/membership/service.go b/core/membership/service.go index 680f9496a..b4fc49f93 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -306,10 +306,9 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return fmt.Errorf("list all principal policies: %w", err) } - // delete sub-resource policies first (projects, groups), then relations, - // then org policies last — so a retry after partial failure won't hit ErrNotMember + // Phase 1: classify policies by type (no deletions yet) var orgPoliciesToDelete []policy.Policy - var errs error + var subResourcePolicies []policy.Policy for _, pol := range allPolicies { switch pol.ResourceType { case schema.OrganizationNamespace: @@ -318,57 +317,52 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } case schema.ProjectNamespace: if _, ok := orgProjectIDSet[pol.ResourceID]; ok { - if err := s.policyService.Delete(ctx, pol.ID); err != nil { - errs = errors.Join(errs, fmt.Errorf("delete project policy %s: %w", pol.ID, err)) - } + subResourcePolicies = append(subResourcePolicies, pol) } case schema.GroupNamespace: if _, ok := orgGroupIDSet[pol.ResourceID]; ok { - if err := s.policyService.Delete(ctx, pol.ID); err != nil { - errs = errors.Join(errs, fmt.Errorf("delete group policy %s: %w", pol.ID, err)) - } + subResourcePolicies = append(subResourcePolicies, pol) } } } - if errs != nil { - s.log.Error("partial failure removing member: some policies could not be deleted, manual cleanup may be needed", - "org_id", orgID, - "principal_id", principalID, - "principal_type", principalType, - "error", errs, - ) - return errs - } - // delete org-level owner policies first via guarded delete — if the guard rejects - // (last owner), we return early before touching any relations or other policies + // Phase 2: guarded org owner policy delete runs first — if the guard rejects + // (last owner), we return early before touching any other policies or relations for _, pol := range orgPoliciesToDelete { if pol.RoleID == ownerRoleID { if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { if errors.Is(err, policy.ErrLastRoleGuard) { return ErrLastOwnerRole } - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) return fmt.Errorf("delete org policy %s: %w", pol.ID, err) } - } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) } } + // Phase 3: guard passed — safe to delete remaining org policies, sub-resource policies + for _, pol := range orgPoliciesToDelete { + if pol.RoleID != ownerRoleID { + if err := s.policyService.Delete(ctx, pol.ID); err != nil { + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } + } + var errs error + for _, pol := range subResourcePolicies { + if err := s.policyService.Delete(ctx, pol.ID); err != nil { + errs = errors.Join(errs, fmt.Errorf("delete sub-resource policy %s: %w", pol.ID, err)) + } + } + if errs != nil { + s.log.Error("partial failure removing member: some policies could not be deleted, manual cleanup may be needed", + "org_id", orgID, + "principal_id", principalID, + "principal_type", principalType, + "error", errs, + ) + return errs + } + // guarded deletes passed — safe to clean up relations for _, g := range orgGroups { if err := s.removeRelations(ctx, g.ID, schema.GroupNamespace, principalID, principalType); err != nil { diff --git a/core/membership/service_test.go b/core/membership/service_test.go index 5f1aad008..75d58dc44 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -769,7 +769,7 @@ func TestService_RemoveOrganizationMember(t *testing.T) { }, nil) d.policySvc.EXPECT().Delete(ctx, "proj-p1").Return(errors.New("delete failed")) }, - wantErrContain: "delete project policy", + wantErrContain: "delete sub-resource policy", }, { name: "should return error if org relation removal fails after org policies deleted", From 139c56f2d474e3ed20316ba9fb6e2ec3fd0bbf51 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 6 May 2026 10:42:59 +0530 Subject: [PATCH 6/8] fix: differentiate guard rejection from concurrently deleted policy When rowsAffected==0, re-check existence inside the transaction. If the row is gone (concurrent delete), return sql.ErrNoRows which maps to ErrNotExist. Only return ErrLastRoleGuard when the row still exists but the guard condition prevented deletion. --- internal/store/postgres/policy_repository.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 8596fba4c..2f61f514e 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -401,6 +401,12 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, return err } if rowsAffected == 0 { + var existingID string + if err := tx.QueryRowContext(ctx, + `SELECT id FROM `+TABLE_POLICIES+` WHERE id = $1`, id, + ).Scan(&existingID); errors.Is(err, sql.ErrNoRows) { + return sql.ErrNoRows + } return policy.ErrLastRoleGuard } From cd2b510fa91eb0224c9506f512f59d96122e1135 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 6 May 2026 10:57:30 +0530 Subject: [PATCH 7/8] fix: propagate non-ErrNoRows errors from existence re-check --- internal/store/postgres/policy_repository.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 2f61f514e..144786034 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -402,11 +402,15 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, } if rowsAffected == 0 { var existingID string - if err := tx.QueryRowContext(ctx, + err := tx.QueryRowContext(ctx, `SELECT id FROM `+TABLE_POLICIES+` WHERE id = $1`, id, - ).Scan(&existingID); errors.Is(err, sql.ErrNoRows) { + ).Scan(&existingID) + if errors.Is(err, sql.ErrNoRows) { return sql.ErrNoRows } + if err != nil { + return err + } return policy.ErrLastRoleGuard } From 7ee4792cceb877d2a1c5f1f04566fff8309c0141 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 6 May 2026 13:11:21 +0530 Subject: [PATCH 8/8] fix: add ORDER BY to FOR UPDATE CTE to prevent deadlocks --- internal/store/postgres/policy_repository.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 144786034..aba751860 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -381,6 +381,7 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, WHERE resource_id = $2 AND resource_type = $3 AND role_id = $4 + ORDER BY id FOR UPDATE ) DELETE FROM ` + TABLE_POLICIES + ` WHERE id = $1 AND (