Skip to content

Commit 8332256

Browse files
Merge pull request #1145 from jianzhangbjz/OCPBUGS-63436-4.19
OCPBUGS-64925: Fix TOCTOU race condition in ensureInstallPlan (#3682)
2 parents e42f5c2 + 074e754 commit 8332256

File tree

59 files changed

+9790
-5243
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+9790
-5243
lines changed

Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,17 @@ vendor:
157157
go mod tidy
158158
go mod vendor
159159
go mod verify
160+
# Copy required CRD manifests for tests
161+
@OPENSHIFT_API_VERSION=$$(go list -m -f '{{.Version}}' github.com/openshift/api) && \
162+
GOMODCACHE=$$(go env GOMODCACHE) && \
163+
OPENSHIFT_API_PATH="$$GOMODCACHE/github.com/openshift/api@$$OPENSHIFT_API_VERSION" && \
164+
if [ -d "$$OPENSHIFT_API_PATH/config/v1/zz_generated.crd-manifests" ]; then \
165+
mkdir -p vendor/github.com/openshift/api/config/v1/zz_generated.crd-manifests && \
166+
cp -f "$$OPENSHIFT_API_PATH/config/v1/zz_generated.crd-manifests/0000_00_cluster-version-operator_01_clusteroperators.crd.yaml" \
167+
vendor/github.com/openshift/api/config/v1/zz_generated.crd-manifests/ 2>/dev/null || true && \
168+
cp -f "$$OPENSHIFT_API_PATH/config/v1/zz_generated.crd-manifests/0000_00_cluster-version-operator_01_clusterversions-Default.crd.yaml" \
169+
vendor/github.com/openshift/api/config/v1/zz_generated.crd-manifests/ 2>/dev/null || true; \
170+
fi
160171

161172
.PHONY: manifests
162173
manifests: ## Generate manifests

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ require (
1414
github.com/maxbrunsfeld/counterfeiter/v6 v6.11.2
1515
github.com/mikefarah/yq/v3 v3.0.0-20201202084205-8846255d1c37
1616
github.com/onsi/ginkgo/v2 v2.23.3
17-
github.com/openshift/api v3.9.0+incompatible
17+
github.com/openshift/api v0.0.0-20250425163235-9b80d67473bc
1818
github.com/operator-framework/api v0.30.0
1919
github.com/operator-framework/operator-lifecycle-manager v0.0.0-00010101000000-000000000000
2020
github.com/operator-framework/operator-registry v1.51.0
@@ -227,7 +227,7 @@ require (
227227

228228
replace (
229229
// controller runtime
230-
github.com/openshift/api => github.com/openshift/api v0.0.0-20210517065120-b325f58df679
230+
github.com/openshift/api => github.com/openshift/api v0.0.0-20250425163235-9b80d67473bc
231231
github.com/openshift/client-go => github.com/openshift/client-go v0.0.0-20200326155132-2a6cd50aedd0 // release-4.5
232232

233233
// use staged repositories

go.sum

Lines changed: 162 additions & 25 deletions
Large diffs are not rendered by default.

staging/operator-lifecycle-manager/go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ require (
2222
github.com/mitchellh/mapstructure v1.5.0
2323
github.com/onsi/ginkgo/v2 v2.23.3
2424
github.com/onsi/gomega v1.36.2
25-
github.com/openshift/api v3.9.0+incompatible
25+
github.com/openshift/api v0.0.0-20250425163235-9b80d67473bc
2626
github.com/openshift/client-go v0.0.0-20220525160904-9e1acff93e4a
2727
github.com/operator-framework/api v0.30.0
2828
github.com/operator-framework/operator-registry v1.51.0
@@ -191,7 +191,7 @@ replace google.golang.org/grpc => google.golang.org/grpc v1.63.2
191191

192192
replace (
193193
// controller runtime
194-
github.com/openshift/api => github.com/openshift/api v0.0.0-20221021112143-4226c2167e40 // release-4.12
194+
github.com/openshift/api => github.com/openshift/api v0.0.0-20250425163235-9b80d67473bc // release-4.19
195195
github.com/openshift/client-go => github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c // release-4.12
196196
)
197197

staging/operator-lifecycle-manager/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,8 +1823,8 @@ github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQ
18231823
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
18241824
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
18251825
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
1826-
github.com/openshift/api v0.0.0-20221021112143-4226c2167e40 h1:PxjGCA72RtsdHWToZLkjjeWm7WXXx4cuv0u4gtvLbrk=
1827-
github.com/openshift/api v0.0.0-20221021112143-4226c2167e40/go.mod h1:aQ6LDasvHMvHZXqLHnX2GRmnfTWCF/iIwz8EMTTIE9A=
1826+
github.com/openshift/api v0.0.0-20250425163235-9b80d67473bc h1:BGKjHtYzBweOSu1UwTnNqtPbJZ4VzOTqVFlUDpP+6U8=
1827+
github.com/openshift/api v0.0.0-20250425163235-9b80d67473bc/go.mod h1:yk60tHAmHhtVpJQo3TwVYq2zpuP70iJIFDCmeKMIzPw=
18281828
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c h1:CV76yFOTXmq9VciBR3Bve5ZWzSxdft7gaMVB3kS0rwg=
18291829
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c/go.mod h1:lFMO8mLHXWFzSdYvGNo8ivF9SfF6zInA8ZGw4phRnUE=
18301830
github.com/operator-framework/api v0.30.0 h1:44hCmGnEnZk/Miol5o44dhSldNH0EToQUG7vZTl29kk=

staging/operator-lifecycle-manager/pkg/controller/operators/catalog/operator.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,12 +1635,6 @@ func (o *Operator) ensureInstallPlan(logger *logrus.Entry, namespace string, gen
16351635
return nil, nil
16361636
}
16371637

1638-
// Check if any existing installplans are creating the same resources
1639-
installPlans, err := o.listInstallPlans(namespace)
1640-
if err != nil {
1641-
return nil, err
1642-
}
1643-
16441638
// There are multiple(2) worker threads process the namespaceQueue.
16451639
// Both worker can work at the same time when 2 separate updates are made for the namespace.
16461640
// The following sequence causes 2 installplans are created for a subscription
@@ -1661,6 +1655,13 @@ func (o *Operator) ensureInstallPlan(logger *logrus.Entry, namespace string, gen
16611655
o.muInstallPlan.Lock()
16621656
defer o.muInstallPlan.Unlock()
16631657

1658+
// Check if any existing installplans are creating the same resources
1659+
// This must be done inside the lock to prevent TOCTOU race condition
1660+
installPlans, err := o.listInstallPlans(namespace)
1661+
if err != nil {
1662+
return nil, err
1663+
}
1664+
16641665
for _, installPlan := range installPlans {
16651666
if installPlan.Spec.Generation == gen {
16661667
return reference.GetReference(installPlan)

staging/operator-lifecycle-manager/pkg/controller/operators/catalog/operator_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"reflect"
1111
"strings"
12+
"sync"
1213
"testing"
1314
"testing/quick"
1415
"time"
@@ -2462,3 +2463,107 @@ func hasExpectedCondition(ip *v1alpha1.InstallPlan, expectedCondition v1alpha1.I
24622463
}
24632464
return false
24642465
}
2466+
2467+
// TestEnsureInstallPlanConcurrency tests that concurrent calls to ensureInstallPlan
2468+
// do not create duplicate InstallPlans for the same subscription.
2469+
// This test verifies the fix for a TOCTOU race condition where multiple worker threads
2470+
// could create duplicate InstallPlans if they both check for existing plans before either
2471+
// has created one.
2472+
func TestEnsureInstallPlanConcurrency(t *testing.T) {
2473+
namespace := "test-ns"
2474+
gen := 1
2475+
numGoroutines := 10
2476+
2477+
ctx, cancel := context.WithCancel(context.TODO())
2478+
defer cancel()
2479+
2480+
// Create a fake operator
2481+
op, err := NewFakeOperator(ctx, namespace, []string{namespace})
2482+
require.NoError(t, err)
2483+
2484+
// Create a test subscription
2485+
sub := &v1alpha1.Subscription{
2486+
ObjectMeta: metav1.ObjectMeta{
2487+
Name: "test-sub",
2488+
Namespace: namespace,
2489+
UID: types.UID("test-uid"),
2490+
},
2491+
Spec: &v1alpha1.SubscriptionSpec{
2492+
Package: "test-package",
2493+
},
2494+
}
2495+
2496+
// Create test steps for the InstallPlan
2497+
steps := []*v1alpha1.Step{
2498+
{
2499+
Resolving: "test-csv",
2500+
Resource: v1alpha1.StepResource{
2501+
CatalogSource: "test-catalog",
2502+
CatalogSourceNamespace: namespace,
2503+
Group: "operators.coreos.com",
2504+
Version: "v1alpha1",
2505+
Kind: "ClusterServiceVersion",
2506+
Name: "test-csv",
2507+
Manifest: toManifest(t, csv("test-csv", namespace, nil, nil)),
2508+
},
2509+
Status: v1alpha1.StepStatusUnknown,
2510+
},
2511+
}
2512+
2513+
// Use WaitGroup to synchronize goroutines
2514+
var wg sync.WaitGroup
2515+
// Use a channel to collect results
2516+
results := make(chan *corev1.ObjectReference, numGoroutines)
2517+
// Use a sync.Once-like mechanism to start all goroutines at roughly the same time
2518+
startBarrier := make(chan struct{})
2519+
2520+
logger := logrus.NewEntry(logrus.New())
2521+
2522+
// Launch multiple goroutines that will call ensureInstallPlan concurrently
2523+
for i := 0; i < numGoroutines; i++ {
2524+
wg.Add(1)
2525+
go func() {
2526+
defer wg.Done()
2527+
// Wait for the start signal
2528+
<-startBarrier
2529+
2530+
// Call ensureInstallPlan
2531+
ref, err := op.ensureInstallPlan(logger, namespace, gen, []*v1alpha1.Subscription{sub}, v1alpha1.ApprovalAutomatic, steps, nil)
2532+
require.NoError(t, err)
2533+
results <- ref
2534+
}()
2535+
}
2536+
2537+
// Start all goroutines
2538+
close(startBarrier)
2539+
2540+
// Wait for all goroutines to complete
2541+
wg.Wait()
2542+
close(results)
2543+
2544+
// Collect all results
2545+
var refs []*corev1.ObjectReference
2546+
for ref := range results {
2547+
refs = append(refs, ref)
2548+
}
2549+
2550+
// Verify we got the expected number of results
2551+
require.Len(t, refs, numGoroutines, "should have received results from all goroutines")
2552+
2553+
// Verify all refs point to the same InstallPlan
2554+
firstRef := refs[0]
2555+
for i, ref := range refs {
2556+
require.Equal(t, firstRef.Name, ref.Name, "goroutine %d returned different InstallPlan name", i)
2557+
require.Equal(t, firstRef.Namespace, ref.Namespace, "goroutine %d returned different InstallPlan namespace", i)
2558+
require.Equal(t, firstRef.UID, ref.UID, "goroutine %d returned different InstallPlan UID", i)
2559+
}
2560+
2561+
// Verify only one InstallPlan was created in the cluster
2562+
ipList, err := op.client.OperatorsV1alpha1().InstallPlans(namespace).List(ctx, metav1.ListOptions{})
2563+
require.NoError(t, err)
2564+
require.Len(t, ipList.Items, 1, "exactly one InstallPlan should have been created")
2565+
2566+
// Verify the created InstallPlan has the correct generation
2567+
createdIP := &ipList.Items[0]
2568+
require.Equal(t, gen, createdIP.Spec.Generation, "InstallPlan should have the correct generation")
2569+
}

staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5853,6 +5853,12 @@ func RequireObjectsInCache(t *testing.T, lister operatorlister.OperatorLister, n
58535853
fetched, err = lister.RbacV1().RoleBindingLister().RoleBindings(namespace).Get(o.GetName())
58545854
case *v1alpha1.ClusterServiceVersion:
58555855
fetched, err = lister.OperatorsV1alpha1().ClusterServiceVersionLister().ClusterServiceVersions(namespace).Get(o.GetName())
5856+
if err != nil {
5857+
if apierrors.IsNotFound(err) {
5858+
return err
5859+
}
5860+
return errors.Join(err, fmt.Errorf("namespace: %v, error: %v", namespace, err))
5861+
}
58565862
// We don't care about finalizers
58575863
object.(*v1alpha1.ClusterServiceVersion).Finalizers = nil
58585864
fetched.(*v1alpha1.ClusterServiceVersion).Finalizers = nil

staging/operator-lifecycle-manager/pkg/controller/operators/openshift/suite_test.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package openshift
22

33
import (
44
"context"
5+
"os"
56
"path/filepath"
67
"testing"
78
"time"
@@ -46,15 +47,25 @@ const (
4647
var _ = BeforeSuite(func() {
4748
logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
4849

49-
base := filepath.Join("..", "..", "..", "..", "vendor", "github.com", "openshift", "api", "config", "v1")
50+
// Try to find CRDs in vendor first, fall back to GOMODCACHE if not found
51+
base := filepath.Join("..", "..", "..", "..", "vendor", "github.com", "openshift", "api", "config", "v1", "zz_generated.crd-manifests")
52+
if _, err := os.Stat(base); os.IsNotExist(err) {
53+
// Fall back to GOMODCACHE
54+
gomodcache := os.Getenv("GOMODCACHE")
55+
if gomodcache == "" {
56+
gomodcache = filepath.Join(os.Getenv("HOME"), "go", "pkg", "mod")
57+
}
58+
base = filepath.Join(gomodcache, "github.com", "openshift", "api@v0.0.0-20250425163235-9b80d67473bc", "config", "v1", "zz_generated.crd-manifests")
59+
}
60+
5061
testEnv = &envtest.Environment{
5162
ErrorIfCRDPathMissing: true,
5263
CRDs: []*apiextensionsv1.CustomResourceDefinition{
5364
crds.ClusterServiceVersion(),
5465
},
5566
CRDDirectoryPaths: []string{
56-
filepath.Join(base, "0000_00_cluster-version-operator_01_clusteroperator.crd.yaml"),
57-
filepath.Join(base, "0000_00_cluster-version-operator_01_clusterversion.crd.yaml"),
67+
filepath.Join(base, "0000_00_cluster-version-operator_01_clusteroperators.crd.yaml"),
68+
filepath.Join(base, "0000_00_cluster-version-operator_01_clusterversions-Default.crd.yaml"),
5869
},
5970
}
6071

0 commit comments

Comments
 (0)