Skip to content

Commit 385c310

Browse files
authored
fix: add some delays when creating resourceSnapshot (#97)
Signed-off-by: Zhiying Lin <zhiyingl456@gmail.com>
1 parent 23615ea commit 385c310

File tree

14 files changed

+596
-71
lines changed

14 files changed

+596
-71
lines changed

.github/workflows/ci.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ jobs:
8686
HUB_SERVER_URL: 'https://172.19.0.2:6443'
8787

8888
e2e-tests:
89+
strategy:
90+
fail-fast: false
91+
matrix:
92+
customized-settings: [default, custom]
93+
include:
94+
- customized-settings: default
95+
# to shorten the test duration, set the resource snapshot creation interval to 0
96+
resource-snapshot-creation-interval: 0m
97+
- customized-settings: custom
98+
resource-snapshot-creation-interval: 1m
8999
runs-on: ubuntu-latest
90100
needs: [
91101
detect-noop,
@@ -119,7 +129,11 @@ jobs:
119129
120130
- name: Run e2e tests
121131
run: |
122-
make e2e-tests
132+
if [ "${{ matrix.customized-settings }}" = "default" ]; then
133+
make e2e-tests
134+
else
135+
make e2e-tests-custom
136+
fi
123137
env:
124138
KUBECONFIG: '/home/runner/.kube/config'
125139
HUB_SERVER_URL: 'https://172.19.0.2:6443'
@@ -129,4 +143,5 @@ jobs:
129143
# TO-DO (chenyu1): to ensure a vendor-neutral experience, switch to a dummy
130144
# property provider once the AKS one is split out.
131145
PROPERTY_PROVIDER: 'azure'
146+
RESOURCE_SNAPSHOT_CREATION_INTERVAL: ${{ matrix.resource-snapshot-creation-interval }}
132147

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,10 @@ e2e-tests-v1alpha1: create-kind-cluster run-e2e-v1alpha1
213213

214214
.PHONY: e2e-tests
215215
e2e-tests: setup-clusters
216-
cd ./test/e2e && ginkgo -v -p .
216+
cd ./test/e2e && ginkgo --label-filter="!custom" -v -p .
217+
218+
e2e-tests-custom: setup-clusters
219+
cd ./test/e2e && ginkgo --label-filter="custom" -v -p .
217220

218221
.PHONY: setup-clusters
219222
setup-clusters:

charts/hub-agent/README.md

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,25 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen
1919

2020
## Parameters
2121

22-
| Parameter | Description | Default |
23-
|:------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------|
24-
| replicaCount | The number of hub-agent replicas to deploy | `1` |
25-
| image.repository | Image repository | `ghcr.io/azure/azure/fleet/hub-agent` |
26-
| image.pullPolicy | Image pullPolicy | `Always` |
27-
| image.tag | The image release tag to use | `v0.1.0` |
28-
| namespace | Namespace that this Helm chart is installed on | `fleet-system` |
29-
| serviceAccount.create | Whether to create service account | `true` |
30-
| serviceAccount.name | Service account name | `hub-agent-sa` |
31-
| resources | The resource request/limits for the container image | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi |
32-
| affinity | The node affinity to use for hubagent pod | `{}` |
33-
| tolerations | The tolerations to use for hubagent pod | `[]` |
34-
| logVerbosity | Log level. Uses V logs (klog) | `5` |
35-
| enableV1Alpha1APIs | If set, the agents will watch for the v1alpha1 APIs. | `false` |
36-
| enableV1Beta1APIs | If set, the agents will watch for the v1beta1 APIs. | `true` |
37-
| hubAPIQPS | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `250` |
38-
| hubAPIBurst | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000` |
39-
| MaxConcurrentClusterPlacement | The max number of clusterResourcePlacement to run concurrently this fleet supports. | `100` |
40-
| ConcurrentResourceChangeSyncs | The number of resourceChange reconcilers that are allowed to run concurrently. | `20` |
41-
| logFileMaxSize | Max size of log file before rotation | `1000000` |
42-
| MaxFleetSizeSupported | The max number of member clusters this fleet supports. | `100` |
22+
| Parameter | Description | Default |
23+
|:-----------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------|
24+
| replicaCount | The number of hub-agent replicas to deploy | `1` |
25+
| image.repository | Image repository | `ghcr.io/azure/azure/fleet/hub-agent` |
26+
| image.pullPolicy | Image pullPolicy | `Always` |
27+
| image.tag | The image release tag to use | `v0.1.0` |
28+
| namespace | Namespace that this Helm chart is installed on | `fleet-system` |
29+
| serviceAccount.create | Whether to create service account | `true` |
30+
| serviceAccount.name | Service account name | `hub-agent-sa` |
31+
| resources | The resource request/limits for the container image | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi |
32+
| affinity | The node affinity to use for hubagent pod | `{}` |
33+
| tolerations | The tolerations to use for hubagent pod | `[]` |
34+
| logVerbosity | Log level. Uses V logs (klog) | `5` |
35+
| enableV1Alpha1APIs | If set, the agents will watch for the v1alpha1 APIs. | `false` |
36+
| enableV1Beta1APIs | If set, the agents will watch for the v1beta1 APIs. | `true` |
37+
| hubAPIQPS | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `250` |
38+
| hubAPIBurst | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000` |
39+
| MaxConcurrentClusterPlacement | The max number of clusterResourcePlacement to run concurrently this fleet supports. | `100` |
40+
| ConcurrentResourceChangeSyncs | The number of resourceChange reconcilers that are allowed to run concurrently. | `20` |
41+
| logFileMaxSize | Max size of log file before rotation | `1000000` |
42+
| MaxFleetSizeSupported | The max number of member clusters this fleet supports. | `100` |
43+
| resourceSnapshotCreationInterval | The interval at which resource snapshots are created. | `1m` |

charts/hub-agent/templates/deployment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ spec:
4343
- --hub-api-burst={{ .Values.hubAPIBurst }}
4444
- --force-delete-wait-time={{ .Values.forceDeleteWaitTime }}
4545
- --cluster-unhealthy-threshold={{ .Values.clusterUnhealthyThreshold }}
46+
- --resource-snapshot-creation-interval={{ .Values.resourceSnapshotCreationInterval }}
4647
ports:
4748
- name: metrics
4849
containerPort: 8080

charts/hub-agent/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ enableGuardRail: true
1818
webhookClientConnectionType: service
1919
forceDeleteWaitTime: 15m0s
2020
clusterUnhealthyThreshold: 3m0s
21+
resourceSnapshotCreationInterval: 1m0s
22+
2123
namespace:
2224
fleet-system
2325

cmd/hubagent/options/options.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ type Options struct {
104104
PprofPort int
105105
// DenyModifyMemberClusterLabels indicates if the member cluster labels cannot be modified by groups (excluding system:masters)
106106
DenyModifyMemberClusterLabels bool
107+
// ResourceSnapshotCreationInterval is the interval at which resource snapshots are created.
108+
ResourceSnapshotCreationInterval time.Duration
107109
}
108110

109111
// NewOptions builds an empty options.
@@ -115,14 +117,15 @@ func NewOptions() *Options {
115117
ResourceNamespace: utils.FleetSystemNamespace,
116118
ResourceName: "136224848560.hub.fleet.azure.com",
117119
},
118-
MaxConcurrentClusterPlacement: 10,
119-
ConcurrentResourceChangeSyncs: 1,
120-
MaxFleetSizeSupported: 100,
121-
EnableV1Alpha1APIs: false,
122-
EnableClusterInventoryAPIs: true,
123-
EnableStagedUpdateRunAPIs: true,
124-
EnablePprof: false,
125-
PprofPort: 6065,
120+
MaxConcurrentClusterPlacement: 10,
121+
ConcurrentResourceChangeSyncs: 1,
122+
MaxFleetSizeSupported: 100,
123+
EnableV1Alpha1APIs: false,
124+
EnableClusterInventoryAPIs: true,
125+
EnableStagedUpdateRunAPIs: true,
126+
EnablePprof: false,
127+
PprofPort: 6065,
128+
ResourceSnapshotCreationInterval: 1 * time.Minute,
126129
}
127130
}
128131

@@ -169,6 +172,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) {
169172
flags.BoolVar(&o.EnablePprof, "enable-pprof", false, "If set, the pprof profiling is enabled.")
170173
flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.")
171174
flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.")
175+
flags.DurationVar(&o.ResourceSnapshotCreationInterval, "resource-snapshot-creation-interval", 1*time.Minute, "The interval at which resource snapshots are created.")
172176

173177
o.RateLimiterOpts.AddFlags(flags)
174178
}

cmd/hubagent/workload/setup.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,15 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager,
153153

154154
// Set up a custom controller to reconcile cluster resource placement
155155
crpc := &clusterresourceplacement.Reconciler{
156-
Client: mgr.GetClient(),
157-
Recorder: mgr.GetEventRecorderFor(crpControllerName),
158-
RestMapper: mgr.GetRESTMapper(),
159-
InformerManager: dynamicInformerManager,
160-
ResourceConfig: resourceConfig,
161-
SkippedNamespaces: skippedNamespaces,
162-
Scheme: mgr.GetScheme(),
163-
UncachedReader: mgr.GetAPIReader(),
156+
Client: mgr.GetClient(),
157+
Recorder: mgr.GetEventRecorderFor(crpControllerName),
158+
RestMapper: mgr.GetRESTMapper(),
159+
InformerManager: dynamicInformerManager,
160+
ResourceConfig: resourceConfig,
161+
SkippedNamespaces: skippedNamespaces,
162+
Scheme: mgr.GetScheme(),
163+
UncachedReader: mgr.GetAPIReader(),
164+
ResourceSnapshotCreationInterval: opts.ResourceSnapshotCreationInterval,
164165
}
165166

166167
rateLimiter := options.DefaultControllerRateLimiter(opts.RateLimiterOpts)

pkg/controllers/clusterinventory/clusterprofile/controller_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,13 @@ func TestSyncClusterProfileCondition(t *testing.T) {
159159
t.Run(tt.name, func(t *testing.T) {
160160
reconciler.syncClusterProfileCondition(tt.memberCluster, tt.clusterProfile)
161161
condition := meta.FindStatusCondition(tt.clusterProfile.Status.Conditions, clusterinventory.ClusterConditionControlPlaneHealthy)
162-
if condition == nil {
162+
if condition == nil { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
163163
t.Fatalf("expected condition to be set, but it was not")
164164
}
165-
if condition.Status != tt.expectedConditionStatus {
165+
if condition.Status != tt.expectedConditionStatus { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
166166
t.Errorf("test case `%s` failed, expected condition status %v, got %v", tt.name, tt.expectedConditionStatus, condition.Status)
167167
}
168-
if condition.Reason != tt.expectedConditionReason {
168+
if condition.Reason != tt.expectedConditionReason { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
169169
t.Errorf("test case `%s` failed, expected condition reason %v, got %v", tt.name, tt.expectedConditionReason, condition.Reason)
170170
}
171171
})

0 commit comments

Comments
 (0)