Skip to content

Commit cf4b8b9

Browse files
authored
remove status.EndpointConfig* from endpoint CRD (#92)
Description of changes: 1. Remove `status.latestEndpointConfgName` from CRD - This field was added for customers to have a reliable way to wait for a successful update because the endpoint goes back to `InService` even after a failed update and the `ResourceSynced` condition was set to `True`. - A customer could not rely on `ResourceSynced` or `endpointStatus` to get the accurate picture and they needed do multiple checks the update is successful. - e.g. `endpointStatus == InService && ACK.ResourceSynced == true && ACK.Terminal == False && failureReason = nil`. - To simplify this, latestEndpointConfig was added to the status so Cx can wait on just `spec.EndpointConfigName == status.latestEndpointConfigName` - With this change, the `ResourceSynced` condition will not go to `True` on failed update since the desired state cannot be reached. Customers can just wait and timeout on: - `endpointStatus == InService && ACK.ResourceSynced == true` and check for failure reason in case of timeout. The latestEndpointConfig from service will be in the Terminal condition message for convinence 2. Move `status.lastEndpointConfigForUpdate` to metadata - This is purely a metadata field. It was only required by the controller to stop retrying an update and hence has been moved to annotation. - Moving this to annotations gives us a two-way door. In case we find a better heuristic in future to differentiate between update related error and related to service modification, this can be removed easily with having any backward breaking changes to the CRD Testing: ``` pytest tests/test_endpoint.py ``` PR build will pass after #91 is merged since there is a dependency on the error message By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent ad448b8 commit cf4b8b9

24 files changed

+95
-137
lines changed
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
ack_generate_info:
2-
build_date: "2021-09-01T00:07:38Z"
3-
build_hash: 6f22b7b568e25b4ee007bb1ab5f9338c16daf172
2+
build_date: "2021-09-01T01:05:16Z"
3+
build_hash: 07d7ea755625b2cac93bbf293bf5a8cab24ecb68
44
go_version: go1.16.4 linux/amd64
55
version: v0.13.0
6-
api_directory_checksum: 13c67d2ac904c5b3c1ddea34aba38d90f06f3adc
6+
api_directory_checksum: a0f6457435f0a16addd9cf7d0acef2830722bd78
77
api_version: v1alpha1
88
aws_sdk_go_version: v1.38.11
99
generator_config_info:
10-
file_checksum: 95bf43b6a68c009a14db1ba7bd91293ca03aee15
10+
file_checksum: 5431bfc09117e092ee89a98edcbad6934363dea2
1111
original_file_name: generator.yaml
1212
last_modification:
1313
reason: API generation
14-
timestamp: 2021-09-01 00:07:46.08290738 +0000 UTC
14+
timestamp: 2021-09-01 01:05:19.682637714 +0000 UTC

apis/v1alpha1/endpoint.go

Lines changed: 0 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/v1alpha1/generator.yaml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ resources:
7373
- EndpointUpdateError
7474
hooks:
7575
sdk_read_one_post_set_output:
76-
code: rm.customDescribeEndpointSetOutput(resp, ko)
76+
code: rm.customDescribeEndpointSetOutput(ko)
7777
sdk_update_pre_build_request:
7878
template_path: endpoint/sdk_update_pre_build_request.go.tpl
7979
sdk_update_post_set_output:
@@ -96,16 +96,6 @@ resources:
9696
from:
9797
operation: DescribeEndpoint
9898
path: FailureReason
99-
LatestEndpointConfigName:
100-
is_read_only: true
101-
from:
102-
operation: DescribeEndpoint
103-
path: EndpointConfigName
104-
LastEndpointConfigNameForUpdate:
105-
is_read_only: true
106-
from:
107-
operation: DescribeEndpointConfig
108-
path: EndpointConfigName
10999
CreationTime:
110100
is_read_only: true
111101
from:

apis/v1alpha1/zz_generated.deepcopy.go

Lines changed: 0 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/sagemaker.services.k8s.aws_endpoints.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,17 +162,10 @@ spec:
162162
description: If the status of the endpoint is Failed, the reason why
163163
it failed.
164164
type: string
165-
lastEndpointConfigNameForUpdate:
166-
description: Name of the Amazon SageMaker endpoint configuration.
167-
type: string
168165
lastModifiedTime:
169166
description: A timestamp that shows when the endpoint was last modified.
170167
format: date-time
171168
type: string
172-
latestEndpointConfigName:
173-
description: The name of the endpoint configuration associated with
174-
this endpoint.
175-
type: string
176169
productionVariants:
177170
description: An array of ProductionVariantSummary objects, one for
178171
each model hosted behind this endpoint.

generator.yaml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ resources:
7373
- EndpointUpdateError
7474
hooks:
7575
sdk_read_one_post_set_output:
76-
code: rm.customDescribeEndpointSetOutput(resp, ko)
76+
code: rm.customDescribeEndpointSetOutput(ko)
7777
sdk_update_pre_build_request:
7878
template_path: endpoint/sdk_update_pre_build_request.go.tpl
7979
sdk_update_post_set_output:
@@ -96,16 +96,6 @@ resources:
9696
from:
9797
operation: DescribeEndpoint
9898
path: FailureReason
99-
LatestEndpointConfigName:
100-
is_read_only: true
101-
from:
102-
operation: DescribeEndpoint
103-
path: EndpointConfigName
104-
LastEndpointConfigNameForUpdate:
105-
is_read_only: true
106-
from:
107-
operation: DescribeEndpointConfig
108-
path: EndpointConfigName
10999
CreationTime:
110100
is_read_only: true
111101
from:

helm/crds/sagemaker.services.k8s.aws_endpoints.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,17 +162,10 @@ spec:
162162
description: If the status of the endpoint is Failed, the reason why
163163
it failed.
164164
type: string
165-
lastEndpointConfigNameForUpdate:
166-
description: Name of the Amazon SageMaker endpoint configuration.
167-
type: string
168165
lastModifiedTime:
169166
description: A timestamp that shows when the endpoint was last modified.
170167
format: date-time
171168
type: string
172-
latestEndpointConfigName:
173-
description: The name of the endpoint configuration associated with
174-
this endpoint.
175-
type: string
176169
productionVariants:
177170
description: An array of ProductionVariantSummary objects, one for
178171
each model hosted behind this endpoint.

pkg/resource/endpoint/custom_update_conditions.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,15 @@
1717
package endpoint
1818

1919
import (
20+
"github.com/aws/aws-sdk-go/aws"
21+
22+
ackcondition "github.com/aws-controllers-k8s/runtime/pkg/condition"
23+
ackerr "github.com/aws-controllers-k8s/runtime/pkg/errors"
24+
corev1 "k8s.io/api/core/v1"
2025
svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1"
2126
svccommon "github.com/aws-controllers-k8s/sagemaker-controller/pkg/common"
2227
svcsdk "github.com/aws/aws-sdk-go/service/sagemaker"
28+
2329
)
2430

2531
// CustomUpdateConditions sets conditions (terminal) on supplied endpoint.
@@ -36,5 +42,21 @@ func (rm *resourceManager) CustomUpdateConditions(
3642
resourceName := resourceGK.Kind
3743
// If the latestStatus == terminalStatus we will set
3844
// the terminal condition and terminal message.
39-
return svccommon.SetTerminalState(conditionManager, latestStatus, &resourceName, terminalStatus)
45+
updated := svccommon.SetTerminalState(conditionManager, latestStatus, &resourceName, terminalStatus)
46+
47+
// Continue setting ResourceSynced condition to false in case of failed update
48+
// since desired and latest will be different until the issue is fixed.
49+
// Customer can use this condition state and FailureReason to determine
50+
// the correct course of action in case the update to Endpoint fails
51+
// Customer will also have additional information like latest endpointConfig
52+
// in condition message and last endpointconfig used for update in annotations
53+
if err != nil {
54+
awsErr, ok := ackerr.AWSError(err)
55+
if ok && awsErr.Code() == "EndpointUpdateError" {
56+
ackcondition.SetSynced(conditionManager, corev1.ConditionFalse, aws.String(awsErr.Error()), nil)
57+
return true
58+
}
59+
}
60+
61+
return updated
4062
}

pkg/resource/endpoint/hooks.go

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,11 @@
1111
// express or implied. See the License for the specific language governing
1212
// permissions and limitations under the License.
1313

14-
// Use this file if the Status/Spec of the CR needs to be modified after
15-
// create/describe/update operation
16-
1714
package endpoint
1815

1916
import (
2017
"context"
18+
"fmt"
2119
"strings"
2220

2321
ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare"
@@ -39,39 +37,29 @@ var (
3937

4038
resourceName = resourceGK.Kind
4139

42-
FailUpdateError = awserr.New("EndpointUpdateError", "unable to update endpoint. check FailureReason", nil)
40+
lastEndpointConfigForUpdateAnnotation = fmt.Sprintf("%s/last-endpoint-config-for-update", resourceGK.Group)
4341

4442
FailureReasonInternalServiceErrorPrefix = "Request to service failed"
4543
)
4644

4745
// customDescribeEndpointSetOutput sets the resource ResourceSynced condition to False if endpoint is
4846
// being modified by AWS
49-
func (rm *resourceManager) customDescribeEndpointSetOutput(
50-
resp *svcsdk.DescribeEndpointOutput,
51-
ko *svcapitypes.Endpoint,
52-
) {
53-
// Workaround: Field config for LatestEndpointConfigName of generator config
54-
// does not code generate this correctly since this field is part of Spec
55-
// SageMaker users will need following information:
56-
// - latestEndpointConfig
57-
// - desiredEndpointConfig
58-
// - LastEndpointConfigNameForUpdate
59-
// - FailureReason
60-
// to determine the correct course of action in case of update to Endpoint fails
61-
if resp.EndpointConfigName != nil {
62-
ko.Status.LatestEndpointConfigName = resp.EndpointConfigName
63-
} else {
64-
ko.Status.LatestEndpointConfigName = nil
65-
}
66-
67-
svccommon.SetSyncedCondition(&resource{ko}, resp.EndpointStatus, &resourceName, &modifyingStatuses)
47+
func (rm *resourceManager) customDescribeEndpointSetOutput(ko *svcapitypes.Endpoint) {
48+
svccommon.SetSyncedCondition(&resource{ko}, ko.Status.EndpointStatus, &resourceName, &modifyingStatuses)
6849
}
6950

7051
// customUpdateEndpointSetOutput sets the resource ResourceSynced condition to False if endpoint is
7152
// being updated. At this stage we know call to updateEndpoint was successful.
7253
func (rm *resourceManager) customUpdateEndpointSetOutput(ko *svcapitypes.Endpoint) {
73-
// no nil check present here since Spec.EndpointConfigName is a required field
74-
ko.Status.LastEndpointConfigNameForUpdate = ko.Spec.EndpointConfigName
54+
// set last endpoint config name used for udapte in annotations
55+
// no nil check present since Spec.EndpointConfigName is a required field
56+
annotations := ko.ObjectMeta.GetAnnotations()
57+
if annotations == nil {
58+
annotations = make(map[string]string)
59+
}
60+
annotations[lastEndpointConfigForUpdateAnnotation] = *ko.Spec.EndpointConfigName
61+
ko.ObjectMeta.SetAnnotations(annotations)
62+
7563
// injecting Updating status to keep the Sync condition message and status.endpointStatus in sync
7664
ko.Status.EndpointStatus = aws.String(svcsdk.EndpointStatusUpdating)
7765

@@ -98,7 +86,15 @@ func (rm *resourceManager) customUpdateEndpointPreChecks(
9886

9987
failureReason := latest.ko.Status.FailureReason
10088
desiredEndpointConfig := desired.ko.Spec.EndpointConfigName
101-
lastEndpointConfigForUpdate := desired.ko.Status.LastEndpointConfigNameForUpdate
89+
90+
var lastEndpointConfigForUpdate *string = nil
91+
// get last endpoint config name used for update from annotations
92+
annotations := desired.ko.ObjectMeta.GetAnnotations()
93+
for k, v := range annotations {
94+
if k == lastEndpointConfigForUpdateAnnotation {
95+
lastEndpointConfigForUpdate = &v
96+
}
97+
}
10298

10399
// Case 2 - EndpointStatus == Failed
104100
if *latestStatus == svcsdk.EndpointStatusFailed ||
@@ -118,7 +114,7 @@ func (rm *resourceManager) customUpdateEndpointPreChecks(
118114
// 1 & 2 does not guarantee an update Failed. Hence we need to look at `*latestEndpointConfigName` to determine if the update was unsuccessful
119115
// `*desiredEndpointConfig != *latestEndpointConfig` + `*desiredEndpointConfig == *lastEndpointConfigForUpdate`+ `FailureReason != nil` indicate that an update is needed,
120116
// has already been tried and failed.
121-
return FailUpdateError
117+
return awserr.New("EndpointUpdateError", fmt.Sprintf("unable to update endpoint. check FailureReason. latest EndpointConfigName is %s", *latest.ko.Spec.EndpointConfigName), nil)
122118
}
123119

124120
return nil

pkg/resource/endpoint/sdk.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)