Skip to content

Commit 4ff0b9a

Browse files
authored
feat: better handling of cost property calculation corner cases (#83)
1 parent 5d35be5 commit 4ff0b9a

File tree

7 files changed

+1108
-160
lines changed

7 files changed

+1108
-160
lines changed

pkg/propertyprovider/azure/provider.go

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,13 @@ const (
5555

5656
const (
5757
// The condition related values in use by the Azure property provider.
58-
59-
// PropertyCollectionSucceededConditionType is a condition type that indicates whether a
60-
// property collection attempt has succeeded.
61-
PropertyCollectionSucceededConditionType = "AKSClusterPropertyCollectionSucceeded"
62-
PropertyCollectionSucceededReason = "AllPropertiesCollectedSuccessfully"
63-
PropertyCollectionFailedCostErrorReason = "FailedToCollectCosts"
64-
PropertyCollectionSucceededMessage = "All properties have been collected successfully"
65-
PropertyCollectionFailedCostErrorMessageTemplate = "An error has occurred when collecting cost properties: %v"
58+
CostPropertiesCollectionSucceededCondType = "AKSClusterCostPropertiesCollectionSucceeded"
59+
CostPropertiesCollectionSucceededReason = "CostsCalculated"
60+
CostPropertiesCollectionDegradedReason = "CostsCalculationDegraded"
61+
CostPropertiesCollectionFailedReason = "CostsCalculationFailed"
62+
CostPropertiesCollectionSucceededMsg = "All cost properties have been collected successfully"
63+
CostPropertiesCollectionDegradedMsgTemplate = "Cost properties are collected in a degraded mode with the following warning(s): %v"
64+
CostPropertiesCollectionFailedMsgTemplate = "An error has occurred when collecting cost properties: %v"
6665
)
6766

6867
// PropertyProvider is the Azure property provider for Fleet.
@@ -203,17 +202,20 @@ func (p *PropertyProvider) Collect(_ context.Context) propertyprovider.PropertyC
203202
ObservationTime: metav1.Now(),
204203
}
205204

206-
perCPUCost, perGBMemoryCost, err := p.nodeTracker.Costs()
207-
if err != nil {
208-
// Note that the last transition time is not tracked here, as the provider does not
209-
// track the previously returned condition. A timestamp will be added in the upper layer.
205+
perCPUCost, perGBMemoryCost, warnings, err := p.nodeTracker.Costs()
206+
switch {
207+
case err != nil:
208+
// An error occurred when calculating costs; do no set the cost properties and
209+
// track the error.
210210
conds = append(conds, metav1.Condition{
211-
Type: PropertyCollectionSucceededConditionType,
211+
Type: CostPropertiesCollectionSucceededCondType,
212212
Status: metav1.ConditionFalse,
213-
Reason: "FailedToCollectCosts",
214-
Message: fmt.Sprintf(PropertyCollectionFailedCostErrorMessageTemplate, err),
213+
Reason: CostPropertiesCollectionFailedReason,
214+
Message: fmt.Sprintf(CostPropertiesCollectionFailedMsgTemplate, err),
215215
})
216-
} else {
216+
case len(warnings) > 0:
217+
// The costs are calculated, but some warnings have been issued; set the cost
218+
// properties and report the warnings as a condition.
217219
properties[PerCPUCoreCostProperty] = clusterv1beta1.PropertyValue{
218220
Value: fmt.Sprintf(CostPrecisionTemplate, perCPUCost),
219221
ObservationTime: metav1.Now(),
@@ -222,6 +224,29 @@ func (p *PropertyProvider) Collect(_ context.Context) propertyprovider.PropertyC
222224
Value: fmt.Sprintf(CostPrecisionTemplate, perGBMemoryCost),
223225
ObservationTime: metav1.Now(),
224226
}
227+
conds = append(conds, metav1.Condition{
228+
Type: CostPropertiesCollectionSucceededCondType,
229+
Status: metav1.ConditionTrue,
230+
Reason: CostPropertiesCollectionDegradedReason,
231+
Message: fmt.Sprintf(CostPropertiesCollectionDegradedMsgTemplate, warnings),
232+
})
233+
default:
234+
// The costs are calculated successfully; set the cost properties and
235+
// report a success as a condition.
236+
properties[PerCPUCoreCostProperty] = clusterv1beta1.PropertyValue{
237+
Value: fmt.Sprintf(CostPrecisionTemplate, perCPUCost),
238+
ObservationTime: metav1.Now(),
239+
}
240+
properties[PerGBMemoryCostProperty] = clusterv1beta1.PropertyValue{
241+
Value: fmt.Sprintf(CostPrecisionTemplate, perGBMemoryCost),
242+
ObservationTime: metav1.Now(),
243+
}
244+
conds = append(conds, metav1.Condition{
245+
Type: CostPropertiesCollectionSucceededCondType,
246+
Status: metav1.ConditionTrue,
247+
Reason: CostPropertiesCollectionSucceededReason,
248+
Message: CostPropertiesCollectionSucceededMsg,
249+
})
225250
}
226251

227252
// Collect the resource properties.
@@ -250,18 +275,6 @@ func (p *PropertyProvider) Collect(_ context.Context) propertyprovider.PropertyC
250275
}
251276
resources.Available = available
252277

253-
// If no errors are found, report a success as a condition.
254-
if len(conds) == 0 {
255-
// Note that the last transition time is not tracked here, as the provider does not
256-
// track the previously returned condition. A timestamp will be added in the upper layer.
257-
conds = append(conds, metav1.Condition{
258-
Type: PropertyCollectionSucceededConditionType,
259-
Status: metav1.ConditionTrue,
260-
Reason: PropertyCollectionSucceededReason,
261-
Message: PropertyCollectionSucceededMessage,
262-
})
263-
}
264-
265278
// Return the collection response.
266279
return propertyprovider.PropertyCollectionResponse{
267280
Properties: properties,

0 commit comments

Comments
 (0)