Skip to content

Commit 421496b

Browse files
authored
Process shutdown event after flushing data (#412)
1 parent b420e16 commit 421496b

File tree

2 files changed

+23
-15
lines changed

2 files changed

+23
-15
lines changed

CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ https://github.com/elastic/apm-aws-lambda/compare/v1.4.0...main[View commits]
2525
[float]
2626
===== Bug fixes
2727
- Log a warning, instead of failing a Lambda function, if auth retrieval from AWS Secrets Manager fails. Reporting APM data will not work, but the Lambda function invocations will proceed. {lambda-pull}401[401]
28+
- Fix incorrect proxy transaction handling at shutdown due to not flushing the data before processing shutdown event. {lambda-pull}412[412].
2829
2930
[float]
3031
===== Features

app/run.go

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,28 @@ func (app *App) Run(ctx context.Context) error {
9696
backgroundDataSendWg.Wait()
9797
if event.EventType == extension.Shutdown {
9898
app.logger.Infof("Exiting due to shutdown event with reason %s", event.ShutdownReason)
99+
// Since we have waited for the processEvent loop to finish we
100+
// already have received all the data we can from the agent. So, we
101+
// flush all the data to make sure that shutdown can correctly deduce
102+
// any pending transactions.
103+
app.apmClient.FlushAPMData(ctx)
104+
// At shutdown we can not expect platform.runtimeDone events to be
105+
// reported for the remaining invocations. If we haven't received the
106+
// transaction from agents at this point then it is safe to assume
107+
// that the function failed. We will create proxy transaction for all
108+
// invocations that haven't received a full transaction from the agent
109+
// yet. If extension doesn't have enough CPU time it is possible that
110+
// the extension might not receive the shutdown signal for timeouts
111+
// or runtime crashes. In these cases we will miss the transaction.
112+
//
113+
// TODO (lahsivjar): Any partial transaction remaining will be added
114+
// to a new batch by OnShutdown and flushed from the defer call to
115+
// flush all data when this function exits. This causes 2 triggers
116+
// of flush, we can optimize this by clearing all buffered channel
117+
// then calling OnShutdown and finally flushing any remaining data.
118+
if err := app.batch.OnShutdown(event.ShutdownReason); err != nil {
119+
app.logger.Errorf("Error finalizing invocation on shutdown: %v", err)
120+
}
99121
return nil
100122
}
101123
if app.apmClient.ShouldFlush() {
@@ -153,21 +175,6 @@ func (app *App) processEvent(
153175
event.Timestamp,
154176
)
155177
case extension.Shutdown:
156-
// At shutdown we can not expect platform.runtimeDone events to be reported
157-
// for the remaining invocations. If we haven't received the transaction
158-
// from agents at this point then it is safe to assume that the function
159-
// failed. We will create proxy transaction for all invocations that
160-
// haven't received a full transaction from the agent yet. If extension
161-
// doesn't have enough CPU time it is possible that the extension might
162-
// not receive the shutdown signal for timeouts or runtime crashes. In
163-
// these cases we will miss the transaction.
164-
app.logger.Debugf("Received shutdown event with reason %s", event.ShutdownReason)
165-
defer func() {
166-
if err := app.batch.OnShutdown(event.ShutdownReason); err != nil {
167-
app.logger.Errorf("Error finalizing invocation on shutdown: %v", err)
168-
}
169-
}()
170-
171178
// platform.report metric (and some other metrics) might not have been
172179
// reported by the logs API even till shutdown. At shutdown we will make
173180
// a last attempt to collect and report these metrics. However, it is

0 commit comments

Comments
 (0)