Skip to content

Commit 9175257

Browse files
aThorp96zakisk
authored andcommitted
feat(perf): cache vcs.GetFiles() to reduce redundant VCS API volume
Calls to `vcs.GetFiles()` only result in one or two API calls (paging aside). However the method is called each time a CEL expression references `"path".pathChanged()` or the `on-path-change` annotation is checked for matches. Because of this, because matching is evaluated several times in a row, and because some users have tens of PipelineRuns in their `.tekton` directory, in some cases a single push event may cause PaC to make hundreds of API requests listing the same files repeatedly.
1 parent 1eabf1c commit 9175257

File tree

9 files changed

+162
-145
lines changed

9 files changed

+162
-145
lines changed

pkg/provider/bitbucketdatacenter/bitbucketdatacenter.go

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type Provider struct {
4242
projectKey string
4343
repo *v1alpha1.Repository
4444
triggerEvent string
45+
cachedChangedFiles *changedfiles.ChangedFiles
4546
}
4647

4748
func (v Provider) Client() *scm.Client {
@@ -371,13 +372,28 @@ func (v *Provider) GetConfig() *info.ProviderConfig {
371372
}
372373
}
373374

375+
// GetFiles gets and caches the list of files changed by a given event.
374376
func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
375-
OrgAndRepo := fmt.Sprintf("%s/%s", runevent.Organization, runevent.Repository)
376-
if runevent.TriggerTarget == triggertype.PullRequest {
377+
if v.cachedChangedFiles == nil {
378+
changes, err := v.fetchChangedFiles(ctx, runevent)
379+
if err != nil {
380+
return changedfiles.ChangedFiles{}, err
381+
}
382+
v.cachedChangedFiles = &changes
383+
}
384+
return *v.cachedChangedFiles, nil
385+
}
386+
387+
func (v *Provider) fetchChangedFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
388+
changedFiles := changedfiles.ChangedFiles{}
389+
390+
orgAndRepo := fmt.Sprintf("%s/%s", runevent.Organization, runevent.Repository)
391+
392+
switch runevent.TriggerTarget {
393+
case triggertype.PullRequest:
377394
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
378-
changedFiles := changedfiles.ChangedFiles{}
379395
for {
380-
changes, _, err := v.Client().PullRequests.ListChanges(ctx, OrgAndRepo, runevent.PullRequestNumber, opts)
396+
changes, _, err := v.Client().PullRequests.ListChanges(ctx, orgAndRepo, runevent.PullRequestNumber, opts)
381397
if err != nil {
382398
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for pull request: %w", err)
383399
}
@@ -408,14 +424,10 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
408424

409425
opts.Page++
410426
}
411-
return changedFiles, nil
412-
}
413-
414-
if runevent.TriggerTarget == triggertype.Push {
427+
case triggertype.Push:
415428
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
416-
changedFiles := changedfiles.ChangedFiles{}
417429
for {
418-
changes, _, err := v.Client().Git.ListChanges(ctx, OrgAndRepo, runevent.SHA, opts)
430+
changes, _, err := v.Client().Git.ListChanges(ctx, orgAndRepo, runevent.SHA, opts)
419431
if err != nil {
420432
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for commit %s: %w", runevent.SHA, err)
421433
}
@@ -442,9 +454,10 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
442454

443455
opts.Page++
444456
}
445-
return changedFiles, nil
457+
default:
458+
// No action necessary
446459
}
447-
return changedfiles.ChangedFiles{}, nil
460+
return changedFiles, nil
448461
}
449462

450463
func (v *Provider) CreateToken(_ context.Context, _ []string, _ *info.Event) (string, error) {

pkg/provider/bitbucketdatacenter/bitbucketdatacenter_test.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@ import (
2222
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/triggertype"
2323
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider"
2424
bbtest "github.com/openshift-pipelines/pipelines-as-code/pkg/provider/bitbucketdatacenter/test"
25+
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/metrics"
2526

2627
"github.com/jenkins-x/go-scm/scm"
2728
"go.uber.org/zap"
2829
zapobserver "go.uber.org/zap/zaptest/observer"
2930
"gotest.tools/v3/assert"
31+
"knative.dev/pkg/metrics/metricstest"
32+
_ "knative.dev/pkg/metrics/testing"
3033
rtesting "knative.dev/pkg/reconciler/testing"
3134
)
3235

@@ -796,6 +799,7 @@ func TestGetFiles(t *testing.T) {
796799
ctx, _ := rtesting.SetupFakeContext(t)
797800
client, mux, tearDown, tURL := bbtest.SetupBBDataCenterClient()
798801
defer tearDown()
802+
metrics.ResetMetrics()
799803

800804
stats := &bbtest.DiffStats{
801805
Values: tt.changeFiles,
@@ -821,13 +825,17 @@ func TestGetFiles(t *testing.T) {
821825
}
822826
})
823827
}
824-
v := &Provider{client: client, baseURL: tURL}
828+
829+
metricsTags := map[string]string{"provider": "bitbucket-datacenter", "event-type": string(tt.event.TriggerTarget)}
830+
metricstest.CheckStatsNotReported(t, "pipelines_as_code_git_provider_api_request_count")
831+
832+
v := &Provider{client: client, baseURL: tURL, triggerEvent: string(tt.event.TriggerTarget)}
825833
changedFiles, err := v.GetFiles(ctx, tt.event)
826834
if tt.wantError {
827835
assert.Equal(t, err.Error(), tt.errMsg)
828-
return
836+
} else {
837+
assert.NilError(t, err, nil)
829838
}
830-
assert.NilError(t, err, nil)
831839
assert.Equal(t, tt.wantAddedFilesCount, len(changedFiles.Added))
832840
assert.Equal(t, tt.wantDeletedFilesCount, len(changedFiles.Deleted))
833841
assert.Equal(t, tt.wantModifiedFilesCount, len(changedFiles.Modified))
@@ -844,6 +852,17 @@ func TestGetFiles(t *testing.T) {
844852
assert.Equal(t, tt.changeFiles[i].Path.ToString, changedFiles.All[i])
845853
}
846854
}
855+
856+
// Check caching
857+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
858+
_, _ = v.GetFiles(ctx, tt.event)
859+
if tt.wantError {
860+
// No caching on error
861+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 2)
862+
} else {
863+
// Cache API results on success
864+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
865+
}
847866
})
848867
}
849868
}

pkg/provider/github/github.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ type Provider struct {
5555
PaginedNumber int
5656
userType string // The type of user i.e bot or not
5757
skippedRun
58-
triggerEvent string
58+
triggerEvent string
59+
cachedChangedFiles *changedfiles.ChangedFiles
5960
}
6061

6162
type skippedRun struct {
@@ -518,11 +519,24 @@ func (v *Provider) getPullRequest(ctx context.Context, runevent *info.Event) (*i
518519
return runevent, nil
519520
}
520521

521-
// GetFiles get a files from pull request.
522+
// GetFiles gets and caches the list of files changed by a given event.
522523
func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
523-
if runevent.TriggerTarget == triggertype.PullRequest {
524+
if v.cachedChangedFiles == nil {
525+
changes, err := v.fetchChangedFiles(ctx, runevent)
526+
if err != nil {
527+
return changedfiles.ChangedFiles{}, err
528+
}
529+
v.cachedChangedFiles = &changes
530+
}
531+
return *v.cachedChangedFiles, nil
532+
}
533+
534+
func (v *Provider) fetchChangedFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
535+
changedFiles := changedfiles.ChangedFiles{}
536+
537+
switch runevent.TriggerTarget {
538+
case triggertype.PullRequest:
524539
opt := &github.ListOptions{PerPage: v.PaginedNumber}
525-
changedFiles := changedfiles.ChangedFiles{}
526540
for {
527541
repoCommit, resp, err := wrapAPI(v, "list_pull_request_files", func() ([]*github.CommitFile, *github.Response, error) {
528542
return v.Client().PullRequests.ListFiles(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber, opt)
@@ -550,11 +564,7 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
550564
}
551565
opt.Page = resp.NextPage
552566
}
553-
return changedFiles, nil
554-
}
555-
556-
if runevent.TriggerTarget == "push" {
557-
changedFiles := changedfiles.ChangedFiles{}
567+
case triggertype.Push:
558568
rC, _, err := wrapAPI(v, "get_commit_files", func() (*github.RepositoryCommit, *github.Response, error) {
559569
return v.Client().Repositories.GetCommit(ctx, runevent.Organization, runevent.Repository, runevent.SHA, &github.ListOptions{})
560570
})
@@ -576,9 +586,10 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
576586
changedFiles.Renamed = append(changedFiles.Renamed, *rC.Files[i].Filename)
577587
}
578588
}
579-
return changedFiles, nil
589+
default:
590+
// No action necessary
580591
}
581-
return changedfiles.ChangedFiles{}, nil
592+
return changedFiles, nil
582593
}
583594

584595
// getObject Get an object from a repository.

pkg/provider/github/github_test.go

Lines changed: 23 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"github.com/jonboulle/clockwork"
2121
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/keys"
2222
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/v1alpha1"
23-
"github.com/openshift-pipelines/pipelines-as-code/pkg/metrics"
2423
"github.com/openshift-pipelines/pipelines-as-code/pkg/params"
2524
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/clients"
2625
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/info"
@@ -29,6 +28,8 @@ import (
2928
testclient "github.com/openshift-pipelines/pipelines-as-code/pkg/test/clients"
3029
ghtesthelper "github.com/openshift-pipelines/pipelines-as-code/pkg/test/github"
3130
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/logger"
31+
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/metrics"
32+
3233
"go.uber.org/zap"
3334
zapobserver "go.uber.org/zap/zaptest/observer"
3435
"gotest.tools/v3/assert"
@@ -347,7 +348,7 @@ func TestGetTektonDir(t *testing.T) {
347348
}
348349
for _, tt := range testGetTektonDir {
349350
t.Run(tt.name, func(t *testing.T) {
350-
resetMetrics()
351+
metrics.ResetMetrics()
351352
observer, exporter := zapobserver.New(zap.InfoLevel)
352353
fakelogger := zap.New(observer).Sugar()
353354
ctx, _ := rtesting.SetupFakeContext(t)
@@ -949,6 +950,7 @@ func TestGetFiles(t *testing.T) {
949950
wantDeletedFilesCount int
950951
wantModifiedFilesCount int
951952
wantRenamedFilesCount int
953+
wantAPIRequestCount int64
952954
}{
953955
{
954956
name: "pull-request",
@@ -977,6 +979,7 @@ func TestGetFiles(t *testing.T) {
977979
wantDeletedFilesCount: 1,
978980
wantModifiedFilesCount: 1,
979981
wantRenamedFilesCount: 1,
982+
wantAPIRequestCount: 2,
980983
},
981984
{
982985
name: "push",
@@ -1007,51 +1010,23 @@ func TestGetFiles(t *testing.T) {
10071010
wantDeletedFilesCount: 1,
10081011
wantModifiedFilesCount: 1,
10091012
wantRenamedFilesCount: 1,
1013+
wantAPIRequestCount: 1,
10101014
},
10111015
}
10121016
for _, tt := range tests {
10131017
t.Run(tt.name, func(t *testing.T) {
10141018
fakeclient, mux, _, teardown := ghtesthelper.SetupGH()
10151019
defer teardown()
1016-
prCommitFiles := []*github.CommitFile{
1017-
{
1018-
Filename: ptr.String("modified.yaml"),
1019-
Status: ptr.String("modified"),
1020-
}, {
1021-
Filename: ptr.String("added.doc"),
1022-
Status: ptr.String("added"),
1023-
}, {
1024-
Filename: ptr.String("removed.yaml"),
1025-
Status: ptr.String("removed"),
1026-
}, {
1027-
Filename: ptr.String("renamed.doc"),
1028-
Status: ptr.String("renamed"),
1029-
},
1030-
}
1020+
metrics.ResetMetrics()
10311021

1032-
pushCommitFiles := []*github.CommitFile{
1033-
{
1034-
Filename: ptr.String("modified.yaml"),
1035-
Status: ptr.String("modified"),
1036-
}, {
1037-
Filename: ptr.String("added.doc"),
1038-
Status: ptr.String("added"),
1039-
}, {
1040-
Filename: ptr.String("removed.yaml"),
1041-
Status: ptr.String("removed"),
1042-
}, {
1043-
Filename: ptr.String("renamed.doc"),
1044-
Status: ptr.String("renamed"),
1045-
},
1046-
}
10471022
if tt.event.TriggerTarget == "pull_request" {
10481023
mux.HandleFunc(fmt.Sprintf("/repos/%s/%s/pulls/%d/files",
10491024
tt.event.Organization, tt.event.Repository, tt.event.PullRequestNumber), func(rw http.ResponseWriter, r *http.Request) {
10501025
if r.URL.Query().Get("page") == "" || r.URL.Query().Get("page") == "1" {
10511026
rw.Header().Add("Link", fmt.Sprintf("<https://api.github.com/repos/%s/%s/pulls/%d/files?page=2>; rel=\"next\"", tt.event.Organization, tt.event.Repository, tt.event.PullRequestNumber))
10521027
fmt.Fprint(rw, "[]")
10531028
} else {
1054-
b, _ := json.Marshal(prCommitFiles)
1029+
b, _ := json.Marshal(tt.commitFiles)
10551030
fmt.Fprint(rw, string(b))
10561031
}
10571032
})
@@ -1060,17 +1035,26 @@ func TestGetFiles(t *testing.T) {
10601035
mux.HandleFunc(fmt.Sprintf("/repos/%s/%s/commits/%s",
10611036
tt.event.Organization, tt.event.Repository, tt.event.SHA), func(rw http.ResponseWriter, _ *http.Request) {
10621037
c := &github.RepositoryCommit{
1063-
Files: pushCommitFiles,
1038+
Files: tt.commit.Files,
10641039
}
10651040
b, _ := json.Marshal(c)
10661041
fmt.Fprint(rw, string(b))
10671042
})
10681043
}
10691044

1045+
metricsTags := map[string]string{"provider": "github", "event-type": string(tt.event.TriggerTarget)}
1046+
metricstest.CheckStatsNotReported(t, "pipelines_as_code_git_provider_api_request_count")
1047+
1048+
log, _ := logger.GetLogger()
10701049
ctx, _ := rtesting.SetupFakeContext(t)
10711050
provider := &Provider{
10721051
ghClient: fakeclient,
10731052
PaginedNumber: 1,
1053+
1054+
// necessary for metrics
1055+
providerName: "github",
1056+
triggerEvent: string(tt.event.TriggerTarget),
1057+
Logger: log,
10741058
}
10751059
changedFiles, err := provider.GetFiles(ctx, tt.event)
10761060
assert.NilError(t, err, nil)
@@ -1089,6 +1073,11 @@ func TestGetFiles(t *testing.T) {
10891073
assert.Equal(t, *tt.commit.Files[i].Filename, changedFiles.All[i])
10901074
}
10911075
}
1076+
1077+
// Check caching
1078+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, tt.wantAPIRequestCount)
1079+
_, _ = provider.GetFiles(ctx, tt.event)
1080+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, tt.wantAPIRequestCount)
10921081
})
10931082
}
10941083
}
@@ -1439,18 +1428,6 @@ func TestIsHeadCommitOfBranch(t *testing.T) {
14391428
}
14401429
}
14411430

1442-
func resetMetrics() {
1443-
metricstest.Unregister(
1444-
"pipelines_as_code_pipelinerun_count",
1445-
"pipelines_as_code_pipelinerun_duration_seconds_sum",
1446-
"pipelines_as_code_running_pipelineruns_count",
1447-
"pipelines_as_code_git_provider_api_request_count",
1448-
)
1449-
1450-
// have to reset sync.Once to allow recreation of Recorder.
1451-
metrics.ResetRecorder()
1452-
}
1453-
14541431
func TestCreateComment(t *testing.T) {
14551432
tests := []struct {
14561433
name string

0 commit comments

Comments
 (0)