Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions checksum_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
//go:build fsbench
// +build fsbench

package task_test

import (
"bytes"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"

"github.com/go-task/task/v3"
)

const (
manySmallFileCount = 20_000
smallFileSize = 5
fewLargeFileCount = 4
largeFileSize = 128 * 1024 * 1024
)

func BenchmarkManySmallFiles(b *testing.B) {
dir := b.TempDir()
createBenchmarkFixture(b, dir, manySmallFileCount, smallFileSize)

benchmarkModes(b, dir, manySmallFileCount, smallFileSize)
}

func BenchmarkFewLargeFiles(b *testing.B) {
dir := b.TempDir()
createBenchmarkFixture(b, dir, fewLargeFileCount, largeFileSize)

benchmarkModes(b, dir, fewLargeFileCount, largeFileSize)
}

func benchmarkModes(b *testing.B, dir string, fileCount int, fileSize int64) {
b.Helper()

for _, mode := range []struct {
name string
task string
expectCache bool
nativeMTime bool
}{
{name: "checksum", task: "checksum-yaml", expectCache: true},
{name: "timestamp", task: "timestamp-yaml", expectCache: true},
{name: "native-mtime", nativeMTime: true},
{name: "none", task: "uncached-yaml"},
} {
b.Run(mode.name, func(b *testing.B) {
if mode.nativeMTime {
benchmarkNativeMTime(b, dir, fileCount, fileSize)
return
}
benchmarkTask(b, dir, mode.task, mode.expectCache, fileCount, fileSize)
})
}
}

func benchmarkTask(
b *testing.B,
dir string,
taskName string,
expectCache bool,
fileCount int,
fileSize int64,
) {
b.Helper()

tempDir := task.TempDir{
Remote: filepath.Join(dir, ".task"),
Fingerprint: filepath.Join(dir, ".task"),
}

if expectCache {
e := task.NewExecutor(
task.WithDir(dir),
task.WithStdout(io.Discard),
task.WithStderr(io.Discard),
task.WithTempDir(tempDir),
)
require.NoError(b, e.Setup())
require.NoError(b, e.Run(b.Context(), &task.Call{Task: taskName}))
}

b.ReportAllocs()
sourceBytes := int64(fileCount) * fileSize
if expectCache {
b.SetBytes(sourceBytes)
}
b.ResetTimer()
for range b.N {
var buff bytes.Buffer
e := task.NewExecutor(
task.WithDir(dir),
task.WithStdout(&buff),
task.WithStderr(&buff),
task.WithTempDir(tempDir),
)
require.NoError(b, e.Setup())
require.NoError(b, e.Run(b.Context(), &task.Call{Task: taskName}))
if expectCache {
require.Contains(b, buff.String(), fmt.Sprintf(`Task "%s" is up to date`, taskName))
}
}
if expectCache {
b.ReportMetric(float64(fileCount), "source_files/op")
b.ReportMetric(float64(sourceBytes)/(1024*1024), "source_MiB/op")
}
}

func benchmarkNativeMTime(b *testing.B, dir string, fileCount int, fileSize int64) {
b.Helper()

output := filepath.Join(dir, "out", "native-mtime.txt")
require.NoError(b, os.WriteFile(output, []byte("ok"), 0o644))
outputTime := time.Now().Add(time.Second)
require.NoError(b, os.Chtimes(output, outputTime, outputTime))

sourceRoot := filepath.Join(dir, "path", "to", "folder")
sourceBytes := int64(fileCount) * fileSize

b.ReportAllocs()
b.SetBytes(sourceBytes)
b.ResetTimer()
for range b.N {
outputInfo, err := os.Stat(output)
require.NoError(b, err)

upToDate, err := nativeMTimeUpToDate(sourceRoot, outputInfo.ModTime())
require.NoError(b, err)
require.True(b, upToDate)
}
b.ReportMetric(float64(fileCount), "source_files/op")
b.ReportMetric(float64(sourceBytes)/(1024*1024), "source_MiB/op")
}

func nativeMTimeUpToDate(sourceRoot string, outputTime time.Time) (bool, error) {
upToDate := true
err := filepath.WalkDir(sourceRoot, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() || filepath.Ext(path) != ".yaml" {
return nil
}
info, err := d.Info()
if err != nil {
return err
}
if info.ModTime().After(outputTime) {
upToDate = false
return fs.SkipAll
}
return nil
})
return upToDate, err
}

func createBenchmarkFixture(tb testing.TB, dir string, fileCount int, fileSize int64) {
tb.Helper()

taskfile := `version: '3'

tasks:
checksum-yaml:
sources:
- path/to/folder/**/*.yaml
generates:
- out/checksum.txt
cmds:
- printf ok > out/checksum.txt

timestamp-yaml:
method: timestamp
sources:
- path/to/folder/**/*.yaml
generates:
- out/timestamp.txt
cmds:
- printf ok > out/timestamp.txt

uncached-yaml:
method: none
cmds:
- printf ok > out/uncached.txt
`
require.NoError(tb, os.WriteFile(filepath.Join(dir, "Taskfile.yml"), []byte(taskfile), 0o644))
require.NoError(tb, os.MkdirAll(filepath.Join(dir, "out"), 0o755))

for i := 1; i <= fileCount; i++ {
subdir := filepath.Join(dir, "path", "to", "folder", fmt.Sprintf("%04d", i/100))
require.NoError(tb, os.MkdirAll(subdir, 0o755))
name := filepath.Join(subdir, fmt.Sprintf("file-%05d.yaml", i))
createSparseFile(tb, name, fileSize)
}
}

func createSparseFile(tb testing.TB, name string, size int64) {
tb.Helper()

file, err := os.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
require.NoError(tb, err)
defer func() {
require.NoError(tb, file.Close())
}()
require.NoError(tb, file.Truncate(size))
}
69 changes: 69 additions & 0 deletions internal/fingerprint/glob.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
package fingerprint

import (
"errors"
"io/fs"
"os"
"path/filepath"
"sort"
"strings"

"github.com/go-task/task/v3/internal/execext"
"github.com/go-task/task/v3/internal/filepathext"
"github.com/go-task/task/v3/taskfile/ast"
)

var errFastGlobFallback = errors.New("fast glob fallback")

func Globs(dir string, globs []*ast.Glob) ([]string, error) {
resultMap := make(map[string]bool)
for _, g := range globs {
Expand All @@ -27,6 +32,10 @@ func Globs(dir string, globs []*ast.Glob) ([]string, error) {
func glob(dir string, g string) ([]string, error) {
g = filepathext.SmartJoin(dir, g)

if results, ok, err := fastRecursiveGlob(g); ok {
return results, err
}

fs, err := execext.ExpandFields(g)
if err != nil {
return nil, err
Expand All @@ -47,6 +56,66 @@ func glob(dir string, g string) ([]string, error) {
return collectKeys(results), nil
}

func fastRecursiveGlob(pattern string) ([]string, bool, error) {
pattern = filepath.Clean(pattern)
separator := string(os.PathSeparator)
marker := separator + "**" + separator

idx := strings.Index(pattern, marker)
if idx == -1 || strings.Contains(pattern[idx+len(marker):], marker) {
return nil, false, nil
}

root := pattern[:idx]
namePattern := pattern[idx+len(marker):]
if root == "" || namePattern == "" || strings.Contains(namePattern, separator) {
return nil, false, nil
}
if strings.Contains(root, "**") || strings.ContainsAny(root, "*?[]{}") {
return nil, false, nil
}
if strings.ContainsAny(namePattern, "{}") {
return nil, false, nil
}

results := make(map[string]bool)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly use a slice rather than map.

var results []string
....
matched, _ := filepath.Match(namePattern, d.Name()) // d.Name() is faster than filepath.Base(path)
if !matched {
return nil
}
results = append(results, path)
return nil
...
return results, true, nil

err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}

if d.Type()&fs.ModeSymlink != 0 {
info, err := os.Stat(path)
if err != nil {
return err
}
if info.IsDir() {
return errFastGlobFallback
}
}

matched, err := filepath.Match(namePattern, filepath.Base(path))
if err != nil {
return err
}
if !matched {
return nil
}
results[path] = true
return nil
})
if errors.Is(err, errFastGlobFallback) {
return nil, false, nil
}
if err != nil {
return nil, true, err
}
return collectKeys(results), true, nil
}

func collectKeys(m map[string]bool) []string {
keys := make([]string, 0, len(m))
for k, v := range m {
Expand Down