diff --git a/README.md b/README.md index 07188f6fc..dd9eceec9 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Cloudfuse supports clouds with an S3 or Azure interface. - [From Source](#from-source) - [Basic Use](#basic-use) - [Health Monitor](#health-monitor) +- [Offline Access (New)](#offline-access-new) - [Command Line Interface](#command-line-interface) - [Limitations](#limitations) - [License](#license) @@ -307,6 +308,12 @@ Cloudfuse also supports a health monitor. The health monitor allows customers gain more insight into how their Cloudfuse instance is behaving with the rest of their machine. Visit [here](https://github.com/Seagate/cloudfuse/wiki/Health-Monitor) to set it up. +## Offline Access (New) + +Cloudfuse now supports offline access through the `file_cache` component. When cloud storage is unreachable, reads and writes continue against the local cache and are flushed to cloud storage once connectivity is restored. The feature is **enabled by default** and can be disabled via the `block-offline-access` flag. + +> **Note:** Cloudfuse uses eventual consistency with last-writer-wins semantics. Offline access can extend the consistency window indefinitely and **increases the risk of data conflicts in multi-client setups!** See [component/file_cache/OfflineAccess.md](component/file_cache/OfflineAccess.md) for full details and configuration guidance. + ## Limitations ### NOTICE diff --git a/common/lock_map.go b/common/lock_map.go index ec9bb2aff..bfbada53d 100644 --- a/common/lock_map.go +++ b/common/lock_map.go @@ -38,8 +38,7 @@ type LockMapItem struct { mtx sync.RWMutex downloadTime time.Time // track if file is in lazy open state - LazyOpen bool - SyncPending bool + LazyOpen bool } // Map holding locks for all the files diff --git a/common/types.go b/common/types.go index 4c9511e90..751e1a60e 100644 --- a/common/types.go +++ b/common/types.go @@ -123,6 +123,28 @@ func (e CloudUnreachableError) Is(target error) bool { return ok } +type NoCachedDataError struct { + Message string + CacheError error +} + +func NewNoCachedDataError(originalError error) NoCachedDataError { + return NoCachedDataError{ + Message: "Failed to connect to cloud storage", + CacheError: originalError, + } +} +func (e NoCachedDataError) Error() string { + return fmt.Sprintf("%s. Here's why: %v", e.Message, e.CacheError) +} +func (e NoCachedDataError) Unwrap() error { + return e.CacheError +} +func (e NoCachedDataError) Is(target error) bool { + _, ok := target.(*NoCachedDataError) + return ok +} + var DefaultWorkDir string var DefaultLogFilePath string var StatsConfigFilePath string diff --git a/component/attr_cache/attr_cache.go b/component/attr_cache/attr_cache.go index 60167473c..c51b8c233 100644 --- a/component/attr_cache/attr_cache.go +++ b/component/attr_cache/attr_cache.go @@ -27,6 +27,7 @@ package attr_cache import ( "context" + "errors" "fmt" "os" "path" @@ -36,6 +37,7 @@ import ( "syscall" "time" + "github.com/Seagate/cloudfuse/common" "github.com/Seagate/cloudfuse/common/config" "github.com/Seagate/cloudfuse/common/log" "github.com/Seagate/cloudfuse/internal" @@ -403,6 +405,10 @@ func (ac *AttrCache) backgroundCleanup() { // cleanupExpiredEntries: removes expired entries from the cache map // This runs in a background goroutine to prevent memory leaks func (ac *AttrCache) cleanupExpiredEntries() { + // do not cleanup when offline + if !ac.NextComponent().CloudConnected() { + return + } // First pass: collect keys to delete under read lock to minimize write lock duration var keysToDelete []string @@ -577,6 +583,25 @@ func (ac *AttrCache) StreamDir( options.Name, numAdded, len(pathList)) } } + } else if errors.Is(err, &common.CloudUnreachableError{}) { + // return expired cachedPathList + if cachedPathList != nil { + pathList = cachedPathList + nextToken = cachedToken + } else { + // return whatever entries we have (but only if the token is empty) + entry, found := ac.cache.get(options.Name) + if options.Token == "" && found { + for _, v := range entry.children { + if v.exists() && v.valid() { + pathList = append(pathList, v.attr) + } + } + } else { + // the cloud is unavailable, and we have nothing to provide + err = common.NewNoCachedDataError(err) + } + } } // values should be returned in ascending order by key, without duplicates // sort @@ -627,7 +652,9 @@ func (ac *AttrCache) fetchCachedDirList( listDirCache, found := ac.cache.get(path) if !found { log.Warn("AttrCache::fetchCachedDirList : %s directory not found in cache", path) - return nil, "", fmt.Errorf("%s directory not found in cache", path) + return nil, "", common.NewNoCachedDataError( + fmt.Errorf("%s directory not found in cache", path), + ) } // is the requested data cached? cachedListSegment, found := listDirCache.listCache[token] @@ -640,9 +667,7 @@ func (ac *AttrCache) fetchCachedDirList( // check timeout if time.Since(cachedListSegment.cachedAt).Seconds() >= float64(ac.cacheTimeout) { log.Info("AttrCache::fetchCachedDirList : %s listing segment %s cache expired", path, token) - // drop the invalid segment from the list cache - delete(listDirCache.listCache, token) - return nil, "", fmt.Errorf( + return cachedListSegment.entries, "", fmt.Errorf( "%s directory listing segment %s cache expired", path, token, @@ -714,12 +739,6 @@ func (ac *AttrCache) cacheListSegment( } // add the new entry listDirItem.listCache[token] = newListCacheSegment - // scan the listing cache and remove expired entries - for k, v := range listDirItem.listCache { - if currTime.Sub(v.cachedAt).Seconds() >= float64(ac.cacheTimeout) { - delete(listDirItem.listCache, k) - } - } log.Trace("AttrCache::cacheListSegment : %s cached list entries \"%s\"-\"%s\" (%d items)", listDirPath, token, nextToken, len(pathList)) } @@ -1175,9 +1194,8 @@ func (ac *AttrCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr var errFromCache error ac.cacheLock.RLock() value, found := ac.cache.get(options.Name) - if found && value.valid() && time.Since(value.cachedAt).Seconds() < float64(ac.cacheTimeout) { - // Serve the request from the attribute cache - respondFromCache = true + if found && value.valid() { + // record cache response if !value.exists() { // log.Debug("AttrCache::GetAttr : %s found, (ENOENT) served from cache", options.Name) errFromCache = syscall.ENOENT @@ -1185,10 +1203,14 @@ func (ac *AttrCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr // log.Debug("AttrCache::GetAttr : %s found, served from cache", options.Name) attrFromCache = value.attr } - } else if ac.cacheDirs { + // only serve this response if it's not expired + if time.Since(value.cachedAt).Seconds() < float64(ac.cacheTimeout) { + respondFromCache = true + } + } + if ac.cacheDirs && !respondFromCache { // drill up for the nearest valid parent directory attribute cache - parent, found := ac.cache.getCachedParent(options.Name) - if found && time.Since(parent.cachedAt).Seconds() < float64(ac.cacheTimeout) { + if parent, found := ac.cache.getCachedParent(options.Name); found { // Remember, we have no entry for options.Name // parent is its nearest valid ancestor // So, if parent doesn't exist, options.Name must not exist @@ -1200,8 +1222,9 @@ func (ac *AttrCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr // options.Name, // parent.exists(), // ) - respondFromCache = true errFromCache = syscall.ENOENT + // only serve this response if it's not expired + respondFromCache = time.Since(parent.cachedAt).Seconds() < float64(ac.cacheTimeout) } } } @@ -1213,18 +1236,12 @@ func (ac *AttrCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr // The answer is not cached, or it's expired // Get the attributes from next component pathAttr, err := ac.NextComponent().GetAttr(options) - // return unexpected errors immediately (no valid response to cache) - if err != nil && !os.IsNotExist(err) { - log.Debug("AttrCache::GetAttr : %s encountered error [%v]", options.Name, err) - return pathAttr, err - } - // response is valid - cache it - ac.cacheLock.Lock() - defer ac.cacheLock.Unlock() - switch err { - case nil: - log.Debug("AttrCache::GetAttr : %s got attributes from cloud, caching result", options.Name) + switch { + case err == nil: // Retrieved attributes so cache them + log.Debug("AttrCache::GetAttr : %s Caching record from cloud", options.Name) + ac.cacheLock.Lock() + defer ac.cacheLock.Unlock() ac.cache.insert(insertOptions{ attr: pathAttr, exists: true, @@ -1233,14 +1250,29 @@ func (ac *AttrCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr if ac.cacheDirs { ac.markAncestorsInCloud(getParentDir(options.Name), time.Now()) } - case syscall.ENOENT: - log.Debug("AttrCache::GetAttr : %s not found, caching ENOENT result", options.Name) + case err == syscall.ENOENT: // cache this entity not existing + log.Debug("AttrCache::GetAttr : %s Caching ENOENT from cloud", options.Name) + ac.cacheLock.Lock() + defer ac.cacheLock.Unlock() ac.cache.insert(insertOptions{ attr: internal.CreateObjAttr(options.Name, 0, time.Now()), exists: false, cachedAt: time.Now(), }) + case errors.Is(err, &common.CloudUnreachableError{}): + // the cloud connection is down + // do we have an expired response from cache? Let's serve that. + haveExpiredResponse := attrFromCache != nil || errFromCache != nil + if haveExpiredResponse { + log.Warn("AttrCache::GetAttr : %s Serving expired cached data (offline)", options.Name) + return attrFromCache, errors.Join(errFromCache, err) + } else { + log.Err("AttrCache::GetAttr : %s No cached data (offline)", options.Name) + return nil, common.NewNoCachedDataError(err) + } + default: + log.Err("AttrCache::GetAttr : %s encountered error [%v]", options.Name, err) } return pathAttr, err } diff --git a/component/attr_cache/attr_cache_test.go b/component/attr_cache/attr_cache_test.go index f04ab8f45..09f0d5826 100644 --- a/component/attr_cache/attr_cache_test.go +++ b/component/attr_cache/attr_cache_test.go @@ -884,6 +884,52 @@ func (suite *attrCacheTestSuite) TestStreamDirError() { } } +func (suite *attrCacheTestSuite) TestStreamDirOfflineExpired() { + defer suite.cleanupTest() + + dirPath := "dir" + entry := getPathAttr("dir/file", defaultSize, fs.FileMode(defaultMode), true) + oldTime := time.Now().Add( + -(time.Duration(suite.attrCache.cacheTimeout) * time.Second) - time.Minute, + ) + + suite.addPathToCache(dirPath+"/", false) + dirItem, found := suite.attrCache.cache.get(dirPath) + suite.assert.True(found) + dirItem.listCache = map[string]listCacheSegment{ + "": { + entries: []*internal.ObjAttr{entry}, + nextToken: "", + cachedAt: oldTime, + }, + } + + options := internal.StreamDirOptions{Name: dirPath} + cloudErr := common.NewCloudUnreachableError(errors.New("network unavailable")) + suite.mock.EXPECT().StreamDir(options).Return(nil, "", cloudErr) + + result, token, err := suite.attrCache.StreamDir(options) + suite.assert.Equal([]*internal.ObjAttr{entry}, result) + suite.assert.Empty(token) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) +} + +func (suite *attrCacheTestSuite) TestStreamDirOfflineNoData() { + defer suite.cleanupTest() + + options := internal.StreamDirOptions{Name: "dir"} + cloudErr := common.NewCloudUnreachableError(errors.New("network unavailable")) + suite.mock.EXPECT().StreamDir(options).Return(nil, "", cloudErr) + + result, token, err := suite.attrCache.StreamDir(options) + suite.assert.Nil(result) + suite.assert.Empty(token) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.NoCachedDataError{}) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) +} + // Test whether the attribute cache correctly tracks which directories are in cloud storage func (suite *attrCacheTestSuite) TestDirInCloud() { defer suite.cleanupTest() @@ -1922,6 +1968,69 @@ func (suite *attrCacheTestSuite) TestGetAttrWithCompleteParentListing() { suite.assert.Nil(result) } +func (suite *attrCacheTestSuite) TestGetAttrOfflineExpired() { + defer suite.cleanupTest() + + path := "file" + options := internal.GetAttrOptions{Name: path} + oldTime := time.Now().Add( + -(time.Duration(suite.attrCache.cacheTimeout) * time.Second) - time.Minute, + ) + + suite.addPathToCache(path, true) + cacheItem, found := suite.attrCache.cache.get(path) + suite.assert.True(found) + cacheItem.cachedAt = oldTime + + cloudErr := common.NewCloudUnreachableError(errors.New("network unavailable")) + suite.mock.EXPECT().GetAttr(options).Return(nil, cloudErr) + + result, err := suite.attrCache.GetAttr(options) + suite.assert.Equal(cacheItem.attr, result) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) +} + +func (suite *attrCacheTestSuite) TestGetAttrOfflineWithCompleteParentListingExpired() { + defer suite.cleanupTest() + + parentPath := "dir/" + childPath := "dir/missing" + options := internal.GetAttrOptions{Name: childPath} + oldTime := time.Now().Add( + -(time.Duration(suite.attrCache.cacheTimeout) * time.Second) - time.Minute, + ) + + suite.addPathToCache(parentPath, false) + parentItem, found := suite.attrCache.cache.get(internal.TruncateDirName(parentPath)) + suite.assert.True(found) + parentItem.listingComplete = true + parentItem.cachedAt = oldTime + + cloudErr := common.NewCloudUnreachableError(errors.New("network unavailable")) + suite.mock.EXPECT().GetAttr(options).Return(nil, cloudErr) + + result, err := suite.attrCache.GetAttr(options) + suite.assert.Nil(result) + suite.assert.Error(err) + suite.assert.ErrorIs(err, syscall.ENOENT) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) +} + +func (suite *attrCacheTestSuite) TestGetAttrOfflineNoData() { + defer suite.cleanupTest() + + options := internal.GetAttrOptions{Name: "missing"} + cloudErr := common.NewCloudUnreachableError(errors.New("network unavailable")) + suite.mock.EXPECT().GetAttr(options).Return(nil, cloudErr) + + result, err := suite.attrCache.GetAttr(options) + suite.assert.Nil(result) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.NoCachedDataError{}) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) +} + // Tests Cache Timeout func (suite *attrCacheTestSuite) TestCacheTimeout() { defer suite.cleanupTest() @@ -1932,6 +2041,7 @@ func (suite *attrCacheTestSuite) TestCacheTimeout() { config, ) // setup a new attr cache with a custom config (clean up will occur after the test as usual) suite.assert.EqualValues(cacheTimeout, suite.attrCache.cacheTimeout) + suite.mock.EXPECT().CloudConnected().AnyTimes().Return(true) path := "a" options := internal.GetAttrOptions{Name: path} @@ -1964,10 +2074,11 @@ func (suite *attrCacheTestSuite) TestCacheTimeout() { func (suite *attrCacheTestSuite) TestCacheCleanupExpiredEntries() { defer suite.cleanupTest() suite.cleanupTest() // clean up the default attr cache generated - cacheTimeout := 2 + cacheTimeout := 1 config := fmt.Sprintf("attr_cache:\n timeout-sec: %d", cacheTimeout) suite.setupTestHelper(config) // setup a new attr cache with a custom config suite.assert.EqualValues(suite.attrCache.cacheTimeout, cacheTimeout) + suite.mock.EXPECT().CloudConnected().AnyTimes().Return(true) path1 := "file1" path2 := "file2" @@ -2019,10 +2130,7 @@ func (suite *attrCacheTestSuite) TestCacheCleanupExpiredEntries() { suite.assertUntouched(childFile) // Wait for cache timeout to expire, plus additional time for background cleanup to run - time.Sleep(time.Second * time.Duration(cacheTimeout+1)) - - // Wait a bit more if cleanup is still in progress - maxWait := 3 * time.Second + maxWait := time.Duration(cacheTimeout*2) * time.Second waitInterval := 100 * time.Millisecond waited := time.Duration(0) @@ -2049,37 +2157,68 @@ func (suite *attrCacheTestSuite) TestCacheCleanupExpiredEntries() { suite.assert.Contains(suite.attrCache.cache.cacheMap, "") } +func (suite *attrCacheTestSuite) TestCacheCleanupExpiredEntriesOffline() { + defer suite.cleanupTest() + suite.cleanupTest() // clean up the default attr cache generated + cacheTimeout := 60 + config := fmt.Sprintf("attr_cache:\n timeout-sec: %d", cacheTimeout) + suite.setupTestHelper(config) + suite.assert.EqualValues(suite.attrCache.cacheTimeout, cacheTimeout) + suite.mock.EXPECT().CloudConnected().Return(false) + + oldTime := time.Now().Add( + -(time.Duration(cacheTimeout) * time.Second) - time.Minute, + ) + suite.addPathToCache("offline-file", true) + suite.addPathToCache("offline-dir/", false) + + fileItem, found := suite.attrCache.cache.get("offline-file") + suite.assert.True(found) + fileItem.cachedAt = oldTime + + dirItem, found := suite.attrCache.cache.get("offline-dir") + suite.assert.True(found) + dirItem.cachedAt = oldTime + + suite.attrCache.cleanupExpiredEntries() + + suite.assert.Contains(suite.attrCache.cache.cacheMap, "") + suite.assert.Contains(suite.attrCache.cache.cacheMap, "offline-file") + suite.assert.Contains(suite.attrCache.cache.cacheMap, "offline-dir") + child, found := suite.attrCache.cache.get("offline-file") + suite.assert.True(found) + suite.assert.Equal(oldTime, child.cachedAt) +} + func (suite *attrCacheTestSuite) TestCacheCleanupDuringBulkCaching() { defer suite.cleanupTest() suite.cleanupTest() // clean up the default attr cache generated - cacheTimeout := 3 // Use a longer timeout for this test + cacheTimeout := 1 config := fmt.Sprintf("attr_cache:\n timeout-sec: %d", cacheTimeout) suite.setupTestHelper(config) // setup a new attr cache with a custom config suite.assert.EqualValues(suite.attrCache.cacheTimeout, cacheTimeout) + suite.mock.EXPECT().CloudConnected().AnyTimes().Return(true) // Add some items to cache manually with old timestamps path1 := "oldfile1" path2 := "oldfile2" oldTime := time.Now().Add(-time.Second * time.Duration(cacheTimeout+1)) - suite.attrCache.cache.cacheMap[path1] = newAttrCacheItem( - getPathAttr(path1, defaultSize, fs.FileMode(defaultMode), true), - true, - oldTime, - ) - suite.attrCache.cache.cacheMap[path2] = newAttrCacheItem( - getPathAttr(path2, defaultSize, fs.FileMode(defaultMode), true), - true, - oldTime, - ) + suite.attrCache.cache.insert(insertOptions{ + attr: getPathAttr(path1, defaultSize, fs.FileMode(defaultMode), true), + exists: true, + cachedAt: oldTime, + }) + suite.attrCache.cache.insert(insertOptions{ + attr: getPathAttr(path2, defaultSize, fs.FileMode(defaultMode), true), + exists: true, + cachedAt: oldTime, + }) // Verify both old items are in cache plus root suite.assert.Len(suite.attrCache.cache.cacheMap, 3) - // Wait a bit for background cleanup to run and remove expired items - time.Sleep(time.Second * time.Duration(cacheTimeout+1)) - // Wait for cleanup to complete - maxWait := 2 * time.Second + maxWait := time.Duration(cacheTimeout*2) * time.Second waitInterval := 100 * time.Millisecond waited := time.Duration(0) diff --git a/component/azstorage/block_blob_test.go b/component/azstorage/block_blob_test.go index 721ce274e..ccd78a6fb 100644 --- a/component/azstorage/block_blob_test.go +++ b/component/azstorage/block_blob_test.go @@ -1139,6 +1139,16 @@ func (s *blockBlobTestSuite) TestCreateFile() { name := generateFileName() h, err := s.az.CreateFile(internal.CreateFileOptions{Name: name}) + // log error information to debug log + unwrappedErr := err + for unwrappedErr != nil { + fmt.Printf( + "Uncaught AZ error is of type \"%T\" and value %v.\n", + unwrappedErr, + unwrappedErr, + ) + unwrappedErr = errors.Unwrap(unwrappedErr) + } s.assert.NoError(err) s.assert.NotNil(h) diff --git a/component/file_cache/OfflineAccess.md b/component/file_cache/OfflineAccess.md new file mode 100644 index 000000000..bf00e1c3e --- /dev/null +++ b/component/file_cache/OfflineAccess.md @@ -0,0 +1,65 @@ +# Offline Access + +Cloudfuse includes an offline access feature in the `file_cache` component that allows reads and writes to continue against the local cache when cloud storage is temporarily unreachable. This document describes the feature, its configuration, and its consistency implications. + +## How It Works + +When cloud storage becomes unavailable, Cloudfuse continues to serve file operations from the local file cache. Reads return cached data. Writes are accepted and held in the local cache. When connectivity is restored, all pending changes are written to cloud storage. + +## Enabling and Disabling + +Offline access is **enabled by default**. To disable it — causing Cloudfuse to block local file access whenever cloud storage is unreachable — set the `block-offline-access` flag in your `file_cache` configuration: + +```yaml +file_cache: + block-offline-access: true +``` + +When `block-offline-access: true`, any filesystem operation that requires a cloud storage + connection will fail with an error when offline, which is the stricter, previous behavior. + +## Improving Offline Functionality with Attribute Cache Tuning + +When offline, Cloudfuse cannot refresh file metadata from cloud storage. The `attr_cache` component caches this metadata locally. By default its timeout is **120 seconds (2 minutes)**, after which Cloudfuse attempts to revalidate metadata against cloud storage. + +To extend the window during which cached metadata remains valid — reducing the chance of stale-metadata errors while offline — raise the `timeout-sec` value under `attr_cache`: + +```yaml +attr_cache: + timeout-sec: 3600 # cache metadata for 1 hour; adjust to your needs +``` + +Using a longer timeout makes it more likely that metadata will be available offline. + +## Consistency Considerations + +> **Read this section carefully before using offline access in a multi-client or shared-storage environment.** + +### Eventual Consistency and Last-Writer-Wins + +Cloudfuse only supports **eventual consistency**, using **last-writer-wins** semantics. Data is written to cloud storage when a file is *closed*, not when it is written. This means that, under normal operation, there is already a window during which cloud storage does not reflect the latest local changes. + +For this reason, we strongly recommend only connecting to each container with a single client, or splitting the container into separate prefixes, each accessed by a single client using the **subdirectory** option. + +### Offline Access Makes the Consistency Window Indefinitely Long + +The offline access feature is **permissive by design**: it keeps local access open for as long as the client is disconnected. This means: + +- A client may hold unsynchronized writes in its local cache indefinitely — for hours, days, or longer — until it reconnects. +- When the client reconnects, those writes will be uploaded to cloud storage. +- If another client has written to the same objects during that time, ***last-writer-wins semantics may cause the offline client's stale data to overwrite the newer data***. + +One particularly unpleasant example: + +1. A Cloudfuse client loses its network connection, then renames a directory while offline (this is allowed if all directory contents are cached). +2. Using another client, the contents of the source directory are updated. +3. Once the offline client reconnects, it **deletes all the objects within the source directory** and writes stale copies of those objects to the destination directory. + +### Recommendation + +**We never recommend concurrent access to the same objects from multiple clients.** The offline access feature *further increases the risk of consistency issues* in such configurations. If you must use multiple clients, be aware that: + +1. A client returning from an extended offline period may overwrite changes made by other clients during that time. +2. There is no built-in conflict detection or merge — the last synchronization wins, regardless of content freshness. + +Use offline access only in single client, prefix-separated client, or read-only scenarios to minimize the risk of conflicting writes. diff --git a/component/file_cache/async.go b/component/file_cache/async.go new file mode 100644 index 000000000..535447e70 --- /dev/null +++ b/component/file_cache/async.go @@ -0,0 +1,382 @@ +/* + Licensed under the MIT License . + + Copyright © 2023-2025 Seagate Technology LLC and/or its Affiliates + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +package file_cache + +import ( + "context" + "errors" + "os" + "path/filepath" + "time" + + "github.com/Seagate/cloudfuse/common" + "github.com/Seagate/cloudfuse/common/config" + "github.com/Seagate/cloudfuse/common/log" + "github.com/Seagate/cloudfuse/internal" + "github.com/netresearch/go-cron" +) + +type UploadWindow struct { + name string `yaml:"name"` + cronExpr string `yaml:"cron"` + duration time.Duration + cronEntryID int +} + +type Config struct { + Schedule WeeklySchedule `yaml:"schedule"` +} + +type WeeklySchedule []UploadWindow + +type pendingFlags struct { + isDir bool + isDeletion bool +} + +func (fc *FileCache) configureScheduler() error { + // load from config + var rawSchedule []map[string]interface{} + err := config.UnmarshalKey(compName+".schedule", &rawSchedule) + if err != nil { + return err + } + // initialize the scheduler + fc.cronScheduler = cron.New(cron.WithSeconds()) + // create parser for cron expressions + parser := cron.MustNewParser( + cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor, + ) + // Convert raw schedule to WeeklySchedule + fc.schedule = make(WeeklySchedule, 0, len(rawSchedule)) + for _, rawWindow := range rawSchedule { + window := UploadWindow{} + if name, ok := rawWindow["name"].(string); ok { + window.name = name + } + if cronExpr, ok := rawWindow["cron"].(string); ok { + window.cronExpr = cronExpr + } + if duration, ok := rawWindow["duration"].(string); ok { + window.duration, err = time.ParseDuration(duration) + if err != nil { + log.Err( + "FileCache::Configure : %s invalid window duration %s (%v)", + window.name, + duration, + err, + ) + return err + } + } + // Determine if we're joining a window that's already active by + // finding the most recent scheduled start via Prev(). + now := time.Now() + var initialWindowEndTime time.Time + var jobOpts []cron.JobOption + schedule, _ := parser.Parse(window.cronExpr) + if sp, ok := schedule.(cron.ScheduleWithPrev); ok { + prevStart := sp.Prev(now) + if !prevStart.IsZero() && prevStart.Add(window.duration).After(now) { + // We're inside an active window that started at prevStart. + initialWindowEndTime = prevStart.Add(window.duration) + // Run immediately to join the in-progress window with shortened duration. + jobOpts = append(jobOpts, cron.WithRunImmediately()) + log.Info( + "FileCache::scheduleUploads : [%s] joining active window (started %s, ends %s)", + window.name, + prevStart.Format(time.Kitchen), + initialWindowEndTime.Format(time.Kitchen), + ) + } + } + // add cron callback + entryId, err := fc.cronScheduler.AddFunc(window.cronExpr, func() { + // Is this a transition from inactive? + windowCount := fc.activeWindows.Add(1) + if windowCount == 1 { + // transition to active - open the window + close(fc.startScheduledUploads) + log.Info( + "FileCache::SchedulerCronFunc : %s - enabled scheduled uploads", + window.name, + ) + } + log.Info( + "FileCache::SchedulerCronFunc : %s (%s) started (numActive=%d)", + window.name, + window.cronExpr, + windowCount, + ) + // When should the window close? + remainingDuration := window.duration + currentTime := time.Now() + if initialWindowEndTime.After(currentTime) { + remainingDuration = initialWindowEndTime.Sub(currentTime) + } + // Create a context to end the window + ctx, cancel := context.WithTimeout(context.Background(), remainingDuration) + defer cancel() + for { + select { + case <-fc.componentStopping: + log.Info("FileCache::SchedulerCronFunc : %s - stopping cron job", window.name) + return + case <-ctx.Done(): + // Window has completed, update active window count + windowCount = fc.activeWindows.Add(-1) + log.Info( + "FileCache::SchedulerCronFunc : %s (%s) ended (numActive=%d)", + window.name, + window.duration, + windowCount, + ) + // Only close resources when the last window ends + if windowCount == 0 { + fc.startScheduledUploads = make(chan struct{}) + log.Info( + "FileCache::SchedulerCronFunc : %s window ended - deferring uploads", + window.name, + ) + } + return + } + } + }, jobOpts...) + if err != nil { + log.Err( + "FileCache::Configure : Schedule %s invalid cron expression (%v)", + window.name, + err, + ) + return err + } + // save window to fc.schedule + window.cronEntryID = int(entryId) + fc.schedule = append(fc.schedule, window) + log.Info( + "FileCache::Configure : Added schedule %s ('%s', %s)", + window.name, + window.cronExpr, + window.duration, + ) + } + + return nil +} + +func (fc *FileCache) startScheduler() { + // check if any schedules should already be active + for _, window := range fc.schedule { + entry := fc.cronScheduler.Entry(cron.EntryID(window.cronEntryID)) + // check if this entry should already be active + // did this entry have a start time within the last duration? + now := time.Now() + var initialWindowEndTime time.Time + for t := entry.Schedule.Next(now.Add(-window.duration)); now.After(t); t = entry.Schedule.Next(t) { + initialWindowEndTime = t.Add(window.duration) + } + if !initialWindowEndTime.IsZero() { + go entry.Job.Run() + } + } + fc.cronScheduler.Start() +} + +// flock must be locked +func (fc *FileCache) addPendingOp(name string, value pendingFlags) { + log.Trace("FileCache::addPendingOp : %s", name) + fc.pendingOps.Store(name, value) + select { + case fc.pendingOpAdded <- struct{}{}: + default: // do not block + } +} + +// persistent background thread function +func (fc *FileCache) servicePendingOps() { + for { + select { + case <-fc.componentStopping: + log.Crit("FileCache::servicePendingOps : Stopping") + // TODO: Persist pending ops + return + case <-fc.startScheduledUploads: + // check if we're connected + // exponential backoff is implemented inside CloudConnected(), + // so we're safe to call it naively every second like this + if !fc.NextComponent().CloudConnected() { + // we are offline, wait for a while before checking again + select { + case <-time.After(time.Second): + case <-fc.componentStopping: + } + break + } + numFilesProcessed := 0 + // Iterate over pending ops + fc.pendingOps.Range(func(key, value interface{}) bool { + numFilesProcessed++ + select { + case <-fc.componentStopping: + return false + case <-fc.startScheduledUploads: + path := key.(string) + value := value.(pendingFlags) + err := fc.updateObject(path, value) + if isOffline(err) { + return false // connection lost - abort iteration + } + if err != nil { + log.Err("FileCache::servicePendingOps : %s upload failed: %v", path, err) + } + default: + return false // upload window ended + } + return true // Continue the iteration + }) + log.Info( + "FileCache::servicePendingOps : Completed upload cycle, processed %d files", + numFilesProcessed, + ) + if numFilesProcessed == 0 { + // we're online but there's nothing to do + // wait for a task to be added + select { + case <-fc.pendingOpAdded: + case <-fc.componentStopping: + } + } + } + } +} + +// synchronize pending operation with cloud storage +func (fc *FileCache) updateObject(name string, flags pendingFlags) error { + log.Trace("FileCache::updateObject : %s", name) + + // lock the file + flock := fc.fileLocks.Get(name) + flock.Lock() + defer flock.Unlock() + + // don't double upload + _, stillPending := fc.pendingOps.Load(name) + if !stillPending { + return nil + } + + // look up file (or folder!) + localPath := filepath.Join(fc.tmpPath, name) + info, localErr := os.Stat(localPath) + localMissing := os.IsNotExist(localErr) + // in case of inconsistency, local state takes precedence (except to prevent incorrect deletions) + if !flags.isDeletion && localErr != nil { + log.Err("FileCache::updateObject : %s stat failed. Here's why: %v", name, localErr) + fc.pendingOps.Delete(name) + return localErr + } + if flags.isDeletion && !localMissing { + log.Err("FileCache::updateObject : %s exists. Ignoring deletion flag!", name) + } + if !localMissing && flags.isDir != info.IsDir() { + log.Err("FileCache::updateObject : %s has wrong dir flag (%t)!", name, flags.isDir) + } + + // update cloud + op := "deletion" + objType := "directory" + var cloudErr error + if localMissing { + if flags.isDeletion && fc.notInCloud(name) { + log.Info("FileCache::updateObject : %s skipping cloud deletion (not in cloud)", name) + fc.pendingOps.Delete(name) + return nil + } + if flags.isDir { + // delete folder + options := internal.DeleteDirOptions{Name: name} + cloudErr = fc.NextComponent().DeleteDir(options) + } else { + // delete file + objType = "file" + options := internal.DeleteFileOptions{Name: name} + cloudErr = fc.NextComponent().DeleteFile(options) + } + } else { + op = "creation/update" + if info.IsDir() { + // upload folder + options := internal.CreateDirOptions{Name: name, Mode: info.Mode()} + cloudErr = fc.NextComponent().CreateDir(options) + } else { + // upload file + objType = "file" + cloudErr = fc.uploadFile(name) + } + } + // handle errors + if cloudErr != nil { + log.Err("FileCache::updateObject : %s %s %s failed [%v]", name, objType, op, cloudErr) + return cloudErr + } + + // update state + log.Info("FileCache::updateObject : %s sync successful", name) + fc.pendingOps.Delete(name) + + return nil +} + +// returns true if we *know* that this entity does not exist in cloud storage +// otherwise returns false (including ambiguous cases) +func (fc *FileCache) notInCloud(name string) bool { + cloudStateKnown, existsInCloud, _ := fc.checkCloud(name) + return cloudStateKnown && !existsInCloud +} + +// and getAttrErr is the error returned from GetAttr +func (fc *FileCache) checkCloud( + name string, +) (cloudStateKnown bool, inCloud bool, getAttrErr error) { + _, getAttrErr = fc.NextComponent().GetAttr(internal.GetAttrOptions{Name: name}) + return cachedData(getAttrErr), !errors.Is(getAttrErr, os.ErrNotExist), getAttrErr +} + +// checks if the error returned from cloud storage means we're offline +func isOffline(err error) bool { + return errors.Is(err, &common.CloudUnreachableError{}) +} + +// checks whether we have usable metadata, despite being offline +func offlineDataAvailable(err error) bool { + return isOffline(err) && cachedData(err) +} + +// checks whether we have usable metadata, despite being offline +func cachedData(err error) bool { + noCachedData := isOffline(err) && errors.Is(err, &common.NoCachedDataError{}) + return !noCachedData +} diff --git a/component/file_cache/cache_policy.go b/component/file_cache/cache_policy.go index 3e7e70481..0334275c3 100644 --- a/component/file_cache/cache_policy.go +++ b/component/file_cache/cache_policy.go @@ -28,6 +28,7 @@ package file_cache import ( "fmt" "os" + "sync" "github.com/Seagate/cloudfuse/common" "github.com/Seagate/cloudfuse/common/log" @@ -45,7 +46,8 @@ type cachePolicyConfig struct { highThreshold float64 lowThreshold float64 - fileLocks *common.LockMap // uses object name (common.JoinUnixFilepath) + fileLocks *common.LockMap // uses object name (common.JoinUnixFilepath) + pendingOps *sync.Map policyTrace bool } diff --git a/component/file_cache/file_cache.go b/component/file_cache/file_cache.go index f17ac063e..868c275e7 100644 --- a/component/file_cache/file_cache.go +++ b/component/file_cache/file_cache.go @@ -27,15 +27,21 @@ package file_cache import ( "context" + "crypto/rand" + "encoding/base64" + "errors" "fmt" "io" "io/fs" + "math" "os" "path/filepath" "runtime" + "slices" "sort" "strings" "sync" + "sync/atomic" "syscall" "time" @@ -60,11 +66,13 @@ type FileCache struct { allowNonEmpty bool cacheTimeout float64 policyTrace bool - missedChmodList sync.Map // uses object name (common.JoinUnixFilepath) - mountPath string // uses os.Separator (filepath.Join) - scheduleOps sync.Map // uses object name (common.JoinUnixFilepath) + missedChmodList sync.Map // uses object name (common.JoinUnixFilepath) + pendingOps sync.Map // uses object name (common.JoinUnixFilepath) + pendingOpAdded chan struct{} // signals when an offline operation is queued + mountPath string // uses os.Separator (filepath.Join) allowOther bool offloadIO bool + offlineAccess bool maxCacheSizeMB float64 defaultPermission os.FileMode @@ -76,14 +84,11 @@ type FileCache struct { lazyWrite bool fileCloseOpt sync.WaitGroup - stopAsyncUpload chan struct{} - schedule WeeklySchedule - uploadNotifyCh chan struct{} - alwaysOn bool - activeWindows int - activeWindowsMutex *sync.Mutex - closeWindowCh chan struct{} - cronScheduler *cron.Cron + componentStopping chan struct{} + schedule WeeklySchedule + activeWindows atomic.Int32 + startScheduledUploads chan struct{} + cronScheduler *cron.Cron } // Structure defining your config parameters @@ -103,8 +108,9 @@ type FileCacheOptions struct { AllowNonEmpty bool `config:"allow-non-empty-temp" yaml:"allow-non-empty-temp,omitempty"` CleanupOnStart bool `config:"cleanup-on-start" yaml:"cleanup-on-start,omitempty"` - EnablePolicyTrace bool `config:"policy-trace" yaml:"policy-trace,omitempty"` - OffloadIO bool `config:"offload-io" yaml:"offload-io,omitempty"` + BlockOfflineAccess bool `config:"block-offline-access" yaml:"block-offline-access,omitempty"` + EnablePolicyTrace bool `config:"policy-trace" yaml:"policy-trace,omitempty"` + OffloadIO bool `config:"offload-io" yaml:"offload-io,omitempty"` RefreshSec uint32 `config:"refresh-sec" yaml:"refresh-sec,omitempty"` HardLimit bool `config:"hard-limit" yaml:"hard-limit,omitempty"` @@ -177,11 +183,18 @@ func (fc *FileCache) Start(ctx context.Context) error { fileCacheStatsCollector = stats_manager.NewStatsCollector(fc.Name()) log.Debug("Starting file cache stats collector") - fc.uploadNotifyCh = make(chan struct{}, 1) - fc.stopAsyncUpload = make(chan struct{}) - err = fc.SetupScheduler() - if err != nil { - log.Warn("FileCache::Start : Failed to setup scheduler [%s]", err.Error()) + // setup async uploads + fc.startScheduledUploads = make(chan struct{}) + fc.componentStopping = make(chan struct{}) + if len(fc.schedule) > 0 { + log.Info("FileCache::Start : Scheduler enabled") + fc.startScheduler() + } else { + close(fc.startScheduledUploads) + } + if len(fc.schedule) > 0 || fc.offlineAccess { + fc.pendingOpAdded = make(chan struct{}, 1) + go fc.servicePendingOps() } return nil @@ -191,10 +204,8 @@ func (fc *FileCache) Start(ctx context.Context) error { func (fc *FileCache) Stop() error { log.Trace("Stopping component : %s", fc.Name()) - // Signal active upload windows to stop - if fc.stopAsyncUpload != nil { - close(fc.stopAsyncUpload) - } + // stop async uploads + close(fc.componentStopping) // Stop the cron scheduler and wait for running jobs to complete if fc.cronScheduler != nil { @@ -253,7 +264,7 @@ func (fc *FileCache) Configure(_ bool) error { err := config.UnmarshalKey(compName, &conf) if err != nil { log.Err("FileCache: config error [invalid config attributes]") - return fmt.Errorf("config error in %s [%s]", fc.Name(), err.Error()) + return fmt.Errorf("config error in %s [%w]", fc.Name(), err) } fc.createEmptyFile = conf.CreateEmptyFile @@ -274,13 +285,14 @@ func (fc *FileCache) Configure(_ bool) error { fc.allowNonEmpty = conf.AllowNonEmpty fc.policyTrace = conf.EnablePolicyTrace fc.offloadIO = conf.OffloadIO + fc.offlineAccess = !conf.BlockOfflineAccess fc.refreshSec = conf.RefreshSec fc.hardLimit = conf.HardLimit err = config.UnmarshalKey("lazy-write", &fc.lazyWrite) if err != nil { log.Err("FileCache: config error [unable to obtain lazy-write]") - return fmt.Errorf("config error in %s [%s]", fc.Name(), err.Error()) + return fmt.Errorf("config error in %s [%w]", fc.Name(), err) } fc.tmpPath = filepath.Clean(common.ExpandPath(conf.TmpPath)) @@ -298,7 +310,7 @@ func (fc *FileCache) Configure(_ bool) error { err = config.UnmarshalKey("mount-path", &fc.mountPath) if err != nil { log.Err("FileCache: config error [unable to obtain Mount Path]") - return fmt.Errorf("config error in %s [%s]", fc.Name(), err.Error()) + return fmt.Errorf("config error in %s [%w]", fc.Name(), err) } if filepath.Clean(fc.mountPath) == filepath.Clean(fc.tmpPath) { log.Err("FileCache: config error [tmp-path is same as mount path]") @@ -307,12 +319,12 @@ func (fc *FileCache) Configure(_ bool) error { // Extract values from 'conf' and store them as you wish here _, err = os.Stat(fc.tmpPath) - if os.IsNotExist(err) { + if isNotExist(err) { log.Err("FileCache: config error [tmp-path does not exist. attempting to create tmp-path.]") err := os.MkdirAll(fc.tmpPath, os.FileMode(0755)) if err != nil { log.Err("FileCache: config error creating directory after clean [%s]", err.Error()) - return fmt.Errorf("config error in %s [%s]", fc.Name(), err.Error()) + return fmt.Errorf("config error in %s [%w]", fc.Name(), err) } } @@ -325,7 +337,7 @@ func (fc *FileCache) Configure(_ bool) error { ) fc.maxCacheSizeMB = 4192 } else { - fc.maxCacheSizeMB = 0.8 * float64(avail) / (MB) + fc.maxCacheSizeMB = math.Floor(0.8 * float64(avail) / (MB)) } if config.IsSet(compName+".max-size-mb") && conf.MaxSizeMB != 0 { @@ -371,57 +383,18 @@ func (fc *FileCache) Configure(_ bool) error { } if config.IsSet(compName + ".schedule") { - var rawSchedule []map[string]any - err := config.UnmarshalKey(compName+".schedule", &rawSchedule) + err = fc.configureScheduler() if err != nil { log.Err( "FileCache::Configure : Failed to parse schedule configuration [%s]", err.Error(), ) - } else { - // Convert raw schedule to WeeklySchedule - fc.schedule = make(WeeklySchedule, 0, len(rawSchedule)) - for _, rawWindow := range rawSchedule { - window := UploadWindow{} - if name, ok := rawWindow["name"].(string); ok { - window.Name = name - } - if cronStr, ok := rawWindow["cron"].(string); ok { - window.CronExpr = cronStr - } - if durStr, ok := rawWindow["duration"].(string); ok { - window.Duration = durStr - } - if !isValidCronExpression(window.CronExpr) { - log.Err( - "FileCache::Configure : Invalid cron expression '%s' for schedule window '%s', skipping", - window.CronExpr, - window.Name, - ) - continue - } - - // Validate duration - _, err := time.ParseDuration(window.Duration) - if err != nil { - log.Err( - "FileCache::Configure : Invalid duration '%s' for schedule window '%s': %v, skipping", - window.Duration, - window.Name, - err, - ) - continue - } - - fc.schedule = append(fc.schedule, window) - log.Info("FileCache::Configure : Parsed schedule %s: cron=%s, duration=%s", - window.Name, window.CronExpr, window.Duration) - } + return fmt.Errorf("config error in %s [invalid schedule format: %w]", fc.Name(), err) } } log.Crit( - "FileCache::Configure : create-empty %t, cache-timeout %d, tmp-path %s, max-size-mb %d, high-mark %d, low-mark %d, refresh-sec %v, max-eviction %v, hard-limit %v, policy %s, allow-non-empty-temp %t, cleanup-on-start %t, policy-trace %t, offload-io %t, defaultPermission %v, diskHighWaterMark %v, maxCacheSize %v, mountPath %v, schedule-len %v", + "FileCache::Configure : create-empty %t, cache-timeout %d, tmp-path %s, max-size-mb %d, high-mark %d, low-mark %d, refresh-sec %v, max-eviction %v, hard-limit %v, policy %s, allow-non-empty-temp %t, cleanup-on-start %t, policy-trace %t, offload-io %t, !block-offline-access %t, defaultPermission %v, diskHighWaterMark %v, maxCacheSize %v, mountPath %v, schedule-len %v", fc.createEmptyFile, int(fc.cacheTimeout), fc.tmpPath, @@ -436,6 +409,7 @@ func (fc *FileCache) Configure(_ bool) error { conf.CleanupOnStart, fc.policyTrace, fc.offloadIO, + fc.offlineAccess, fc.defaultPermission, fc.diskHighWaterMark, fc.maxCacheSizeMB, @@ -446,6 +420,10 @@ func (fc *FileCache) Configure(_ bool) error { return nil } +func isNotExist(err error) bool { + return errors.Is(err, os.ErrNotExist) +} + // OnConfigChange : If component has registered, on config file change this method is called func (fc *FileCache) OnConfigChange() { log.Trace("FileCache::OnConfigChange : %s", fc.Name()) @@ -487,6 +465,7 @@ func (fc *FileCache) GetPolicyConfig(conf FileCacheOptions) cachePolicyConfig { maxSizeMB: fc.maxCacheSizeMB, fileLocks: fc.fileLocks, policyTrace: conf.EnablePolicyTrace, + pendingOps: &fc.pendingOps, } return cacheConfig @@ -495,7 +474,10 @@ func (fc *FileCache) GetPolicyConfig(conf FileCacheOptions) cachePolicyConfig { func (fc *FileCache) StatFs() (*common.Statfs_t, bool, error) { statfs, populated, err := fc.NextComponent().StatFs() + // TODO: handle offline errors if populated { + // if we are offline, this will return EIO to the system + // TODO: Is this the desired behavior? return statfs, populated, err } @@ -555,22 +537,122 @@ func isLocalDirEmpty(path string) bool { return err == io.EOF } -// Note: The primary purpose of the file cache is to keep track of files that are opened by the user. -// So we do not need to support some APIs like Create Directory since the file cache will manage -// creating local directories as needed. +func (fc *FileCache) CreateDir(options internal.CreateDirOptions) error { + log.Trace("FileCache::CreateDir : %s", options.Name) + + // if offline access is disabled, just pass this call on to the attribute cache + if !fc.offlineAccess { + return fc.NextComponent().CreateDir(options) + } -// DeleteDir: Recursively invalidate the directory and its children + localPath := filepath.Join(fc.tmpPath, options.Name) + + // Do not call nextComponent.CreateDir when we are offline. + // Otherwise the attribute cache could go out of sync with the cloud. + if fc.NextComponent().CloudConnected() { + // we have a cloud connection, so it's safe to call the next component + err := fc.NextComponent().CreateDir(options) + if err == nil || errors.Is(err, os.ErrExist) { + // creating the directory in cloud either worked, or it already exists + // make sure the directory exists in local cache + mkdirErr := os.MkdirAll(localPath, options.Mode.Perm()) + if mkdirErr != nil { + log.Err( + "FileCache::CreateDir : %s failed to create local directory. Here's why: %v", + localPath, + mkdirErr, + ) + } + } + return err + } + + // we are offline + // check if the directory exists in cloud storage + cloudStateKnown, inCloud, err := fc.checkCloud(options.Name) + notInCloud := cloudStateKnown && !inCloud + switch { + case notInCloud: + // the directory does not exist in the cloud, so we can create it locally + err = os.Mkdir(localPath, options.Mode.Perm()) + if err != nil { + // report and return the error, since it will rightly return EEXIST when needed, etc + log.Err("FileCache::CreateDir : %s os.Mkdir failed. Here's why: %v", options.Name, err) + } else { + // record this directory to sync to cloud later + // Note: the s3storage component can return success on CreateDir, even without a cloud connection. + // The thread that pushes local changes to the cloud will have to account for this + // to avoid creating an entry for this directory in the attribute cache, + // which would give us the false impression that the directory is in the cloud. + flock := fc.fileLocks.Get(options.Name) + flock.Lock() + defer flock.Unlock() + fc.addPendingOp(options.Name, pendingFlags{isDir: true}) + log.Info( + "FileCache::CreateDir : %s created offline and queued for cloud sync", + options.Name, + ) + } + case err != nil && !isOffline(err): + // we seem to have regained our cloud connection, but GetAttr failed for some reason + // log this and return the error from GetAttr as is + log.Err("FileCache::CreateDir : %s GetAttr failed. Here's why: %v", options.Name, err) + case errors.Is(err, &common.NoCachedDataError{}): + // we are offline and we don't know whether the directory exists in cloud storage + // block directory creation (to protect data consistency) + log.Warn( + "FileCache::CreateDir : %s might exist in cloud storage. Creation is blocked.", + options.Name, + ) + default: + // the directory already exists in cloud storage + // use distinct log messages for when the attribute cache entry is valid or expired + if err == nil { // valid + log.Warn("FileCache::CreateDir : %s already exists in cloud storage", options.Name) + } else { // expired + log.Warn( + "FileCache::CreateDir : %s already exists in cloud storage (and we are offline)", + options.Name, + ) + } + // return EEXIST + err = os.ErrExist + } + return err +} + +// DeleteDir: Delete empty directory func (fc *FileCache) DeleteDir(options internal.DeleteDirOptions) error { log.Trace("FileCache::DeleteDir : %s", options.Name) + lockName := internal.ExtendDirName(options.Name) + offlineOkay := false + flock := fc.fileLocks.Get(lockName) + flock.Lock() + defer flock.Unlock() + // The libfuse component only calls DeleteDir on empty directories, so this directory must be empty err := fc.NextComponent().DeleteDir(options) - if err != nil { - log.Err("FileCache::DeleteDir : %s failed", options.Name) - // There is a chance that meta file for directory was not created in which case - // rest api delete will fail while we still need to cleanup the local cache for the same - } else { + // Allow safe offline access + if isOffline(err) && fc.offlineAccess { + if _, statErr := os.Stat(filepath.Join(fc.tmpPath, options.Name)); statErr == nil { + log.Debug("FileCache::DeleteDir : %s local access allowed (offline)", options.Name) + offlineOkay = true + } else if cloudStateKnown, _, _ := fc.checkCloud(options.Name); cloudStateKnown { + log.Debug("FileCache::DeleteDir : %s access allowed (offline)", options.Name) + offlineOkay = true + } + } + // delete locally + if err == nil || offlineOkay { fc.policy.CachePurge(filepath.Join(fc.tmpPath, options.Name)) + } else { + log.Err("FileCache::DeleteDir : %s . Here's why: %v", options.Name, err) + } + // record pending op + if offlineOkay { + fc.addPendingOp(options.Name, pendingFlags{isDir: true, isDeletion: true}) + return nil } return err @@ -587,15 +669,19 @@ func (fc *FileCache) StreamDir( // To cover case 1, grab all entries from storage attrs, token, err := fc.NextComponent().StreamDir(options) + if isOffline(err) && fc.offlineAccess && cachedData(err) { + // we have a valid listing for offline access + err = nil + } if err != nil { return attrs, token, err } // Get files from local cache localPath := filepath.Join(fc.tmpPath, options.Name) - dirents, err := os.ReadDir(localPath) - if err != nil { - return attrs, token, nil + dirents, localErr := os.ReadDir(localPath) + if localErr != nil && !isNotExist(localErr) { + log.Err("FileCache::StreamDir : %s os.ReadDir failed [%v]", options.Name, localErr) } i := 0 // Index for cloud @@ -639,32 +725,41 @@ func (fc *FileCache) StreamDir( if token == "" { for _, entry := range dirents { entryPath := common.JoinUnixFilepath(options.Name, entry.Name()) - if !entry.IsDir() { - // This is an overhead for streamdir for now - // As list is paginated we have no way to know whether this particular item exists both in local cache - // and container or not. So we rely on getAttr to tell if entry was cached then it exists in cloud storage too - // If entry does not exists on storage then only return a local item here. - _, err := fc.NextComponent().GetAttr(internal.GetAttrOptions{Name: entryPath}) - if err != nil && (err == syscall.ENOENT || os.IsNotExist(err)) { - // get the lock on the file, to allow any pending operation to complete - flock := fc.fileLocks.Get(entryPath) - flock.RLock() - // use os.Stat instead of entry.Info() to be sure we get good info (with flock locked) - info, err := os.Stat( - filepath.Join(localPath, entry.Name()), - ) // Grab local cache attributes - flock.RUnlock() - // If local file is not locked then only use its attributes otherwise rely on container attributes - if err == nil { - // Case 2 (file only in local cache) so create a new attributes and add them to the storage attributes - log.Debug("FileCache::StreamDir : serving %s from local cache", entryPath) - attr := newObjAttr(entryPath, info) - attrs = append(attrs, attr) - } - } + // skip any entries that exist in cloud storage + cloudStateKnown, inCloud, _ := fc.checkCloud(entryPath) + if cloudStateKnown && inCloud { + continue + } + // get the lock on the file, to allow any pending operation to complete + flock := fc.fileLocks.Get(entryPath) + flock.RLock() + // use os.Stat instead of entry.Info() to be sure we get good info (with flock locked) + info, err := os.Stat( + filepath.Join(localPath, entry.Name()), + ) // Grab local cache attributes + flock.RUnlock() + if err == nil { + // Case 2 (file only in local cache) so create a new attributes and add them to the storage attributes + log.Debug("FileCache::StreamDir : serving %s from local cache", entryPath) + attrs = append(attrs, newObjAttr(entryPath, info)) } } } + // values should be returned in ascending order by key, without duplicates + // sort + slices.SortFunc[[]*internal.ObjAttr, *internal.ObjAttr]( + attrs, + func(a, b *internal.ObjAttr) int { + return strings.Compare(a.Path, b.Path) + }, + ) + // remove duplicates + attrs = slices.CompactFunc[[]*internal.ObjAttr, *internal.ObjAttr]( + attrs, + func(a, b *internal.ObjAttr) bool { + return a.Path == b.Path + }, + ) return attrs, token, err } @@ -672,17 +767,18 @@ func (fc *FileCache) StreamDir( func (fc *FileCache) IsDirEmpty(options internal.IsDirEmptyOptions) bool { log.Trace("FileCache::IsDirEmpty : %s", options.Name) - // Check if directory is empty at remote or not, if container is not empty then return false + // Check if directory is empty at remote emptyAtRemote := fc.NextComponent().IsDirEmpty(options) - if !emptyAtRemote { + connected := fc.NextComponent().CloudConnected() + if !emptyAtRemote && connected { log.Debug("FileCache::IsDirEmpty : %s is not empty at remote", options.Name) return emptyAtRemote } - // Remote is empty so we need to check for the local directory + // we need to check for the local directory // While checking local directory we need to ensure that we delete all empty directories and then // return the result. - cleanup, err := fc.deleteEmptyDirs(internal.DeleteDirOptions(options)) + emptyAfterCleanup, err := fc.deleteEmptyDirs(internal.DeleteDirOptions(options)) if err != nil { log.Debug( "FileCache::IsDirEmpty : %s failed to delete empty directories [%s]", @@ -691,8 +787,40 @@ func (fc *FileCache) IsDirEmpty(options internal.IsDirEmptyOptions) bool { ) return false } + // if the directory is not empty locally after cleanup, return false + // if we are connected, then the emptyAtRemote is reliable + if !emptyAfterCleanup || connected { + return emptyAfterCleanup && emptyAtRemote + } + // we are offline - is access allowed? + if !fc.offlineAccess { + log.Err("FileCache::IsDirEmpty : %s blocking offline access", options.Name) + return false + } + // offline access is allowed - pull directory contents from attribute cache + sdOptions := internal.StreamDirOptions{Name: options.Name, Count: 1} + attrs, _, err := fc.NextComponent().StreamDir(sdOptions) + if !cachedData(err) { + log.Err("FileCache::IsDirEmpty : %s no attribute cache data (offline)", options.Name) + return false + } + if len(attrs) > 0 { + log.Debug("FileCache::IsDirEmpty : %s not empty in attribute cache (offline)", options.Name) + return false + } + // attribute cache has no entries for this directory, but we need to check if it has a complete listing + // call GetAttr with a bogus random file name to see if the attribute cache listing is complete + // this is janky, but it improves offline access while preserving a simple IsDirEmpty interface + b := make([]byte, 12) + _, _ = rand.Read(b) + bogusFilename := common.JoinUnixFilepath(options.Name, base64.URLEncoding.EncodeToString(b)) + _, err = fc.NextComponent().GetAttr(internal.GetAttrOptions{Name: bogusFilename}) + if isOffline(err) && isNotExist(err) { + log.Info("FileCache::IsDirEmpty : %s proven empty using heroics (offline)", options.Name) + return true + } - return cleanup + return false } // DeleteEmptyDirs: delete empty directories in local cache, return error if directory is not empty @@ -706,7 +834,7 @@ func (fc *FileCache) deleteEmptyDirs(options internal.DeleteDirOptions) (bool, e entries, err := os.ReadDir(localPath) if err != nil { - if err == syscall.ENOENT || os.IsNotExist(err) { + if err == syscall.ENOENT || isNotExist(err) { return true, nil } @@ -759,6 +887,7 @@ func (fc *FileCache) deleteEmptyDirs(options internal.DeleteDirOptions) (bool, e func (fc *FileCache) RenameDir(options internal.RenameDirOptions) error { log.Trace("FileCache::RenameDir : src=%s, dst=%s", options.Src, options.Dst) + // first we need to lock all the files involved // get a list of source objects form both cloud and cache // cloud var cloudObjects []string @@ -783,14 +912,15 @@ func (fc *FileCache) RenameDir(options internal.RenameDirOptions) error { return err } // combine the lists - objectNames := combineLists(cloudObjects, localObjects) - - // add object destinations, and sort the result - for _, srcName := range objectNames { + srcObjects := combineLists(cloudObjects, localObjects) + // add destinations + var dstObjects []string + for _, srcName := range srcObjects { dstName := strings.Replace(srcName, options.Src, options.Dst, 1) - objectNames = append(objectNames, dstName) + dstObjects = append(dstObjects, dstName) } - sort.Strings(objectNames) + // combine sources and destinations + objectNames := combineLists(srcObjects, dstObjects) // acquire a file lock on each entry (and defer unlock) flocks := make([]*common.LockMapItem, 0, len(objectNames)) @@ -803,8 +933,21 @@ func (fc *FileCache) RenameDir(options internal.RenameDirOptions) error { // rename the directory in the cloud err = fc.NextComponent().RenameDir(options) - if err != nil { - log.Err("FileCache::RenameDir : error %s [%s]", options.Src, err.Error()) + // if we are offline and offline access is enabled, allow fully cached local directories to be renamed + // we restrict renames to cached data to keep semantics simple (pending ops are only create or delete) + if isOffline(err) && fc.offlineAccess && isSubset(cloudObjects, localObjects) { + log.Warn( + "FileCache::RenameDir : %s -> %s Cloud is unreachable. Proceeding with offline rename.", + options.Src, + options.Dst, + ) + } else if err != nil { + log.Err( + "FileCache::RenameDir : %s -> %s Cloud rename failed. Here's why: %v", + options.Src, + options.Dst, + err, + ) return err } @@ -838,11 +981,16 @@ func (fc *FileCache) RenameDir(options internal.RenameDirOptions) error { } // remember to delete the src directory later (after its contents are deleted) directoriesToPurge = append(directoriesToPurge, path) + // update pending cloud ops + fc.renamePendingOp( + fc.getObjectName(path), + fc.getObjectName(newPath), + ) } } else { // stat(localPath) failed. err is the one returned by stat // documentation: https://pkg.go.dev/io/fs#WalkDirFunc - if os.IsNotExist(err) { + if isNotExist(err) { // none of the files that were moved actually exist in local storage log.Info("FileCache::RenameDir : %s does not exist in local cache.", options.Src) } else if err != nil { @@ -879,9 +1027,12 @@ func (fc *FileCache) listCloudObjects(prefix string) (objectNames []string, err var attrSlice []*internal.ObjAttr attrSlice, token, err = fc.NextComponent(). StreamDir(internal.StreamDirOptions{Name: prefix, Token: token}) - if err != nil { + if offlineDataAvailable(err) && fc.offlineAccess { + err = nil + } else if err != nil { return } + // collect the object names for i := len(attrSlice) - 1; i >= 0; i-- { attr := attrSlice[i] if !attr.IsDir() { @@ -914,7 +1065,7 @@ func (fc *FileCache) listCachedObjects(directory string) (objectNames []string, } else { // stat(localPath) failed. err is the one returned by stat // documentation: https://pkg.go.dev/io/fs#WalkDirFunc - if os.IsNotExist(err) { + if isNotExist(err) { // none of the files that were moved actually exist in local storage log.Info("FileCache::listObjects : %s does not exist in local cache.", directory) } else if err != nil { @@ -923,7 +1074,7 @@ func (fc *FileCache) listCachedObjects(directory string) (objectNames []string, } return nil }) - if walkDirErr != nil && !os.IsNotExist(walkDirErr) { + if walkDirErr != nil && !isNotExist(walkDirErr) { err = walkDirErr } sort.Strings(objectNames) @@ -932,29 +1083,55 @@ func (fc *FileCache) listCachedObjects(directory string) (objectNames []string, func combineLists(listA, listB []string) []string { // since both lists are sorted, we can combine the two lists using a double-indexed for loop - combinedList := listA + var combinedList []string i := 0 // Index for listA j := 0 // Index for listB - // Iterate through both lists, adding entries from B that are missing from A + // Iterate through both lists, adding entries in order for i < len(listA) && j < len(listB) { itemA := listA[i] itemB := listB[j] if itemA < itemB { + combinedList = append(combinedList, itemA) i++ } else if itemA > itemB { - // we could insert here, but it's probably better to just sort later combinedList = append(combinedList, itemB) j++ } else { + // the items are the same - just add one + combinedList = append(combinedList, itemA) i++ j++ } } - // sort and return - sort.Strings(combinedList) + return combinedList } +func isSubset(subset, superset []string) bool { + // since both lists are sorted, we can walk the two lists using a double-indexed for loop + i := 0 // Index for subset + j := 0 // Index for superset + // Iterate through both lists, adding entries in order + for i < len(subset) && j < len(superset) { + itemA := subset[i] + itemB := superset[j] + if itemA < itemB { + // superset is ahead of subset - itemA is not a superset member + // so subset is not contained in superset + return false + } else if itemA > itemB { + // superset is behind subset - advance superset index + j++ + } else { + // match found - move to next subset item + i++ + j++ + } + } + + return true +} + func (fc *FileCache) getObjectName(localPath string) string { relPath, err := filepath.Rel(fc.tmpPath, localPath) if err != nil { @@ -979,6 +1156,7 @@ func unlockAll(flocks []*common.LockMapItem) { func (fc *FileCache) CreateFile(options internal.CreateFileOptions) (*handlemap.Handle, error) { //defer exectime.StatTimeCurrentBlock("FileCache::CreateFile")() log.Trace("FileCache::CreateFile : name=%s, mode=%d", options.Name, options.Mode) + var offline bool flock := fc.fileLocks.Get(options.Name) flock.Lock() @@ -987,11 +1165,36 @@ func (fc *FileCache) CreateFile(options internal.CreateFileOptions) (*handlemap. // createEmptyFile was added to optionally support immutable containers. If customers do not care about immutability they can set this to true. if fc.createEmptyFile { newF, err := fc.NextComponent().CreateFile(options) + if err == nil { + newF.GetFileObject().Close() + } + // are we offline? + if isOffline(err) && fc.offlineAccess { + // remember that we're offline + offline = true + // clear the error + err = nil + } if err != nil { log.Err("FileCache::CreateFile : Failed to create file %s", options.Name) return nil, err } - newF.GetFileObject().Close() + } + + // block offline access when disabled or risky + if !fc.NextComponent().CloudConnected() { + if !fc.offlineAccess { + log.Err("FileCache::CreateFile : %s failed (offline access disabled)", options.Name) + return nil, common.CloudUnreachableError{} + } + // if the file is not in the cache and the cloud state is unknown, block access + if _, err := os.Stat(filepath.Join(fc.tmpPath, options.Name)); err != nil { + if cloudStateKnown, _, _ := fc.checkCloud(options.Name); !cloudStateKnown { + log.Err("FileCache::CreateFile : %s failed (no offline metadata)", options.Name) + return nil, common.NewNoCachedDataError(&common.CloudUnreachableError{}) + } + } + offline = true } // Create the file in local cache @@ -1044,76 +1247,92 @@ func (fc *FileCache) CreateFile(options internal.CreateFileOptions) (*handlemap. // update state flock.LazyOpen = false + // if we're offline, record this operation as pending + if offline { + fc.addPendingOp(options.Name, pendingFlags{}) + } + return handle, nil } -// Validate that storage 404 errors truly correspond to Does Not Exist. -// path: the storage path -// err: the storage error -// method: the caller method name -// recoverable: whether or not case 2 is recoverable on flush/close of the file -func (fc *FileCache) validateStorageError( +// resolveCloudNotFoundError handles the case where cloud storage returns "not found" +// but the file may exist in the local cache (not yet flushed to cloud). +// +// This occurs when createEmptyFile is false: files are created locally first, +// then uploaded on close/flush. During this window, cloud operations will return 404. +// +// Returns: +// - nil if the file exists locally and canRetryOnClose is true +// - ENOENT if the file doesn't exist in cloud or local cache +// - EIO if the file exists locally but canRetryOnClose is false +// - the original error unchanged for non-404 errors or offline errors +func (fc *FileCache) resolveCloudNotFoundError( path string, err error, method string, - recoverable bool, + canRetryOnClose bool, ) error { - // For methods that take in file name, the goal is to update the path in cloud storage and the local cache. - // See comments in GetAttr for the different situations we can run into. This specifically handles case 2. - if os.IsNotExist(err) { - log.Debug("FileCache::%s : %s does not exist in cloud storage", method, path) - if !fc.createEmptyFile { - // Check if the file exists in the local cache - // (policy might not think the file exists if the file is merely marked for eviction and not actually evicted yet) - localPath := filepath.Join(fc.tmpPath, path) - if _, err := os.Stat(localPath); os.IsNotExist(err) { - // If the file is not in the local cache, then the file does not exist. - log.Err("FileCache::%s : %s does not exist in local cache", method, path) - return syscall.ENOENT - } else { - if !recoverable { - log.Err( - "FileCache::%s : %s has not been closed/flushed yet, unable to recover this operation on close", - method, - path, - ) - return syscall.EIO - } else { - log.Info( - "FileCache::%s : %s has not been closed/flushed yet, we can recover this operation on close", - method, - path, - ) - return nil - } - } - } + // Only handle cloud "not found" errors (skip offline or other errors) + if isOffline(err) || !isNotExist(err) { + return err } - return err + + log.Debug("FileCache::%s : %s does not exist in cloud storage", method, path) + + // When createEmptyFile is true, cloud 404 means file truly doesn't exist + if fc.createEmptyFile { + return err + } + + // Check if file exists in local cache (pending upload) + localPath := filepath.Join(fc.tmpPath, path) + if _, statErr := os.Stat(localPath); isNotExist(statErr) { + log.Err("FileCache::%s : %s does not exist in local cache", method, path) + return syscall.ENOENT + } + + // File exists locally but not in cloud - it's pending upload + if canRetryOnClose { + log.Info("FileCache::%s : %s pending upload, cloud will be updated on close", method, path) + return nil + } + + log.Err("FileCache::%s : %s pending upload, operation cannot be retried", method, path) + return syscall.EIO } func (fc *FileCache) DeleteFile(options internal.DeleteFileOptions) error { log.Trace("FileCache::DeleteFile : name=%s", options.Name) + localPath := filepath.Join(fc.tmpPath, options.Name) + offlineOkay := false + _, hadPendingOp := fc.pendingOps.Load(options.Name) flock := fc.fileLocks.Get(options.Name) flock.Lock() defer flock.Unlock() err := fc.NextComponent().DeleteFile(options) - err = fc.validateStorageError(options.Name, err, "DeleteFile", true) - if err != nil { - log.Err("FileCache::DeleteFile : error %s [%s]", options.Name, err.Error()) + err = fc.resolveCloudNotFoundError(options.Name, err, "DeleteFile", true) + if isOffline(err) && fc.offlineAccess { + // we are offline, so handle deletion locally + offlineOkay = true + } + if err != nil && !offlineOkay { + log.Err("FileCache::DeleteFile : %s deletion failed. Here's why: %v", options.Name, err) return err } - localPath := filepath.Join(fc.tmpPath, options.Name) + // delete file from cache fc.policy.CachePurge(localPath) - // Delete from scheduleOps if it exists - fc.scheduleOps.Delete(options.Name) - // update file state flock.LazyOpen = false + // update pending ops: + // - offline delete must be deferred + // - deleting a file that already has a deferred op must convert it to deletion + if offlineOkay || hadPendingOp { + fc.addPendingOp(options.Name, pendingFlags{isDeletion: true}) + } return nil } @@ -1142,135 +1361,124 @@ func (fc *FileCache) openFileInternal(handle *handlemap.Handle, flock *common.Lo fileOptions := val.(openFileOptions) flags = fileOptions.flags fMode = fileOptions.fMode + overwrite := flags&os.O_TRUNC != 0 + create := flags&os.O_CREATE != 0 localPath := filepath.Join(fc.tmpPath, handle.Path) - var f *os.File fc.policy.CacheValid(localPath) - downloadRequired, fileExists, attr, err := fc.isDownloadRequired(localPath, handle.Path, flock) - if err != nil && !os.IsNotExist(err) { - log.Err( - "FileCache::openFileInternal : Failed to check if download is required for %s [%s]", - handle.Path, - err.Error(), - ) + downloadRequired, fileExists, attr, _ := fc.isDownloadRequired(localPath, handle.Path, flock) + + // handle offline cases + if !fc.NextComponent().CloudConnected() { + if !fc.offlineAccess { + // offline access is not allowed + if !overwrite && downloadRequired { + // data is unavailable - do not open the file + log.Err("FileCache::OpenFile : %s can't download data (offline)", handle.Path) + return &common.CloudUnreachableError{} + } else { + // the cloud was connected when open was called, + // so try to fulfill the contract by allowing local access + // if the connection is not reestablished, flush and close will fail, + // but at least the data will be on disk + log.Err("FileCache::OpenFile : %s Using local file (offline)", handle.Path) + } + } else if !fileExists && !overwrite { + // data is unavailable - do not open the file + log.Err("FileCache::OpenFile : %s data unavailable (offline)", handle.Path) + return &common.CloudUnreachableError{} + } } - fileMode := fc.defaultPermission - if downloadRequired { + if downloadRequired || overwrite { log.Debug("FileCache::openFileInternal : Need to download %s", handle.Path) - fileSize := int64(0) - if attr != nil { - fileSize = int64(attr.Size) - } - - if fileExists { - log.Debug("FileCache::openFileInternal : Delete cached file %s", handle.Path) - - err := deleteFile(localPath) - if err != nil && !os.IsNotExist(err) { - log.Err("FileCache::openFileInternal : Failed to delete old file %s", handle.Path) - } - } else { - // Create the file if if doesn't already exist. - err := os.MkdirAll(filepath.Dir(localPath), fc.defaultPermission) - if err != nil { - log.Err( - "FileCache::openFileInternal : error creating directory structure for file %s [%s]", - handle.Path, - err.Error(), - ) - return err - } - } - - // Open the file in write mode. - f, err = common.OpenFile(localPath, os.O_CREATE|os.O_RDWR, fMode) + // Create the folder if it doesn't already exist. + err := os.MkdirAll(filepath.Dir(localPath), fc.defaultPermission) if err != nil { log.Err( - "FileCache::openFileInternal : error creating new file %s [%s]", + "FileCache::openFileInternal : error creating directory structure for file %s [%s]", handle.Path, err.Error(), ) return err } - if flags&os.O_TRUNC != 0 { - fileSize = 0 + // Open a download handle + downloadHandle, err := common.OpenFile(localPath, os.O_CREATE|os.O_TRUNC|os.O_RDWR, fMode) + if err != nil { + log.Err("FileCache::openFileInternal : %s open dl handle failed [%v]", handle.Path, err) + return err } - if fileSize > 0 { + // download + if attr != nil && !overwrite { // Download/Copy the file from storage to the local file. // We pass a count of 0 to get the entire object - err = fc.NextComponent().CopyToFile( + dlErr := fc.NextComponent().CopyToFile( internal.CopyToFileOptions{ Name: handle.Path, Offset: 0, Count: 0, - File: f, + File: downloadHandle, }) - if err != nil { + if dlErr != nil { // File was created locally and now download has failed so we need to delete it back from local cache - log.Err( - "FileCache::openFileInternal : error downloading file from storage %s [%s]", - handle.Path, - err.Error(), - ) - _ = f.Close() + log.Err("FileCache::openFileInternal : %s download failed [%v]", handle.Path, dlErr) + _ = downloadHandle.Close() err = os.Remove(localPath) if err != nil { - log.Err( - "FileCache::openFileInternal : Failed to remove file %s [%s]", - localPath, - err.Error(), - ) + log.Err("FileCache::openFileInternal : %s delete failed [%v]", localPath, err) } - return err + return dlErr } } // Update the last download time of this file flock.SetDownloadTime() - // update file state - flock.LazyOpen = false - - log.Debug("FileCache::openFileInternal : Download of %s is complete", handle.Path) - f.Close() + downloadHandle.Close() + log.Debug("FileCache::openFileInternal : %s download complete", handle.Path) // After downloading the file, update the modified times and mode of the file. - if attr != nil && !attr.IsModeDefault() { - fileMode = attr.Mode + // Only set permissions when creating a new file (O_CREATE flag is set) + if create { + fileMode := fc.defaultPermission + if attr != nil && !attr.IsModeDefault() { + fileMode = attr.Mode + } + + // If user has selected some non default mode in config then every local file shall be created with that mode only + err = os.Chmod(localPath, fileMode) + if err != nil { + log.Err( + "FileCache::openFileInternal : Failed to change mode of file %s [%s]", + handle.Path, + err.Error(), + ) + } } - } - // If user has selected some non default mode in config then every local file shall be created with that mode only - err = os.Chmod(localPath, fileMode) - if err != nil { - log.Err( - "FileCache::openFileInternal : Failed to change mode of file %s [%s]", - handle.Path, - err.Error(), - ) - } - // TODO: When chown is supported should we update that? + // TODO: When chown is supported should we update that? - if attr != nil { - // chtimes shall be the last api otherwise calling chmod/chown will update the last change time - err = os.Chtimes(localPath, attr.Atime, attr.Mtime) - if err != nil { - log.Err( - "FileCache::openFileInternal : Failed to change times of file %s [%s]", - handle.Path, - err.Error(), - ) + // set date + if attr != nil { + // chtimes shall be the last api otherwise calling chmod/chown will update the last change time + err = os.Chtimes(localPath, attr.Atime, attr.Mtime) + if err != nil { + log.Err( + "FileCache::openFileInternal : Failed to change times of file %s [%s]", + handle.Path, + err.Error(), + ) + } } - } + } // end: create & download file fileCacheStatsCollector.UpdateStats(stats_manager.Increment, dlFiles, (int64)(1)) // Open the file and grab a shared lock to prevent deletion by the cache policy. - f, err = common.OpenFile(localPath, flags, fMode) + f, err := common.OpenFile(localPath, flags, fMode) if err != nil { log.Err( "FileCache::openFileInternal : error opening cached file %s [%s]", @@ -1281,7 +1489,11 @@ func (fc *FileCache) openFileInternal(handle *handlemap.Handle, flock *common.Lo } if flags&os.O_TRUNC != 0 { - fc.setHandleDirty(handle) + // handle is already locked by openFileInternal; avoid recursive lock in setHandleDirty. + if !handle.Dirty() { + handle.Flags.Set(handlemap.HandleFlagDirty) + fc.fileLocks.Get(handle.Path).IncDirty() + } } inf, err := f.Stat() @@ -1321,39 +1533,22 @@ func (fc *FileCache) OpenFile(options internal.OpenFileOptions) (*handlemap.Hand localPath := filepath.Join(fc.tmpPath, options.Name) downloadRequired, _, cloudAttr, err := fc.isDownloadRequired(localPath, options.Name, flock) + cloudConnected := fc.NextComponent().CloudConnected() + + // block offline open calls when offline access is disabled + if !cloudConnected && !fc.offlineAccess { + log.Err("FileCache::OpenFile : %s Offline access is disabled", options.Name) + return nil, common.CloudUnreachableError{} + } // return err in case of authorization permission mismatch if err != nil && err == syscall.EACCES { return nil, err } - // check if we are running out of space - if cloudAttr != nil { - fileSize := int64(cloudAttr.Size) - if fc.diskHighWaterMark != 0 { - currSize, err := common.GetUsage(fc.tmpPath) - if err != nil { - log.Err( - "FileCache::OpenFile : error getting current usage of cache [%s]", - err.Error(), - ) - } else { - // Subtract existing local file size to avoid double-counting - existingSize := int64(0) - if info, statErr := os.Stat(localPath); statErr == nil { - existingSize = info.Size() - } - additionalSpace := max(int64(0), fileSize-existingSize) - if currSize+float64(additionalSpace) > fc.diskHighWaterMark { - log.Err( - "FileCache::OpenFile : cache size limit reached [%f] failed to open %s", - fc.maxCacheSizeMB, - options.Name, - ) - return nil, syscall.ENOSPC - } - } - } + // check cache size hard limit + if cloudAttr != nil && fc.exceedsHardLimit(int64(cloudAttr.Size), options.Name, "OpenFile") { + return nil, syscall.ENOSPC } // create handle and record openFileOptions for later @@ -1368,7 +1563,8 @@ func (fc *FileCache) OpenFile(options internal.OpenFileOptions) (*handlemap.Hand // will opening the file require downloading it? var openErr error - if !downloadRequired { + openOverwrites := options.Flags&os.O_TRUNC != 0 + if !downloadRequired || openOverwrites || !cloudConnected { // use the local file to complete the open operation now // flock is already locked, as required by openFileInternal openErr = fc.openFileInternal(handle, flock) @@ -1381,6 +1577,42 @@ func (fc *FileCache) OpenFile(options internal.OpenFileOptions) (*handlemap.Hand return handle, openErr } +func (fc *FileCache) exceedsHardLimit(newSize int64, name string, requestType string) bool { + // this is only relevant if there is a hard limit + if fc.diskHighWaterMark == 0 { + return false + } + // how much is being added? + existingSize := int64(0) + if info, statErr := os.Stat(filepath.Join(fc.tmpPath, name)); statErr == nil { + existingSize = info.Size() + } + additionalSpace := newSize - existingSize + // don't check usage needlessly + if additionalSpace <= 0 { + return false + } + // get current total cache size + currSize, err := common.GetUsage(fc.tmpPath) + if err != nil { + log.Err("FileCache::exceedsHardLimit : failed to get current cache size [%v]", err) + return false + } + // check if we are running out of space + // Add a buffer to the high water mark to account for any small discrepancies in usage calculations + if currSize+float64(additionalSpace) > (fc.diskHighWaterMark + 4096) { + log.Err( + "FileCache::exceedsHardLimit : %s %s failed - cache size hard limit reached (%f MB)", + name, + requestType, + fc.maxCacheSizeMB, + ) + return true + } + + return false +} + // flock must already be locked before calling this function func (fc *FileCache) isDownloadRequired( localPath string, @@ -1392,7 +1624,7 @@ func (fc *FileCache) isDownloadRequired( lmt := time.Time{} // check if the file exists locally - finfo, statErr := os.Stat(localPath) + info, statErr := os.Stat(localPath) if statErr == nil { // The file does not need to be downloaded as long as it is in the cache policy fileInPolicyCache := fc.policy.IsCached(localPath) @@ -1405,8 +1637,8 @@ func (fc *FileCache) isDownloadRequired( ) } // gather stat details - lmt = finfo.ModTime() - } else if os.IsNotExist(statErr) { + lmt = info.ModTime() + } else if isNotExist(statErr) { // The file does not exist in the local cache so it needs to be downloaded log.Debug("FileCache::isDownloadRequired : %s not present in local cache", localPath) } else { @@ -1424,49 +1656,52 @@ func (fc *FileCache) isDownloadRequired( // get cloud attributes cloudAttr, err := fc.NextComponent().GetAttr(internal.GetAttrOptions{Name: objectPath}) - if err != nil && !os.IsNotExist(err) { - log.Err( - "FileCache::isDownloadRequired : Failed to get attr of %s [%s]", - objectPath, - err.Error(), - ) + if cloudAttr == nil && !isNotExist(err) { + log.Err("FileCache::isDownloadRequired : %s GetAttr failed [%v]", objectPath, err) } - if !cached && cloudAttr != nil { + // if no data is cached, and there is (or might be) data in cloud storage, download it + if !cached && !isNotExist(err) { downloadRequired = true } + // check refresh timer if cached && refreshTimerExpired && cloudAttr != nil { // File is not expired, but the user has configured a refresh timer, which has expired. - // Does the cloud have a newer copy? - cloudHasLatestData := cloudAttr.Mtime.After(lmt) || finfo.Size() != cloudAttr.Size + // before deciding to honor the refresh timer, check for exceptions: + switch { + // Don't refresh unless the cloud copy is newer + case !cloudAttr.Mtime.After(lmt) && info.Size() == cloudAttr.Size: + log.Info( + "FileCache::isDownloadRequired : %s Cloud data is not latest, skip redownload [A-%v : L-%v]", + objectPath, + cloudAttr.Mtime, + lmt, + ) // Is the local file open? - fileIsOpen := flock.Count() > 0 && !flock.LazyOpen - if cloudHasLatestData && !fileIsOpen { + case flock.Count() > 0 && !flock.LazyOpen: log.Info( - "FileCache::isDownloadRequired : File is modified in container, so forcing redownload %s [A-%v : L-%v] [A-%v : L-%v]", + "FileCache::isDownloadRequired : %s Need to re-download latest data, but skipping as handle is already open", + objectPath, + ) + case !fc.NextComponent().CloudConnected(): + log.Info( + "FileCache::isDownloadRequired : %s Need to re-download, but skipping as cloud is not connected", + objectPath, + ) + default: + log.Info( + "FileCache::isDownloadRequired : %s File is modified in container, so forcing redownload [A-%v : L-%v] [A-%v : L-%v]", objectPath, cloudAttr.Mtime, lmt, cloudAttr.Size, - finfo.Size(), + info.Size(), ) downloadRequired = true - } else { - // log why we decided not to refresh - if !cloudHasLatestData { - log.Info( - "FileCache::isDownloadRequired : File in container is not latest, skip redownload %s [A-%v : L-%v]", - objectPath, - cloudAttr.Mtime, - lmt, - ) - } else if fileIsOpen { - log.Info( - "FileCache::isDownloadRequired : Need to re-download %s, but skipping as handle is already open", - objectPath, - ) - } + } + + if !downloadRequired { // As we have decided to continue using old file, we reset the timer to check again after refresh time interval flock.SetDownloadTime() } @@ -1513,31 +1748,36 @@ func (fc *FileCache) releaseFileInternal( _, noCachedHandle := options.Handle.GetValue("openFileOptions") if !noCachedHandle { - // flock is already locked, as required by flushFileInternal - err := fc.flushFileInternal( - internal.FlushFileOptions{Handle: options.Handle, CloseInProgress: true}, - ) //nolint - if err != nil { - log.Err("FileCache::releaseFileInternal : failed to flush file %s", options.Handle.Path) - return err + // update the cache policy + _ = fc.FileUsed(options.Handle.Path) + // sync + if options.Handle.Dirty() { + // flush the local file + err := fc.flushFileLocal(options.Handle) + if err != nil { + return err + } + // upload + // flock is already locked + flushOptions := internal.FlushFileOptions{Handle: options.Handle, CloseInProgress: true} + err = fc.flushFileCloud(flushOptions) + if err != nil { + return err + } } - + // close f := options.Handle.GetFileObject() if f == nil { - log.Err( - "FileCache::releaseFileInternal : error [missing fd in handle object] %s", - options.Handle.Path, - ) + log.Err("FileCache::releaseFileInternal : %s missing fd in handle", options.Handle.Path) return syscall.EBADF } - - err = f.Close() + err := f.Close() if err != nil { log.Err( - "FileCache::releaseFileInternal : error closing file %s(%d) [%s]", + "FileCache::releaseFileInternal : %s (%d) close failed [%v]", options.Handle.Path, int(f.Fd()), - err.Error(), + err, ) return err } @@ -1585,8 +1825,7 @@ func (fc *FileCache) ReadInBuffer(options *internal.ReadInBufferOptions) (int, e options.Handle.OptCnt++ options.Handle.Unlock() if (options.Handle.OptCnt % defaultCacheUpdateCount) == 0 { - localPath := filepath.Join(fc.tmpPath, options.Handle.Path) - fc.policy.CacheValid(localPath) + _ = fc.FileUsed(options.Handle.Path) } // Removing Pread as it is not supported on Windows @@ -1624,27 +1863,9 @@ func (fc *FileCache) WriteFile(options *internal.WriteFileOptions) (int, error) return 0, syscall.EBADF } - if fc.diskHighWaterMark != 0 { - currSize, err := common.GetUsage(fc.tmpPath) - if err != nil { - log.Err("FileCache::WriteFile : error getting current usage of cache [%s]", err.Error()) - } else { - // Calculate additional space needed beyond the file's current size - existingSize := int64(0) - if info, statErr := f.Stat(); statErr == nil { - existingSize = info.Size() - } - newEnd := options.Offset + int64(len(options.Data)) - additionalSpace := max(int64(0), newEnd-existingSize) - if currSize+float64(additionalSpace) > fc.diskHighWaterMark { - log.Err( - "FileCache::WriteFile : cache size limit reached [%f] failed to open %s", - fc.maxCacheSizeMB, - options.Handle.Path, - ) - return 0, syscall.ENOSPC - } - } + newSize := options.Offset + int64(len(options.Data)) + if fc.exceedsHardLimit(newSize, options.Handle.Path, "WriteFile") { + return 0, syscall.ENOSPC } // Read and write operations are very frequent so updating cache policy for every read is a costly operation @@ -1653,8 +1874,7 @@ func (fc *FileCache) WriteFile(options *internal.WriteFileOptions) (int, error) options.Handle.OptCnt++ options.Handle.Unlock() if (options.Handle.OptCnt % defaultCacheUpdateCount) == 0 { - localPath := filepath.Join(fc.tmpPath, options.Handle.Path) - fc.policy.CacheValid(localPath) + _ = fc.FileUsed(options.Handle.Path) } // Removing Pwrite as it is not supported on Windows @@ -1705,173 +1925,177 @@ func (fc *FileCache) SyncFile(options internal.SyncFileOptions) error { // FlushFile: Flush the local file to storage func (fc *FileCache) FlushFile(options internal.FlushFileOptions) error { - var flock *common.LockMapItem - // if flush will upload the file, then acquire the file lock - if options.Handle.Dirty() && (!fc.lazyWrite || options.CloseInProgress) { - flock = fc.fileLocks.Get(options.Handle.Path) - flock.Lock() - defer flock.Unlock() + // update the cache policy + _ = fc.FileUsed(options.Handle.Path) + + // ignore clean handles + if !options.Handle.Dirty() { + return nil } - // flock is locked, as required by flushFileInternal - return fc.flushFileInternal(options) + // flush the local file + err := fc.flushFileLocal(options.Handle) + if err != nil { + return err + } + + // If lazy write is enabled, stop here + if fc.lazyWrite && !options.CloseInProgress { + // As lazy-write is enable, upload will be scheduled when file is closed. + log.Info( + "FileCache::FlushFile : %s upload delayed until handle %d closes (lazy write)", + options.Handle.Path, + options.Handle.ID, + ) + return nil + } + + // acquire flock + flock := fc.fileLocks.Get(options.Handle.Path) + flock.Lock() + defer flock.Unlock() + return fc.flushFileCloud(options) } -// file must be locked before calling this function -func (fc *FileCache) flushFileInternal(options internal.FlushFileOptions) error { - //defer exectime.StatTimeCurrentBlock("FileCache::FlushFile")() - log.Trace("FileCache::FlushFile : handle=%d, path=%s", options.Handle.ID, options.Handle.Path) +// flush local file +func (fc *FileCache) flushFileLocal(handle *handlemap.Handle) error { + //defer exectime.StatTimeCurrentBlock("FileCache::flushFileLocal")() + log.Trace("FileCache::flushFileLocal : %s handle=%d", handle.Path, handle.ID) - // The file should already be in the cache since CreateFile/OpenFile was called before and a shared lock was acquired. - localPath := filepath.Join(fc.tmpPath, options.Handle.Path) - fc.policy.CacheValid(localPath) - // if our handle is dirty then that means we wrote to the file - if options.Handle.Dirty() { - if fc.lazyWrite && !options.CloseInProgress { - // As lazy-write is enable, upload will be scheduled when file is closed. - log.Info( - "FileCache::FlushFile : %s will be flushed when handle %d is closed", - options.Handle.Path, - options.Handle.ID, - ) - return nil - } + // ignore clean handles + if !handle.Dirty() { + return nil + } - f := options.Handle.GetFileObject() - if f == nil { - log.Err( - "FileCache::FlushFile : error [couldn't find fd in handle] %s", - options.Handle.Path, - ) - return syscall.EBADF - } + // Flush all data to disk that has been buffered by the kernel. + f := handle.GetFileObject() + if f == nil { + log.Err("FileCache::flushFileLocal : %s couldn't find fd in handle", handle.Path) + return syscall.EBADF + } + err := fc.syncFile(f, handle.Path) + if err != nil { + log.Err("FileCache::flushFileLocal : %s sync failed [%v]", handle.Path, err) + return syscall.EIO + } - // Flush all data to disk that has been buffered by the kernel. - // for scheduled uploads, we use a read-only file handle - if !options.AsyncUpload { - err := fc.syncFile(f, options.Handle.Path) - if err != nil { - log.Err( - "FileCache::FlushFile : error [unable to sync file] %s", - options.Handle.Path, - ) - return syscall.EIO - } - } + return nil +} - // Write to storage - // Create a new handle for the SDK to use to upload (read local file) - // The local handle can still be used for read and write. - var orgMode fs.FileMode - modeChanged := false - notInCloud := fc.notInCloud( +// file must be locked before calling this function +func (fc *FileCache) flushFileCloud(options internal.FlushFileOptions) error { + //defer exectime.StatTimeCurrentBlock("FileCache::flushFileCloud")() + log.Trace("FileCache::flushFileCloud : %s handle=%d", options.Handle.Path, options.Handle.ID) + + // ignore clean handles + if !options.Handle.Dirty() { + return nil + } + + // If lazy write is enabled, stop here + if fc.lazyWrite && !options.CloseInProgress { + // As lazy-write is enable, upload will be scheduled when file is closed. + log.Info( + "FileCache::flushFileCloud : %s will be flushed when handle %d is closed", options.Handle.Path, + options.Handle.ID, ) - // Figure out if we should upload immediately or append to pending OPS - if options.AsyncUpload || !notInCloud || fc.alwaysOn { - uploadHandle, err := common.Open(localPath) - if err != nil { - if os.IsPermission(err) { - info, _ := os.Stat(localPath) - orgMode = info.Mode() - newMode := orgMode | 0444 - err = os.Chmod(localPath, newMode) - if err == nil { - modeChanged = true - uploadHandle, err = common.Open(localPath) - log.Info( - "FileCache::FlushFile : read mode added to file %s", - options.Handle.Path, - ) - } - } + return nil + } - if err != nil { - log.Err( - "FileCache::FlushFile : error [unable to open upload handle] %s [%s]", - options.Handle.Path, - err.Error(), - ) - return err - } - } - err = fc.NextComponent().CopyFromFile( - internal.CopyFromFileOptions{ - Name: options.Handle.Path, - File: uploadHandle, - }) + // decide whether to schedule the upload instead + select { + case <-fc.startScheduledUploads: + // upload now + default: + // schedule is inactive - push to pendingOps + log.Info("FileCache::flushFileCloud : %s upload deferred (scheduled)", options.Handle.Path) + fc.addPendingOp(options.Handle.Path, pendingFlags{}) + fc.clearHandleDirty(options.Handle) + return nil + } - uploadHandle.Close() - if err == nil { - // Clear dirty flag since file was successfully uploaded - fc.clearHandleDirty(options.Handle) - } + // Write to storage + err := fc.uploadFile(options.Handle.Path) + // handle errors and update flags + switch { + case err == nil: + fc.clearHandleDirty(options.Handle) + case isOffline(err) && fc.offlineAccess: + log.Warn("FileCache::flushFileCloud : %s upload delayed (offline)", options.Handle.Path) + // add file to upload queue + fc.addPendingOp(options.Handle.Path, pendingFlags{}) + err = nil + default: + log.Err("FileCache::flushFileCloud : %s upload failed [%v]", options.Handle.Path, err) + } - if err != nil { - log.Err( - "FileCache::FlushFile : %s upload failed [%s]", - options.Handle.Path, - err.Error(), - ) - return err - } + return err +} - if modeChanged { - err1 := os.Chmod(localPath, orgMode) - if err1 != nil { - log.Err( - "FileCache::FlushFile : Failed to remove read mode from file %s [%s]", - options.Handle.Path, - err1.Error(), - ) - } - } +// copy local file data to cloud storage +func (fc *FileCache) uploadFile(name string) error { + // Open a new read-only local file handle for the SDK to use to upload + // stat + localPath := filepath.Join(fc.tmpPath, name) + info, err := os.Stat(localPath) + if err != nil { + log.Err("FileCache::FlushFile : %s stat failed [%v]", name, err) + return err + } + origMode := info.Mode() + modeChanged := false + // open + f, openErr := common.Open(localPath) + // fix permissions + if os.IsPermission(openErr) { + newMode := origMode | 0444 + err = os.Chmod(localPath, newMode) + if err == nil { + modeChanged = true + log.Info("FileCache::FlushFile : read mode added to file %s", name) + f, openErr = common.Open(localPath) } else { - //push to scheduleOps as default since we don't want to upload to the cloud - log.Info( - "FileCache::FlushFile : %s upload deferred (Scheduled for upload)", - options.Handle.Path, - ) - _, statErr := os.Stat(localPath) - if statErr == nil { - fc.markFileForUpload(options.Handle.Path) - flock := fc.fileLocks.Get(options.Handle.Path) - flock.SyncPending = true - } - fc.clearHandleDirty(options.Handle) - + log.Err("FileCache::FlushFile : %s unable to add read mode [%v]", name, err) } - + } + if openErr != nil { + log.Err("FileCache::FlushFile : %s unable to open upload handle [%v]", name, openErr) + return openErr + } + // upload file data + uploadErr := fc.NextComponent().CopyFromFile(internal.CopyFromFileOptions{Name: name, File: f}) + f.Close() + // change mode back + if modeChanged { + err := os.Chmod(localPath, origMode) + if err != nil { + log.Err("FileCache::FlushFile : %s Failed to remove read mode [%v]", name, err) + } + } + // update the mode as well + if uploadErr == nil { // If chmod was done on the file before it was uploaded to container then setting up mode would have been missed // Such file names are added to this map and here post upload we try to set the mode correctly // Delete the entry from map so that any further flush do not try to update the mode again - _, found := fc.missedChmodList.LoadAndDelete(options.Handle.Path) + _, found := fc.missedChmodList.LoadAndDelete(name) if found { // If file is found in map it means last chmod was missed on this // When chmod on container was missed, local file was updated with correct mode // Here take the mode from local cache and update the container accordingly - localPath := filepath.Join(fc.tmpPath, options.Handle.Path) - info, err := os.Stat(localPath) - if err == nil { - err = fc.chmodInternal( - internal.ChmodOptions{Name: options.Handle.Path, Mode: info.Mode()}, - ) - if err != nil { - // chmod was missed earlier for this file and doing it now also - // resulted in error so ignore this one and proceed for flush handling - log.Err( - "FileCache::FlushFile : %s chmod failed [%s]", - options.Handle.Path, - err.Error(), - ) - } + err = fc.chmodInternal(internal.ChmodOptions{Name: name, Mode: origMode}) + if err != nil { + // chmod was missed earlier for this file and doing it now also + // resulted in error so ignore this one and proceed for flush handling + log.Err("FileCache::FlushFile : %s chmod failed [%v]", name, err) + fc.missedChmodList.LoadOrStore(name, true) } } } - return nil + return uploadErr } // GetAttr: Consolidate attributes from storage and local cache @@ -1887,12 +2111,14 @@ func (fc *FileCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr // If the file is being downloaded or deleted, the size and mod time will be incorrect // wait for download or deletion to complete before getting local file info flock := fc.fileLocks.Get(options.Name) - // TODO: should we add RLock and RUnlock to the lock map for GetAttr? flock.RLock() // Path in local cache, open, and dirty so cache is the source of truth for attributes. localPath := filepath.Join(fc.tmpPath, options.Name) info, localErr := os.Stat(localPath) + if localErr != nil && !isNotExist(localErr) { + log.Warn("FileCache::GetAttr : %s unexpected stat error [%v]", options.Name, localErr) + } if flock.Count() > 0 && flock.DirtyCount() > 0 { if localErr == nil && !info.IsDir() { flock.RUnlock() @@ -1900,31 +2126,39 @@ func (fc *FileCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr } } - flock.RUnlock() - // To cover case 1, get attributes from storage - var exists bool + inCloud := false attrs, remoteErr := fc.NextComponent().GetAttr(options) - if remoteErr != nil { - if remoteErr == syscall.ENOENT || os.IsNotExist(remoteErr) { - log.Debug("FileCache::GetAttr : %s does not exist in cloud storage", options.Name) - exists = false - } else { - log.Err( - "FileCache::GetAttr : Failed to get attr of %s [%s]", - options.Name, - remoteErr.Error(), - ) + flock.RUnlock() + switch { + case remoteErr == nil: // object found + inCloud = true + case !isOffline(remoteErr) && isNotExist(remoteErr): // object not found + log.Debug("FileCache::GetAttr : %s does not exist in cloud storage", options.Name) + case fc.offlineAccess && isOffline(remoteErr): // offline access + switch { + case cachedData(remoteErr): // use offline attributes + inCloud = !isNotExist(remoteErr) + case fc.notInCloud(options.Name): // use parent directory attributes + inCloud = false + case localErr == nil: // no attributes, but allow access to local file + log.Warn("FileCache::GetAttr : %s missing attrs, using stat (offline)", options.Name) + default: // cloud state unknown + log.Err("FileCache::GetAttr : %s cloud state unknown (offline)", options.Name) return nil, remoteErr } - } else { - exists = true + log.Debug("FileCache::GetAttr : %s exists=%t (offline)", options.Name, inCloud) + default: // other errors + log.Err("FileCache::GetAttr : %s GetAttr failed. Here's why: %v", options.Name, remoteErr) + return nil, remoteErr } // To cover cases 2 and 3, grab the attributes from the local cache - // All directory operations are guaranteed to be synced with storage so they cannot be in a case 2 or 3 state. - if localErr == nil && info != nil && !info.IsDir() { - if exists { // Case 3 (file in cloud storage and in local cache) so update the relevant attributes + if localErr == nil { + if !inCloud { // Case 2 (only in local cache) + log.Debug("FileCache::GetAttr : serving %s attr from local cache", options.Name) + attrs = newObjAttr(options.Name, info) + } else if !info.IsDir() { // Case 3 (file in cloud storage and in local cache) so update the relevant attributes // attrs is a pointer returned by NextComponent // modifying attrs could corrupt cached directory listings // to update properties, we need to make a deep copy first @@ -1932,13 +2166,10 @@ func (fc *FileCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr newAttr.Mtime = info.ModTime() newAttr.Size = info.Size() attrs = &newAttr - } else { // Case 2 (file only in local cache) so create a new attributes and add them to the storage attributes - log.Debug("FileCache::GetAttr : serving %s attr from local cache", options.Name) - exists = true - attrs = newObjAttr(options.Name, info) } } + exists := inCloud || localErr == nil if !exists { return nil, syscall.ENOENT } @@ -1946,6 +2177,7 @@ func (fc *FileCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr return attrs, nil } +// setter func (fc *FileCache) setHandleDirty(handle *handlemap.Handle) { handle.Lock() alreadyDirty := handle.Dirty() @@ -1958,6 +2190,7 @@ func (fc *FileCache) setHandleDirty(handle *handlemap.Handle) { } } +// setter func (fc *FileCache) clearHandleDirty(handle *handlemap.Handle) { handle.Lock() wasDirty := handle.Dirty() @@ -1989,10 +2222,25 @@ func (fc *FileCache) RenameFile(options internal.RenameFileOptions) error { defer dflock.Unlock() err := fc.NextComponent().RenameFile(options) - localOnly := os.IsNotExist(err) - err = fc.validateStorageError(options.Src, err, "RenameFile", true) + localOnly := isNotExist(err) + err = fc.resolveCloudNotFoundError(options.Src, err, "RenameFile", true) + if fc.offlineAccess && isOffline(err) { + // offline renames require a cached src, since pendingOps only records uploads and deletes + if _, statErr := os.Stat(filepath.Join(fc.tmpPath, options.Src)); statErr != nil { + log.Err("FileCache::RenameFile : %s Offline rename failed (no cache)", options.Src) + return err + } else { + log.Debug("FileCache::RenameFile : %s Offline rename allowed", options.Src) + // make sure src is in pendingOps so renamePendingOp works correctly + _, srcAlreadyPending := fc.pendingOps.LoadOrStore(options.Src, pendingFlags{}) + if !srcAlreadyPending { + log.Info("FileCache::RenameFile : %s Added src to pendingOps", options.Src) + } + err = nil + } + } if err != nil { - log.Err("FileCache::RenameFile : %s failed to rename file [%s]", options.Src, err.Error()) + log.Err("FileCache::RenameFile : %s rename failed [%v]", options.Src, err) return err } @@ -2018,15 +2266,7 @@ func (fc *FileCache) renameLocalFile( ) fc.policy.CacheValid(localDstPath) - // Transfer entry from scheduleOps if it exists - if _, found := fc.scheduleOps.Load(srcName); found { - fc.scheduleOps.Store(dstName, struct{}{}) - fc.scheduleOps.Delete(srcName) - - // Ensure SyncPending flag is set on destination - dflock.SyncPending = true - } - case os.IsNotExist(err): + case isNotExist(err): if localOnly { // neither cloud nor file cache has this file, so return ENOENT log.Err("FileCache::renameLocalFile : %s source file not found", srcName) @@ -2059,11 +2299,23 @@ func (fc *FileCache) renameLocalFile( // rename open handles fc.renameOpenHandles(srcName, dstName, sflock, dflock) + // update pending cloud ops + fc.renamePendingOp(fc.getObjectName(localSrcPath), fc.getObjectName(localDstPath)) return nil } -// files should already be locked before calling this function +// flock must be locked for both src and dst +func (fc *FileCache) renamePendingOp(srcName, dstName string) { + value, operationPending := fc.pendingOps.LoadAndDelete(srcName) + if operationPending { + opFlags := value.(pendingFlags) + fc.addPendingOp(srcName, pendingFlags{isDir: opFlags.isDir, isDeletion: true}) + fc.addPendingOp(dstName, opFlags) + } +} + +// flock must be locked for both files func (fc *FileCache) renameOpenHandles( srcName, dstName string, sflock, dflock *common.LockMapItem, @@ -2073,9 +2325,11 @@ func (fc *FileCache) renameOpenHandles( // update any open handles to the file with its new name handlemap.GetHandles().Range(func(key, value any) bool { handle := value.(*handlemap.Handle) + handle.Lock() if handle.Path == srcName { handle.Path = dstName } + handle.Unlock() return true }) // copy the number of open handles to the new name @@ -2083,6 +2337,8 @@ func (fc *FileCache) renameOpenHandles( sflock.Dec() dflock.Inc() } + // copy flags + dflock.LazyOpen = sflock.LazyOpen } } @@ -2090,31 +2346,8 @@ func (fc *FileCache) renameOpenHandles( func (fc *FileCache) TruncateFile(options internal.TruncateFileOptions) error { log.Trace("FileCache::TruncateFile : name=%s, size=%d", options.Name, options.NewSize) - if fc.diskHighWaterMark != 0 { - currSize, err := common.GetUsage(fc.tmpPath) - if err != nil { - log.Err( - "FileCache::TruncateFile : error getting current usage of cache [%s]", - err.Error(), - ) - } else { - // Only count the additional space beyond the file's current size - localPath := filepath.Join(fc.tmpPath, options.Name) - existingSize := int64(0) - if info, statErr := os.Stat(localPath); statErr == nil { - existingSize = info.Size() - } - additionalSpace := max(int64(0), options.NewSize-existingSize) - // Add a buffer to the high water mark to account for any small discrepancies in usage calculations - if currSize+float64(additionalSpace) > (fc.diskHighWaterMark + 4096) { - log.Err( - "FileCache::TruncateFile : cache size limit reached [%f] failed to open %s", - fc.maxCacheSizeMB, - options.Name, - ) - return syscall.ENOSPC - } - } + if fc.exceedsHardLimit(options.NewSize, options.Name, "TruncateFile") { + return syscall.ENOSPC } if options.Handle != nil { @@ -2126,7 +2359,7 @@ func (fc *FileCache) TruncateFile(options internal.TruncateFileOptions) error { err := fc.openFileInternal(options.Handle, flock) flock.Unlock() if err != nil { - return fmt.Errorf("error downloading file for %s [%s]", options.Handle.Path, err) + return fmt.Errorf("error downloading file for %s [%w]", options.Handle.Path, err) } } @@ -2178,32 +2411,66 @@ func (fc *FileCache) TruncateFile(options internal.TruncateFileOptions) error { return nil } + var offlineOkay bool flock := fc.fileLocks.Get(options.Name) flock.Lock() defer flock.Unlock() - err := fc.NextComponent().TruncateFile(options) - err = fc.validateStorageError(options.Name, err, "TruncateFile", true) - if err != nil { - log.Err("FileCache::TruncateFile : %s failed to truncate [%s]", options.Name, err.Error()) - return err + // check local file + localPath := filepath.Join(fc.tmpPath, options.Name) + info, localErr := os.Stat(localPath) + + cloudErr := fc.NextComponent().TruncateFile(options) + cloudErr = fc.resolveCloudNotFoundError(options.Name, cloudErr, "TruncateFile", true) + if isOffline(cloudErr) && fc.offlineAccess { + // is file data needed? + needData := options.NewSize == 0 + haveData := localErr == nil + switch { + case haveData: + log.Debug("FileCache::TruncateFile : %s Offline truncate allowed", options.Name) + offlineOkay = true + case !needData: + log.Debug("FileCache::TruncateFile : %s Creating file (offline)", options.Name) + if f, err := common.OpenFile( + localPath, + os.O_CREATE|os.O_WRONLY|os.O_TRUNC, + fc.defaultPermission, + ); err == nil { + info, localErr = f.Stat() + f.Close() + offlineOkay = true + } else { + log.Err( + "FileCache::TruncateFile : %s Offline create failed [%v]", + options.Name, + err, + ) + } + default: + log.Info("FileCache::TruncateFile : %s Need file data (offline)", options.Name) + } + } + if cloudErr != nil && !offlineOkay { + log.Err("FileCache::TruncateFile : %s failed to truncate [%v]", options.Name, cloudErr) + return cloudErr } // Update the size of the file in the local cache - localPath := filepath.Join(fc.tmpPath, options.Name) - info, err := os.Stat(localPath) - if err == nil || os.IsExist(err) { + if localErr == nil { fc.policy.CacheValid(localPath) - if info.Size() != options.NewSize { - err = os.Truncate(localPath, options.NewSize) + err := os.Truncate(localPath, options.NewSize) if err != nil { log.Err( - "FileCache::TruncateFile : error truncating cached file %s [%s]", + "FileCache::TruncateFile : %s failed to truncate cached file [%v]", localPath, - err.Error(), + err, ) return err + } else if offlineOkay { + fc.addPendingOp(options.Name, pendingFlags{}) + log.Warn("FileCache::TruncateFile : %s operation queued (offline)", options.Name) } } } @@ -2222,38 +2489,58 @@ func (fc *FileCache) Chmod(options internal.ChmodOptions) error { return fc.chmodInternal(options) } -// file must be locked before calling this function +// flock must be locked before calling this function func (fc *FileCache) chmodInternal(options internal.ChmodOptions) error { log.Trace("FileCache::Chmod : Change mode of path %s", options.Name) + var offlineOkay bool + + // check local file + localPath := filepath.Join(fc.tmpPath, options.Name) + info, localErr := os.Stat(localPath) // Update the file in cloud storage - err := fc.NextComponent().Chmod(options) - err = fc.validateStorageError(options.Name, err, "Chmod", false) - if err != nil { - if err != syscall.EIO { - log.Err("FileCache::Chmod : %s failed to change mode [%s]", options.Name, err.Error()) - return err - } else { - fc.missedChmodList.LoadOrStore(options.Name, true) + cloudErr := fc.NextComponent().Chmod(options) + cloudErr = fc.resolveCloudNotFoundError(options.Name, cloudErr, "Chmod", false) + switch { + // for offline access to work, there needs to be a cached file on which to write the mode + case isOffline(cloudErr) && fc.offlineAccess && localErr == nil: + log.Debug("FileCache::Chmod : %s operating on cache (offline)", options.Name) + offlineOkay = true + // EIO means local-only file (pending upload) + case cloudErr == syscall.EIO: + log.Info("FileCache::Chmod : %s operating on cache (object not found)", options.Name) + // return all other cloud errors + case cloudErr != nil: + log.Err("FileCache::Chmod : %s failed [%v]", options.Name, cloudErr) + return cloudErr + } + + // Cloud succeeded (or offline with local file) + if localErr != nil { + // File not in cache - verify cloud actually has it (protects against nil-returning backends) + cloudStateKnown, inCloud, _ := fc.checkCloud(options.Name) + if cloudStateKnown && inCloud { + // Cloud confirms object exists, chmod succeeded there, nothing local to update + return nil } + // Can't confirm cloud state or object doesn't exist + log.Err("FileCache::Chmod : %s not in cache and cloud state uncertain", options.Name) + return localErr } // Update the mode of the file in the local cache - localPath := filepath.Join(fc.tmpPath, options.Name) - info, err := os.Stat(localPath) - if err == nil { - fc.policy.CacheValid(localPath) - - if info.Mode() != options.Mode { - err = os.Chmod(localPath, options.Mode) - if err != nil { - log.Err( - "FileCache::Chmod : error changing mode on the cached path %s [%s]", - localPath, - err.Error(), - ) - return err - } + fc.policy.CacheValid(localPath) + if info.Mode() != options.Mode { + err := os.Chmod(localPath, options.Mode) + if err != nil { + log.Err("FileCache::Chmod : %s local chmod failed [%v]", options.Name, err) + return err + } + // record info for later cloud sync + fc.missedChmodList.LoadOrStore(options.Name, true) + if offlineOkay { + log.Warn("FileCache::Chmod : %s operation queued (offline)", options.Name) + fc.addPendingOp(options.Name, pendingFlags{}) } } @@ -2268,40 +2555,66 @@ func (fc *FileCache) Chown(options internal.ChownOptions) error { flock.Lock() defer flock.Unlock() + // check cache + localPath := filepath.Join(fc.tmpPath, options.Name) + _, localErr := os.Stat(localPath) + // Update the file in cloud storage err := fc.NextComponent().Chown(options) - err = fc.validateStorageError(options.Name, err, "Chown", false) - if err != nil { + err = fc.resolveCloudNotFoundError(options.Name, err, "Chown", false) + switch { + // for offline access to work, there needs to be a cached file on which to write the owner + // note: we don't add to pending ops since we have no mechanism to replay chown to cloud + case isOffline(err) && fc.offlineAccess && localErr == nil: + log.Debug("FileCache::Chown : %s operating on cache (offline)", options.Name) + // EIO means local-only file (pending upload) - we can't sync ownership to cloud + case err == syscall.EIO: + log.Info( + "FileCache::Chown : %s local-only file, cannot sync ownership to cloud", + options.Name, + ) + return err + // return all other cloud errors + case err != nil: log.Err("FileCache::Chown : %s failed to change owner [%s]", options.Name, err.Error()) return err } + // Cloud succeeded (or offline with local file) + if localErr != nil { + // File not in cache - verify cloud actually has it (protects against nil-returning backends) + cloudStateKnown, inCloud, _ := fc.checkCloud(options.Name) + if cloudStateKnown && inCloud { + // Cloud confirms object exists, chown succeeded there, nothing local to update + return nil + } + // Can't confirm cloud state or object doesn't exist + log.Err("FileCache::Chown : %s not in cache and cloud state uncertain", options.Name) + return localErr + } + // Update the owner and group of the file in the local cache - localPath := filepath.Join(fc.tmpPath, options.Name) - _, err = os.Stat(localPath) - if err == nil { - fc.policy.CacheValid(localPath) + fc.policy.CacheValid(localPath) - if runtime.GOOS != "windows" { - err = os.Chown(localPath, options.Owner, options.Group) - if err != nil { - log.Err( - "FileCache::Chown : error changing owner on the cached path %s [%s]", - localPath, - err.Error(), - ) - return err - } - } + // locally, do nothing on Windows + if runtime.GOOS == "windows" { + return nil + } + + // update local file + err = os.Chown(localPath, options.Owner, options.Group) + if err != nil { + log.Err("FileCache::Chown : %s owner change failed [%v]", localPath, err) + return err } return nil } +// wrapper for CacheValid which takes (object) name func (fc *FileCache) FileUsed(name string) error { // Update the owner and group of the file in the local cache - localPath := filepath.Join(fc.tmpPath, name) - fc.policy.CacheValid(localPath) + fc.policy.CacheValid(filepath.Join(fc.tmpPath, name)) return nil } @@ -2311,8 +2624,7 @@ func (fc *FileCache) FileUsed(name string) error { // << DO NOT DELETE ANY AUTO GENERATED CODE HERE >> func NewFileCacheComponent() internal.Component { comp := &FileCache{ - fileLocks: common.NewLockMap(), - activeWindowsMutex: &sync.Mutex{}, + fileLocks: common.NewLockMap(), } comp.SetName(compName) config.AddConfigChangeEventListener(comp) diff --git a/component/file_cache/file_cache_test.go b/component/file_cache/file_cache_test.go index a0ac61708..2b33cbd5f 100644 --- a/component/file_cache/file_cache_test.go +++ b/component/file_cache/file_cache_test.go @@ -50,6 +50,7 @@ import ( "github.com/Seagate/cloudfuse/component/loopback" "github.com/Seagate/cloudfuse/internal" "github.com/Seagate/cloudfuse/internal/handlemap" + "go.uber.org/mock/gomock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" @@ -65,6 +66,8 @@ type fileCacheTestSuite struct { cache_path string // uses os.Separator (filepath.Join) fake_storage_path string // uses os.Separator (filepath.Join) useMock bool + mockCtrl *gomock.Controller + mock *internal.MockComponent } func newLoopbackFS() internal.Component { @@ -136,10 +139,18 @@ func (suite *fileCacheTestSuite) setupTestHelper(configuration string) { err := config.ReadConfigFromReader(strings.NewReader(configuration)) suite.assert.NoError(err) - suite.loopback = newLoopbackFS() - suite.fileCache = newTestFileCache(suite.loopback) - err = suite.loopback.Start(context.Background()) - suite.assert.NoError(err) + if suite.useMock { + suite.mockCtrl = gomock.NewController(suite.T()) + suite.mock = internal.NewMockComponent(suite.mockCtrl) + suite.fileCache = newTestFileCache(suite.mock) + // always simulate being offline + suite.mock.EXPECT().CloudConnected().AnyTimes().Return(false) + } else { + suite.loopback = newLoopbackFS() + suite.fileCache = newTestFileCache(suite.loopback) + err = suite.loopback.Start(context.Background()) + suite.assert.NoError(err) + } err = suite.fileCache.Start(context.Background()) if err != nil { panic(fmt.Sprintf("Unable to start file cache [%s]", err.Error())) @@ -148,12 +159,16 @@ func (suite *fileCacheTestSuite) setupTestHelper(configuration string) { } func (suite *fileCacheTestSuite) cleanupTest() { - err := suite.loopback.Stop() - suite.assert.NoError(err) - err = suite.fileCache.Stop() + err := suite.fileCache.Stop() if err != nil { panic(fmt.Sprintf("Unable to stop file cache [%s]", err.Error())) } + if suite.useMock { + suite.mockCtrl.Finish() + } else { + err = suite.loopback.Stop() + suite.assert.NoError(err) + } // Delete the temp directories created err = os.RemoveAll(suite.cache_path) @@ -447,12 +462,138 @@ func (suite *fileCacheTestSuite) TestCreateDir() { err := suite.fileCache.CreateDir(options) suite.assert.NoError(err) - // Path should not be added to the file cache - suite.assert.NoDirExists(filepath.Join(suite.cache_path, path)) + // Path should be added to the file cache + suite.assert.DirExists(filepath.Join(suite.cache_path, path)) // Path should be in fake storage suite.assert.DirExists(filepath.Join(suite.fake_storage_path, path)) } +// Tests CreateDir +func (suite *fileCacheTestSuite) TestCreateDirErrExist() { + defer suite.cleanupTest() + path := "a" + options := internal.CreateDirOptions{Name: path} + err := suite.fileCache.CreateDir(options) + suite.assert.NoError(err) + // test + err = suite.fileCache.CreateDir(options) + suite.assert.ErrorIs(err, os.ErrExist) +} + +// Tests CreateDir +func (suite *fileCacheTestSuite) TestCreateDirOffline() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + // setup + path := "a" + options := internal.CreateDirOptions{Name: path} + suite.mock.EXPECT().GetAttr(internal.GetAttrOptions{Name: path}).Return(nil, os.ErrNotExist) + err := suite.fileCache.CreateDir(options) + suite.assert.NoError(err) + + // Path should be added to the file cache + suite.assert.DirExists(filepath.Join(suite.cache_path, path)) +} + +func (suite *fileCacheTestSuite) TestStreamDirShowsOfflineCreatedDirectory() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dirName := "offline-created-dir" + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: dirName}). + Return(nil, os.ErrNotExist) + + err := suite.fileCache.CreateDir(internal.CreateDirOptions{Name: dirName, Mode: 0777}) + suite.assert.NoError(err) + + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: "", Token: ""}). + Return([]*internal.ObjAttr{}, "", &common.CloudUnreachableError{}) + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: dirName}). + Return(nil, os.ErrNotExist) + + attrs, token, err := suite.fileCache.StreamDir(internal.StreamDirOptions{Name: ""}) + suite.assert.NoError(err) + suite.assert.Empty(token) + + foundDir := false + for _, attr := range attrs { + if attr != nil && attr.Name == dirName && attr.IsDir() { + foundDir = true + break + } + } + suite.assert.True(foundDir, "offline-created directory should appear in StreamDir output") +} + +func (suite *fileCacheTestSuite) TestCreateFileInsideOfflineCreatedDirectory() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dirName := "offline-parent" + filePath := dirName + "/nested.txt" + + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: dirName}). + Return(nil, os.ErrNotExist) + err := suite.fileCache.CreateDir(internal.CreateDirOptions{Name: dirName, Mode: 0777}) + suite.assert.NoError(err) + + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: filePath}). + Return(nil, os.ErrNotExist) + h, err := suite.fileCache.CreateFile(internal.CreateFileOptions{Name: filePath, Mode: 0777}) + suite.assert.NoError(err) + suite.assert.NotNil(h) + suite.assert.FileExists(filepath.Join(suite.cache_path, filePath)) + + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: dirName, Token: ""}). + Return([]*internal.ObjAttr{}, "", &common.CloudUnreachableError{}) + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: filePath}). + Return(nil, os.ErrNotExist) + + attrs, token, err := suite.fileCache.StreamDir(internal.StreamDirOptions{Name: dirName}) + suite.assert.NoError(err) + suite.assert.Empty(token) + + foundFile := false + for _, attr := range attrs { + if attr != nil && attr.Path == filePath { + foundFile = true + break + } + } + suite.assert.True( + foundFile, + "file in offline-created directory should appear in StreamDir output", + ) +} + func (suite *fileCacheTestSuite) TestDeleteDir() { defer suite.cleanupTest() // Setup @@ -477,6 +618,67 @@ func (suite *fileCacheTestSuite) TestDeleteDir() { suite.assert.NoDirExists(filepath.Join(suite.cache_path, dir)) } +func (suite *fileCacheTestSuite) TestDeleteDirOffline() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dir := "offline-delete-dir" + err := os.MkdirAll(filepath.Join(suite.cache_path, dir), 0777) + suite.assert.NoError(err) + + suite.mock.EXPECT(). + DeleteDir(internal.DeleteDirOptions{Name: dir}). + Return(&common.CloudUnreachableError{}) + + err = suite.fileCache.DeleteDir(internal.DeleteDirOptions{Name: dir}) + suite.assert.NoError(err) + suite.assert.NoDirExists(filepath.Join(suite.cache_path, dir)) + + op, exists := suite.fileCache.pendingOps.Load(dir) + suite.assert.True(exists, "directory delete should be queued in pendingOps") + if exists { + suite.assert.Equal(pendingFlags{isDir: true, isDeletion: true}, op) + } +} + +func (suite *fileCacheTestSuite) TestOfflineCreateThenDeleteDirKeepsPendingKeyAsProvided() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dir := "offline-create-delete-dir" + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: dir}). + Return(nil, os.ErrNotExist) + err := suite.fileCache.CreateDir(internal.CreateDirOptions{Name: dir, Mode: 0777}) + suite.assert.NoError(err) + + suite.mock.EXPECT(). + DeleteDir(internal.DeleteDirOptions{Name: dir}). + Return(&common.CloudUnreachableError{}) + err = suite.fileCache.DeleteDir(internal.DeleteDirOptions{Name: dir}) + suite.assert.NoError(err) + + op, found := suite.fileCache.pendingOps.Load(dir) + suite.assert.True(found, "directory key should be present as provided") + if found { + suite.assert.Equal(pendingFlags{isDir: true, isDeletion: true}, op) + } +} + func (suite *fileCacheTestSuite) TestStreamDirError() { defer suite.cleanupTest() // Setup @@ -487,6 +689,62 @@ func (suite *fileCacheTestSuite) TestStreamDirError() { suite.assert.Empty(dir) } +func (suite *fileCacheTestSuite) TestStreamDirOfflineCachedData() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + name := "offline-stream" + attrs := []*internal.ObjAttr{ + {Path: name + "/file1", Name: "file1"}, + } + + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: name, Token: ""}). + Return(attrs, "", &common.CloudUnreachableError{}) + + dir, token, err := suite.fileCache.StreamDir(internal.StreamDirOptions{Name: name}) + suite.assert.NoError(err) + suite.assert.Empty(token) + suite.assert.Len(dir, 1) + suite.assert.Equal(name+"/file1", dir[0].Path) +} + +func (suite *fileCacheTestSuite) TestStreamDirOfflineNoCachedData() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + name := "offline-stream-no-cache" + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: name, Token: ""}). + Return( + nil, + "", + common.NoCachedDataError{ + Message: "no cached metadata", + CacheError: &common.CloudUnreachableError{}, + }, + ) + + dir, token, err := suite.fileCache.StreamDir(internal.StreamDirOptions{Name: name}) + suite.assert.Error(err) + suite.assert.Empty(token) + suite.assert.Empty(dir) +} + func (suite *fileCacheTestSuite) TestStreamDirCase1() { defer suite.cleanupTest() // Setup @@ -550,10 +808,10 @@ func (suite *fileCacheTestSuite) TestStreamDirCase2() { suite.assert.NoError(err) suite.assert.NotEmpty(dir) suite.assert.Len(dir, 4) - suite.assert.Equal(subdir, dir[0].Path) - suite.assert.Equal(file1, dir[1].Path) - suite.assert.Equal(file2, dir[2].Path) - suite.assert.Equal(file3, dir[3].Path) + suite.assert.Equal(file1, dir[0].Path) + suite.assert.Equal(file2, dir[1].Path) + suite.assert.Equal(file3, dir[2].Path) + suite.assert.Equal(subdir, dir[3].Path) } func (suite *fileCacheTestSuite) TestStreamDirCase3() { @@ -728,6 +986,102 @@ func (suite *fileCacheTestSuite) TestIsDirEmptyFalseInCache() { suite.assert.False(empty) } +func (suite *fileCacheTestSuite) TestIsDirEmptyOfflineEmptyDir() { + // Offline: attribute cache has no entries and GetAttr confirms listing is complete -> empty + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dir := "offline-empty-dir" + suite.mock.EXPECT(). + IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}). + Return(false) + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: dir, Count: 1}). + Return(nil, "", &common.CloudUnreachableError{}) + suite.mock.EXPECT(). + GetAttr(gomock.Any()). + Return(nil, common.NewCloudUnreachableError(os.ErrNotExist)) + + empty := suite.fileCache.IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}) + suite.assert.True(empty) +} + +func (suite *fileCacheTestSuite) TestIsDirEmptyOfflineEmptyDirListingIncomplete() { + // Offline: attribute cache has no entries and GetAttr confirms listing is complete -> empty + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dir := "offline-empty-dir" + suite.mock.EXPECT(). + IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}). + Return(false) + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: dir, Count: 1}). + Return(nil, "", &common.CloudUnreachableError{}) + suite.mock.EXPECT().GetAttr(gomock.Any()). + Return(nil, common.NewNoCachedDataError(common.NewCloudUnreachableError(nil))) + + empty := suite.fileCache.IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}) + suite.assert.False(empty) +} + +func (suite *fileCacheTestSuite) TestIsDirEmptyOfflineEmptyDirBlocked() { + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true\n block-offline-access: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dir := "offline-empty-dir" + suite.mock.EXPECT(). + IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}). + Return(false) + + empty := suite.fileCache.IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}) + suite.assert.False(empty) +} + +func (suite *fileCacheTestSuite) TestIsDirEmptyOfflineNonEmptyDir() { + // Offline: attribute cache has entries -> not empty + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + dir := "offline-nonempty-dir" + attrs := []*internal.ObjAttr{ + {Path: dir + "/file1", Name: "file1"}, + } + suite.mock.EXPECT(). + IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}). + Return(false) + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: dir, Count: 1}). + Return(attrs, "", &common.CloudUnreachableError{}) + + empty := suite.fileCache.IsDirEmpty(internal.IsDirEmptyOptions{Name: dir}) + suite.assert.False(empty) +} + func (suite *fileCacheTestSuite) TestRenameDir() { defer suite.cleanupTest() @@ -761,6 +1115,108 @@ func (suite *fileCacheTestSuite) TestRenameDir() { } } +func (suite *fileCacheTestSuite) TestRenameDirOfflineFullyCached() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + src := "offline-rename-dir-src" + dst := "offline-rename-dir-dst" + file1 := src + "/file1" + file2 := src + "/file2" + + err := os.MkdirAll(filepath.Join(suite.cache_path, src), 0777) + suite.assert.NoError(err) + err = os.WriteFile(filepath.Join(suite.cache_path, file1), []byte("f1"), 0777) + suite.assert.NoError(err) + err = os.WriteFile(filepath.Join(suite.cache_path, file2), []byte("f2"), 0777) + suite.assert.NoError(err) + suite.fileCache.addPendingOp(file1, pendingFlags{}) + + cloudAttrs := []*internal.ObjAttr{ + {Path: file1, Name: "file1"}, + {Path: file2, Name: "file2"}, + } + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: src, Token: ""}). + Return(cloudAttrs, "", &common.CloudUnreachableError{}) + suite.mock.EXPECT(). + RenameDir(internal.RenameDirOptions{Src: src, Dst: dst}). + Return(&common.CloudUnreachableError{}) + + err = suite.fileCache.RenameDir(internal.RenameDirOptions{Src: src, Dst: dst}) + suite.assert.NoError(err) + + suite.assert.NoDirExists(filepath.Join(suite.cache_path, src)) + suite.assert.FileExists(filepath.Join(suite.cache_path, dst, "file1")) + suite.assert.FileExists(filepath.Join(suite.cache_path, dst, "file2")) + + opSrc, srcFound := suite.fileCache.pendingOps.Load(file1) + suite.assert.True(srcFound, "Src file deletion should be queued after offline directory rename") + if srcFound { + suite.assert.True(opSrc.(pendingFlags).isDeletion) + } + + opDst, dstFound := suite.fileCache.pendingOps.Load(dst + "/file1") + suite.assert.True(dstFound, "Dst file creation should be queued after offline directory rename") + if dstFound { + suite.assert.False(opDst.(pendingFlags).isDeletion) + } +} + +func (suite *fileCacheTestSuite) TestRenameDirOfflineNotFullyCached() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + src := "offline-rename-dir-incomplete-src" + dst := "offline-rename-dir-incomplete-dst" + localFile := src + "/filez" + file1 := src + "/file1" + file2 := src + "/file2" + + err := os.MkdirAll(filepath.Join(suite.cache_path, src), 0777) + suite.assert.NoError(err) + err = os.WriteFile(filepath.Join(suite.cache_path, localFile), []byte("cached-only-one"), 0777) + suite.assert.NoError(err) + + cloudAttrs := []*internal.ObjAttr{ + {Path: file1, Name: "file1"}, + {Path: file2, Name: "file2"}, + } + suite.mock.EXPECT(). + StreamDir(internal.StreamDirOptions{Name: src, Token: ""}). + Return(cloudAttrs, "", &common.CloudUnreachableError{}) + suite.mock.EXPECT(). + RenameDir(internal.RenameDirOptions{Src: src, Dst: dst}). + Return(&common.CloudUnreachableError{}) + + err = suite.fileCache.RenameDir(internal.RenameDirOptions{Src: src, Dst: dst}) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) + + suite.assert.FileExists(filepath.Join(suite.cache_path, localFile)) + suite.assert.NoFileExists(filepath.Join(suite.cache_path, dst, "file1")) + + _, foundDst := suite.fileCache.pendingOps.Load(dst + "/file1") + suite.assert.False( + foundDst, + "No destination pending op should exist when offline directory rename is blocked", + ) +} + // Combined test for all three cases func (suite *fileCacheTestSuite) TestRenameDirOpenFile() { defer suite.cleanupTest() @@ -932,6 +1388,55 @@ func (suite *fileCacheTestSuite) TestCreateFile() { suite.assert.NoFileExists(filepath.Join(suite.fake_storage_path, path)) } +func (suite *fileCacheTestSuite) TestCreateFileOffline() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + path := "offline-create-file" + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: path}). + Return(nil, &common.CloudUnreachableError{}) + + h, err := suite.fileCache.CreateFile(internal.CreateFileOptions{Name: path, Mode: 0777}) + suite.assert.NoError(err) + suite.assert.NotNil(h) + suite.assert.True(h.Dirty(), "Handle should be dirty for local-only file creation") + suite.assert.FileExists(filepath.Join(suite.cache_path, path)) + + op, exists := suite.fileCache.pendingOps.Load(path) + suite.assert.True(exists, "File should be queued in pendingOps while offline") + suite.assert.Equal(pendingFlags{}, op) +} + +func (suite *fileCacheTestSuite) TestCreateFileOfflineBlocked() { + // enable mock component and disable offline access + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true\n block-offline-access: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + path := "offline-blocked-create-file" + h, err := suite.fileCache.CreateFile(internal.CreateFileOptions{Name: path, Mode: 0777}) + suite.assert.Error(err) + suite.assert.Nil(h) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) + suite.assert.NoFileExists(filepath.Join(suite.cache_path, path)) + + _, exists := suite.fileCache.pendingOps.Load(path) + suite.assert.False(exists, "File should not be queued when offline access is blocked") +} + func (suite *fileCacheTestSuite) TestCreateFileWithNoPerm() { if runtime.GOOS == "windows" { defer suite.cleanupTest() @@ -1201,11 +1706,71 @@ func (suite *fileCacheTestSuite) TestDeleteFileError() { suite.assert.EqualValues(syscall.ENOENT, err) } -func (suite *fileCacheTestSuite) TestOpenFileNotInCache() { - defer suite.cleanupTest() - path := "file7" - handle, _ := suite.loopback.CreateFile(internal.CreateFileOptions{Name: path, Mode: 0777}) - testData := "test data" +func (suite *fileCacheTestSuite) TestDeleteFileOffline() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + path := "offline-delete-file" + localPath := filepath.Join(suite.cache_path, path) + err := os.MkdirAll(filepath.Dir(localPath), 0777) + suite.assert.NoError(err) + err = os.WriteFile(localPath, []byte("cached data"), 0777) + suite.assert.NoError(err) + + suite.mock.EXPECT(). + DeleteFile(internal.DeleteFileOptions{Name: path}). + Return(&common.CloudUnreachableError{}) + err = suite.fileCache.DeleteFile(internal.DeleteFileOptions{Name: path}) + suite.assert.NoError(err) + suite.assert.NoFileExists(localPath) + + op, exists := suite.fileCache.pendingOps.Load(path) + suite.assert.True(exists, "File should be queued in pendingOps for deferred deletion") + suite.assert.Equal(pendingFlags{isDeletion: true}, op) +} + +func (suite *fileCacheTestSuite) TestDeleteFileOfflineBlocked() { + // enable mock component and disable offline access + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true\n block-offline-access: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + path := "offline-blocked-delete-file" + localPath := filepath.Join(suite.cache_path, path) + err := os.MkdirAll(filepath.Dir(localPath), 0777) + suite.assert.NoError(err) + err = os.WriteFile(localPath, []byte("cached data"), 0777) + suite.assert.NoError(err) + + suite.mock.EXPECT(). + DeleteFile(internal.DeleteFileOptions{Name: path}). + Return(&common.CloudUnreachableError{}) + err = suite.fileCache.DeleteFile(internal.DeleteFileOptions{Name: path}) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) + suite.assert.FileExists(localPath) + + _, exists := suite.fileCache.pendingOps.Load(path) + suite.assert.False(exists, "File should not be queued when offline access is blocked") +} + +func (suite *fileCacheTestSuite) TestOpenFileNotInCache() { + defer suite.cleanupTest() + path := "file7" + handle, _ := suite.loopback.CreateFile(internal.CreateFileOptions{Name: path, Mode: 0777}) + testData := "test data" data := []byte(testData) _, err := suite.loopback.WriteFile( &internal.WriteFileOptions{Handle: handle, Offset: 0, Data: data}, @@ -1252,6 +1817,113 @@ func (suite *fileCacheTestSuite) TestOpenFileInCache() { suite.assert.FileExists(filepath.Join(suite.cache_path, path)) } +func (suite *fileCacheTestSuite) TestOpenFileOfflineCachedData() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + path := "offline-open-cached" + localPath := filepath.Join(suite.cache_path, path) + err := os.MkdirAll(filepath.Dir(localPath), 0777) + suite.assert.NoError(err) + err = os.WriteFile(localPath, []byte("cached data"), 0777) + suite.assert.NoError(err) + suite.fileCache.policy.CacheValid(localPath) + + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: path}). + Return(nil, &common.CloudUnreachableError{}). + AnyTimes() + + handle, err := suite.fileCache.OpenFile( + internal.OpenFileOptions{Name: path, Flags: os.O_RDWR, Mode: 0777}, + ) + suite.assert.NoError(err) + suite.assert.NotNil(handle) + suite.assert.Equal(path, handle.Path) + + err = suite.fileCache.ReleaseFile(internal.ReleaseFileOptions{Handle: handle}) + suite.assert.NoError(err) +} + +func (suite *fileCacheTestSuite) TestOpenFileOfflineMissingData() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + path := "offline-open-missing" + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: path}). + Return(nil, &common.CloudUnreachableError{}). + AnyTimes() + + handle, err := suite.fileCache.OpenFile( + internal.OpenFileOptions{Name: path, Flags: os.O_RDONLY, Mode: 0777}, + ) + suite.assert.Error(err) + suite.assert.NotNil(handle) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) +} + +func (suite *fileCacheTestSuite) TestOpenFileOfflineMissingAttrsWithOverwrite() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + path := "offline-open-overwrite-no-attrs" + noCachedDataErr := common.NoCachedDataError{ + Message: "no cached metadata", + CacheError: &common.CloudUnreachableError{}, + } + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: path}). + Return(nil, noCachedDataErr). + AnyTimes() + + handle, err := suite.fileCache.OpenFile( + internal.OpenFileOptions{Name: path, Flags: os.O_RDWR | os.O_TRUNC, Mode: 0777}, + ) + suite.assert.NoError(err) + suite.assert.NotNil(handle) + + data := []byte("overwrite while offline") + n, err := suite.fileCache.WriteFile( + &internal.WriteFileOptions{Handle: handle, Offset: 0, Data: data}, + ) + suite.assert.NoError(err) + suite.assert.Equal(len(data), n) + + localData, err := os.ReadFile(filepath.Join(suite.cache_path, path)) + suite.assert.NoError(err) + suite.assert.Equal(data, localData) + + // Keep this test focused on branch selection and local write behavior. + // Close the fd directly and avoid ReleaseFile/upload behavior in mock mode. + f := handle.GetFileObject() + if f != nil { + err = f.Close() + suite.assert.NoError(err) + } +} + func (suite *fileCacheTestSuite) TestOpenCreateGetAttr() { defer suite.cleanupTest() path := "file8a" @@ -1474,10 +2146,10 @@ func (suite *fileCacheTestSuite) TestWriteFileErrorBadFd() { // Setup file := "file20" handle := handlemap.NewHandle(file) - bytesWritten, err := suite.fileCache.WriteFile(&internal.WriteFileOptions{Handle: handle}) + bytesWrittength, err := suite.fileCache.WriteFile(&internal.WriteFileOptions{Handle: handle}) suite.assert.Error(err) suite.assert.EqualValues(syscall.EBADF, err) - suite.assert.Equal(0, bytesWritten) + suite.assert.Equal(0, bytesWrittength) } func (suite *fileCacheTestSuite) TestFlushFileEmpty() { @@ -1577,8 +2249,8 @@ loopbackfs: suite.assert.FileExists(filepath.Join(suite.cache_path, file)) suite.assert.NoFileExists(filepath.Join(suite.fake_storage_path, file)) - _, exists := suite.fileCache.scheduleOps.Load(file) - suite.assert.True(exists, "File should be in scheduleOps after creation") + _, exists := suite.fileCache.pendingOps.Load(file) + suite.assert.True(exists, "File should be in pendingOps after creation") // Wait until the cron window starts, then poll for the upload windowStart := now.Truncate(time.Minute).Add(time.Duration(startSecond) * time.Second) @@ -1593,22 +2265,12 @@ loopbackfs: _, err = os.Stat(filepath.Join(suite.fake_storage_path, file)) } suite.assert.FileExists(filepath.Join(suite.fake_storage_path, file)) - - // Cloud file visibility can race slightly with scheduleOps cleanup on slower CI workers. - for i := 0; i < 300; i++ { - _, exists = suite.fileCache.scheduleOps.Load(file) - flock := suite.fileCache.fileLocks.Get(file) - if !exists && flock != nil && !flock.SyncPending { - break - } + _, exists = suite.fileCache.pendingOps.Load(file) + for i := 0; i < 300 && exists; i++ { time.Sleep(20 * time.Millisecond) + _, exists = suite.fileCache.pendingOps.Load(file) } - _, exists = suite.fileCache.scheduleOps.Load(file) - suite.assert.False(exists, "File should have been removed from scheduleOps after upload") - suite.assert.False( - suite.fileCache.fileLocks.Get(file).SyncPending, - "SyncPending flag should be cleared after upload", - ) + suite.assert.False(exists, "File should have been removed from pendingOps after upload") } func (suite *fileCacheTestSuite) TestCronOnToOFFUpload() { @@ -1683,10 +2345,8 @@ loopbackfs: suite.assert.NoError(err) suite.assert.FileExists(filepath.Join(suite.cache_path, file2)) suite.assert.NoFileExists(filepath.Join(suite.fake_storage_path, file2)) - _, scheduled := suite.fileCache.scheduleOps.Load(file2) + _, scheduled := suite.fileCache.pendingOps.Load(file2) suite.assert.True(scheduled, "File should be scheduled when scheduler is OFF") - flock := suite.fileCache.fileLocks.Get(file2) - suite.assert.True(flock.SyncPending, "SyncPending flag should be set") } func (suite *fileCacheTestSuite) TestNoScheduleAlwaysOn() { @@ -1716,91 +2376,17 @@ loopbackfs: suite.assert.NoError(err) suite.assert.FileExists(filepath.Join(suite.fake_storage_path, file), "File should be uploaded immediately with no schedule (always-on mode)") - // Poll until scheduleOps is cleared (accounts for slower CI workers) - for i := 0; i < 300; i++ { - _, exists := suite.fileCache.scheduleOps.Load(file) - flock := suite.fileCache.fileLocks.Get(file) - if !exists && (flock == nil || !flock.SyncPending) { - break - } + _, exists := suite.fileCache.pendingOps.Load(file) + for i := 0; i < 300 && exists; i++ { + _, exists = suite.fileCache.pendingOps.Load(file) time.Sleep(20 * time.Millisecond) } - - _, exists := suite.fileCache.scheduleOps.Load(file) - suite.assert.False(exists, "File should not be in scheduleOps map") + suite.assert.False(exists, "File should not be in pendingOps map") uploadedData, err := os.ReadFile(filepath.Join(suite.fake_storage_path, file)) suite.assert.NoError(err) suite.assert.Equal(data, uploadedData, "Uploaded file content should match original") - - flock := suite.fileCache.fileLocks.Get(file) - if flock != nil { - suite.assert.False(flock.SyncPending, "SyncPending flag should be clear") - } -} - -func (suite *fileCacheTestSuite) TestExistingCloudFileImmediateUpload() { - defer suite.cleanupTest() - - // 1. Initialize variables and files / call setuptesthelper - // Set up scheduler with a time far in the future (ensuring we're in OFF state initially) - now := time.Now() - second := (now.Second() + 30) % 60 - cronExpr := fmt.Sprintf("%d * * * * *", second) - - configContent := fmt.Sprintf(`file_cache: - path: %s - offload-io: true - create-empty-file: false - schedule: - - name: "Test" - cron: %s - duration: "5s" - -loopbackfs: - path: %s`, - suite.cache_path, - cronExpr, - suite.fake_storage_path, - ) - - suite.setupTestHelper(configContent) - - // Create a file that will be "already in cloud" - originalFile := "existing_cloud_file.txt" - originalContent := []byte("original cloud content") - - // Create the file in the cloud storage directly - err := os.MkdirAll(suite.fake_storage_path, 0777) - suite.assert.NoError(err) - err = os.WriteFile(filepath.Join(suite.fake_storage_path, originalFile), originalContent, 0777) - suite.assert.NoError(err) - suite.assert.FileExists(filepath.Join(suite.fake_storage_path, originalFile)) - suite.assert.NoFileExists(filepath.Join(suite.cache_path, originalFile)) - - // Write to the file and close the file - handle, err := suite.fileCache.OpenFile(internal.OpenFileOptions{ - Name: originalFile, - Flags: os.O_RDWR, - Mode: 0777, - }) - suite.assert.NoError(err) - // Write new content to the file - modifiedContent := []byte("modified cloud file content") - _, err = suite.fileCache.WriteFile(&internal.WriteFileOptions{ - Handle: handle, - Data: modifiedContent, - Offset: 0, - }) - suite.assert.NoError(err) - err = suite.fileCache.ReleaseFile(internal.ReleaseFileOptions{Handle: handle}) - suite.assert.NoError(err) - - // Confirm cloud storage copy is updated - fInfo, err := os.Stat(filepath.Join(suite.fake_storage_path, originalFile)) - suite.NoError(err) - suite.assert.Len(modifiedContent, int(fInfo.Size())) } func (suite *fileCacheTestSuite) TestCreateFileAndRename() { @@ -1846,9 +2432,9 @@ loopbackfs: suite.assert.NoFileExists(filepath.Join(suite.fake_storage_path, srcFile), "File should not exist in cloud storage when scheduler is OFF") - // Check if file is in scheduleOps with original name - _, existsInSchedule := suite.fileCache.scheduleOps.Load(srcFile) - suite.assert.True(existsInSchedule, "File should be in scheduleOps before rename") + // Check if file is in pendingOps with original name + _, existsInSchedule := suite.fileCache.pendingOps.Load(srcFile) + suite.assert.True(existsInSchedule, "File should be in pendingOps before rename") // Rename the file err = suite.fileCache.RenameFile(internal.RenameFileOptions{Src: srcFile, Dst: dstFile}) @@ -1860,24 +2446,17 @@ loopbackfs: suite.assert.FileExists(filepath.Join(suite.cache_path, dstFile), "Destination file should exist in local cache after rename") - // Check if the file has been renamed in scheduleOps - _, existsInScheduleOld := suite.fileCache.scheduleOps.Load(srcFile) - suite.assert.False( - existsInScheduleOld, - "Old file name should not be in scheduleOps after rename", - ) - - _, existsInScheduleNew := suite.fileCache.scheduleOps.Load(dstFile) - suite.assert.True(existsInScheduleNew, "New file name should be in scheduleOps after rename") + // Check if the file has been renamed in pendingOps + opSrc, foundSrc := suite.fileCache.pendingOps.Load(srcFile) + suite.assert.True(foundSrc, "Src deletion should be in pendingOps after rename") + suite.assert.True(opSrc.(pendingFlags).isDeletion, "Src pending op should be deletion") - // Check that file lock status was properly transferred - flock := suite.fileCache.fileLocks.Get(dstFile) - if flock != nil { - suite.assert.True(flock.SyncPending, "SyncPending flag should be set on renamed file") - } + opDst, foundDst := suite.fileCache.pendingOps.Load(dstFile) + suite.assert.True(foundDst, "Dst should be in pendingOps after rename") + suite.assert.False(opDst.(pendingFlags).isDeletion, "Dst pending op should be creation") } -func (suite *fileCacheTestSuite) TestDeleteFileAndScheduleOps() { +func (suite *fileCacheTestSuite) TestDeleteScheduledFile() { defer suite.cleanupTest() now := time.Now() @@ -1922,9 +2501,9 @@ loopbackfs: suite.assert.NoFileExists(filepath.Join(suite.fake_storage_path, testFile), "File should not exist in cloud storage when scheduler is OFF") - // Check if file is in scheduleOps before deletion - _, existsInSchedule := suite.fileCache.scheduleOps.Load(testFile) - suite.assert.True(existsInSchedule, "File should be in scheduleOps before deletion") + // Check if file is in pendingOps before deletion + _, existsInSchedule := suite.fileCache.pendingOps.Load(testFile) + suite.assert.True(existsInSchedule, "File should be in pendingOps before deletion") err = suite.fileCache.DeleteFile(internal.DeleteFileOptions{Name: testFile}) suite.assert.NoError(err) @@ -1933,10 +2512,138 @@ loopbackfs: suite.assert.NoFileExists(filepath.Join(suite.cache_path, testFile), "File should not exist in local cache after deletion") - // Check if the file has been deleted in scheduleOps - _, existsInScheduleAfterDelete := suite.fileCache.scheduleOps.Load(testFile) - suite.assert.False(existsInScheduleAfterDelete, - "File should not be in scheduleOps after deletion") + // Check pendingOps tracks deletion for deferred cloud sync. + op, existsInScheduleAfterDelete := suite.fileCache.pendingOps.Load(testFile) + suite.assert.True(existsInScheduleAfterDelete, + "File should remain in pendingOps after deletion") + if existsInScheduleAfterDelete { + suite.assert.True(op.(pendingFlags).isDeletion, + "Pending op should be marked as deletion after deletion") + } +} + +func (suite *fileCacheTestSuite) TestAddPendingOpSignalsChannel() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + name := "async-signal-file" + suite.fileCache.addPendingOp(name, pendingFlags{}) + + _, exists := suite.fileCache.pendingOps.Load(name) + suite.assert.True(exists) + + select { + case <-suite.fileCache.pendingOpAdded: + // expected signal + case <-time.After(200 * time.Millisecond): + suite.Fail("pendingOpAdded should be signaled when adding pending op") + } +} + +func (suite *fileCacheTestSuite) TestUpdateObjectOfflineErrorKeepsPending() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + name := "async-update-offline" + localPath := filepath.Join(suite.cache_path, name) + err := os.MkdirAll(filepath.Dir(localPath), 0777) + suite.assert.NoError(err) + err = os.WriteFile(localPath, []byte("local data"), 0777) + suite.assert.NoError(err) + + suite.fileCache.addPendingOp(name, pendingFlags{}) + suite.mock.EXPECT().CopyFromFile(gomock.Any()).Return(&common.CloudUnreachableError{}) + + err = suite.fileCache.updateObject(name, pendingFlags{}) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) + + _, stillPending := suite.fileCache.pendingOps.Load(name) + suite.assert.True( + stillPending, + "pending op should remain queued when updateObject fails offline", + ) +} + +func (suite *fileCacheTestSuite) TestUpdateObjectDeletionMissingLocalFile() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + name := "async-delete-missing-local" + suite.fileCache.addPendingOp(name, pendingFlags{isDeletion: true}) + suite.mock.EXPECT().GetAttr(internal.GetAttrOptions{Name: name}).Return(nil, nil) + suite.mock.EXPECT().DeleteFile(internal.DeleteFileOptions{Name: name}).Return(nil) + + err := suite.fileCache.updateObject(name, pendingFlags{isDeletion: true}) + suite.assert.NoError(err) + + _, stillPending := suite.fileCache.pendingOps.Load(name) + suite.assert.False(stillPending, "delete should be synced and removed from pendingOps") +} + +func (suite *fileCacheTestSuite) TestServicePendingOpsOfflineKeepsPending() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + name := "async-service-offline" + suite.fileCache.addPendingOp(name, pendingFlags{}) + + // servicePendingOps should leave queued ops untouched while CloudConnected() is false. + time.Sleep(1200 * time.Millisecond) + _, stillPending := suite.fileCache.pendingOps.Load(name) + suite.assert.True(stillPending) +} + +func (suite *fileCacheTestSuite) TestServicePendingOpsProcessesPendingOnline() { + defer suite.cleanupTest() + + name := "async-service-online" + localPath := filepath.Join(suite.cache_path, name) + err := os.MkdirAll(filepath.Dir(localPath), 0777) + suite.assert.NoError(err) + err = os.WriteFile(localPath, []byte("async online data"), 0777) + suite.assert.NoError(err) + + suite.fileCache.addPendingOp(name, pendingFlags{}) + + for i := 0; i < 50; i++ { + _, pending := suite.fileCache.pendingOps.Load(name) + if !pending { + break + } + time.Sleep(20 * time.Millisecond) + } + + _, pending := suite.fileCache.pendingOps.Load(name) + suite.assert.False(pending, "pending op should be processed by async uploader when online") } func (suite *fileCacheTestSuite) TestCreateEmptyFileEqualTrue() { @@ -1980,10 +2687,10 @@ loopbackfs: "Handle should not be marked as dirty when create-empty-file is true", ) - // The file shouldn't be in scheduleOps because it's already in cloud storage - _, existsInSchedule := suite.fileCache.scheduleOps.Load(testFile) + // The file shouldn't be in pendingOps because it's already in cloud storage + _, existsInSchedule := suite.fileCache.pendingOps.Load(testFile) suite.assert.False(existsInSchedule, - "File should not be in scheduleOps because it's already in cloud storage") + "File should not be in pendingOps because it's already in cloud storage") err = suite.fileCache.ReleaseFile(internal.ReleaseFileOptions{Handle: handle}) suite.assert.NoError(err) @@ -2035,9 +2742,9 @@ loopbackfs: suite.assert.NoFileExists(filepath.Join(suite.fake_storage_path, testFile), "File should not exist in cloud storage when scheduler is OFF") - // Check if file is in scheduleOps initially - _, existsInSchedule := suite.fileCache.scheduleOps.Load(testFile) - suite.assert.True(existsInSchedule, "File should be in scheduleOps after creation") + // Check if file is in pendingOps initially + _, existsInSchedule := suite.fileCache.pendingOps.Load(testFile) + suite.assert.True(existsInSchedule, "File should be in pendingOps after creation") // Write to file again with updated content newContent := []byte("updated file content") @@ -2053,9 +2760,9 @@ loopbackfs: err = suite.fileCache.ReleaseFile(internal.ReleaseFileOptions{Handle: handle}) suite.assert.NoError(err) - // Check scheduleOps to verify changes - _, stillInSchedule := suite.fileCache.scheduleOps.Load(testFile) - suite.assert.True(stillInSchedule, "File should remain in scheduleOps after modification") + // Check pendingOps to verify changes + _, stillInSchedule := suite.fileCache.pendingOps.Load(testFile) + suite.assert.True(stillInSchedule, "File should remain in pendingOps after modification") // Verify the local content was updated localData, err := os.ReadFile(filepath.Join(suite.cache_path, testFile)) @@ -2088,30 +2795,9 @@ loopbackfs: suite.cache_path, suite.fake_storage_path, ) - suite.setupTestHelper(configContent) - // The invalid schedule should be skipped but valid one should be there - hasValidSchedule := false - for _, sched := range suite.fileCache.schedule { - if sched.Name == "InvalidTest" { - suite.assert.Fail("Invalid schedule should not be added") - } - if sched.Name == "ValidTest" { - hasValidSchedule = true - } - } - - suite.assert.True(hasValidSchedule, "Valid schedule entry should be processed") - - // Test that operations still work with the valid schedule - file := "test_after_invalid_cron.txt" - handle, err := suite.fileCache.CreateFile(internal.CreateFileOptions{Name: file, Mode: 0777}) - suite.assert.NoError(err) - - err = suite.fileCache.ReleaseFile(internal.ReleaseFileOptions{Handle: handle}) - suite.assert.NoError(err) - suite.assert.FileExists(filepath.Join(suite.cache_path, file), - "File should be created successfully despite invalid cron expression") + // The invalid schedule should creash the configuration + suite.assert.Panics(func() { suite.setupTestHelper(configContent) }) } func (suite *fileCacheTestSuite) TestOverlappingSchedules() { @@ -2376,6 +3062,65 @@ func (suite *fileCacheTestSuite) TestGetAttrCase4() { suite.assert.NoError(openErr) } +func (suite *fileCacheTestSuite) TestGetAttrOfflineCachedMetadata() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + file := "offline-getattr-metadata" + attrs := &internal.ObjAttr{Path: file, Name: "offline-getattr-metadata", Size: 11} + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: file}). + Return(attrs, &common.CloudUnreachableError{}) + + attr, err := suite.fileCache.GetAttr(internal.GetAttrOptions{Name: file}) + suite.assert.NoError(err) + suite.assert.NotNil(attr) + suite.assert.Equal(file, attr.Path) + suite.assert.EqualValues(11, attr.Size) +} + +func (suite *fileCacheTestSuite) TestGetAttrOfflineLocalFallback() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + file := "offline-getattr-local" + localData := []byte("local cached data") + localPath := filepath.Join(suite.cache_path, file) + err := os.MkdirAll(filepath.Dir(localPath), 0777) + suite.assert.NoError(err) + err = os.WriteFile(localPath, localData, 0777) + suite.assert.NoError(err) + + offlineNoCacheErr := common.NoCachedDataError{ + Message: "no cached metadata", + CacheError: &common.CloudUnreachableError{}, + } + suite.mock.EXPECT(). + GetAttr(internal.GetAttrOptions{Name: file}). + Return(nil, offlineNoCacheErr). + Times(2) + + attr, err := suite.fileCache.GetAttr(internal.GetAttrOptions{Name: file}) + suite.assert.NoError(err) + suite.assert.NotNil(attr) + suite.assert.Equal(file, attr.Path) + suite.assert.EqualValues(len(localData), attr.Size) +} + // func (suite *fileCacheTestSuite) TestGetAttrError() { // defer suite.cleanupTest() // // Setup @@ -2407,6 +3152,68 @@ func (suite *fileCacheTestSuite) TestRenameFileNotInCache() { suite.assert.FileExists(filepath.Join(suite.fake_storage_path, dst)) // Dst does exist } +func (suite *fileCacheTestSuite) TestRenameFileOfflineCachedSrc() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + src := "offline-rename-src" + dst := "offline-rename-dst" + srcLocalPath := filepath.Join(suite.cache_path, src) + err := os.MkdirAll(filepath.Dir(srcLocalPath), 0777) + suite.assert.NoError(err) + err = os.WriteFile(srcLocalPath, []byte("cached data"), 0777) + suite.assert.NoError(err) + + suite.mock.EXPECT(). + RenameFile(internal.RenameFileOptions{Src: src, Dst: dst}). + Return(&common.CloudUnreachableError{}) + + err = suite.fileCache.RenameFile(internal.RenameFileOptions{Src: src, Dst: dst}) + suite.assert.NoError(err) + suite.assert.NoFileExists(filepath.Join(suite.cache_path, src)) + suite.assert.FileExists(filepath.Join(suite.cache_path, dst)) + + opSrc, srcFound := suite.fileCache.pendingOps.Load(src) + suite.assert.True(srcFound, "Src deletion should be in pendingOps after offline rename") + suite.assert.True(opSrc.(pendingFlags).isDeletion) + + opDst, dstFound := suite.fileCache.pendingOps.Load(dst) + suite.assert.True(dstFound, "Dst creation should be in pendingOps after offline rename") + suite.assert.False(opDst.(pendingFlags).isDeletion) +} + +func (suite *fileCacheTestSuite) TestRenameFileOfflineMissingSrc() { + // enable mock component + suite.cleanupTest() + defaultConfig := fmt.Sprintf( + "file_cache:\n path: %s\n offload-io: true", + suite.cache_path, + ) + suite.useMock = true + suite.setupTestHelper(defaultConfig) + defer suite.cleanupTest() + + src := "offline-rename-missing-src" + dst := "offline-rename-missing-dst" + suite.mock.EXPECT(). + RenameFile(internal.RenameFileOptions{Src: src, Dst: dst}). + Return(&common.CloudUnreachableError{}) + + err := suite.fileCache.RenameFile(internal.RenameFileOptions{Src: src, Dst: dst}) + suite.assert.Error(err) + suite.assert.ErrorIs(err, &common.CloudUnreachableError{}) + + _, dstFound := suite.fileCache.pendingOps.Load(dst) + suite.assert.False(dstFound, "Dst should not be queued when offline rename fails") +} + func (suite *fileCacheTestSuite) TestRenameFileInCache() { defer suite.cleanupTest() // Setup diff --git a/component/file_cache/lru_policy.go b/component/file_cache/lru_policy.go index 57fb90a01..29e7bb3d6 100644 --- a/component/file_cache/lru_policy.go +++ b/component/file_cache/lru_policy.go @@ -77,17 +77,21 @@ type lruPolicy struct { // DU utility was found on the path or not duPresent bool +} - // Tracks scheduled files to skip during eviction - schedule *FileCache +type pendingOpSnapshot struct { + IsDir bool + IsDeletion bool } -// LRUPolicySnapshot represents the *persisted state* of lruPolicy. +// lruPolicySnapshot represents the *persisted state* of lruPolicy. // It contains only the fields that need to be saved, and they are exported. -type LRUPolicySnapshot struct { - NodeList []string // Just node names, *without their fc.tmp prefix*, in linked list order - CurrMarkerPosition uint64 // Node index of currMarker - LastMarkerPosition uint64 // Node index of lastMarker +type lruPolicySnapshot struct { + NodeList []string // Just node names, *without their fc.tmp prefix*, in linked list order + SyncPendingFlags []bool // whether each file in NodeList belongs in the pendingOps map (kept for backward compat) + CurrMarkerPosition uint64 // Node index of currMarker + LastMarkerPosition uint64 // Node index of lastMarker + PendingOps map[string]pendingOpSnapshot // Complete pendingOps map with full flags } const ( @@ -124,7 +128,8 @@ func (p *lruPolicy) StartPolicy() error { p.lastMarker.prev = p.currMarker p.lastMarker.next = nil p.head = p.currMarker - gob.Register(LRUPolicySnapshot{}) + gob.Register(lruPolicySnapshot{}) + gob.Register(pendingOpSnapshot{}) snapshot, err := readSnapshotFromFile(p.tmpPath) if err == nil && snapshot != nil { p.loadSnapshot(snapshot) @@ -167,14 +172,9 @@ func (p *lruPolicy) ShutdownPolicy() error { return p.createSnapshot().writeToFile(p.tmpPath) } -func (fc *FileCache) IsScheduled(objName string) bool { - _, inSchedule := fc.scheduleOps.Load(objName) - return inSchedule -} - -func (p *lruPolicy) createSnapshot() *LRUPolicySnapshot { +func (p *lruPolicy) createSnapshot() *lruPolicySnapshot { log.Trace("lruPolicy::saveSnapshot") - var snapshot LRUPolicySnapshot + var snapshot lruPolicySnapshot var index uint64 p.Lock() defer p.Unlock() @@ -188,29 +188,61 @@ func (p *lruPolicy) createSnapshot() *LRUPolicySnapshot { case current == p.lastMarker: snapshot.LastMarkerPosition = index case strings.HasPrefix(current.name, p.tmpPath): - snapshot.NodeList = append(snapshot.NodeList, current.name[len(p.tmpPath):]) + relName := current.name[len(p.tmpPath):] + snapshot.NodeList = append(snapshot.NodeList, relName) + objName := common.NormalizeObjectName(relName[1:]) + _, isPending := p.pendingOps.Load(objName) + snapshot.SyncPendingFlags = append(snapshot.SyncPendingFlags, isPending) default: log.Err("lruPolicy::saveSnapshot : %s Ignoring unrecognized cache path", current.name) } index++ } + + // Capture complete pendingOps map for reliable restoration + snapshot.PendingOps = make(map[string]pendingOpSnapshot) + p.pendingOps.Range(func(key, value interface{}) bool { + flags := value.(pendingFlags) + snapshot.PendingOps[key.(string)] = pendingOpSnapshot{ + IsDir: flags.isDir, + IsDeletion: flags.isDeletion, + } + return true + }) + return &snapshot } -func (p *lruPolicy) loadSnapshot(snapshot *LRUPolicySnapshot) { +func (p *lruPolicy) loadSnapshot(snapshot *lruPolicySnapshot) { if snapshot == nil { return } p.Lock() defer p.Unlock() + // Restore pendingOps from new field if available, otherwise fall back to old method + loadPendingOps := false + if len(snapshot.PendingOps) > 0 { + for key, value := range snapshot.PendingOps { + p.pendingOps.Store(key, pendingFlags{isDir: value.IsDir, isDeletion: value.IsDeletion}) + } + } else { + // Backward compatibility: use SyncPendingFlags if PendingOps is not available + loadPendingOps = len(snapshot.NodeList) == len(snapshot.SyncPendingFlags) + } + // walk the slice and write the entries into the policy // remember that the markers are actual nodes, with indices preceding the item at the same NodeList index nodeIndex := 0 nextNode := p.head tail := p.lastMarker - for _, v := range snapshot.NodeList { - // recreate the node + for i, v := range snapshot.NodeList { fullPath := filepath.Join(p.tmpPath, v) + // populate pendingOps + if loadPendingOps && snapshot.SyncPendingFlags[i] { + objName := v[1:] + p.pendingOps.Store(objName, pendingFlags{}) + } + // recreate the node newNode := &lruNode{ name: fullPath, next: nil, @@ -254,7 +286,7 @@ func (p *lruPolicy) loadSnapshot(snapshot *LRUPolicySnapshot) { } } -func (ss *LRUPolicySnapshot) writeToFile(tmpPath string) error { +func (ss *lruPolicySnapshot) writeToFile(tmpPath string) error { var buf bytes.Buffer enc := gob.NewEncoder(&buf) err := enc.Encode(ss) @@ -265,7 +297,7 @@ func (ss *LRUPolicySnapshot) writeToFile(tmpPath string) error { return os.WriteFile(filepath.Join(tmpPath, snapshotPath), buf.Bytes(), 0644) } -func readSnapshotFromFile(tmpPath string) (*LRUPolicySnapshot, error) { +func readSnapshotFromFile(tmpPath string) (*lruPolicySnapshot, error) { fullSnapshotPath := filepath.Join(tmpPath, snapshotPath) defer os.Remove(fullSnapshotPath) snapshotData, err := os.ReadFile(fullSnapshotPath) @@ -278,7 +310,7 @@ func readSnapshotFromFile(tmpPath string) (*LRUPolicySnapshot, error) { } return nil, err } - var snapshot LRUPolicySnapshot + var snapshot lruPolicySnapshot dec := gob.NewDecoder(bytes.NewReader(snapshotData)) err = dec.Decode(&snapshot) if err != nil { @@ -526,7 +558,7 @@ func (p *lruPolicy) deleteExpiredNodes() { if objName[0] == '/' { objName = objName[1:] } - if p.schedule != nil && p.schedule.IsScheduled(objName) { + if _, syncPending := p.pendingOps.Load(objName); syncPending { continue } @@ -589,12 +621,19 @@ func (p *lruPolicy) deleteItem(name string) { } // Check if there are any open handles to this file or not - if flock.Count() > 0 || flock.SyncPending { + if flock.Count() > 0 { log.Warn("lruPolicy::DeleteItem : File in use %s", name) p.CacheValid(name) return } + // check if the file is pending upload (it was modified offline) + if _, syncPending := p.pendingOps.Load(objName); syncPending { + log.Warn("lruPolicy::DeleteItem : %s File is not synchronized to cloud storage", name) + p.CacheValid(name) + return + } + // There are no open handles for this file so it's safe to remove this // Check if the file exists first, since this is often the second time we're calling deleteFile _, err := os.Stat(name) diff --git a/component/file_cache/lru_policy_test.go b/component/file_cache/lru_policy_test.go index 5b02ed4e6..a856db389 100644 --- a/component/file_cache/lru_policy_test.go +++ b/component/file_cache/lru_policy_test.go @@ -30,6 +30,7 @@ import ( "io/fs" "os" "path/filepath" + "sync" "testing" "time" @@ -47,7 +48,7 @@ type lruPolicyTestSuite struct { policy *lruPolicy } -var cache_path = filepath.Join(home_dir, "file_cache") +var cache_path = filepath.Join(home_dir, "file_cache"+randomString(8)) func (suite *lruPolicyTestSuite) SetupTest() { err := log.SetDefaultLogger("silent", common.LogConfig{Level: common.ELogLevel.LOG_DEBUG()}) @@ -67,6 +68,7 @@ func (suite *lruPolicyTestSuite) SetupTest() { highThreshold: defaultMaxThreshold, lowThreshold: defaultMinThreshold, fileLocks: &common.LockMap{}, + pendingOps: &sync.Map{}, } suite.setupTestHelper(config) @@ -179,6 +181,7 @@ func (suite *lruPolicyTestSuite) TestUpdateConfig() { highThreshold: 70, lowThreshold: 20, fileLocks: &common.LockMap{}, + pendingOps: &sync.Map{}, } err := suite.policy.UpdateConfig(config) suite.assert.NoError(err) @@ -214,6 +217,7 @@ func (suite *lruPolicyTestSuite) TestCachePurge() { highThreshold: defaultMaxThreshold, lowThreshold: defaultMinThreshold, fileLocks: &common.LockMap{}, + pendingOps: &sync.Map{}, } suite.setupTestHelper(config) @@ -256,8 +260,7 @@ func (suite *lruPolicyTestSuite) TestDeleteItemSkipsSyncPending() { // Simulate eviction flow where the node has already been removed. suite.policy.nodeMap.Delete(localPath) - flock := suite.policy.fileLocks.Get("sync_pending") - flock.SyncPending = true + suite.policy.pendingOps.Store("sync_pending", pendingFlags{}) suite.policy.deleteItem(localPath) @@ -317,6 +320,7 @@ func (suite *lruPolicyTestSuite) TestMaxEviction() { highThreshold: defaultMaxThreshold, lowThreshold: defaultMinThreshold, fileLocks: &common.LockMap{}, + pendingOps: &sync.Map{}, } suite.setupTestHelper(config) @@ -341,6 +345,10 @@ func (suite *lruPolicyTestSuite) verifyPolicy(expectedPolicy, actualPolicy *lruP suite.assert.Same(actualPolicy.lastMarker, actual) default: suite.assert.Equal(expected.name, actual.name) + objName := expected.name[len(suite.policy.tmpPath)+1:] + _, expectedPending := expectedPolicy.pendingOps.Load(objName) + _, actualPending := actualPolicy.pendingOps.Load(objName) + suite.assert.Equal(expectedPending, actualPending) } suite.assert.NotNil(actual, "actual list is shorter than expected") suite.assert.NotNil(expected, "actual list is longer than expected") @@ -363,6 +371,32 @@ func (suite *lruPolicyTestSuite) TestCreateSnapshotEmpty() { suite.verifyPolicy(originalPolicy, suite.policy) } +func (suite *lruPolicyTestSuite) TestCreateSnapshot() { + defer suite.cleanupTest() + // setup + numFiles := 5 + pathPrefix := filepath.Join(cache_path, "temp") + for i := 1; i <= numFiles; i++ { + suite.policy.CacheValid(pathPrefix + fmt.Sprint(i)) + if i > 3 { + suite.policy.pendingOps.Store("temp"+fmt.Sprint(i), pendingFlags{}) + } + } + originalPolicy := suite.policy + // test + snapshot := suite.policy.createSnapshot() + suite.cleanupTest() + suite.setupTestHelper(originalPolicy.cachePolicyConfig) + suite.policy.loadSnapshot(snapshot) + // assert + suite.assert.NotNil(snapshot) + suite.assert.Len(snapshot.NodeList, numFiles) + for i, v := range snapshot.NodeList { + suite.assert.Equal(pathPrefix+fmt.Sprint(numFiles-i), filepath.Join(cache_path, v)) + } + suite.verifyPolicy(originalPolicy, suite.policy) +} + func (suite *lruPolicyTestSuite) TestCreateSnapshotWithTrailingMarkers() { defer suite.cleanupTest() // setup @@ -503,10 +537,16 @@ func (suite *lruPolicyTestSuite) TestCreateSnapshotLeadingMarkers() { func (suite *lruPolicyTestSuite) TestSnapshotSerialization() { defer suite.cleanupTest() // setup - snapshot := &LRUPolicySnapshot{ + snapshot := &lruPolicySnapshot{ NodeList: []string{"a", "b", "c"}, CurrMarkerPosition: 1, LastMarkerPosition: 2, + SyncPendingFlags: []bool{true, false, false}, + PendingOps: map[string]pendingOpSnapshot{ + "a": {}, + "deleted-file": {IsDeletion: true}, + "deleted-folder": {IsDir: true, IsDeletion: true}, + }, } // test err := snapshot.writeToFile(cache_path) @@ -517,50 +557,102 @@ func (suite *lruPolicyTestSuite) TestSnapshotSerialization() { suite.assert.Equal(snapshot, snapshotFromFile) // this checks deep equality } -func (suite *lruPolicyTestSuite) TestNoEvictionIfInScheduleOps() { +func (suite *lruPolicyTestSuite) TestLoadSnapshotRestoresExplicitPendingOps() { + defer suite.cleanupTest() + + snapshot := &lruPolicySnapshot{ + NodeList: []string{"/cached-file", "/cached-dir"}, + CurrMarkerPosition: 1, + LastMarkerPosition: 3, + SyncPendingFlags: []bool{false, false}, + PendingOps: map[string]pendingOpSnapshot{ + "cached-file": {IsDeletion: true}, + "cached-dir": {IsDir: true}, + "deleted-dir": {IsDir: true, IsDeletion: true}, + }, + } + + suite.policy.loadSnapshot(snapshot) + + for name, expected := range snapshot.PendingOps { + value, found := suite.policy.pendingOps.Load(name) + suite.assert.True(found, "expected pending op %s to be restored", name) + if found { + suite.assert.Equal( + pendingFlags{isDir: expected.IsDir, isDeletion: expected.IsDeletion}, + value, + ) + } + } + + suite.assert.True(suite.policy.IsCached(filepath.Join(cache_path, "cached-file"))) + suite.assert.True(suite.policy.IsCached(filepath.Join(cache_path, "cached-dir"))) +} + +func (suite *lruPolicyTestSuite) TestLoadSnapshotFallsBackToSyncPendingFlags() { + defer suite.cleanupTest() + + snapshot := &lruPolicySnapshot{ + NodeList: []string{"/legacy-file", "/other-file"}, + CurrMarkerPosition: 0, + LastMarkerPosition: 3, + SyncPendingFlags: []bool{true, false}, + } + + suite.policy.loadSnapshot(snapshot) + + value, found := suite.policy.pendingOps.Load("legacy-file") + suite.assert.True(found) + if found { + suite.assert.Equal(pendingFlags{}, value) + } + + _, found = suite.policy.pendingOps.Load("other-file") + suite.assert.False(found) +} + +func (suite *lruPolicyTestSuite) TestNoEvictionIfInPendingOps() { defer suite.cleanupTest() - fileName := filepath.Join(cache_path, "scheduled_file") + name := "pending_file" + fileName := filepath.Join(cache_path, name) suite.policy.CacheValid(fileName) - fakeSchedule := &FileCache{} - fakeSchedule.scheduleOps.Store(common.NormalizeObjectName("scheduled_file"), struct{}{}) - suite.policy.schedule = fakeSchedule + suite.policy.pendingOps.Store(name, pendingFlags{}) time.Sleep(2 * time.Second) - suite.assert.True(suite.policy.IsCached(fileName), "File in scheduleOps should not be evicted") + suite.assert.True(suite.policy.IsCached(fileName), "File in pendingOps should not be evicted") } -func (suite *lruPolicyTestSuite) TestEvictionRespectsScheduleOps() { +func (suite *lruPolicyTestSuite) TestEvictionRespectsPendingOps() { defer suite.cleanupTest() + objNames := []string{"File1", "file2", "file3", "file4"} fileNames := []string{ - filepath.Join(cache_path, "file1"), - filepath.Join(cache_path, "file2"), - filepath.Join(cache_path, "file3"), - filepath.Join(cache_path, "file4"), + filepath.Join(cache_path, objNames[0]), + filepath.Join(cache_path, objNames[1]), + filepath.Join(cache_path, objNames[2]), + filepath.Join(cache_path, objNames[3]), } for _, name := range fileNames { suite.policy.CacheValid(name) } - fakeSchedule := &FileCache{} - fakeSchedule.scheduleOps.Store(common.NormalizeObjectName("file2"), struct{}{}) - fakeSchedule.scheduleOps.Store(common.NormalizeObjectName("file4"), struct{}{}) - suite.policy.schedule = fakeSchedule + suite.policy.pendingOps.Store(objNames[1], pendingFlags{}) + suite.policy.pendingOps.Store(objNames[3], pendingFlags{}) time.Sleep(3 * time.Second) suite.assert.False(suite.policy.IsCached(fileNames[0]), "file1 should be evicted") suite.assert.True( suite.policy.IsCached(fileNames[1]), - "file2 should NOT be evicted (in scheduleOps)", + "file2 should NOT be evicted (in pendingOps)", ) suite.assert.False(suite.policy.IsCached(fileNames[2]), "file3 should be evicted") suite.assert.True( suite.policy.IsCached(fileNames[3]), - "file4 should NOT be evicted (in scheduleOps)", + "file4 should NOT be evicted (in pendingOps)", ) } diff --git a/component/file_cache/scheduler.go b/component/file_cache/scheduler.go deleted file mode 100644 index 44106ee6b..000000000 --- a/component/file_cache/scheduler.go +++ /dev/null @@ -1,324 +0,0 @@ -/* - Licensed under the MIT License . - - Copyright © 2023-2026 Seagate Technology LLC and/or its Affiliates - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -package file_cache - -import ( - "context" - "errors" - "os" - "path/filepath" - "time" - - "github.com/Seagate/cloudfuse/common" - "github.com/Seagate/cloudfuse/common/log" - "github.com/Seagate/cloudfuse/internal" - "github.com/Seagate/cloudfuse/internal/handlemap" - "github.com/netresearch/go-cron" -) - -type UploadWindow struct { - Name string `yaml:"name"` - CronExpr string `yaml:"cron"` - Duration string `yaml:"duration"` -} - -type Config struct { - Schedule WeeklySchedule `yaml:"schedule"` -} - -type WeeklySchedule []UploadWindow - -func (fc *FileCache) SetupScheduler() error { - if len(fc.schedule) == 0 { - log.Info( - "FileCache::SetupScheduler : Empty schedule configuration, defaulting to always-on mode", - ) - fc.alwaysOn = true - return nil - } - - // Setup the cron scheduler - cronScheduler := cron.New(cron.WithSeconds()) - fc.scheduleUploads(cronScheduler, fc.schedule) - cronScheduler.Start() - fc.cronScheduler = cronScheduler - - log.Info("FileCache::SetupScheduler : Scheduler started successfully") - return nil -} - -func isValidCronExpression(expr string) bool { - parser := cron.MustNewParser( - cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor, - ) - _, err := parser.Parse(expr) - return err == nil -} - -func (fc *FileCache) scheduleUploads(c *cron.Cron, sched WeeklySchedule) { - // define callbacks to activate and disable uploads - startFunc := func() { - log.Info("FileCache::SetupScheduler : Starting scheduled upload window") - fc.closeWindowCh = make(chan struct{}) - } - endFunc := func() { - log.Info("FileCache::SetupScheduler : Upload window ended") - close(fc.closeWindowCh) - } - - parser := cron.MustNewParser( - cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor, - ) - - // start up the schedules - for _, config := range sched { - windowName := config.Name - duration, err := time.ParseDuration(config.Duration) - if err != nil { - log.Info("[%s] Invalid duration '%s': %v\n", windowName, config.Duration, err) - continue - } - - // Determine if we're joining a window that's already active by - // finding the most recent scheduled start via Prev(). - now := time.Now() - var initialWindowEndTime time.Time - var jobOpts []cron.JobOption - - schedule, _ := parser.Parse(config.CronExpr) - if sp, ok := schedule.(cron.ScheduleWithPrev); ok { - prevStart := sp.Prev(now) - if !prevStart.IsZero() && prevStart.Add(duration).After(now) { - // We're inside an active window that started at prevStart. - initialWindowEndTime = prevStart.Add(duration) - // Run immediately to join the in-progress window with shortened duration. - jobOpts = append(jobOpts, cron.WithRunImmediately()) - log.Info( - "FileCache::scheduleUploads : [%s] joining active window (started %s, ends %s)", - windowName, - prevStart.Format(time.Kitchen), - initialWindowEndTime.Format(time.Kitchen), - ) - } - } - - _, err = c.AddFunc(config.CronExpr, func() { - // Start a new window and track it - fc.activeWindowsMutex.Lock() - isFirstWindow := fc.activeWindows == 0 - fc.activeWindows++ - windowCount := fc.activeWindows - fc.activeWindowsMutex.Unlock() - - // activate uploads - if isFirstWindow { - // open the window - startFunc() - } - - log.Info( - "schedule [%s] (%s) starting (active windows=%d)", - windowName, - config.CronExpr, - windowCount, - ) - fc.servicePendingOps() - - // When should the window close? - remainingDuration := duration - currentTime := time.Now() - if initialWindowEndTime.After(currentTime) { - remainingDuration = initialWindowEndTime.Sub(currentTime) - } - // Create a context to end the window - window, cancel := context.WithTimeout(context.Background(), remainingDuration) - defer cancel() - - for { - select { - case <-fc.stopAsyncUpload: - log.Info("Shutting down upload scheduler") - return - case <-window.Done(): - // Window has completed, update active window count - fc.activeWindowsMutex.Lock() - fc.activeWindows-- - isLastWindow := fc.activeWindows == 0 - windowCount := fc.activeWindows - fc.activeWindowsMutex.Unlock() - - log.Info("[%s] Upload window ended at %s (remaining windows: %d)\n", - windowName, time.Now().Format(time.Kitchen), windowCount) - - // Only close resources when the last window ends - if isLastWindow { - endFunc() - } - return - case <-fc.uploadNotifyCh: - log.Debug("[%s] File change detected, processing pending uploads at %s\n", - windowName, time.Now().Format(time.Kitchen)) - fc.servicePendingOps() - } - } - }, jobOpts...) - if err != nil { - log.Err("[%s] Failed to schedule cron job with expression '%s': %v\n", - windowName, config.CronExpr, err) - continue - } - } -} - -func (fc *FileCache) markFileForUpload(path string) { - fc.scheduleOps.Store(path, struct{}{}) - select { - case fc.uploadNotifyCh <- struct{}{}: - // Successfully notified - log.Info( - "FileCache::markFileForUpload : Notified upload window about new file: %s", - path, - ) - default: - // Channel buffer is full, which means notifications are already pending - // No need to block here as uploads will be processed soon - log.Info( - "FileCache::markFileForUpload : Upload window notification channel full, skipping notify for: %s", - path, - ) - } -} - -func (fc *FileCache) servicePendingOps() { - log.Info("FileCache::servicePendingOps : Servicing pending uploads") - - // Process pending operations - numFilesProcessed := 0 - fc.scheduleOps.Range(func(key, value any) bool { - numFilesProcessed++ - select { - case <-fc.stopAsyncUpload: - log.Info("FileCache::servicePendingOps : Upload processing interrupted") - return false - case <-fc.closeWindowCh: - return false - default: - path := key.(string) - err := fc.uploadPendingFile(path) - if err != nil { - log.Err( - "FileCache::servicePendingOps : %s upload failed: %v", - path, - err, - ) - } - } - return true - }) - - log.Info( - "FileCache::servicePendingOps : Completed upload cycle, processed %d files", - numFilesProcessed, - ) -} - -func (fc *FileCache) uploadPendingFile(name string) error { - log.Trace("FileCache::uploadPendingFile : %s", name) - - // lock the file - flock := fc.fileLocks.Get(name) - flock.Lock() - defer flock.Unlock() - - // don't double upload - if !flock.SyncPending { - return nil - } - - // look up file (or folder!) - localPath := filepath.Join(fc.tmpPath, name) - info, err := os.Stat(localPath) - if err != nil { - log.Err("FileCache::uploadPendingFile : %s failed to stat file. Here's why: %v", name, err) - return err - } - if info.IsDir() { - // upload folder - options := internal.CreateDirOptions{Name: name, Mode: info.Mode()} - err = fc.NextComponent().CreateDir(options) - if err != nil && !os.IsExist(err) { - return err - } - } else { - // this is a file - // prepare a handle - handle := handlemap.NewHandle(name) - // open the cached file - f, err := common.OpenFile(localPath, os.O_RDONLY, fc.defaultPermission) - if err != nil { - log.Err( - "FileCache::uploadPendingFile : %s failed to open file. Here's why: %v", - name, - err, - ) - return err - } - // write handle attributes - inf, err := f.Stat() - if err == nil { - handle.Size = inf.Size() - } - handle.UnixFD = uint64(f.Fd()) - handle.SetFileObject(f) - fc.setHandleDirty(handle) - - // upload the file - err = fc.flushFileInternal( - internal.FlushFileOptions{Handle: handle, CloseInProgress: true, AsyncUpload: true}, - ) - f.Close() - if err != nil { - log.Err("FileCache::uploadPendingFile : %s Upload failed. Cause: %v", name, err) - return err - } - } - // update state - flock.SyncPending = false - fc.scheduleOps.Delete(name) - log.Info("FileCache::uploadPendingFile : File uploaded: %s", name) - - return nil -} - -func (fc *FileCache) notInCloud(name string) bool { - notInCloud, _ := fc.checkCloud(name) - return notInCloud -} - -func (fc *FileCache) checkCloud(name string) (notInCloud bool, getAttrErr error) { - _, getAttrErr = fc.NextComponent().GetAttr(internal.GetAttrOptions{Name: name}) - notInCloud = errors.Is(getAttrErr, os.ErrNotExist) - return notInCloud, getAttrErr -} diff --git a/component/s3storage/utils.go b/component/s3storage/utils.go index 49eaef2a8..8b9b08976 100644 --- a/component/s3storage/utils.go +++ b/component/s3storage/utils.go @@ -155,9 +155,9 @@ func parseS3Err(err error, attemptedAction string) error { } } - var maErr *retry.MaxAttemptsError - qeErr := &ratelimit.QuotaExceededError{} - if errors.As(err, &maErr) || errors.As(err, qeErr) || errors.Is(err, context.Canceled) || + var maerr *retry.MaxAttemptsError + qeerr := &ratelimit.QuotaExceededError{} + if errors.As(err, &maerr) || errors.As(err, qeerr) || errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { log.Err( "%s : Failed to %s because cloud storage is unreachable", diff --git a/internal/component_options.go b/internal/component_options.go index 35a6d0702..7d18f6bb0 100644 --- a/internal/component_options.go +++ b/internal/component_options.go @@ -140,7 +140,6 @@ type CopyFromFileOptions struct { type FlushFileOptions struct { Handle *handlemap.Handle CloseInProgress bool - AsyncUpload bool } type SyncFileOptions struct { diff --git a/setup/baseConfig.yaml b/setup/baseConfig.yaml index ced9cae75..b692fed5b 100644 --- a/setup/baseConfig.yaml +++ b/setup/baseConfig.yaml @@ -116,6 +116,7 @@ file_cache: create-empty-file: true|false allow-non-empty-temp: true|false cleanup-on-start: true|false + block-offline-access: true|false policy-trace: true|false offload-io: true|false refresh-sec: diff --git a/test-scripts/block.sh b/test-scripts/block.sh new file mode 100755 index 000000000..181d8cf96 --- /dev/null +++ b/test-scripts/block.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#command to block outgoing calls to Lyve Cloud and Azure Blob Storage +sudo iptables -I OUTPUT 1 -d 192.55.0.0/16 -j REJECT +sudo iptables -I OUTPUT 1 -d 20.60.0.0/16 -j REJECT diff --git a/test-scripts/connect.sh b/test-scripts/connect.sh new file mode 100755 index 000000000..c2345be9b --- /dev/null +++ b/test-scripts/connect.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#command to accept outgoing calls to Lyve Cloud and Azure Blob Storage +sudo iptables -D OUTPUT -d 192.55.0.0/16 -j REJECT +sudo iptables -D OUTPUT -d 20.60.0.0/16 -j REJECT