@@ -23,6 +23,7 @@ package tests
2323import (
2424 "context"
2525 "encoding/json"
26+ "fmt"
2627 "os"
2728 "strings"
2829 "testing"
@@ -31,6 +32,7 @@ import (
3132 "github.com/stretchr/testify/require"
3233
3334 "github.com/arangodb/go-driver/v2/arangodb"
35+ "github.com/arangodb/go-driver/v2/arangodb/shared"
3436)
3537
3638func Test_CreateBackupSimple (t * testing.T ) {
@@ -132,6 +134,8 @@ func Test_RestoreBackupSimple(t *testing.T) {
132134
133135 err = client .BackupDelete (ctx , backup .ID )
134136 require .NoError (t , err , "DeleteBackup failed" )
137+
138+ waitForSync (t , ctx , client )
135139 })
136140 })
137141 })
@@ -140,6 +144,40 @@ func Test_RestoreBackupSimple(t *testing.T) {
140144 })
141145}
142146
147+ /*
148+ Sometimes after restore, we observe the following error during db creation:
149+
150+ ```
151+ Could not create database: executing createSystemCollectionsAndIndices
152+ (creates all system collections including their indices) failed.
153+ ```
154+
155+ Looks like not all DB servers have reported ready in Current/DBServers in the agency when the database creation attempt runs.
156+ This could be the case if DB servers are all restarted (as after a hotbackup) and start reacting to liveliness probes each,
157+ but the coordinator has not yet fetched the latest agency state.
158+ There may be a small window of time in which a DB server already responds to `/_admin/server/availablity`,
159+ but has not reported ready to the agency, or the coordinator has not yet fetched the latest state from the agency and
160+ does not yet see the server available in Current/DBServers.
161+ */
162+ func waitForSync (t * testing.T , ctx context.Context , client arangodb.Client ) {
163+ NewTimeout (func () error {
164+ name := GenerateUUID ("test-backup-DB" )
165+
166+ db , err := client .CreateDatabase (ctx , name , nil )
167+ if err != nil {
168+ if ok , arangoErr := shared .IsArangoError (err ); ok {
169+ t .Logf ("waitForSync ERROR: errorNum: %d, errCode: %d, msg: %s" , arangoErr .ErrorNum , arangoErr .Code , arangoErr .ErrorMessage )
170+ if strings .Contains (arangoErr .ErrorMessage , "executing createSystemCollectionsAndIndices (creates all system collections including their indices) failed" ) {
171+ return err
172+ }
173+ }
174+ }
175+ require .NoError (t , err , fmt .Sprintf ("waitForSync Failed to create DB %s" , name ))
176+ require .NoError (t , db .Remove (ctx ))
177+ return Interrupt {}
178+ }).TimeoutT (t , 2 * time .Minute , 125 * time .Millisecond )
179+ }
180+
143181func Test_BackupFullFlow (t * testing.T ) {
144182 requireClusterMode (t )
145183
@@ -184,6 +222,7 @@ func Test_BackupFullFlow(t *testing.T) {
184222 WaitForHealthyCluster (t , client , time .Minute , true )
185223 })
186224
225+ waitForSync (t , ctx , client )
187226 })
188227 }, WrapOptions {
189228 Parallel : newBool (false ),
0 commit comments