diff --git a/bundle/manifests/oadp-operator.clusterserviceversion.yaml b/bundle/manifests/oadp-operator.clusterserviceversion.yaml index a0e19c2fca..c128c36f14 100644 --- a/bundle/manifests/oadp-operator.clusterserviceversion.yaml +++ b/bundle/manifests/oadp-operator.clusterserviceversion.yaml @@ -1116,7 +1116,7 @@ spec: - name: FS_PV_HOSTPATH - name: PLUGINS_HOSTPATH - name: RELATED_IMAGE_VELERO - value: quay.io/konveyor/velero:latest + value: ghcr.io/kaovilai/velero:fix-backup-repo-connect-oadp-b74fb03-ubi9 - name: RELATED_IMAGE_OPENSHIFT_VELERO_PLUGIN value: quay.io/konveyor/openshift-velero-plugin:latest - name: RELATED_IMAGE_VELERO_PLUGIN_FOR_AWS @@ -1277,7 +1277,7 @@ spec: provider: name: Red Hat relatedImages: - - image: quay.io/konveyor/velero:latest + - image: ghcr.io/kaovilai/velero:fix-backup-repo-connect-oadp-b74fb03-ubi9 name: velero - image: quay.io/konveyor/openshift-velero-plugin:latest name: openshift-velero-plugin diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 1304283022..0f23a2809d 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -69,7 +69,7 @@ spec: - name: PLUGINS_HOSTPATH value: "" - name: RELATED_IMAGE_VELERO - value: quay.io/konveyor/velero:latest + value: ghcr.io/kaovilai/velero:fix-backup-repo-connect-oadp-b74fb03-ubi9 - name: RELATED_IMAGE_OPENSHIFT_VELERO_PLUGIN value: quay.io/konveyor/openshift-velero-plugin:latest - name: RELATED_IMAGE_VELERO_PLUGIN_FOR_AWS diff --git a/docs/developer/PROW_CI.md b/docs/developer/PROW_CI.md index 072951dc27..3d004ef823 100644 --- a/docs/developer/PROW_CI.md +++ b/docs/developer/PROW_CI.md @@ -259,7 +259,7 @@ Update `config/manager/manager.yaml`, changing the following (if release branch ```diff ... - name: RELATED_IMAGE_VELERO -- value: quay.io/konveyor/velero:latest +- value: ghcr.io/kaovilai/velero:fix-backup-repo-connect-08fc1b9 + value: quay.io/konveyor/velero:oadp-1.4 - name: RELATED_IMAGE_OPENSHIFT_VELERO_PLUGIN - value: quay.io/konveyor/openshift-velero-plugin:latest @@ -285,7 +285,7 @@ Update `pkg/common/common.go`, changing the following (if release branch was cre ... // Images const ( -- VeleroImage = "quay.io/konveyor/velero:latest" +- VeleroImage = "ghcr.io/kaovilai/velero:fix-backup-repo-connect-08fc1b9" + VeleroImage = "quay.io/konveyor/velero:oadp-1.4" - OpenshiftPluginImage = "quay.io/konveyor/openshift-velero-plugin:latest" + OpenshiftPluginImage = "quay.io/konveyor/openshift-velero-plugin:oadp-1.4" diff --git a/internal/controller/velero_test.go b/internal/controller/velero_test.go index 03b7e49141..0317c35241 100644 --- a/internal/controller/velero_test.go +++ b/internal/controller/velero_test.go @@ -2911,7 +2911,7 @@ func TestDPAReconciler_getVeleroImage(t *testing.T) { }, }, pluginName: common.Velero, - wantImage: "quay.io/konveyor/velero:latest", + wantImage: "ghcr.io/kaovilai/velero:fix-backup-repo-connect-oadp-b74fb03-ubi9", setEnvVars: map[string]string{ "REGISTRY": "quay.io", "PROJECT": "konveyor", diff --git a/pkg/common/common.go b/pkg/common/common.go index c42734fd3f..2c6c6478ba 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -66,7 +66,7 @@ var DefaultRestoreResourcePriorities = types.Priorities{ // Images const ( - VeleroImage = "quay.io/konveyor/velero:latest" + VeleroImage = "ghcr.io/kaovilai/velero:fix-backup-repo-connect-oadp-b74fb03-ubi9" OpenshiftPluginImage = "quay.io/konveyor/openshift-velero-plugin:latest" AWSPluginImage = "quay.io/konveyor/velero-plugin-for-aws:latest" LegacyAWSPluginImage = "quay.io/konveyor/velero-plugin-for-legacy-aws:latest" diff --git a/tests/e2e/hcp_backup_restore_suite_test.go b/tests/e2e/hcp_backup_restore_suite_test.go index 73bd764336..28d881abca 100644 --- a/tests/e2e/hcp_backup_restore_suite_test.go +++ b/tests/e2e/hcp_backup_restore_suite_test.go @@ -288,6 +288,13 @@ func runHCPBackup(brCase BackupRestoreCase, backupName string, h *libhcp.HCHandl gomega.Expect(succeeded).To(gomega.Equal(true)) log.Printf("Backup for case %s succeeded", brCase.Name) + // Print diagnostics to help identify PrepareRepo bug + log.Println("\n========== DIAGNOSTIC INFORMATION (PrepareRepo Bug Detection) ==========") + lib.PrintBackupRepositoryDiagnostics(h.Client, namespace) + lib.PrintDataUploadDiagnostics(h.Client, namespace, backupName) + lib.PrintNodeAgentDiagnostics(kubernetesClientForSuiteRun, namespace) + log.Println("========================================================================") + if brCase.BackupRestoreType == lib.CSI { // wait for volume snapshot to be Ready gomega.Eventually(lib.AreVolumeSnapshotsReady(h.Client, backupName), time.Minute*4, time.Second*10).Should(gomega.BeTrue()) diff --git a/tests/e2e/lib/backup.go b/tests/e2e/lib/backup.go index 2836b5e98b..fd6cf9260f 100755 --- a/tests/e2e/lib/backup.go +++ b/tests/e2e/lib/backup.go @@ -8,6 +8,7 @@ import ( "time" velero "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + velerov2alpha1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1" pkgbackup "github.com/vmware-tanzu/velero/pkg/backup" "github.com/vmware-tanzu/velero/pkg/cmd/util/downloadrequest" "github.com/vmware-tanzu/velero/pkg/cmd/util/output" @@ -217,3 +218,125 @@ func DeleteBackupRepositories(c client.Client, namespace string) error { return nil } + +// PrintBackupRepositoryDiagnostics prints detailed information about BackupRepositories +// to help diagnose PrepareRepo bugs where local Kopia config files are missing +func PrintBackupRepositoryDiagnostics(c client.Client, namespace string) { + log.Println("===== BackupRepository Diagnostics (PrepareRepo Bug Detection) =====") + + backupRepos, err := GetBackupRepositoryList(c, namespace) + if err != nil { + log.Printf("ERROR: Failed to get BackupRepository list: %v", err) + return + } + + if len(backupRepos.Items) == 0 { + log.Println("INFO: No BackupRepositories found") + return + } + + for _, repo := range backupRepos.Items { + log.Printf("\n--- BackupRepository: %s ---", repo.Name) + log.Printf(" UID: %s", repo.UID) + log.Printf(" Created: %s", repo.CreationTimestamp) + log.Printf(" Repository Type: %s", repo.Spec.RepositoryType) + log.Printf(" Volume Namespace: %s", repo.Spec.VolumeNamespace) + log.Printf(" Backup Storage Location: %s", repo.Spec.BackupStorageLocation) + log.Printf(" Phase: %s", repo.Status.Phase) + log.Printf(" Message: %s", repo.Status.Message) + + if repo.Status.Phase == velero.BackupRepositoryPhaseReady { + log.Printf(" ⚠️ EVIDENCE: Repository exists in backend storage") + log.Printf(" ⚠️ If DataUpload fails with 'config file not found', this confirms PrepareRepo bug") + } + } + log.Println("================================================================") +} + +// PrintDataUploadDiagnostics prints detailed information about DataUpload resources +// to help diagnose PrepareRepo bugs by showing config file errors +func PrintDataUploadDiagnostics(c client.Client, namespace string, backupName string) { + log.Println("===== DataUpload Diagnostics (PrepareRepo Bug Detection) =====") + + dataUploadList := &velerov2alpha1.DataUploadList{} + err := c.List(context.Background(), dataUploadList, client.InNamespace(namespace)) + if err != nil { + log.Printf("ERROR: Failed to get DataUpload list: %v", err) + return + } + + if len(dataUploadList.Items) == 0 { + log.Println("INFO: No DataUpload resources found") + log.Println(" ⚠️ This might indicate volume backup didn't run") + log.Println("================================================================") + return + } + + // Filter for DataUploads related to this backup + relevantUploads := []velerov2alpha1.DataUpload{} + for _, du := range dataUploadList.Items { + if du.Labels != nil && du.Labels[velero.BackupNameLabel] == backupName { + relevantUploads = append(relevantUploads, du) + } + } + + if len(relevantUploads) == 0 { + log.Printf("INFO: No DataUpload resources found for backup %s", backupName) + log.Println(" ⚠️ This might indicate volume backup didn't run") + log.Println("================================================================") + return + } + + for _, du := range relevantUploads { + log.Printf("\n--- DataUpload: %s ---", du.Name) + log.Printf(" Created: %s", du.CreationTimestamp) + log.Printf(" Phase: %s", du.Status.Phase) + log.Printf(" Message: %s", du.Status.Message) + log.Printf(" Progress: %+v", du.Status.Progress) + log.Printf(" Node: %s", du.Status.Node) + + // Check for PrepareRepo bug evidence in message + msg := du.Status.Message + if msg != "" { + if (du.Status.Phase == velerov2alpha1.DataUploadPhaseFailed || du.Status.Phase == velerov2alpha1.DataUploadPhaseCanceled) && + (containsIgnoreCase(msg, "config") && containsIgnoreCase(msg, "no such file")) { + log.Println(" 🔴 CRITICAL BUG EVIDENCE: Config file not found error detected!") + log.Println(" 🔴 This confirms the PrepareRepo bug hypothesis") + } + } + } + log.Println("================================================================") +} + +// containsIgnoreCase performs case-insensitive substring search +func containsIgnoreCase(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || + len(s) > len(substr) && (s[:len(substr)] == substr || + s[len(s)-len(substr):] == substr || + indexIgnoreCase(s, substr) >= 0)) +} + +// indexIgnoreCase finds the index of substr in s, case-insensitive +func indexIgnoreCase(s, substr string) int { + s = toLower(s) + substr = toLower(substr) + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return i + } + } + return -1 +} + +// toLower converts string to lowercase +func toLower(s string) string { + b := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + b[i] = c + } + return string(b) +} diff --git a/tests/e2e/lib/nodeagent_helpers.go b/tests/e2e/lib/nodeagent_helpers.go index e232727ff8..1d29feefd1 100644 --- a/tests/e2e/lib/nodeagent_helpers.go +++ b/tests/e2e/lib/nodeagent_helpers.go @@ -1,6 +1,7 @@ package lib import ( + "bytes" "context" "fmt" "log" @@ -77,3 +78,131 @@ func NodeAgentDaemonSetHasNodeSelector(c *kubernetes.Clientset, namespace, key, return ds.Spec.Template.Spec.NodeSelector[key] == value, nil } } + +// PrintNodeAgentDiagnostics prints node-agent pod logs filtered for PrepareRepo-related messages +// to help diagnose PrepareRepo bugs where local Kopia config files are missing +func PrintNodeAgentDiagnostics(c *kubernetes.Clientset, namespace string) { + log.Println("===== Node-Agent Pod Diagnostics (PrepareRepo Bug Detection) =====") + + // Get all node-agent pods + podList, err := GetAllPodsWithLabel(c, namespace, "name="+common.NodeAgent) + if err != nil { + log.Printf("ERROR: Failed to get node-agent pods: %v", err) + return + } + + if len(podList.Items) == 0 { + log.Println("INFO: No node-agent pods found") + return + } + + for _, pod := range podList.Items { + log.Printf("\n--- Node-Agent Pod: %s (Node: %s) ---", pod.Name, pod.Spec.NodeName) + log.Printf(" Phase: %s", pod.Status.Phase) + log.Printf(" Created: %s", pod.CreationTimestamp) + + // Get pod logs + req := c.CoreV1().Pods(namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + TailLines: int64Ptr(500), // Last 500 lines + }) + podLogs, err := req.Stream(context.Background()) + if err != nil { + log.Printf(" ERROR: Failed to get logs: %v", err) + continue + } + defer podLogs.Close() + + // Read and filter logs + buf := new(bytes.Buffer) + _, err = buf.ReadFrom(podLogs) + if err != nil { + log.Printf(" ERROR: Failed to read logs: %v", err) + continue + } + + logsStr := buf.String() + lines := splitLines(logsStr) + + // Look for PrepareRepo-related log lines + foundRepoInit := false + foundConnect := false + foundConfigError := false + + log.Println(" Relevant log entries:") + for _, line := range lines { + // Check for PrepareRepo evidence + if containsAny(line, []string{"Repo has already been initialized", "repository already exists"}) { + log.Printf(" ⚠️ %s", line) + foundRepoInit = true + } + if containsAny(line, []string{"connecting to it", "Connect()"}) { + log.Printf(" ✓ %s", line) + foundConnect = true + } + if containsAny(line, []string{"config", "no such file", "ENOENT"}) { + log.Printf(" 🔴 %s", line) + foundConfigError = true + } + // Also show DataUpload-related errors + if containsAny(line, []string{"error", "failed", "Error"}) && + containsAny(line, []string{"DataUpload", "backup", "volume"}) { + log.Printf(" ! %s", line) + } + } + + // Analyze findings + if foundRepoInit && !foundConnect && foundConfigError { + log.Println(" 🔴 CRITICAL BUG EVIDENCE: Repository exists but Connect() was NOT called!") + log.Println(" 🔴 This confirms the PrepareRepo bug hypothesis") + } else if foundRepoInit && !foundConnect { + log.Println(" ⚠️ WARNING: Repository exists but no Connect() log found") + log.Println(" ⚠️ This suggests PrepareRepo bug may have occurred") + } + } + log.Println("================================================================") +} + +// Helper functions for PrintNodeAgentDiagnostics + +func int64Ptr(i int64) *int64 { + return &i +} + +func splitLines(s string) []string { + var lines []string + start := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + lines = append(lines, s[start:i]) + start = i + 1 + } + } + if start < len(s) { + lines = append(lines, s[start:]) + } + return lines +} + +func containsAny(s string, substrs []string) bool { + for _, substr := range substrs { + if containsSubstring(s, substr) { + return true + } + } + return false +} + +func containsSubstring(s, substr string) bool { + if len(substr) == 0 { + return true + } + if len(s) < len(substr) { + return false + } + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +}