diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala index c8a407df993..dda1df984ce 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala @@ -75,9 +75,13 @@ object FileResolver { filePath.toUri } + private val RESOURCE_TYPE_PREFIXES = Set("datasets") + /** * Parses a dataset file path and extracts its components. - * Expected format: /ownerEmail/datasetName/versionName/fileRelativePath + * Expected format: /datasets/ownerEmail/datasetName/versionName/fileRelativePath + * + * The first segment is a resource type prefix (e.g. "datasets") and is stripped before parsing. * * @param fileName The file path to parse * @return Some((ownerEmail, datasetName, versionName, fileRelativePath)) if valid, None otherwise @@ -86,7 +90,12 @@ object FileResolver { fileName: String ): Option[(String, String, String, Array[String])] = { val filePath = Paths.get(fileName) - val pathSegments = (0 until filePath.getNameCount).map(filePath.getName(_).toString).toArray + var pathSegments = (0 until filePath.getNameCount).map(filePath.getName(_).toString).toArray + + // Strip known resource type prefix if present + if (pathSegments.nonEmpty && RESOURCE_TYPE_PREFIXES.contains(pathSegments(0))) { + pathSegments = pathSegments.drop(1) + } if (pathSegments.length < 4) { return None @@ -103,8 +112,8 @@ object FileResolver { /** * Attempts to resolve a given fileName to a URI. * - * The fileName format should be: /ownerEmail/datasetName/versionName/fileRelativePath - * e.g. /bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv + * The fileName format should be: /datasets/ownerEmail/datasetName/versionName/fileRelativePath + * e.g. /datasets/bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv * The output dataset URI format is: {DATASET_FILE_URI_SCHEME}:///{repositoryName}/{versionHash}/fileRelativePath * e.g. {DATASET_FILE_URI_SCHEME}:///dataset-15/adeq233td/some/dir/file.txt * @@ -195,7 +204,7 @@ object FileResolver { /** * Parses a dataset file path to extract owner email and dataset name. - * Expected format: /ownerEmail/datasetName/versionName/fileRelativePath + * Expected format: /datasets/ownerEmail/datasetName/versionName/fileRelativePath * * @param path The file path from operator properties * @return Some((ownerEmail, datasetName)) if path is valid, None otherwise diff --git a/common/workflow-core/src/test/scala/org/apache/texera/amber/storage/FileResolverSpec.scala b/common/workflow-core/src/test/scala/org/apache/texera/amber/storage/FileResolverSpec.scala index 593ac8d3471..2602dab4c79 100644 --- a/common/workflow-core/src/test/scala/org/apache/texera/amber/storage/FileResolverSpec.scala +++ b/common/workflow-core/src/test/scala/org/apache/texera/amber/storage/FileResolverSpec.scala @@ -79,9 +79,9 @@ class FileResolverSpec private val localCsvFilePath = "common/workflow-core/src/test/resources/country_sales_small.csv" - private val datasetACsvFilePath = "/test_user@test.com/test_dataset/v2/directory/a.csv" + private val datasetACsvFilePath = "/datasets/test_user@test.com/test_dataset/v2/directory/a.csv" - private val dataset1TxtFilePath = "/test_user@test.com/test_dataset/v1/1.txt" + private val dataset1TxtFilePath = "/datasets/test_user@test.com/test_dataset/v1/1.txt" override protected def beforeAll(): Unit = { initializeDBAndReplaceDSLContext() diff --git a/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala b/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala index efed479653a..7d5a22ad734 100644 --- a/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala +++ b/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala @@ -1188,7 +1188,7 @@ class DatasetResource extends LazyLogging { throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) ) - val ownerNode = DatasetFileNode + val datasetsNode = DatasetFileNode .fromLakeFSRepositoryCommittedObjects( Map( (user.getEmail, dataset.getName, latestVersion.getName) -> LakeFSStorageClient @@ -1197,6 +1197,8 @@ class DatasetResource extends LazyLogging { ) .head + val ownerNode = datasetsNode.children.get.head + DashboardDatasetVersion( latestVersion, ownerNode.children.get @@ -1404,7 +1406,7 @@ class DatasetResource extends LazyLogging { val datasetName = dataset.dataset.getName val repositoryName = dataset.dataset.getRepositoryName - val ownerFileNode = DatasetFileNode + val datasetsNode = DatasetFileNode .fromLakeFSRepositoryCommittedObjects( Map( (dataset.ownerEmail, datasetName, datasetVersion.getName) -> LakeFSStorageClient @@ -1413,6 +1415,8 @@ class DatasetResource extends LazyLogging { ) .head + val ownerFileNode = datasetsNode.children.get.head + DatasetVersionRootFileNodesResponse( ownerFileNode.children.get .find(_.getName == datasetName) @@ -1423,7 +1427,7 @@ class DatasetResource extends LazyLogging { .head .children .get, - DatasetFileNode.calculateTotalSize(List(ownerFileNode)) + DatasetFileNode.calculateTotalSize(List(datasetsNode)) ) } diff --git a/file-service/src/main/scala/org/apache/texera/service/type/dataset/DatasetFileNode.scala b/file-service/src/main/scala/org/apache/texera/service/type/dataset/DatasetFileNode.scala index 7c91d30b94a..b26ffaa01b8 100644 --- a/file-service/src/main/scala/org/apache/texera/service/type/dataset/DatasetFileNode.scala +++ b/file-service/src/main/scala/org/apache/texera/service/type/dataset/DatasetFileNode.scala @@ -81,6 +81,10 @@ object DatasetFileNode { ): List[DatasetFileNode] = { val rootNode = new DatasetFileNode("/", "directory", null, "") + // Add "datasets" prefix node + val datasetsNode = new DatasetFileNode("datasets", "directory", rootNode, "") + rootNode.children = Some(List(datasetsNode)) + // Owner level nodes map val ownerNodes = mutable.Map[String, DatasetFileNode]() @@ -88,8 +92,8 @@ object DatasetFileNode { case ((ownerEmail, datasetName, versionName), objects) => val ownerNode = ownerNodes.getOrElseUpdate( ownerEmail, { - val newNode = new DatasetFileNode(ownerEmail, "directory", rootNode, ownerEmail) - rootNode.children = Some(rootNode.getChildren :+ newNode) + val newNode = new DatasetFileNode(ownerEmail, "directory", datasetsNode, ownerEmail) + datasetsNode.children = Some(datasetsNode.getChildren :+ newNode) newNode } ) diff --git a/frontend/src/app/common/type/dataset-file.ts b/frontend/src/app/common/type/dataset-file.ts index 5fe561d3720..64d8224555b 100644 --- a/frontend/src/app/common/type/dataset-file.ts +++ b/frontend/src/app/common/type/dataset-file.ts @@ -17,8 +17,8 @@ * under the License. */ -// user given filePath is /ownerEmail/datasetName/versionName/fileRelativePath -// e.g. /bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv +// user given filePath is /datasets/ownerEmail/datasetName/versionName/fileRelativePath +// e.g. /datasets/bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv export interface DatasetFile { ownerEmail: string; datasetName: string; @@ -28,11 +28,17 @@ export interface DatasetFile { /** * Parses a file path string to a DatasetFile interface. + * The first segment "datasets" is stripped before parsing. * @param filePath - The file path string to parse. * @returns The parsed DatasetFile object. */ export function parseFilePathToDatasetFile(filePath: string): DatasetFile { - const parts = filePath.split("/").filter(part => part.length > 0); + let parts = filePath.split("/").filter(part => part.length > 0); + + // Strip the "datasets" prefix if present + if (parts.length > 0 && parts[0] === "datasets") { + parts = parts.slice(1); + } if (parts.length < 4) { throw new Error("Invalid file path format"); @@ -56,5 +62,5 @@ export function parseFilePathToDatasetFile(filePath: string): DatasetFile { */ export function parseDatasetFileToFilePath(datasetFile: DatasetFile): string { const { ownerEmail, datasetName, versionName, fileRelativePath } = datasetFile; - return `/${ownerEmail}/${datasetName}/${versionName}/${fileRelativePath}`; + return `/datasets/${ownerEmail}/${datasetName}/${versionName}/${fileRelativePath}`; } diff --git a/frontend/src/app/common/type/datasetVersionFileTree.ts b/frontend/src/app/common/type/datasetVersionFileTree.ts index 8d1686998ca..98f898d4432 100644 --- a/frontend/src/app/common/type/datasetVersionFileTree.ts +++ b/frontend/src/app/common/type/datasetVersionFileTree.ts @@ -31,19 +31,20 @@ export function getFullPathFromDatasetFileNode(node: DatasetFileNode): string { } /** - * Returns the relative path of a DatasetFileNode by stripping the first three segments. + * Returns the relative path of a DatasetFileNode by stripping the first four segments + * (datasets/ownerEmail/datasetName/versionName). * @param node The DatasetFileNode whose relative path is needed. - * @returns The relative path (without the first three segments and without a leading slash). + * @returns The relative path (without the first four segments and without a leading slash). */ export function getRelativePathFromDatasetFileNode(node: DatasetFileNode): string { const fullPath = getFullPathFromDatasetFileNode(node); // Get the full path const pathSegments = fullPath.split("/").filter(segment => segment.length > 0); // Split and remove empty segments - if (pathSegments.length <= 3) { - return ""; // If there are 3 or fewer segments, return an empty string (no relative path exists) + if (pathSegments.length <= 4) { + return ""; // If there are 4 or fewer segments, return an empty string (no relative path exists) } - return pathSegments.slice(3).join("/"); // Join remaining segments as the relative path + return pathSegments.slice(4).join("/"); // Join remaining segments as the relative path } export function getPathsUnderOrEqualDatasetFileNode(node: DatasetFileNode): string[] {