Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e0a359f
fix: Postgres image tag
aicam Feb 24, 2026
85a18f3
fix: Postgres image tag and registry
aicam Feb 25, 2026
e0c54ec
Merge branch 'main' into main
aicam Feb 25, 2026
850314d
Merge branch 'apache:main' into main
aicam Mar 3, 2026
e4a844f
fix: header name
aicam Mar 3, 2026
94e18c4
Merge branch 'main' into main
chenlica Mar 5, 2026
0eaffd0
Merge branch 'main' into main
aicam Mar 7, 2026
ce82ac9
Merge branch 'apache:main' into main
aicam Mar 9, 2026
f5b6dc4
Merge branch 'apache:main' into main
aicam Mar 11, 2026
b5e9024
fix: image tags
aicam Mar 11, 2026
7612505
fix: use latest in CI
aicam Mar 12, 2026
2c2b704
Merge branch 'main' into main
aicam Mar 12, 2026
b210bca
Merge branch 'apache:main' into main
aicam Mar 17, 2026
66b1afa
Merge branch 'apache:main' into main
aicam Mar 19, 2026
54de47b
feat: `datasets` prefix
aicam Mar 31, 2026
defe0e1
Merge branch 'main' into feat/repo-type-update-logical-path
aicam Mar 31, 2026
4297bc3
Merge branch 'apache:main' into main
aicam Apr 2, 2026
cfb64bf
Merge branch 'apache:main' into main
aicam Apr 9, 2026
983e0cb
Merge branch 'apache:main' into main
aicam Apr 15, 2026
c5c9f73
Merge branch 'apache:main' into main
aicam Apr 20, 2026
315f058
Merge branch 'apache:main' into main
aicam Apr 30, 2026
8106416
Merge branch 'apache:main' into main
aicam May 1, 2026
e08712e
feat(cloudbiomapper): add CloudBioMapper operator for sequence alignm…
May 2, 2026
0dc36ba
feat(cloudbiomapper): add cluster management UI, backend API, and fro…
May 2, 2026
f675409
add configuration variables
May 4, 2026
4c865ea
Fix compilation error
kunwp1 May 4, 2026
bf77ce5
Merge pull request #5 from aicam/feat/cloudbiomapper
aicam May 4, 2026
d8e6736
Revert "Feat/cloudbiomapper"
aicam May 7, 2026
fafe402
Merge pull request #6 from aicam/revert-5-feat/cloudbiomapper
aicam May 7, 2026
ca960d6
Merge branch 'apache:main' into main
aicam May 7, 2026
884d935
Merge branch 'apache:main' into main
aicam May 8, 2026
f69c162
Merge branch 'apache:main' into main
aicam May 14, 2026
0f5f38e
Merge remote-tracking branch 'apache/main'
aicam Jun 11, 2026
d1fe970
Merge branch 'main' into feat/repo-type-update-logical-path
aicam Jun 11, 2026
f46d3b7
Merge remote-tracking branch 'apache/main'
aicam Jun 22, 2026
607f581
Merge remote-tracking branch 'apache/main'
aicam Jun 23, 2026
e38a355
Merge branch 'main' into feat/repo-type-update-logical-path
aicam Jun 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,13 @@ object FileResolver {
filePath.toUri
}

private val RESOURCE_TYPE_PREFIXES = Set("datasets")

/**
* Parses a dataset file path and extracts its components.
* Expected format: /ownerEmail/datasetName/versionName/fileRelativePath
* Expected format: /datasets/ownerEmail/datasetName/versionName/fileRelativePath
*
* The first segment is a resource type prefix (e.g. "datasets") and is stripped before parsing.
*
Comment on lines 81 to 85
* @param fileName The file path to parse
* @return Some((ownerEmail, datasetName, versionName, fileRelativePath)) if valid, None otherwise
Expand All @@ -86,7 +90,12 @@ object FileResolver {
fileName: String
): Option[(String, String, String, Array[String])] = {
val filePath = Paths.get(fileName)
val pathSegments = (0 until filePath.getNameCount).map(filePath.getName(_).toString).toArray
var pathSegments = (0 until filePath.getNameCount).map(filePath.getName(_).toString).toArray

// Strip known resource type prefix if present
if (pathSegments.nonEmpty && RESOURCE_TYPE_PREFIXES.contains(pathSegments(0))) {
pathSegments = pathSegments.drop(1)
}

if (pathSegments.length < 4) {
return None
Expand All @@ -103,8 +112,8 @@ object FileResolver {
/**
* Attempts to resolve a given fileName to a URI.
*
* The fileName format should be: /ownerEmail/datasetName/versionName/fileRelativePath
* e.g. /bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv
* The fileName format should be: /datasets/ownerEmail/datasetName/versionName/fileRelativePath
* e.g. /datasets/bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv
* The output dataset URI format is: {DATASET_FILE_URI_SCHEME}:///{repositoryName}/{versionHash}/fileRelativePath
* e.g. {DATASET_FILE_URI_SCHEME}:///dataset-15/adeq233td/some/dir/file.txt
*
Expand Down Expand Up @@ -195,7 +204,7 @@ object FileResolver {

/**
* Parses a dataset file path to extract owner email and dataset name.
* Expected format: /ownerEmail/datasetName/versionName/fileRelativePath
* Expected format: /datasets/ownerEmail/datasetName/versionName/fileRelativePath
*
* @param path The file path from operator properties
* @return Some((ownerEmail, datasetName)) if path is valid, None otherwise
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ class FileResolverSpec

private val localCsvFilePath = "common/workflow-core/src/test/resources/country_sales_small.csv"

private val datasetACsvFilePath = "/test_user@test.com/test_dataset/v2/directory/a.csv"
private val datasetACsvFilePath = "/datasets/test_user@test.com/test_dataset/v2/directory/a.csv"

private val dataset1TxtFilePath = "/test_user@test.com/test_dataset/v1/1.txt"
private val dataset1TxtFilePath = "/datasets/test_user@test.com/test_dataset/v1/1.txt"

override protected def beforeAll(): Unit = {
initializeDBAndReplaceDSLContext()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1188,7 +1188,7 @@ class DatasetResource extends LazyLogging {
throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE)
)

val ownerNode = DatasetFileNode
val datasetsNode = DatasetFileNode
.fromLakeFSRepositoryCommittedObjects(
Map(
(user.getEmail, dataset.getName, latestVersion.getName) -> LakeFSStorageClient
Expand All @@ -1197,6 +1197,8 @@ class DatasetResource extends LazyLogging {
)
.head

val ownerNode = datasetsNode.children.get.head

Comment on lines 1193 to +1201
DashboardDatasetVersion(
latestVersion,
ownerNode.children.get
Expand Down Expand Up @@ -1404,7 +1406,7 @@ class DatasetResource extends LazyLogging {
val datasetName = dataset.dataset.getName
val repositoryName = dataset.dataset.getRepositoryName

val ownerFileNode = DatasetFileNode
val datasetsNode = DatasetFileNode
.fromLakeFSRepositoryCommittedObjects(
Map(
(dataset.ownerEmail, datasetName, datasetVersion.getName) -> LakeFSStorageClient
Expand All @@ -1413,6 +1415,8 @@ class DatasetResource extends LazyLogging {
)
.head

val ownerFileNode = datasetsNode.children.get.head

DatasetVersionRootFileNodesResponse(
ownerFileNode.children.get
.find(_.getName == datasetName)
Expand All @@ -1423,7 +1427,7 @@ class DatasetResource extends LazyLogging {
.head
.children
.get,
DatasetFileNode.calculateTotalSize(List(ownerFileNode))
DatasetFileNode.calculateTotalSize(List(datasetsNode))
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,19 @@ object DatasetFileNode {
): List[DatasetFileNode] = {
val rootNode = new DatasetFileNode("/", "directory", null, "")

// Add "datasets" prefix node
val datasetsNode = new DatasetFileNode("datasets", "directory", rootNode, "")
rootNode.children = Some(List(datasetsNode))

// Owner level nodes map
val ownerNodes = mutable.Map[String, DatasetFileNode]()

map.foreach {
case ((ownerEmail, datasetName, versionName), objects) =>
val ownerNode = ownerNodes.getOrElseUpdate(
ownerEmail, {
val newNode = new DatasetFileNode(ownerEmail, "directory", rootNode, ownerEmail)
rootNode.children = Some(rootNode.getChildren :+ newNode)
val newNode = new DatasetFileNode(ownerEmail, "directory", datasetsNode, ownerEmail)
datasetsNode.children = Some(datasetsNode.getChildren :+ newNode)
newNode
}
)
Expand Down
14 changes: 10 additions & 4 deletions frontend/src/app/common/type/dataset-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
* under the License.
*/

// user given filePath is /ownerEmail/datasetName/versionName/fileRelativePath
// e.g. /bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv
// user given filePath is /datasets/ownerEmail/datasetName/versionName/fileRelativePath
// e.g. /datasets/bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv
export interface DatasetFile {
ownerEmail: string;
datasetName: string;
Expand All @@ -28,11 +28,17 @@ export interface DatasetFile {

/**
* Parses a file path string to a DatasetFile interface.
* The first segment "datasets" is stripped before parsing.
* @param filePath - The file path string to parse.
* @returns The parsed DatasetFile object.
*/
export function parseFilePathToDatasetFile(filePath: string): DatasetFile {
const parts = filePath.split("/").filter(part => part.length > 0);
let parts = filePath.split("/").filter(part => part.length > 0);

// Strip the "datasets" prefix if present
if (parts.length > 0 && parts[0] === "datasets") {
parts = parts.slice(1);
}

if (parts.length < 4) {
throw new Error("Invalid file path format");
Expand All @@ -56,5 +62,5 @@ export function parseFilePathToDatasetFile(filePath: string): DatasetFile {
*/
export function parseDatasetFileToFilePath(datasetFile: DatasetFile): string {
const { ownerEmail, datasetName, versionName, fileRelativePath } = datasetFile;
return `/${ownerEmail}/${datasetName}/${versionName}/${fileRelativePath}`;
return `/datasets/${ownerEmail}/${datasetName}/${versionName}/${fileRelativePath}`;
}
11 changes: 6 additions & 5 deletions frontend/src/app/common/type/datasetVersionFileTree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,20 @@ export function getFullPathFromDatasetFileNode(node: DatasetFileNode): string {
}

/**
* Returns the relative path of a DatasetFileNode by stripping the first three segments.
* Returns the relative path of a DatasetFileNode by stripping the first four segments
* (datasets/ownerEmail/datasetName/versionName).
* @param node The DatasetFileNode whose relative path is needed.
* @returns The relative path (without the first three segments and without a leading slash).
* @returns The relative path (without the first four segments and without a leading slash).
*/
export function getRelativePathFromDatasetFileNode(node: DatasetFileNode): string {
const fullPath = getFullPathFromDatasetFileNode(node); // Get the full path
const pathSegments = fullPath.split("/").filter(segment => segment.length > 0); // Split and remove empty segments

if (pathSegments.length <= 3) {
return ""; // If there are 3 or fewer segments, return an empty string (no relative path exists)
if (pathSegments.length <= 4) {
return ""; // If there are 4 or fewer segments, return an empty string (no relative path exists)
}

return pathSegments.slice(3).join("/"); // Join remaining segments as the relative path
return pathSegments.slice(4).join("/"); // Join remaining segments as the relative path
}

export function getPathsUnderOrEqualDatasetFileNode(node: DatasetFileNode): string[] {
Expand Down
Loading