Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 103 additions & 26 deletions src/Migration/Sources/Appwrite.php
Original file line number Diff line number Diff line change
Expand Up @@ -199,18 +199,59 @@ public function report(array $resources = []): array
*/
private function reportAuth(array $resources, array &$report): void
{
// check if we need to fetch teams!
$needTeams = !empty(array_intersect(
[Resource::TYPE_TEAM, Resource::TYPE_MEMBERSHIP],
$resources
));

$pageLimit = 25;
$teams = ['total' => 0, 'teams' => []];

if (\in_array(Resource::TYPE_USER, $resources)) {
$report[Resource::TYPE_USER] = $this->users->list()['total'];
$report[Resource::TYPE_USER] = $this->users->list(
[Query::limit(1)]
)['total'];
}

if ($needTeams) {
if (\in_array(Resource::TYPE_MEMBERSHIP, $resources)) {
$allTeams = [];
$lastTeam = null;

while (true) {
$params = $lastTeam
// TODO: should we use offset here?
// this, realistically, shouldn't be too much ig
? [Query::cursorAfter($lastTeam)]
: [Query::limit($pageLimit)];

$teamList = $this->teams->list($params);

$totalTeams = $teamList['total'];
$currentTeams = $teamList['teams'];

$allTeams = array_merge($allTeams, $currentTeams);
$lastTeam = $currentTeams[count($currentTeams) - 1]['$id'] ?? null;

if (count($currentTeams) < $pageLimit) {
break;
}
}
$teams = ['total' => $totalTeams, 'teams' => $allTeams];
} else {
$teamList = $this->teams->list([Query::limit(1)]);
$teams = ['total' => $teamList['total'], 'teams' => []];
}
}

if (\in_array(Resource::TYPE_TEAM, $resources)) {
$report[Resource::TYPE_TEAM] = $this->teams->list()['total'];
$report[Resource::TYPE_TEAM] = $teams['total'];
}

if (\in_array(Resource::TYPE_MEMBERSHIP, $resources)) {
$report[Resource::TYPE_MEMBERSHIP] = 0;
$teams = $this->teams->list()['teams'];
foreach ($teams as $team) {
foreach ($teams['teams'] as $team) {
$report[Resource::TYPE_MEMBERSHIP] += $this->teams->listMemberships(
$team['$id'],
[Query::limit(1)]
Expand All @@ -236,9 +277,14 @@ private function reportDatabases(array $resources, array &$report): void
private function reportStorage(array $resources, array &$report): void
{
if (\in_array(Resource::TYPE_BUCKET, $resources)) {
$report[Resource::TYPE_BUCKET] = $this->storage->listBuckets()['total'];
// just fetch one bucket for the `total`
$report[Resource::TYPE_BUCKET] = $this->storage->listBuckets([
Query::limit(1)
])['total'];
}

$pageLimit = 25;

if (\in_array(Resource::TYPE_FILE, $resources)) {
$report[Resource::TYPE_FILE] = 0;
$report['size'] = 0;
Expand All @@ -249,58 +295,89 @@ private function reportStorage(array $resources, array &$report): void
$currentBuckets = $this->storage->listBuckets(
$lastBucket
? [Query::cursorAfter($lastBucket)]
: [Query::limit(20)]
: [Query::limit($pageLimit)]
)['buckets'];

$buckets = array_merge($buckets, $currentBuckets);
$lastBucket = $buckets[count($buckets) - 1]['$id'] ?? null;

if (count($currentBuckets) < 20) {
if (count($currentBuckets) < $pageLimit) {
break;
}
}

foreach ($buckets as $bucket) {
$files = [];
$lastFile = null;

while (true) {
$currentFiles = $this->storage->listFiles(
$files = $this->storage->listFiles(
$bucket['$id'],
$lastFile
? [Query::cursorAfter($lastFile)]
: [Query::limit(20)]
: [Query::limit($pageLimit)]
)['files'];

$files = array_merge($files, $currentFiles);
$report[Resource::TYPE_FILE] += count($files);
foreach ($files as $file) {
// already includes the `sizeOriginal`
$report['size'] += $file['sizeOriginal'] ?? 0;
}

$lastFile = $files[count($files) - 1]['$id'] ?? null;

if (count($currentFiles) < 20) {
if (count($files) < $pageLimit) {
break;
}
}

$report[Resource::TYPE_FILE] += count($files);
foreach ($files as $file) {
$report['size'] += $this->storage->getFile(
$bucket['$id'],
$file['$id']
)['sizeOriginal'];
}
}

$report['size'] = $report['size'] / 1000 / 1000; // MB
}
}

private function reportFunctions(array $resources, array &$report): void
{
$pageLimit = 25;
$needVarsOrDeployments = (
\in_array(Resource::TYPE_DEPLOYMENT, $resources) ||
\in_array(Resource::TYPE_ENVIRONMENT_VARIABLE, $resources)
);

$functions = [];
$totalFunctions = 0;

if (!$needVarsOrDeployments && \in_array(Resource::TYPE_FUNCTION, $resources)) {
// Only function count needed, short-circuit
$funcList = $this->functions->list([Query::limit(1)]);
$report[Resource::TYPE_FUNCTION] = $funcList['total'];
return;
}

if ($needVarsOrDeployments) {
$lastFunction = null;
while (true) {
$params = $lastFunction
? [Query::cursorAfter($lastFunction)]
: [Query::limit($pageLimit)];

$funcList = $this->functions->list($params);

$totalFunctions = $funcList['total'];
$currentFunctions = $funcList['functions'];
$functions = array_merge($functions, $currentFunctions);

$lastFunction = $currentFunctions[count($currentFunctions) - 1]['$id'] ?? null;
if (count($currentFunctions) < $pageLimit) {
break;
}
}
}

if (\in_array(Resource::TYPE_FUNCTION, $resources)) {
$report[Resource::TYPE_FUNCTION] = $this->functions->list()['total'];
$report[Resource::TYPE_FUNCTION] = $totalFunctions;
}

if (\in_array(Resource::TYPE_DEPLOYMENT, $resources)) {
$report[Resource::TYPE_DEPLOYMENT] = 0;
$functions = $this->functions->list()['functions'];
foreach ($functions as $function) {
if (!empty($function['deploymentId'])) {
$report[Resource::TYPE_DEPLOYMENT] += 1;
Expand All @@ -310,9 +387,9 @@ private function reportFunctions(array $resources, array &$report): void

if (\in_array(Resource::TYPE_ENVIRONMENT_VARIABLE, $resources)) {
$report[Resource::TYPE_ENVIRONMENT_VARIABLE] = 0;
$functions = $this->functions->list()['functions'];
foreach ($functions as $function) {
$report[Resource::TYPE_ENVIRONMENT_VARIABLE] += $this->functions->listVariables($function['$id'])['total'];
// function model contains `vars`, we don't need to fetch the list again.
$report[Resource::TYPE_ENVIRONMENT_VARIABLE] += count($function['vars'] ?? []);
}
}
}
Expand Down Expand Up @@ -1297,7 +1374,7 @@ private function exportFunctions(int $batchSize): void
$function['events'],
$function['schedule'],
$function['timeout'],
$function['deploymentId'],
$function['deploymentId'] ?? '',
$function['entrypoint']
);

Expand Down
49 changes: 33 additions & 16 deletions src/Migration/Sources/Appwrite/Reader/API.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,38 +61,55 @@ public function report(array $resources, array &$report): mixed
foreach ($databases as $database) {
$databaseId = $database['$id'];

/* $tablesResponse = $this->tables->list(...); */
$tablesResponse = $this->database->listCollections($databaseId);
$tables = $tablesResponse['collections'];
$tables = [];
$pageLimit = 25;
$lastTable = null;

while (true) {
/* $currentTables = $this->tables->list(...); */
$currentTables = $this->database->listCollections(
$databaseId,
$lastTable
? [Query::cursorAfter($lastTable)]
: [Query::limit($pageLimit)]
)['collections']; /* ['tables'] */

$tables = array_merge($tables, $currentTables);
$lastTable = $tables[count($tables) - 1]['$id'] ?? null;

if (count($currentTables) < $pageLimit) {
break;
}
}

Comment on lines +64 to 84
Copy link
Contributor

@coderabbitai coderabbitai bot Jul 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix potential array access issue and query construction

There are two issues in the pagination implementation:

  1. Line 78 could fail if no tables are returned in the response. You should check if the array is not empty before accessing the last element.
  2. The query construction is incorrect. When using cursorAfter, you still need to include the limit parameter.

Apply this diff to fix both issues:

-                $lastTable = $tables[count($tables) - 1]['$id'] ?? null;
+                $lastTable = !empty($tables) ? $tables[count($tables) - 1]['$id'] : null;
                 $currentTables = $this->database->listCollections(
                     $databaseId,
                     $lastTable
-                        ? [Query::cursorAfter($lastTable)]
-                        : [Query::limit($pageLimit)]
+                        ? [Query::limit($pageLimit), Query::cursorAfter($lastTable)]
+                        : [Query::limit($pageLimit)]
                 )['collections']; /* ['tables'] */
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
$tables = [];
$pageLimit = 25;
$lastTable = null;
while (true) {
/* $currentTables = $this->tables->list(...); */
$currentTables = $this->database->listCollections(
$databaseId,
$lastTable
? [Query::cursorAfter($lastTable)]
: [Query::limit($pageLimit)]
)['collections']; /* ['tables'] */
$tables = array_merge($tables, $currentTables);
$lastTable = $tables[count($tables) - 1]['$id'] ?? null;
if (count($currentTables) < $pageLimit) {
break;
}
}
$tables = [];
$pageLimit = 25;
$lastTable = null;
while (true) {
/* $currentTables = $this->tables->list(...); */
$currentTables = $this->database->listCollections(
$databaseId,
$lastTable
? [Query::limit($pageLimit), Query::cursorAfter($lastTable)]
: [Query::limit($pageLimit)]
)['collections']; /* ['tables'] */
$tables = array_merge($tables, $currentTables);
- $lastTable = $tables[count($tables) - 1]['$id'] ?? null;
+ $lastTable = !empty($tables) ? $tables[count($tables) - 1]['$id'] : null;
if (count($currentTables) < $pageLimit) {
break;
}
}
🤖 Prompt for AI Agents
In src/Migration/Sources/Appwrite/Reader/API.php around lines 64 to 84, fix the
pagination logic by first checking if $currentTables is not empty before
accessing the last element to avoid potential errors. Also, modify the query
construction to always include the limit parameter along with cursorAfter when
$lastTable is set, ensuring correct pagination behavior.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its fine because the $pageLimit is 25 which is the same as the default number of records returned by the API as well. We don't need - Query::limit($pageLimit), Query::cursorAfter($lastTable).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ItzNotABug Thank you for the clarification about the API's default behavior with 25 records. You're absolutely right that the query construction is fine as-is since the API defaults to returning 25 records.

However, there's still a potential issue with the array access on line 78. If the first API call returns an empty $currentTables array (which could happen if there are no tables in the database), then $tables would remain empty, but we'd still try to access $tables[count($tables) - 1]['$id'] where count($tables) - 1 would be -1, leading to an undefined index error.

The fix for just the array access issue would be:

$lastTable = !empty($tables) ? $tables[count($tables) - 1]['$id'] : null;

But if you're confident this scenario won't occur in practice or is handled elsewhere, feel free to keep it as-is.


✏️ Learnings added
Learnt from: ItzNotABug
PR: utopia-php/migration#89
File: src/Migration/Sources/Appwrite/Reader/API.php:64-84
Timestamp: 2025-07-19T08:29:22.280Z
Learning: In the Appwrite API, the default page limit for listing collections is 25 records, so when using cursor-based pagination with Query::cursorAfter(), there's no need to explicitly specify Query::limit(25) as the API will default to this limit.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: ItzNotABug
PR: utopia-php/migration#80
File: src/Migration/Sources/Supabase.php:300-308
Timestamp: 2025-06-28T09:47:58.757Z
Learning: In the utopia-php/migration codebase, during the terminology swap from Collection/Attribute/Document to Table/Column/Row, the user ItzNotABug prefers to keep the existing query logic unchanged even if it becomes semantically incorrect with the new naming. The focus is purely on resource type renaming, not on fixing logical issues that become apparent after the terminology change.

Learnt from: ItzNotABug
PR: utopia-php/migration#80
File: src/Migration/Sources/Appwrite.php:843-851
Timestamp: 2025-06-28T09:47:08.333Z
Learning: In the utopia-php/migration codebase, during the terminology swap from Collection/Attribute/Document to Table/Column/Row, the class constructors and method parameters use the new terminology (like "relatedTable"), but the underlying data structures and API responses still use the legacy keys (like "relatedCollection"). This is an intentional design pattern to allow gradual migration while maintaining compatibility with existing data sources.

Learnt from: ItzNotABug
PR: utopia-php/migration#80
File: src/Migration/Sources/Appwrite/Reader/API.php:8-8
Timestamp: 2025-06-28T09:47:11.436Z
Learning: In the Appwrite migration codebase, commented-out Tables service references (import statements and constructor parameters) are intentionally kept for future implementation when the Tables service becomes available in the Appwrite SDK, rather than being dead code that should be removed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kinda makes sense.
But I wonder how this never led to any issues with similar logic where pagination is used in the codebase 🤔

Changing this here would mean, changing the check everywhere we use pagination. I'll wait for @abnegate's insight on this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!

if (Resource::isSupported(Resource::TYPE_TABLE, $resources)) {
$report[Resource::TYPE_TABLE] += $tablesResponse['total'];
$report[Resource::TYPE_TABLE] += count($tables);
}

if (Resource::isSupported([Resource::TYPE_ROW, Resource::TYPE_COLUMN, Resource::TYPE_INDEX], $resources)) {
foreach ($tables as $table) {
$tableId = $table['$id'];

if (Resource::isSupported(Resource::TYPE_COLUMN, $resources)) {
// a table already returns a list of attributes
$report[Resource::TYPE_COLUMN] += count($table['columns'] ?? $table['attributes'] ?? []);
}

if (in_array(Resource::TYPE_INDEX, $resources)) {
// a table already returns a list of indexes
$report[Resource::TYPE_INDEX] += count($table['indexes'] ?? []);
}

// this one's a bit heavy if the number of tables are high!
if (Resource::isSupported(Resource::TYPE_ROW, $resources)) {
/* $rowsResponse = $this->tables->listRows(...) */
$rowsResponse = $this->database->listDocuments(
$databaseId,
$tableId,
[Query::limit(1)]
);
$report[Resource::TYPE_ROW] += $rowsResponse['total'];
}

if (Resource::isSupported(Resource::TYPE_COLUMN, $resources)) {
/* $columnsResponse = $this->tables->listColumns(...); */
$columnsResponse = $this->database->listAttributes($databaseId, $tableId);
$report[Resource::TYPE_COLUMN] += $columnsResponse['total'];
}

if (in_array(Resource::TYPE_INDEX, $resources)) {
/* $indexesResponse = $this->tables->listIndexes(...); */
$indexesResponse = $this->database->listIndexes($databaseId, $tableId);
$report[Resource::TYPE_INDEX] += $indexesResponse['total'];
$report[Resource::TYPE_ROW] += $rowsResponse['total'];
}
}
}
Expand Down