diff --git a/.talismanrc b/.talismanrc index 71ea6de83..ae12936cd 100644 --- a/.talismanrc +++ b/.talismanrc @@ -1,4 +1,70 @@ fileignoreconfig: - - filename: pnpm-lock.yaml - checksum: 07642e8dd04d580185a459e5b088d8a1bb4e91be4e04f4842bf4fe4775205bf6 +- filename: packages/contentstack-external-migrate/src/services/contentful/contentful.service.ts + checksum: e710b7fbad0a413403de9b937bcc98a9cc84a2d6a920a836ddfa78706b959822 +- filename: packages/contentstack-external-migrate/src/services/contentful/extension.service.ts + checksum: 1c0c95059828ccecc2e2f6bc5a377eb37a6e879a2bc2605943857a0bbfd37aad +- filename: packages/contentstack-external-migrate/src/services/contentful/app/index.json + checksum: 508b3ef2dfcabcba03bb85b2716c1690a2a693254cf7605978d909006dd52028 +- filename: packages/contentstack-external-migrate/src/services/contentful/migration-contentful/utils/apps/appDetails.json + checksum: a80c6dfcb90b6a964e604b47699dee694b1dd93630e69859c04bdd41a0f4a7c0 +- filename: packages/contentstack-external-migrate/docs/manifest-schema.md + checksum: 96e188eb1cbb29d1ee3206634ac6a8cd4243c72cf42019cbb848e0e6e83c2b5f +- filename: packages/contentstack-external-migrate/docs/implementation-principles.md + checksum: 2a96dfbe9270fd50c42f781a40cbdf674d12d7f00784d4bd8640f3064b0d319f +- filename: packages/contentstack-external-migrate/test/commands/migrate/audit.test.ts + checksum: 0daa06c38f5b5879cf9e5854c96f5ee4976524b2de5ff3e62c64b0e28239e508 +- filename: packages/contentstack-external-migrate/test/commands/migrate/import.test.ts + checksum: e931a9cb89e9cb6ce384e0ae0218b5e9df3d8631caea5997cdab4e007b6d9d9a +- filename: packages/contentstack-external-migrate/src/services/contentful/users.ts + checksum: 86dce671e996019419256dbb5ebd8d927392715cdecda3a44e2d2315ac13adbe +- filename: packages/contentstack-external-migrate/src/services/contentful/utils/index.ts + checksum: cbc04052ad999e5a2ec6fbcbcd4e1df388d8c3575ffbd8370764934892477b0f +- filename: packages/contentstack-import/src/import/modules/webhooks.ts + checksum: 8c9527f966fe24debfb36d96a280ac3b2dd6494a58128380c69d7475e595436d +- filename: packages/contentstack-external-migrate/src/services/contentful/contentful/jsonRTE.ts + checksum: f19ae1132a29a93bb2e027366ab495ada1928afc9e7acd21fc7dc0cb774bc46e +- filename: packages/contentstack-external-migrate/docs/phases/phase-4-export.md + checksum: afb2edadd487e28d2ecc4ff837703dd654394c0469bc70a848e98b07bb934141 +- filename: packages/contentstack-external-migrate/docs/phases/phase-5-manifest-and-review.md + checksum: e7ea9d0095b38ed4722456d20560256eb9b7362e452929ba30925c5c66212415 +- filename: packages/contentstack-import/test/unit/import/modules/webhooks.test.ts + checksum: 3924a3e38211b8554f25365fe3100cb83361d03370fdaa77e5c7f9e74abafcee +- filename: packages/contentstack-external-migrate/docs/phases/phase-3-import.md + checksum: 188c1a2b310f15b53608e97f4273e42296a87a20e3e03c0030c2140b873cc809 +- filename: packages/contentstack-external-migrate/docs/phases/phase-2-audit.md + checksum: 8aa4d20af2def7b9afec9f3be7cfa269095dd92be015c6dd16774a27b13f4a65 +- filename: packages/contentstack-external-migrate/src/services/contentful/marketplace.service.ts + checksum: 0f6b8c3bd68093b0e42bf2bf4345321f99e9dda7eafc233a79c296257641950a +- filename: packages/contentstack-external-migrate/test/lib/manifest.test.ts + checksum: 1e80e263e06653dfd967779696b243582fe2aefc89084a2d05499807fec49e37 +- filename: packages/contentstack-external-migrate/src/services/contentful/contentful/roles.ts + checksum: 149d43d9348bf970339297b73bf66ead41efafc51ef1881b147845b4893976ad +- filename: packages/contentstack-external-migrate/src/lib/conversion-summary.ts + checksum: 05303adaed06435152ff69f022b2282faec63b53b36c404555c806cc808d5efa +- filename: packages/contentstack-external-migrate/src/services/contentful/contentful/taxonomy.service.ts + checksum: 6bc4638c31b5e4a87f26033b5bbea7404594e4fabc25447c8e76ec3b8b7602bb +- filename: packages/contentstack-external-migrate/src/adapters/contentful/validator.ts + checksum: c42584ad1a31a2cc085871eb62cd29c41c350b4f5ec51153e95f1279097f9b59 +- filename: packages/contentstack-external-migrate/docs/architecture.md + checksum: 0c157fef081918197a1213c8dbb1f07fd0d1313eaf90d18ddee335c0685db7b1 +- filename: packages/contentstack-external-migrate/src/adapters/contentful/convert.ts + checksum: 0c7cd556f5a7104bfac80f5933f3ce8f61349648462aa80e456db0390cd8491d +- filename: packages/contentstack-external-migrate/src/commands/migrate/status.ts + checksum: 316f723f0ec85b99f4b631f35e0acbbc1c80baec0cece8fc930243c4bb9ccbfd +- filename: packages/contentstack-external-migrate/src/services/contentful/migration-contentful/libs/createInitialMapper.js + checksum: 7431b4ea396ca3ad670c380108b4bf2061f45526fd36161096dd4dd46b4b6f8b +- filename: packages/contentstack-external-migrate/src/commands/migrate/import.ts + checksum: b1749716d8555d8a5af23c32a1b8ee8b39fa2b3dd0c51aff77f814e9766f8a92 +- filename: packages/contentstack-external-migrate/src/lib/manifest.ts + checksum: 6674d08800ae8b3a657f7506cf7e6906b8b952ea90b50240d0314b7cb82d7d47 +- filename: packages/contentstack-external-migrate/src/services/contentful/migration-contentful/libs/contentTypeMapper.js + checksum: 02e0f6cce67b4e070134b3908ce44c063e12133e88c373785bb414eb93e1a9ea +- filename: packages/contentstack-external-migrate/src/lib/create-stack.ts + checksum: e38b3286c9091a40d3f6ce24e2b0215c9c26457591d269f886a6a27cfbeca402 +- filename: packages/contentstack-external-migrate/src/commands/migrate/create.ts + checksum: 29dece984996d02fac42520335529071c619ee14d293a95a732c1811fe739f41 +- filename: packages/contentstack-external-migrate/src/services/contentful/content-type-creator.ts + checksum: 653a7e0443f6b9712ed91ad45aa4189864bee710ea4bd3ac40031225da4da5a5 +- filename: pnpm-lock.yaml + checksum: 07642e8dd04d580185a459e5b088d8a1bb4e91be4e04f4842bf4fe4775205bf6 version: '1.0' diff --git a/packages/contentstack-bulk-publish/.mocharc.json b/packages/contentstack-bulk-publish/.mocharc.json index 50e4a6804..3d76d7307 100644 --- a/packages/contentstack-bulk-publish/.mocharc.json +++ b/packages/contentstack-bulk-publish/.mocharc.json @@ -5,7 +5,8 @@ "test/unit/commands/assets/unpublish.test.js", "test/unit/commands/bulk-publish/cross-publish.test.js", "test/unit/commands/entries/publish.test.js", - "test/unit/commands/entries/unpublish.test.js" + "test/unit/commands/entries/unpublish.test.js", + "test/unit/util/asset-scan.test.js" ], "reporter": "dot", "timeout": 60000, diff --git a/packages/contentstack-bulk-publish/src/commands/cm/assets/publish.js b/packages/contentstack-bulk-publish/src/commands/cm/assets/publish.js index a032678c0..3aecb2c8e 100644 --- a/packages/contentstack-bulk-publish/src/commands/cm/assets/publish.js +++ b/packages/contentstack-bulk-publish/src/commands/cm/assets/publish.js @@ -15,10 +15,12 @@ class AssetsPublishCommand extends Command { assetsFlags.folderUid = assetsFlags['folder-uid'] || assetsFlags.folderUid; assetsFlags.bulkPublish = assetsFlags['bulk-publish'] || assetsFlags.bulkPublish; assetsFlags.apiVersion = assetsFlags['api-version'] || '3'; // setting default value for apiVersion + assetsFlags.dataDir = assetsFlags['data-dir'] || assetsFlags.dataDir; delete assetsFlags['api-version']; delete assetsFlags['retry-failed']; delete assetsFlags['folder-uid']; delete assetsFlags['bulk-publish']; + delete assetsFlags['data-dir']; let updatedFlags; try { @@ -111,12 +113,18 @@ class AssetsPublishCommand extends Command { } } - validate({ environments, retryFailed, locales, 'source-env': sourceEnv, 'delivery-token': deliveryToken }) { + validate({ environments, retryFailed, locales, dataDir, 'source-env': sourceEnv, 'delivery-token': deliveryToken }) { let missing = []; if (retryFailed) { return true; } + // In data-dir mode, environments and locales are derived per-asset from the + // backup publish_details, so they are not required on the command line. + if (dataDir) { + return true; + } + if (sourceEnv && !deliveryToken) { this.error('Specify the source environment delivery token. Run --help for more details.', { exit: 2 }); } @@ -181,6 +189,11 @@ AssetsPublishCommand.flags = { '(optional) The UID of the Assets’ folder from which the assets need to be published. The default value is cs_root.', exclusive: ['source-env'], }), + 'data-dir': flags.string({ + description: + '(optional) Path to the import backup directory. When set, each imported asset is published only to the environments and locales it was published to in the source stack (read from the backup’s publish details and asset UID mapping), with asset-scan gating applied. Intended for the post-import publish flow.', + exclusive: ['source-env', 'folder-uid'], + }), 'bulk-publish': flags.string({ description: 'Set this flag to use Contentstack’s Bulk Publish APIs. It is true, by default.', default: 'true', @@ -259,11 +272,14 @@ AssetsPublishCommand.examples = [ '', 'Using --stack-api-key flag', 'csdx cm:assets:publish --environments [ENVIRONMENT 1] [ENVIRONMENT 2] --locales [LOCALE] --stack-api-key [STACK API KEY]', + '', + 'Using --data-dir flag (publish imported assets to their original environments after asset scanning)', + 'csdx cm:assets:publish --data-dir [PATH TO IMPORT BACKUP DIR] --stack-api-key [STACK API KEY]', ]; AssetsPublishCommand.aliases = ['cm:bulk-publish:assets']; AssetsPublishCommand.usage = - 'cm:assets:publish [-a ] [--retry-failed ] [-e ] [--folder-uid ] [--bulk-publish ] [-c ] [-y] [--locales ] [--branch ] [--delivery-token ] [--source-env ]'; + 'cm:assets:publish [-a ] [--retry-failed ] [-e ] [--folder-uid ] [--data-dir ] [--bulk-publish ] [-c ] [-y] [--locales ] [--branch ] [--delivery-token ] [--source-env ]'; module.exports = AssetsPublishCommand; diff --git a/packages/contentstack-bulk-publish/src/producer/publish-assets.js b/packages/contentstack-bulk-publish/src/producer/publish-assets.js index a9afbdd7c..edee00ed1 100644 --- a/packages/contentstack-bulk-publish/src/producer/publish-assets.js +++ b/packages/contentstack-bulk-publish/src/producer/publish-assets.js @@ -1,7 +1,10 @@ /* eslint-disable no-console */ /* eslint-disable new-cap */ /* eslint-disable camelcase */ -const { cliux } = require('@contentstack/cli-utilities'); +const path = require('path'); +const { existsSync } = require('fs'); +const chalk = require('chalk'); +const { cliux, FsUtility } = require('@contentstack/cli-utilities'); const { getQueue } = require('../util/queue'); const { performBulkPublish, publishAsset, initializeLogger } = require('../consumer/publish'); const retryFailedLogs = require('../util/retryfailed'); @@ -9,14 +12,27 @@ const { validateFile } = require('../util/fs'); const { isEmpty } = require('../util'); const { fetchBulkPublishLimit } = require('../util/common-utility'); const { generateBulkPublishStatusUrl } = require('../util/generate-bulk-publish-url'); +const { resolveInQueueAssets, fetchScanStatusBatch, ASSET_SCAN_STATUS } = require('../util/asset-scan'); const queue = getQueue(); let logFileName; let bulkPublishSet = []; +let pendingAssetsForRetry = []; +let scanSummary = { clean: 0, quarantined: 0, inQueue: 0, noStatus: 0 }; let filePath; /* eslint-disable no-param-reassign */ +function printScanSummary({ clean, noStatus, inQueue, quarantined }) { + const total = clean + noStatus + inQueue + quarantined; + if (total === 0) return; + console.log(chalk.bold(`\nAsset scan summary (${total} total):`)); + console.log(chalk.green(` ✓ Clean (publishing): ${clean}`)); + if (noStatus > 0) console.log(chalk.green(` ✓ No scan status (publishing): ${noStatus}`)); + if (inQueue > 0) console.log(chalk.yellow(` ⧖ In queue (retrying): ${inQueue}`)); + if (quarantined > 0) console.log(chalk.red(` ✗ Quarantined (skipped): ${quarantined}`)); +} + async function getAssets(stack, folder, bulkPublish, environments, locale, apiVersion, bulkPublishLimit, skip = 0) { return new Promise((resolve, reject) => { let queryParams = { @@ -25,6 +41,7 @@ async function getAssets(stack, folder, bulkPublish, environments, locale, apiVe include_count: true, include_folders: true, include_publish_details: true, + include_asset_scan_status: true, }; stack .asset() @@ -34,7 +51,8 @@ async function getAssets(stack, folder, bulkPublish, environments, locale, apiVe if (assetResponse && assetResponse.items.length > 0) { skip += assetResponse.items.length; let assets = assetResponse.items; - for (let index = 0; index < assetResponse.items.length; index++) { + + for (let index = 0; index < assets.length; index++) { if (assets[index].is_dir === true) { await getAssets( stack, @@ -48,6 +66,35 @@ async function getAssets(stack, folder, bulkPublish, environments, locale, apiVe ); continue; } + + const scanStatus = assets[index]._asset_scan_status; + + // Quarantined assets are skipped permanently + if (scanStatus === ASSET_SCAN_STATUS.QUARANTINE) { + scanSummary.quarantined++; + console.log(chalk.yellow(`Skipped (quarantined): Asset UID '${assets[index].uid}'`)); + continue; + } + + // In-queue assets are deferred for retry after all pages are processed + if (scanStatus === ASSET_SCAN_STATUS.IN_QUEUE) { + scanSummary.inQueue++; + pendingAssetsForRetry.push({ + uid: assets[index].uid, + locale, + publish_details: assets[index].publish_details || [], + environments, + }); + continue; + } + + // Ready (clean) or no scan status — enqueue for publish + if (scanStatus === ASSET_SCAN_STATUS.READY) { + scanSummary.clean++; + } else { + scanSummary.noStatus++; + } + if (bulkPublish) { if (bulkPublishSet.length < bulkPublishLimit) { bulkPublishSet.push({ @@ -67,22 +114,6 @@ async function getAssets(stack, folder, bulkPublish, environments, locale, apiVe }); bulkPublishSet = []; } - - if ( - assetResponse.items.length - 1 === index && - bulkPublishSet.length > 0 && - bulkPublishSet.length < bulkPublishLimit - ) { - await queue.Enqueue({ - assets: bulkPublishSet, - Type: 'asset', - environments: environments, - locale, - stack: stack, - apiVersion, - }); - bulkPublishSet = []; - } } else { await queue.Enqueue({ assetUid: assets[index].uid, @@ -94,6 +125,23 @@ async function getAssets(stack, folder, bulkPublish, environments, locale, apiVe }); } } + + // Flush any partial bulk batch at the end of the page. + // Done outside the for-loop so it fires correctly even when some assets + // were skipped (quarantined/in-queue) and the last non-skipped asset is + // not at the final array index. + if (bulkPublish && bulkPublishSet.length > 0) { + await queue.Enqueue({ + assets: bulkPublishSet, + Type: 'asset', + environments: environments, + locale, + stack: stack, + apiVersion, + }); + bulkPublishSet = []; + } + if (skip === assetResponse.count) { return resolve(true); } @@ -109,6 +157,288 @@ async function getAssets(stack, folder, bulkPublish, environments, locale, apiVe }); } +/** + * After all pages/locales are scanned, retry any assets that were in-queue. + * Takes pendingItems explicitly — does not read from module-level state. + * Uses incremental backoff (see asset-scan.js SCAN_RETRY config). + */ +async function processPendingAssets(pendingItems, stack, bulkPublish, environments, apiVersion, bulkPublishLimit) { + if (pendingItems.length === 0) return; + + // Deduplicate UIDs across locales — scan status is per-asset, not per-locale. + // Resolving once avoids redundant retry loops for multi-locale runs. + const allUids = [...new Set(pendingItems.map((a) => a.uid))]; + const resolvedUids = await resolveInQueueAssets(stack, allUids); + + if (resolvedUids.length === 0) { + console.log(chalk.yellow('No in-queue assets resolved after retries.')); + return; + } + + const resolvedSet = new Set(resolvedUids); + + // Group resolved items by locale for correct enqueue context + const byLocale = {}; + for (const item of pendingItems) { + if (!resolvedSet.has(item.uid)) continue; + if (!byLocale[item.locale]) byLocale[item.locale] = []; + byLocale[item.locale].push(item); + } + + for (const locale of Object.keys(byLocale)) { + const resolvedItems = byLocale[locale]; + + if (bulkPublish) { + let batchSet = []; + for (const item of resolvedItems) { + batchSet.push({ uid: item.uid, locale, publish_details: item.publish_details }); + if (batchSet.length === bulkPublishLimit) { + await queue.Enqueue({ + assets: batchSet, + Type: 'asset', + environments, + locale, + stack, + apiVersion, + }); + batchSet = []; + } + } + if (batchSet.length > 0) { + await queue.Enqueue({ + assets: batchSet, + Type: 'asset', + environments, + locale, + stack, + apiVersion, + }); + } + } else { + for (const item of resolvedItems) { + await queue.Enqueue({ + assetUid: item.uid, + publish_details: item.publish_details, + environments, + Type: 'asset', + locale, + stack, + }); + } + } + } +} + +/** + * Publish assets from an import backup directory (post-import flow). + * + * Unlike getAssets (live folder scan), this drives publishing from the backup: + * each imported asset is published ONLY to the environments/locales it was + * published to in the source stack (from its publish_details), remapped to the + * target stack. Scan-status gating is applied to the target asset UIDs. + * + * Mirrors the publish_details/env-name resolution of contentstack-import's + * assets `publish()` (the bulk publish API resolves environment NAMES against + * the target stack, and import preserves env names, so source name == target + * name), and adds the clean/quarantined/in-queue scan gating that import skips. + * + * Source of truth split: + * - publish_details + environments come from the BACKUP (post-import flow): an + * asset's target environments are its source publish_details, gated by the + * environment uid-mapping (only environments actually imported into the target + * are publishable) — avoids doomed publish calls to envs never created there. + * - scan status comes from the LIVE target API (it is a runtime property of the + * freshly-imported assets and cannot exist in the backup). + * + * Streaming: asset chunks are processed and released one at a time and scan-gated + * per chunk, so memory does not scale with total asset count. The only structures + * retained across chunks are bounded (partial publish batches + the in-queue + * subset). The single in-memory floor is the asset uid-mapping file itself (same + * as import's publish()); for very large stacks raise Node's --max-old-space-size. + */ +async function getAssetsFromBackup(stack, dataDir, bulkPublish, apiVersion, bulkPublishLimit) { + const assetsPath = path.join(dataDir, 'assets'); + const assetsIndexPath = path.join(assetsPath, 'assets.json'); + const assetUidMapperPath = path.join(dataDir, 'mapper', 'assets', 'uid-mapping.json'); + const envUidMapperPath = path.join(dataDir, 'mapper', 'environments', 'uid-mapping.json'); + const environmentsPath = path.join(dataDir, 'environments', 'environments.json'); + + // Fail fast with actionable errors when the backup is incomplete. + if (!existsSync(assetsPath) || !existsSync(assetsIndexPath)) { + throw new Error(`No assets found in backup. Expected '${assetsIndexPath}'. Check the --data-dir path.`); + } + if (!existsSync(assetUidMapperPath)) { + throw new Error( + `Asset UID mapping not found at '${assetUidMapperPath}'. Run import against this data dir before publishing.`, + ); + } + if (!existsSync(environmentsPath)) { + throw new Error(`Environments not found at '${environmentsPath}'. Cannot resolve target environments.`); + } + + const fsUtil = new FsUtility({ basePath: assetsPath, indexFileName: 'assets.json' }); + const assetUidMap = fsUtil.readFile(assetUidMapperPath, true) || {}; + // environments.json: { [sourceEnvUid]: { name, ... } } — source env definitions. + const environments = fsUtil.readFile(environmentsPath, true) || {}; + // uid-mapping.json: { [sourceEnvUid]: targetEnvUid } — only environments actually + // imported into the target. Used as the "is publishable" gate. Optional: older + // backups (or runs with no imported environments) may not have it. + const envUidMapping = existsSync(envUidMapperPath) ? fsUtil.readFile(envUidMapperPath, true) || {} : null; + if (!envUidMapping) { + console.log( + chalk.yellow( + `Environment UID mapping not found at '${envUidMapperPath}'. Falling back to environment names from ` + + `environments.json — ensure the target stack has environments with matching names.`, + ), + ); + } + const isEnvImported = (sourceEnvUid) => + !envUidMapping || Object.prototype.hasOwnProperty.call(envUidMapping, sourceEnvUid); + + // Resolve an asset's deduped, env-gated target (envName, locale) pairs from its + // publish_details. Env name comes from the backup; only environments actually + // imported into the target (per the env uid-mapping) are publishable. + const resolvePairs = (asset) => { + const seen = new Set(); + const pairs = []; + for (const pd of asset.publish_details) { + const env = environments[pd.environment]; + if (!env || !env.name) continue; // env not in the data dir — cannot resolve a name + if (!isEnvImported(pd.environment)) continue; // env not imported into target — skip + const key = `${env.name}||${pd.locale}`; + if (seen.has(key)) continue; + seen.add(key); + pairs.push({ envName: env.name, locale: pd.locale }); + } + return pairs; + }; + + // Bounded cross-chunk state only — nothing scales with total asset count: + // - `buffers`: partial publish batches, capped at envCount x localeCount x bulkPublishLimit. + // - `pending`: the in-queue (scanning) subset awaiting retry. + // The full asset universe is never held in memory; chunks are processed and + // released one at a time (same streaming shape as contentstack-import's publish()). + const buffers = new Map(); // "envName||locale" -> { envName, locale, uids: [] } + const pending = []; // { targetUid, pairs } for assets whose scan is still in queue + let skippedNoUidMapping = 0; // source asset was not imported (no asset uid mapping) + let skippedNoMappableEnv = 0; // asset has publish details, but none of its envs were imported + let publishableAssets = 0; // assets enqueued for publish (across all env/locale pairs) + + const enqueueBatch = async (envName, locale, uids) => { + if (uids.length === 0) return; + if (bulkPublish) { + const assets = uids.map((uid) => ({ uid, locale })); + await queue.Enqueue({ assets, Type: 'asset', environments: [envName], locale, stack, apiVersion }); + } else { + for (const uid of uids) { + await queue.Enqueue({ assetUid: uid, environments: [envName], Type: 'asset', locale, stack }); + } + } + }; + + // Add a publishable asset to its (env, locale) buffers, flushing any that fill up. + // Grouping by the exact pair keeps the bulk API from publishing an asset to a + // combo it was not published to in source. + const bufferAsset = async (targetUid, pairs) => { + publishableAssets++; + for (const { envName, locale } of pairs) { + const key = `${envName}||${locale}`; + let buf = buffers.get(key); + if (!buf) { + buf = { envName, locale, uids: [] }; + buffers.set(key, buf); + } + buf.uids.push(targetUid); + if (buf.uids.length >= bulkPublishLimit) { + await enqueueBatch(envName, locale, buf.uids); + buf.uids = []; + } + } + }; + + const indexer = fsUtil.indexFileContent; + + // NOTE: one readChunkFiles.next() call per index entry — the iteration count must + // equal the number of chunk files (same contract as contentstack-import's publish()). + for (const _index in indexer) { + const chunk = await fsUtil.readChunkFiles.next(); + const assetsArr = Object.values(chunk || {}); + + // Resolve this chunk's assets to publish targets (bounded by chunk size). + const resolved = []; + for (const asset of assetsArr) { + if (!asset || !Array.isArray(asset.publish_details) || asset.publish_details.length === 0) { + continue; + } + const targetUid = assetUidMap[asset.uid]; + if (!targetUid) { + skippedNoUidMapping++; + continue; + } + const pairs = resolvePairs(asset); + if (pairs.length === 0) { + skippedNoMappableEnv++; + continue; + } + resolved.push({ targetUid, pairs }); + } + if (resolved.length === 0) continue; + + // Scan status is a target-stack property of the freshly-imported assets, so it + // is fetched live (one batched read per chunk) — it is not in the backup. + const statusMap = await fetchScanStatusBatch( + stack, + resolved.map((r) => r.targetUid), + ); + + for (const { targetUid, pairs } of resolved) { + const status = statusMap.get(targetUid); + if (status === ASSET_SCAN_STATUS.QUARANTINE) { + scanSummary.quarantined++; + console.log(chalk.yellow(`Skipped (quarantined): Asset UID '${targetUid}'`)); + } else if (status === ASSET_SCAN_STATUS.IN_QUEUE) { + scanSummary.inQueue++; + pending.push({ targetUid, pairs }); + } else { + if (status === ASSET_SCAN_STATUS.READY) scanSummary.clean++; + else scanSummary.noStatus++; + await bufferAsset(targetUid, pairs); + } + } + } + + // Resolve in-queue assets once (incremental backoff); publish those that turn clean. + if (pending.length > 0) { + const resolvedUids = await resolveInQueueAssets( + stack, + pending.map((p) => p.targetUid), + ); + const resolvedSet = new Set(resolvedUids); + for (const { targetUid, pairs } of pending) { + if (resolvedSet.has(targetUid)) await bufferAsset(targetUid, pairs); + } + } + + // Flush remaining partial (env, locale) batches. + for (const { envName, locale, uids } of buffers.values()) { + await enqueueBatch(envName, locale, uids); + } + + if (skippedNoUidMapping > 0) { + console.log(chalk.yellow(`Skipped ${skippedNoUidMapping} asset(s): no UID mapping (not imported into target).`)); + } + if (skippedNoMappableEnv > 0) { + console.log( + chalk.yellow( + `Skipped ${skippedNoMappableEnv} asset(s): none of their published environments were imported into the target.`, + ), + ); + } + if (publishableAssets === 0) { + console.log(chalk.yellow('No publishable assets found in backup (no mapped assets with publishable environments).')); + } +} + function setConfig(conf, bp) { if (bp) { queue.consumer = performBulkPublish; @@ -120,9 +450,11 @@ function setConfig(conf, bp) { config = conf; queue.config = conf; filePath = initializeLogger(logFileName); + pendingAssetsForRetry = []; + scanSummary = { clean: 0, quarantined: 0, inQueue: 0, noStatus: 0 }; } -async function start({ retryFailed, bulkPublish, environments, folderUid, locales, apiVersion }, stack, config) { +async function start({ retryFailed, bulkPublish, environments, folderUid, locales, apiVersion, dataDir }, stack, config) { process.on('beforeExit', async () => { const isErrorLogEmpty = await isEmpty(`${filePath}.error`); const isSuccessLogEmpty = await isEmpty(`${filePath}.success`); @@ -131,7 +463,7 @@ async function start({ retryFailed, bulkPublish, environments, folderUid, locale } else if (!isSuccessLogEmpty) { console.log(`The success log for this session is stored at ${filePath}.success`); } - + // Generate and display the bulk publish status link if (bulkPublish && stack && config) { const statusUrl = generateBulkPublishStatusUrl(stack, config); @@ -142,11 +474,12 @@ async function start({ retryFailed, bulkPublish, environments, folderUid, locale process.stdout.write('\n'); } } - + process.exit(0); }); if (retryFailed) { + console.log(chalk.yellow('Note: --retry-failed replays from log and skips asset scan status checks.')); if (!validateFile(retryFailed, ['publish-assets', 'bulk-publish-assets'])) { return false; } @@ -159,17 +492,34 @@ async function start({ retryFailed, bulkPublish, environments, folderUid, locale } else { await retryFailedLogs(retryFailed, { assetQueue: queue }, 'publish'); } + } else if (dataDir) { + // Post-import flow: publish each imported asset only to its original + // environments/locales (from backup publish_details), scan-gated. + setConfig(config, bulkPublish); + const bulkPublishLimit = fetchBulkPublishLimit(stack?.org_uid); + await getAssetsFromBackup(stack, dataDir, bulkPublish, apiVersion, bulkPublishLimit); + printScanSummary(scanSummary); } else if (folderUid) { setConfig(config, bulkPublish); const bulkPublishLimit = fetchBulkPublishLimit(stack?.org_uid); for (const locale of locales) { await getAssets(stack, folderUid, bulkPublish, environments, locale, apiVersion, bulkPublishLimit); } + + printScanSummary(scanSummary); + + // Resolve in-queue assets with incremental retry; pass pendingAssetsForRetry explicitly + if (pendingAssetsForRetry.length > 0) { + await processPendingAssets(pendingAssetsForRetry, stack, bulkPublish, environments, apiVersion, bulkPublishLimit); + pendingAssetsForRetry = []; + } } } module.exports = { getAssets, + getAssetsFromBackup, setConfig, start, + processPendingAssets, }; diff --git a/packages/contentstack-bulk-publish/src/util/asset-scan.js b/packages/contentstack-bulk-publish/src/util/asset-scan.js new file mode 100644 index 000000000..9c9f3f0d1 --- /dev/null +++ b/packages/contentstack-bulk-publish/src/util/asset-scan.js @@ -0,0 +1,121 @@ +/* eslint-disable no-console */ +const chalk = require('chalk'); + +const ASSET_SCAN_STATUS = { + READY: 'clean', + QUARANTINE: 'quarantined', + IN_QUEUE: 'pending', +}; + +const SCAN_RETRY = { + MAX_RETRIES: 5, + INITIAL_WAIT_MS: 5000, + BACKOFF_FACTOR: 2, +}; + +function getIncrementalWaitMs(attempt) { + return SCAN_RETRY.INITIAL_WAIT_MS * Math.pow(SCAN_RETRY.BACKOFF_FACTOR, attempt); +} + +/** + * Batch-fetch asset scan statuses for a list of UIDs. + * Returns a Map. UIDs with no scan data map to undefined. + * Throws on API error — callers must not silently treat failures as "ready". + */ +async function fetchScanStatusBatch(stack, uids) { + const statusMap = new Map(); + if (!uids || uids.length === 0) return statusMap; + + const BATCH_SIZE = 100; + for (let i = 0; i < uids.length; i += BATCH_SIZE) { + const batch = uids.slice(i, i + BATCH_SIZE); + const response = await stack + .asset() + .query({ uid: { $in: batch }, include_asset_scan_status: true, limit: BATCH_SIZE }) + .find(); + for (const asset of response.items || []) { + statusMap.set(asset.uid, asset._asset_scan_status); + } + } + + return statusMap; +} + +/** + * Retry pending (in-queue) assets with incremental backoff until they become + * clean or max retries is reached. + * + * Wait series: 5s, 10s, 20s, 40s, 80s (5 attempts total, max 155s). + * + * @param {object} stack - Management SDK stack instance + * @param {string[]} pendingUids - UIDs currently in scan queue + * @returns {string[]} UIDs that became clean and are safe to publish + */ +async function resolveInQueueAssets(stack, pendingUids) { + if (!pendingUids || pendingUids.length === 0) return []; + + const totalWaitSec = + Array.from({ length: SCAN_RETRY.MAX_RETRIES }, (_, i) => getIncrementalWaitMs(i)).reduce( + (a, b) => a + b, + 0, + ) / 1000; + console.log( + chalk.yellow( + `Resolving ${pendingUids.length} in-queue asset(s). Max wait: ${totalWaitSec}s over ${SCAN_RETRY.MAX_RETRIES} retries.`, + ), + ); + + let remaining = [...pendingUids]; + const resolvedUids = []; + + for (let attempt = 0; attempt < SCAN_RETRY.MAX_RETRIES && remaining.length > 0; attempt++) { + const waitMs = getIncrementalWaitMs(attempt); + console.log( + chalk.yellow( + `Asset scan: ${remaining.length} asset(s) in queue. Waiting ${waitMs / 1000}s before retry ${attempt + 1}/${ + SCAN_RETRY.MAX_RETRIES + }...`, + ), + ); + + await new Promise((resolve) => setTimeout(resolve, waitMs)); + + const statusMap = await fetchScanStatusBatch(stack, remaining); + const stillPending = []; + + for (const uid of remaining) { + const status = statusMap.get(uid); + if (status === ASSET_SCAN_STATUS.QUARANTINE) { + console.log(chalk.red(`Skipped (quarantined after retry): Asset UID '${uid}'`)); + } else if (status === ASSET_SCAN_STATUS.IN_QUEUE) { + stillPending.push(uid); + } else { + // clean or undefined (scanning disabled) — publishable + resolvedUids.push(uid); + } + } + + remaining = stillPending; + } + + if (remaining.length > 0) { + console.warn( + chalk.red( + `Asset scan: ${remaining.length} asset(s) remained in queue after ${SCAN_RETRY.MAX_RETRIES} retries and will be skipped.`, + ), + ); + for (const uid of remaining) { + console.warn(chalk.red(`Skipped (max retries exceeded): Asset UID '${uid}'`)); + } + } + + return resolvedUids; +} + +module.exports = { + ASSET_SCAN_STATUS, + SCAN_RETRY, + getIncrementalWaitMs, + fetchScanStatusBatch, + resolveInQueueAssets, +}; diff --git a/packages/contentstack-bulk-publish/test/unit/util/asset-scan.test.js b/packages/contentstack-bulk-publish/test/unit/util/asset-scan.test.js new file mode 100644 index 000000000..83b430bde --- /dev/null +++ b/packages/contentstack-bulk-publish/test/unit/util/asset-scan.test.js @@ -0,0 +1,162 @@ +'use strict'; + +const { describe, it, beforeEach, afterEach } = require('mocha'); +const { expect } = require('chai'); + +const { + ASSET_SCAN_STATUS, + SCAN_RETRY, + getIncrementalWaitMs, + fetchScanStatusBatch, + resolveInQueueAssets, +} = require('../../../src/util/asset-scan'); + +// Minimal mock stack factory +function makeStack(items) { + return { + asset() { + return { + query() { + return { + find: async () => ({ items: items || [] }), + }; + }, + }; + }, + }; +} + +// Stack that throws on query +function makeErrorStack(errorMsg) { + return { + asset() { + return { + query() { + return { + find: async () => { + throw new Error(errorMsg); + }, + }; + }, + }; + }, + }; +} + +describe('asset-scan utilities', () => { + // ─── getIncrementalWaitMs ──────────────────────────────────────────────── + + describe('getIncrementalWaitMs', () => { + it('returns INITIAL_WAIT_MS for attempt 0', () => { + expect(getIncrementalWaitMs(0)).to.equal(SCAN_RETRY.INITIAL_WAIT_MS); + }); + + it('doubles on each subsequent attempt', () => { + const seq = [0, 1, 2, 3, 4].map(getIncrementalWaitMs); + for (let i = 1; i < seq.length; i++) { + expect(seq[i]).to.equal(seq[i - 1] * SCAN_RETRY.BACKOFF_FACTOR); + } + }); + + it('produces the correct 5-attempt sequence', () => { + const expected = [5000, 10000, 20000, 40000, 80000]; + expected.forEach((ms, attempt) => { + expect(getIncrementalWaitMs(attempt)).to.equal(ms); + }); + }); + }); + + // ─── fetchScanStatusBatch ─────────────────────────────────────────────── + + describe('fetchScanStatusBatch', () => { + it('returns empty Map when called with empty uid array', async () => { + const map = await fetchScanStatusBatch(makeStack([]), []); + expect(map.size).to.equal(0); + }); + + it('maps UIDs to their scan statuses', async () => { + const items = [ + { uid: 'a1', _asset_scan_status: 'clean' }, + { uid: 'a2', _asset_scan_status: 'quarantined' }, + { uid: 'a3', _asset_scan_status: 'pending' }, + ]; + const map = await fetchScanStatusBatch(makeStack(items), ['a1', 'a2', 'a3']); + expect(map.get('a1')).to.equal(ASSET_SCAN_STATUS.READY); + expect(map.get('a2')).to.equal(ASSET_SCAN_STATUS.QUARANTINE); + expect(map.get('a3')).to.equal(ASSET_SCAN_STATUS.IN_QUEUE); + }); + + it('maps UIDs with no scan field to undefined', async () => { + const items = [{ uid: 'a1' }]; + const map = await fetchScanStatusBatch(makeStack(items), ['a1']); + expect(map.get('a1')).to.equal(undefined); + }); + + it('throws on API error (fail fast — do not silently treat as ready)', async () => { + try { + await fetchScanStatusBatch(makeErrorStack('Network error'), ['a1']); + expect.fail('Expected fetchScanStatusBatch to throw'); + } catch (error) { + expect(error.message).to.equal('Network error'); + } + }); + }); + + // ─── resolveInQueueAssets ─────────────────────────────────────────────── + + describe('resolveInQueueAssets', () => { + let originalSetTimeout; + + beforeEach(() => { + // Replace setTimeout with an immediate resolver to avoid real waits + originalSetTimeout = global.setTimeout; + global.setTimeout = (fn) => fn(); + }); + + afterEach(() => { + global.setTimeout = originalSetTimeout; + }); + + it('returns empty array for empty input without calling stack', async () => { + const result = await resolveInQueueAssets(makeStack([]), []); + expect(result).to.deep.equal([]); + }); + + it('resolves UIDs that become clean on the first retry', async () => { + const items = [{ uid: 'a1', _asset_scan_status: 'clean' }]; + const result = await resolveInQueueAssets(makeStack(items), ['a1']); + expect(result).to.include('a1'); + }); + + it('excludes UIDs that become quarantined during retry', async () => { + const items = [{ uid: 'a1', _asset_scan_status: 'quarantined' }]; + const result = await resolveInQueueAssets(makeStack(items), ['a1']); + expect(result).to.not.include('a1'); + }); + + it('resolves UIDs with no scan status (scanning disabled)', async () => { + const items = [{ uid: 'a1' }]; // no _asset_scan_status field + const result = await resolveInQueueAssets(makeStack(items), ['a1']); + expect(result).to.include('a1'); + }); + + it('drops UIDs still pending after MAX_RETRIES', async () => { + // Always returns pending status + const items = [{ uid: 'a1', _asset_scan_status: 'pending' }]; + const result = await resolveInQueueAssets(makeStack(items), ['a1']); + expect(result).to.deep.equal([]); + }); + + it('handles mixed outcomes: clean, quarantined, and pending exhausted', async () => { + const items = [ + { uid: 'clean1', _asset_scan_status: 'clean' }, + { uid: 'quar1', _asset_scan_status: 'quarantined' }, + { uid: 'pend1', _asset_scan_status: 'pending' }, + ]; + const result = await resolveInQueueAssets(makeStack(items), ['clean1', 'quar1', 'pend1']); + expect(result).to.include('clean1'); + expect(result).to.not.include('quar1'); + expect(result).to.not.include('pend1'); + }); + }); +}); diff --git a/packages/contentstack-import/src/import/module-importer.ts b/packages/contentstack-import/src/import/module-importer.ts index 2a6879c09..6e9243fa2 100755 --- a/packages/contentstack-import/src/import/module-importer.ts +++ b/packages/contentstack-import/src/import/module-importer.ts @@ -34,6 +34,12 @@ class ModuleImporter { const stackDetails: Record = await this.stackAPIClient.fetch(); this.importConfig.stackName = stackDetails.name as string; this.importConfig.org_uid = stackDetails.org_uid as string; + + const assetScanningEnabled = await this.detectAssetScanning(this.importConfig.org_uid); + if (assetScanningEnabled) { + this.importConfig.assetScanningEnabled = true; + this.importConfig.skipAssetsPublish = true; + } } await this.resolveImportPath(); @@ -209,6 +215,16 @@ class ModuleImporter { log.error(`Audit failed with following error. ${error}`, this.importConfig.context); } } + + private async detectAssetScanning(orgUid: string): Promise { + try { + const orgDetails = await this.managementAPIClient.organization(orgUid).fetch({ include_plan: true }); + const features: Array<{ uid: string; enabled?: boolean }> = orgDetails?.plan?.features || []; + return features.some((f) => (f.uid === 'assetsScan' || f.uid === 'amAssetsScan') && f.enabled === true); + } catch { + return false; + } + } } export default ModuleImporter; diff --git a/packages/contentstack-import/src/import/modules/assets.ts b/packages/contentstack-import/src/import/modules/assets.ts index e8b792f0b..ab21f419e 100644 --- a/packages/contentstack-import/src/import/modules/assets.ts +++ b/packages/contentstack-import/src/import/modules/assets.ts @@ -79,6 +79,16 @@ export default class ImportAssets extends BaseClass { } log.success('Assets imported successfully!', this.importConfig.context); + + if (this.importConfig.assetScanningEnabled) { + log.info('Asset Scanning is enabled for this stack.', this.importConfig.context); + log.info('Assets cannot be published immediately — scanning must complete first.', this.importConfig.context); + log.info('Once scanning is done, publish your assets using:', this.importConfig.context); + log.info( + `csdx cm:assets:publish --data-dir ${this.importConfig.backupDir} --stack-api-key [STACK API KEY]`, + this.importConfig.context, + ); + } } catch (error) { handleAndLogError(error, { ...this.importConfig.context }); } diff --git a/packages/contentstack-import/src/types/import-config.ts b/packages/contentstack-import/src/types/import-config.ts index 2c4c9a000..70ca10a27 100644 --- a/packages/contentstack-import/src/types/import-config.ts +++ b/packages/contentstack-import/src/types/import-config.ts @@ -15,6 +15,7 @@ export default interface ImportConfig extends DefaultConfig, ExternalConfig { skipAssetsPublish?: boolean; skipEntriesPublish?: boolean; cliLogsPath: string; + assetScanningEnabled?: boolean; canCreatePrivateApp: boolean; contentDir: string; data: string;