diff --git a/client/package-lock.json b/client/package-lock.json index da6a5c6b5..a4f47b6c3 100644 --- a/client/package-lock.json +++ b/client/package-lock.json @@ -1,12 +1,12 @@ { "name": "vue-media-annotator", - "version": "1.9.10", + "version": "1.9.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "vue-media-annotator", - "version": "1.9.10", + "version": "1.9.11", "license": "Apache-2.0", "dependencies": { "@flatten-js/interval-tree": "^1.0.11", diff --git a/client/package.json b/client/package.json index 10575b8bd..4ab60e08b 100644 --- a/client/package.json +++ b/client/package.json @@ -11,8 +11,10 @@ "scripts": { "dev": "npm run serve", "dev:electron": "npm run serve:electron", + "dev:electron:windows": "npm run serve:electron:windows", "serve": "vite", "serve:electron": "mkdir -p /tmp/dive-electron && cross-env ELECTRON_RUN_AS_NODE= ELECTRON_DISABLE_SECURITY_WARNINGS=true TMPDIR=/tmp/dive-electron XDG_RUNTIME_DIR=/tmp/dive-electron electron-vite dev --entry=.electron/main/background.js", + "serve:electron:windows": "cross-env ELECTRON_DISABLE_SECURITY_WARNINGS=true electron-vite dev --entry=.electron/main/background.js", "build:web": "vite build", "build:electron": "electron-vite build && electron-builder --config electron-builder.json", "divecli": "node ./bin/platform/desktop/backend/cli.js", diff --git a/client/platform/desktop/backend/native/common.ts b/client/platform/desktop/backend/native/common.ts index 3068bc910..1687746d5 100644 --- a/client/platform/desktop/backend/native/common.ts +++ b/client/platform/desktop/backend/native/common.ts @@ -368,10 +368,18 @@ async function loadMetadata( imageData = defaultDisplay.imageData; videoUrl = defaultDisplay.videoUrl; } else if (projectMetaData.type === 'video') { - /* If the video has been transcoded, use that video */ + /* Use transcoded output only after it exists on disk. */ if (projectMetaData.transcodedVideoFile) { - const video = npath.join(projectDirData.basePath, projectMetaData.transcodedVideoFile); - videoUrl = makeMediaUrl(video); + const transcodedVideo = npath.join(projectDirData.basePath, projectMetaData.transcodedVideoFile); + if (await fs.pathExists(transcodedVideo)) { + videoUrl = makeMediaUrl(transcodedVideo); + } else if (projectMetaData.originalBasePath && projectMetaData.originalVideoFile) { + const originalVideo = npath.join(projectMetaData.originalBasePath, projectMetaData.originalVideoFile); + videoUrl = makeMediaUrl(originalVideo); + } else { + // Some legacy/test metadata only has a transcoded filename. + videoUrl = makeMediaUrl(transcodedVideo); + } } else { const video = npath.join(projectMetaData.originalBasePath, projectMetaData.originalVideoFile); videoUrl = makeMediaUrl(video); diff --git a/client/platform/desktop/backend/native/mediaJobs.ts b/client/platform/desktop/backend/native/mediaJobs.ts index 046a2ee9b..427682ba3 100644 --- a/client/platform/desktop/backend/native/mediaJobs.ts +++ b/client/platform/desktop/backend/native/mediaJobs.ts @@ -58,6 +58,31 @@ interface CheckMediaResults { videoDimensions: { width: number; height: number }; } +function frameRateStringFromProbeStream(stream: { + avg_frame_rate?: string; + r_frame_rate?: string; +}): string { + const parseable = (s: string | undefined): s is string => { + if (!s || s === '0/0') return false; + const parts = s.split('/').map((v) => Number.parseInt(v, 10)); + return ( + parts.length === 2 + && !Number.isNaN(parts[0]) + && !Number.isNaN(parts[1]) + && parts[1] !== 0 + ); + }; + if (parseable(stream.avg_frame_rate)) { + return stream.avg_frame_rate; + } + if (parseable(stream.r_frame_rate)) { + return stream.r_frame_rate; + } + throw Error( + 'FFProbe found no usable frame rate (avg_frame_rate / r_frame_rate)', + ); +} + async function checkFrameMisalignment(file: string): Promise { const args = [ file, @@ -138,11 +163,11 @@ async function checkMedia(file: string): Promise { if (ffprobeJSON && ffprobeJSON.streams?.length) { const videoStream = ffprobeJSON.streams.filter((el) => el.codec_type === 'video'); - if (videoStream.length === 0 || !videoStream[0].avg_frame_rate) { - throw Error('FFProbe found that video stream has no avg_frame_rate'); + if (videoStream.length === 0) { + throw Error('FFProbe found no video stream'); } - const originalFpsString = videoStream[0].avg_frame_rate; + const originalFpsString = frameRateStringFromProbeStream(videoStream[0]); const [dividend, divisor] = originalFpsString.split('/').map((v) => Number.parseInt(v, 10)); const originalFps = dividend / divisor; const websafe = videoStream @@ -191,6 +216,9 @@ async function convertMedia( ffmpegArgs.push(args.mediaList[mediaIndex][1]); const job = observeChild(spawn(ffmpegPath, ffmpegArgs, { shell: false })); + if (job.pid === undefined) { + throw new Error('Failed to start conversion process'); + } let jobKey = `convert_${job.pid}_${jobWorkDir}`; if (key.length) { jobKey = key; @@ -212,6 +240,11 @@ async function convertMedia( args.meta.transcodingJobKey = jobBase.key; } fs.writeFile(npath.join(jobWorkDir, DiveJobManifestName), JSON.stringify(jobBase, null, 2)); + // Emit an initial update immediately so UI reflects "converting" before ffmpeg writes logs. + updater({ + ...jobBase, + body: ['Conversion job started'], + }); job.stdout.on('data', jobFileEchoMiddleware(jobBase, updater, joblog)); job.stderr.on('data', jobFileEchoMiddleware(jobBase, updater, joblog)); diff --git a/client/platform/desktop/backend/server.ts b/client/platform/desktop/backend/server.ts index 41cc812d3..11dd5fdd0 100644 --- a/client/platform/desktop/backend/server.ts +++ b/client/platform/desktop/backend/server.ts @@ -36,12 +36,19 @@ const supportedMediaTypes = [ 'image/webp', ]; +function formatHostForUrl(host: string): string { + if (host.includes(':') && !host.startsWith('[')) { + return `[${host}]`; + } + return host; +} + function makeMediaUrl(filepath: string): string { const addr = server.address() as AddressInfo | null; if (!addr) { throw new Error('server has not initialized yet'); } - return `http://${addr.address}:${addr.port}/api/media?path=${filepath}`; + return `http://${formatHostForUrl(addr.address)}:${addr.port}/api/media?path=${encodeURIComponent(filepath)}`; } /* LOAD metadata */ diff --git a/client/platform/desktop/frontend/api.ts b/client/platform/desktop/frontend/api.ts index 49a26a709..765b14c4d 100644 --- a/client/platform/desktop/frontend/api.ts +++ b/client/platform/desktop/frontend/api.ts @@ -253,10 +253,17 @@ async function cancelJob(job: DesktopJob): Promise { let _axiosClient: AxiosInstance; // do not use elsewhere let _baseURL: string | null = null; +function formatHostForUrl(host: string) { + if (host.includes(':') && !host.startsWith('[')) { + return `[${host}]`; + } + return host; +} + async function getClient(): Promise { if (_axiosClient === undefined) { const addr = await window.diveDesktop.invoke('server-info'); - _baseURL = `http://${addr.address}:${addr.port}/api`; + _baseURL = `http://${formatHostForUrl(addr.address)}:${addr.port}/api`; _axiosClient = axios.create({ baseURL: _baseURL }); } return _axiosClient; diff --git a/client/platform/desktop/frontend/components/Recent.vue b/client/platform/desktop/frontend/components/Recent.vue index 7b49780de..e7c09f42a 100644 --- a/client/platform/desktop/frontend/components/Recent.vue +++ b/client/platform/desktop/frontend/components/Recent.vue @@ -90,7 +90,7 @@ export default defineComponent({ imports.forEach(async (conversionArgs) => { // Queue conversion job if (conversionArgs.mediaList.length > 0) { - api.convert(conversionArgs); + await api.convert(conversionArgs); } const recentsMeta = await api.loadMetadata(conversionArgs.meta.id); setRecents(recentsMeta); @@ -112,7 +112,7 @@ export default defineComponent({ }); } else { // Queue conversion job - api.convert(conversionArgs); + await api.convert(conversionArgs); // Display new data and await transcoding to complete const recentsMeta = await api.loadMetadata(conversionArgs.meta.id); setRecents(recentsMeta); diff --git a/server/dive_tasks/frame_alignment.py b/server/dive_tasks/frame_alignment.py index 135d4f009..a0d89bc41 100644 --- a/server/dive_tasks/frame_alignment.py +++ b/server/dive_tasks/frame_alignment.py @@ -20,13 +20,13 @@ def check_and_fix_frame_alignment( There appears to be no ffprobe way to determine if the second pass fixed the issue or not """ - misaligned = _ffprobe_frame_alignment(task, file_path, context, manager) + misaligned = is_frame_misaligned(task, file_path, context, manager) if misaligned is True: return _realign_video_and_audio(task, file_path, context, manager) return file_path -def _ffprobe_frame_alignment( +def is_frame_misaligned( task: Task, file_path: Path, context: Dict, manager: JobManager ) -> bool: command = [ diff --git a/server/dive_tasks/tasks.py b/server/dive_tasks/tasks.py index b062ebfe4..5485f706e 100644 --- a/server/dive_tasks/tasks.py +++ b/server/dive_tasks/tasks.py @@ -17,7 +17,7 @@ from girder_worker.utils import JobManager, JobStatus from dive_tasks import utils -from dive_tasks.frame_alignment import check_and_fix_frame_alignment +from dive_tasks.frame_alignment import check_and_fix_frame_alignment, is_frame_misaligned from dive_tasks.manager import patch_manager from dive_tasks.pipeline_discovery import discover_configs from dive_utils import constants, fromMeta @@ -521,14 +521,11 @@ def convert_video( print('Expected 1 video stream, found {}'.format(len(videostream))) print('Using first Video Stream found') - # Extract average framerate - avgFpsString: str = videostream[0]["avg_frame_rate"] - originalFps = None - if avgFpsString: - dividend, divisor = [int(v) for v in avgFpsString.split('/')] - originalFps = dividend / divisor - else: - raise Exception('Expected key avg_frame_rate in ffprobe') + format_info = jsoninfo.get('format') or {} + format_name = format_info.get('format_name') or '' + + # Extract framerate (avg_frame_rate, else r_frame_rate for e.g. MPEG-TS) + originalFpsString, originalFps = utils.fps_from_ffprobe_stream(videostream[0]) if requestedFps == -1: newAnnotationFps = originalFps @@ -537,8 +534,21 @@ def convert_video( if newAnnotationFps < 1: raise Exception('FPS lower than 1 is not supported') + source_misaligned = False + if skip_transcoding: + source_misaligned = is_frame_misaligned(self, Path(file_name), context, manager) + + # Skip remux/transcode only for browser-safe sources, matching desktop checks. + can_skip_transcode = ( + skip_transcoding + and videostream[0]['codec_name'] == 'h264' + and videostream[0].get('sample_aspect_ratio') == '1:1' + and utils.container_allows_skip_transcoding(format_name) + and not source_misaligned + ) + # lets determine if we don't need to transcode this file - if skip_transcoding and videostream[0]['codec_name'] == 'h264': + if can_skip_transcode: # Now we can update the meta data and push the values manager.updateStatus(JobStatus.PUSHING_OUTPUT) gc.addMetadataToItem( @@ -547,7 +557,7 @@ def convert_video( "source_video": False, # even though it is, this for requesting "transcoder": "ffmpeg", constants.OriginalFPSMarker: originalFps, - constants.OriginalFPSStringMarker: avgFpsString, + constants.OriginalFPSStringMarker: originalFpsString, "codec": "h264", }, ) @@ -556,7 +566,7 @@ def convert_video( { constants.DatasetMarker: True, # mark the parent folder as able to annotate. constants.OriginalFPSMarker: originalFps, - constants.OriginalFPSStringMarker: avgFpsString, + constants.OriginalFPSStringMarker: originalFpsString, constants.FPSMarker: newAnnotationFps, "ffprobe_info": videostream[0], }, @@ -565,7 +575,18 @@ def convert_video( elif skip_transcoding: print('Transcoding cannot be skipped:') print(f'Codec Name: {videostream[0]["codec_name"]}') - print('Codec name is not h264 so file will be transcoded') + print(f'format_name: {format_name}') + if videostream[0]['codec_name'] != 'h264': + print('Codec is not h264; file will be transcoded') + elif videostream[0].get('sample_aspect_ratio') != '1:1': + print( + 'Sample aspect ratio is not 1:1; file will be transcoded ' + '(desktop-parity rule)' + ) + elif not utils.container_allows_skip_transcoding(format_name): + print('Container is not web-safe (e.g. mpegts); file will be transcoded') + elif source_misaligned: + print('Frame timestamps are misaligned; file will be transcoded') command = [ "ffmpeg", @@ -601,14 +622,14 @@ def convert_video( "source_video": False, "transcoder": "ffmpeg", constants.OriginalFPSMarker: originalFps, - constants.OriginalFPSStringMarker: avgFpsString, + constants.OriginalFPSStringMarker: originalFpsString, "codec": "h264", }, ) source_metadata = { "source_video": True, constants.OriginalFPSMarker: originalFps, - constants.OriginalFPSStringMarker: avgFpsString, + constants.OriginalFPSStringMarker: originalFpsString, "codec": videostream[0]["codec_name"], } if misaligned_flag: @@ -622,7 +643,7 @@ def convert_video( { constants.DatasetMarker: True, # mark the parent folder as able to annotate. constants.OriginalFPSMarker: originalFps, - constants.OriginalFPSStringMarker: avgFpsString, + constants.OriginalFPSStringMarker: originalFpsString, constants.FPSMarker: newAnnotationFps, "ffprobe_info": videostream[0], }, diff --git a/server/dive_tasks/utils.py b/server/dive_tasks/utils.py index d532d55aa..91ee83bdd 100644 --- a/server/dive_tasks/utils.py +++ b/server/dive_tasks/utils.py @@ -8,7 +8,7 @@ from subprocess import Popen import tempfile import threading -from typing import List, Tuple +from typing import List, Optional, Tuple from urllib import request from urllib.parse import urlencode, urljoin @@ -32,6 +32,62 @@ class CanceledError(RuntimeError): pass +def fps_from_ffprobe_stream(video_stream: dict) -> Tuple[str, float]: + """ + Return (fps_string, fps_float) from an ffprobe video stream dict. + Prefer avg_frame_rate; fall back to r_frame_rate when avg is missing or 0/0 + (common for MPEG-TS and similar). + """ + + def _parse_rational(s: str) -> Optional[Tuple[int, int]]: + if not s or s == "0/0": + return None + parts = s.split("/") + if len(parts) != 2: + return None + try: + num, den = int(parts[0]), int(parts[1]) + except ValueError: + return None + if den == 0: + return None + return num, den + + avg = str(video_stream.get("avg_frame_rate") or "") + r_rate = str(video_stream.get("r_frame_rate") or "") + + pair = _parse_rational(avg) + used = avg + if pair is None: + pair = _parse_rational(r_rate) + used = r_rate + if pair is None: + raise Exception( + "Could not determine frame rate from ffprobe " + "(avg_frame_rate and r_frame_rate missing or unusable)" + ) + num, den = pair + return used, num / den + + +# ffprobe "format_name" is a comma-separated list of demuxer names. Only these +# are treated as browser-friendly when skip_transcoding is requested (H.264 in +# MPEG-TS / MPEG-PS is still h264 but must be remuxed — see convert_video). +# Matches dive-common websafeVideoTypes (mp4 / webm); other containers transcode. +_WEBSAFE_SKIP_TRANSCODE_FORMAT_FRAGMENTS = frozenset({'mp4', 'webm'}) + + +def container_allows_skip_transcoding(format_name: str) -> bool: + """ + True if ffprobe format_name indicates a container we can skip remuxing for + (mp4 or webm demuxer tags, same as websafe video MIME types in the client). + """ + if not format_name or not str(format_name).strip(): + return False + parts = {p.strip() for p in str(format_name).split(',') if p.strip()} + return bool(parts & _WEBSAFE_SKIP_TRANSCODE_FORMAT_FRAGMENTS) + + def authenticate_urllib(gc: GirderClient): """Enable authenticated requests to girder backend using normal urllib""" opener = request.build_opener() @@ -286,14 +342,12 @@ def upload_exported_zipped_dataset( transcoded_video = list(gc.listItem(root_folderId, name=video["filename"])) if len(transcoded_video) == 1: ffprobe = meta["ffprobe_info"] - avgFpsString = ffprobe["avg_frame_rate"] - dividend, divisor = [int(v) for v in avgFpsString.split('/')] - originalFps = dividend / divisor + originalFpsString, originalFps = fps_from_ffprobe_stream(ffprobe) transcoded_metadata = { "codec": "h264", "originalFps": originalFps, - "originalFpsString": avgFpsString, + "originalFpsString": originalFpsString, "source_video": False, "transcoder": "ffmpeg", } @@ -307,12 +361,12 @@ def upload_exported_zipped_dataset( source_metadata = { "codec": ffprobe["codec_name"], "originalFps": originalFps, - "originalFpsString": avgFpsString, + "originalFpsString": originalFpsString, "source_video": False, } gc.addMetadataToItem(str(item['_id']), source_metadata) root_meta["originalFps"] = originalFps - root_meta["originalFpsString"] = avgFpsString + root_meta["originalFpsString"] = originalFpsString # Need to tag folder Level data (annotate, and others) root_meta[constants.DatasetMarker] = True