diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index 419872562..cb92b44bd 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -116,3 +116,6 @@ antonov rfdetr basemodule IMAGENET +worklet +worklets +BGRA \ No newline at end of file diff --git a/apps/computer-vision/app.json b/apps/computer-vision/app.json index 4d68c039b..4fcbca2ce 100644 --- a/apps/computer-vision/app.json +++ b/apps/computer-vision/app.json @@ -25,11 +25,23 @@ "foregroundImage": "./assets/icons/adaptive-icon.png", "backgroundColor": "#ffffff" }, - "package": "com.anonymous.computervision" + "package": "com.anonymous.computervision", + "permissions": ["android.permission.CAMERA"] }, "web": { "favicon": "./assets/icons/favicon.png" }, - "plugins": ["expo-font", "expo-router"] + "plugins": [ + "expo-font", + "expo-router", + [ + "expo-build-properties", + { + "android": { + "minSdkVersion": 26 + } + } + ] + ] } } diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx index 35fba7fb1..eafbc70e6 100644 --- a/apps/computer-vision/app/_layout.tsx +++ b/apps/computer-vision/app/_layout.tsx @@ -59,6 +59,15 @@ export default function _layout() { headerTitleStyle: { color: ColorPalette.primary }, }} > + Select a demo model + router.navigate('vision_camera/')} + > + Vision Camera + router.navigate('classification/')} diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx index a1b3a7834..80c3974d4 100644 --- a/apps/computer-vision/app/style_transfer/index.tsx +++ b/apps/computer-vision/app/style_transfer/index.tsx @@ -5,6 +5,14 @@ import { useStyleTransfer, STYLE_TRANSFER_CANDY, } from 'react-native-executorch'; +import { + Canvas, + Image as SkiaImage, + Skia, + AlphaType, + ColorType, + SkImage, +} from '@shopify/react-native-skia'; import { View, StyleSheet, Image } from 'react-native'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; @@ -16,12 +24,17 @@ export default function StyleTransferScreen() { useEffect(() => { setGlobalGenerating(model.isGenerating); }, [model.isGenerating, setGlobalGenerating]); + const [imageUri, setImageUri] = useState(''); + const [styledImage, setStyledImage] = useState(null); + const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); + const handleCameraPress = async (isCamera: boolean) => { const image = await getImage(isCamera); const uri = image?.uri; if (typeof uri === 'string') { - setImageUri(uri as string); + setImageUri(uri); + setStyledImage(null); } }; @@ -29,7 +42,20 @@ export default function StyleTransferScreen() { if (imageUri) { try { const output = await model.forward(imageUri); - setImageUri(output); + const height = output.sizes[0]; + const width = output.sizes[1]; + const skData = Skia.Data.fromBytes(output.dataPtr); + const img = Skia.Image.MakeImage( + { + width, + height, + alphaType: AlphaType.Opaque, + colorType: ColorType.RGBA_8888, + }, + skData, + width * 4 + ); + setStyledImage(img); } catch (e) { console.error(e); } @@ -48,15 +74,38 @@ export default function StyleTransferScreen() { return ( - + {styledImage ? ( + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > + + + + + ) : ( + + )} ('classification'); + const [activeModel, setActiveModel] = useState('classification'); + const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); + const [cameraPosition, setCameraPosition] = useState<'back' | 'front'>( + 'back' + ); + const { setGlobalGenerating } = useContext(GeneratingContext); + + const classification = useClassification({ + model: EFFICIENTNET_V2_S, + preventLoad: activeModel !== 'classification', + }); + const objectDetection = useObjectDetection({ + model: SSDLITE_320_MOBILENET_V3_LARGE, + preventLoad: activeModel !== 'objectDetection', + }); + const segmentation = useImageSegmentation({ + model: DEEPLAB_V3_RESNET50, + preventLoad: activeModel !== 'segmentation', + }); + + const activeIsGenerating = { + classification: classification.isGenerating, + objectDetection: objectDetection.isGenerating, + segmentation: segmentation.isGenerating, + }[activeModel]; + + useEffect(() => { + setGlobalGenerating(activeIsGenerating); + }, [activeIsGenerating, setGlobalGenerating]); + + const [fps, setFps] = useState(0); + const [frameMs, setFrameMs] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = + devices.find((d) => d.position === cameraPosition) ?? devices[0]; + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.FrameProcessing); + } catch { + return undefined; + } + }, [device]); + + const [classResult, setClassResult] = useState({ label: '', score: 0 }); + const [detections, setDetections] = useState([]); + const [imageSize, setImageSize] = useState({ width: 1, height: 1 }); + const [maskImage, setMaskImage] = useState(null); + + const updateClass = useCallback((r: { label: string; score: number }) => { + setClassResult(r); + const now = Date.now(); + const diff = now - lastFrameTimeRef.current; + if (diff > 0) { + setFps(Math.round(1000 / diff)); + setFrameMs(diff); + } + lastFrameTimeRef.current = now; + }, []); + + const updateFps = useCallback(() => { + const now = Date.now(); + const diff = now - lastFrameTimeRef.current; + if (diff > 0) { + setFps(Math.round(1000 / diff)); + setFrameMs(diff); + } + lastFrameTimeRef.current = now; + }, []); + + const updateDetections = useCallback( + (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => { + setDetections(p.results); + setImageSize({ width: p.imageWidth, height: p.imageHeight }); + updateFps(); + }, + [updateFps] + ); + + const updateMask = useCallback( + (img: SkImage) => { + setMaskImage((prev) => { + prev?.dispose(); + return img; + }); + updateFps(); + }, + [updateFps] + ); + + const classRof = classification.runOnFrame; + const detRof = objectDetection.runOnFrame; + const segRof = segmentation.runOnFrame; + + useEffect(() => { + frameKillSwitch.setBlocking(true); + setMaskImage((prev) => { + prev?.dispose(); + return null; + }); + const id = setTimeout(() => { + frameKillSwitch.setBlocking(false); + }, 300); + return () => clearTimeout(id); + }, [activeModel]); + + const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + dropFramesWhileBusy: true, + onFrame: useCallback( + (frame: Frame) => { + 'worklet'; + + if (frameKillSwitch.getDirty()) { + frame.dispose(); + return; + } + + try { + if (activeModel === 'classification') { + if (!classRof) return; + const result = classRof(frame); + if (result) { + let bestLabel = ''; + let bestScore = -1; + const entries = Object.entries(result); + for (let i = 0; i < entries.length; i++) { + const [label, score] = entries[i]!; + if ((score as number) > bestScore) { + bestScore = score as number; + bestLabel = label; + } + } + scheduleOnRN(updateClass, { label: bestLabel, score: bestScore }); + } + } else if (activeModel === 'objectDetection') { + if (!detRof) return; + const iw = frame.width > frame.height ? frame.height : frame.width; + const ih = frame.width > frame.height ? frame.width : frame.height; + const result = detRof(frame, 0.5); + if (result) { + scheduleOnRN(updateDetections, { + results: result, + imageWidth: iw, + imageHeight: ih, + }); + } + } else if (activeModel === 'segmentation') { + if (!segRof) return; + const result = segRof(frame, [], false); + if (result?.ARGMAX) { + const argmax: Int32Array = result.ARGMAX; + const side = Math.round(Math.sqrt(argmax.length)); + const pixels = new Uint8Array(side * side * 4); + for (let i = 0; i < argmax.length; i++) { + const color = CLASS_COLORS[argmax[i]!] ?? [0, 0, 0, 0]; + pixels[i * 4] = color[0]!; + pixels[i * 4 + 1] = color[1]!; + pixels[i * 4 + 2] = color[2]!; + pixels[i * 4 + 3] = color[3]!; + } + const skData = Skia.Data.fromBytes(pixels); + const img = Skia.Image.MakeImage( + { + width: side, + height: side, + alphaType: AlphaType.Unpremul, + colorType: ColorType.RGBA_8888, + }, + skData, + side * 4 + ); + if (img) scheduleOnRN(updateMask, img); + } + } + } catch { + // ignore + } finally { + frame.dispose(); + } + }, + [ + activeModel, + classRof, + detRof, + segRof, + updateClass, + updateDetections, + updateMask, + ] + ), + }); + + const activeIsReady = { + classification: classification.isReady, + objectDetection: objectDetection.isReady, + segmentation: segmentation.isReady, + }[activeModel]; + + const activeDownloadProgress = { + classification: classification.downloadProgress, + objectDetection: objectDetection.downloadProgress, + segmentation: segmentation.downloadProgress, + }[activeModel]; + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + function coverFit(imgW: number, imgH: number) { + const scale = Math.max(canvasSize.width / imgW, canvasSize.height / imgH); + return { + scale, + offsetX: (canvasSize.width - imgW * scale) / 2, + offsetY: (canvasSize.height - imgH * scale) / 2, + }; + } + + const { + scale: detScale, + offsetX: detOX, + offsetY: detOY, + } = coverFit(imageSize.width, imageSize.height); + + const activeTaskInfo = TASKS.find((t) => t.id === activeTask)!; + const activeVariantLabel = + activeTaskInfo.variants.find((v) => v.id === activeModel)?.label ?? + activeTaskInfo.variants[0]!.label; + + return ( + + + + + + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > + {activeModel === 'segmentation' && maskImage && ( + + + + )} + + {activeModel === 'objectDetection' && ( + <> + {detections.map((det, i) => { + const left = det.bbox.x1 * detScale + detOX; + const top = det.bbox.y1 * detScale + detOY; + const w = (det.bbox.x2 - det.bbox.x1) * detScale; + const h = (det.bbox.y2 - det.bbox.y1) * detScale; + return ( + + + + {det.label} {(det.score * 100).toFixed(1)} + + + + ); + })} + + )} + + + {activeModel === 'classification' && classResult.label ? ( + + {classResult.label} + + {(classResult.score * 100).toFixed(1)}% + + + ) : null} + + {!activeIsReady && ( + + + + )} + + + + {activeVariantLabel} + + {fps} FPS – {frameMs.toFixed(0)} ms + + + + + {TASKS.map((t) => ( + { + setActiveTask(t.id); + setActiveModel(t.variants[0]!.id); + }} + > + + {t.label} + + + ))} + + + + {activeTaskInfo.variants.map((v) => ( + setActiveModel(v.id)} + > + + {v.label} + + + ))} + + + + + + setCameraPosition((p) => (p === 'back' ? 'front' : 'back')) + } + > + + {/* Camera body */} + + {/* Rotate arrows — arc with arrowhead around the lens */} + + + + + + + ); +} + +const styles = StyleSheet.create({ + container: { flex: 1, backgroundColor: 'black' }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { color: 'white', fontSize: 18 }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { color: 'white', fontSize: 15, fontWeight: '600' }, + loadingOverlay: { + ...StyleSheet.absoluteFillObject, + backgroundColor: 'rgba(0,0,0,0.6)', + justifyContent: 'center', + alignItems: 'center', + }, + + topOverlay: { + position: 'absolute', + top: 0, + left: 0, + right: 0, + alignItems: 'center', + gap: 8, + }, + titleRow: { + alignItems: 'center', + paddingHorizontal: 16, + }, + modelTitle: { + color: 'white', + fontSize: 22, + fontWeight: '700', + textShadowColor: 'rgba(0,0,0,0.7)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 4, + }, + fpsText: { + color: 'rgba(255,255,255,0.85)', + fontSize: 14, + fontWeight: '500', + marginTop: 2, + textShadowColor: 'rgba(0,0,0,0.7)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 4, + }, + + tabsContent: { + paddingHorizontal: 12, + gap: 6, + }, + tab: { + paddingHorizontal: 18, + paddingVertical: 7, + borderRadius: 20, + backgroundColor: 'rgba(0,0,0,0.45)', + borderWidth: 1, + borderColor: 'rgba(255,255,255,0.25)', + }, + tabActive: { + backgroundColor: 'rgba(255,255,255,0.2)', + borderColor: 'white', + }, + tabText: { + color: 'rgba(255,255,255,0.7)', + fontSize: 14, + fontWeight: '600', + }, + tabTextActive: { color: 'white' }, + + chipsContent: { + paddingHorizontal: 12, + gap: 6, + }, + variantChip: { + paddingHorizontal: 14, + paddingVertical: 5, + borderRadius: 16, + backgroundColor: 'rgba(0,0,0,0.35)', + borderWidth: 1, + borderColor: 'rgba(255,255,255,0.15)', + }, + variantChipActive: { + backgroundColor: ColorPalette.primary, + borderColor: ColorPalette.primary, + }, + variantChipText: { + color: 'rgba(255,255,255,0.6)', + fontSize: 12, + fontWeight: '500', + }, + variantChipTextActive: { color: 'white' }, + + bbox: { + position: 'absolute', + borderWidth: 2, + borderColor: 'cyan', + borderRadius: 4, + }, + bboxLabel: { + position: 'absolute', + top: -22, + left: -2, + paddingHorizontal: 6, + paddingVertical: 2, + borderRadius: 4, + }, + bboxLabelText: { color: 'white', fontSize: 11, fontWeight: '600' }, + + classResultOverlay: { + ...StyleSheet.absoluteFillObject, + justifyContent: 'center', + alignItems: 'center', + }, + classResultLabel: { + color: 'white', + fontSize: 28, + fontWeight: '700', + textAlign: 'center', + textShadowColor: 'rgba(0,0,0,0.8)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 6, + paddingHorizontal: 24, + }, + classResultScore: { + color: 'rgba(255,255,255,0.75)', + fontSize: 18, + fontWeight: '500', + marginTop: 4, + textShadowColor: 'rgba(0,0,0,0.8)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 6, + }, + bottomOverlay: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + }, + flipButton: { + width: 56, + height: 56, + borderRadius: 28, + backgroundColor: 'rgba(255,255,255,0.2)', + justifyContent: 'center', + alignItems: 'center', + borderWidth: 1.5, + borderColor: 'rgba(255,255,255,0.4)', + }, +}); diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json index cce918197..328e9bc4f 100644 --- a/apps/computer-vision/package.json +++ b/apps/computer-vision/package.json @@ -17,6 +17,7 @@ "@react-navigation/native": "^7.1.6", "@shopify/react-native-skia": "2.2.12", "expo": "^54.0.27", + "expo-build-properties": "~1.0.10", "expo-constants": "~18.0.11", "expo-font": "~14.0.10", "expo-linking": "~8.0.10", @@ -30,17 +31,20 @@ "react-native-gesture-handler": "~2.28.0", "react-native-image-picker": "^7.2.2", "react-native-loading-spinner-overlay": "^3.0.1", - "react-native-reanimated": "~4.1.1", + "react-native-nitro-image": "^0.12.0", + "react-native-nitro-modules": "^0.33.9", + "react-native-reanimated": "~4.2.1", "react-native-safe-area-context": "~5.6.0", "react-native-screens": "~4.16.0", "react-native-svg": "15.12.1", "react-native-svg-transformer": "^1.5.0", - "react-native-worklets": "0.5.1" + "react-native-vision-camera": "5.0.0-beta.2", + "react-native-worklets": "^0.7.2" }, "devDependencies": { "@babel/core": "^7.25.2", "@types/pngjs": "^6.0.5", - "@types/react": "~19.1.10" + "@types/react": "~19.2.0" }, "private": true } diff --git a/docs/docs/06-api-reference/classes/ClassificationModule.md b/docs/docs/06-api-reference/classes/ClassificationModule.md index f39a1ae9e..066dd9a45 100644 --- a/docs/docs/06-api-reference/classes/ClassificationModule.md +++ b/docs/docs/06-api-reference/classes/ClassificationModule.md @@ -24,13 +24,87 @@ Module for image classification tasks. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ The classification result. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/ExecutorchModule.md b/docs/docs/06-api-reference/classes/ExecutorchModule.md index 992deeaee..7935e39cf 100644 --- a/docs/docs/06-api-reference/classes/ExecutorchModule.md +++ b/docs/docs/06-api-reference/classes/ExecutorchModule.md @@ -24,13 +24,87 @@ General module for executing custom Executorch models. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -85,7 +161,9 @@ An array of output tensor pointers. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -114,7 +192,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md b/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md index 68595c61c..ba6016f47 100644 --- a/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md +++ b/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md @@ -24,13 +24,87 @@ Module for generating image embeddings from input images. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ A Float32Array containing the image embeddings. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md index b395640ac..6b4128906 100644 --- a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md +++ b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md @@ -21,13 +21,87 @@ or a custom [LabelEnum](../type-aliases/LabelEnum.md) label map. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -39,9 +113,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -103,7 +179,9 @@ If the model is not loaded. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -132,7 +210,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. @@ -168,6 +246,8 @@ The input shape as an array of numbers. Defined in: [modules/computer_vision/ImageSegmentationModule.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L76) +Load the model and prepare it for inference. + #### Returns `Promise`\<`void`\> diff --git a/docs/docs/06-api-reference/classes/ObjectDetectionModule.md b/docs/docs/06-api-reference/classes/ObjectDetectionModule.md index 38fd14f56..f0c61d6a6 100644 --- a/docs/docs/06-api-reference/classes/ObjectDetectionModule.md +++ b/docs/docs/06-api-reference/classes/ObjectDetectionModule.md @@ -6,7 +6,7 @@ Module for object detection tasks. ## Extends -- `BaseModule` +- `VisionModule`\<[`Detection`](../interfaces/Detection.md)[]\> ## Constructors @@ -20,21 +20,141 @@ Module for object detection tasks. #### Inherited from -`BaseModule.constructor` +`VisionModule.constructor` ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`VisionModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** + +Native module instance (JSI Host Object) -Native module instance +#### Inherited from + +`VisionModule.nativeModule` + +## Accessors + +### runOnFrame + +#### Get Signature + +> **get** **runOnFrame**(): (`frame`, ...`args`) => `TOutput` \| `null` + +Defined in: [modules/computer_vision/VisionModule.ts:61](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts#L61) + +Synchronous worklet function for real-time VisionCamera frame processing. + +Only available after the model is loaded. Returns null if not loaded. + +**Use this for VisionCamera frame processing in worklets.** +For async processing, use `forward()` instead. + +##### Example + +```typescript +const model = new ClassificationModule(); +await model.load({ modelSource: MODEL }); + +// Use the functional form of setState to store the worklet — passing it +// directly would cause React to invoke it immediately as an updater fn. +const [runOnFrame, setRunOnFrame] = useState(null); +setRunOnFrame(() => model.runOnFrame); + +const frameOutput = useFrameOutput({ + onFrame(frame) { + 'worklet'; + if (!runOnFrame) return; + const result = runOnFrame(frame); + frame.dispose(); + }, +}); +``` + +##### Returns + +(`frame`, ...`args`) => `TOutput` \| `null` #### Inherited from -`BaseModule.nativeModule` +`VisionModule.runOnFrame` ## Methods @@ -42,9 +162,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) + +Unloads the model from memory and releases native resources. -Unloads the model from memory. +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -52,38 +174,70 @@ Unloads the model from memory. #### Inherited from -`BaseModule.delete` +`VisionModule.delete` --- ### forward() -> **forward**(`imageSource`, `detectionThreshold`): `Promise`\<[`Detection`](../interfaces/Detection.md)[]\> +> **forward**(`input`, `detectionThreshold`): `Promise`\<[`Detection`](../interfaces/Detection.md)[]\> + +Defined in: [modules/computer_vision/ObjectDetectionModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts#L46) + +Executes the model's forward pass with automatic input type detection. -Defined in: [modules/computer_vision/ObjectDetectionModule.ts:54](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts#L54) +Supports two input types: -Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. -`detectionThreshold` can be supplied to alter the sensitivity of the detection. +1. **String path/URI**: File path, URL, or Base64-encoded string +2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + +**Note**: For VisionCamera frame processing, use `runOnFrame` instead. +This method is async and cannot be called in worklet context. #### Parameters -##### imageSource +##### input -`string` +Image source (string path or PixelData object) -The image source to be processed. +`string` | [`PixelData`](../interfaces/PixelData.md) ##### detectionThreshold -`number` = `0.7` - -The threshold for detection sensitivity. Default is 0.7. +`number` = `0.5` #### Returns `Promise`\<[`Detection`](../interfaces/Detection.md)[]\> -An array of Detection objects representing detected items in the image. +A Promise that resolves to the model output. + +#### Example + +```typescript +// String path (async) +const result1 = await model.forward('file:///path/to/image.jpg'); + +// Pixel data (async) +const result2 = await model.forward({ + dataPtr: new Uint8Array(pixelBuffer), + sizes: [480, 640, 3], + scalarType: ScalarType.BYTE, +}); + +// For VisionCamera frames, use runOnFrame in worklet: +const frameOutput = useFrameOutput({ + onFrame(frame) { + 'worklet'; + if (!model.runOnFrame) return; + const result = model.runOnFrame(frame); + }, +}); +``` + +#### Overrides + +`VisionModule.forward` --- @@ -91,7 +245,9 @@ An array of Detection objects representing detected items in the image. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -112,7 +268,7 @@ Array of output tensors. #### Inherited from -`BaseModule.forwardET` +`VisionModule.forwardET` --- @@ -120,7 +276,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. @@ -146,7 +302,7 @@ The input shape as an array of numbers. #### Inherited from -`BaseModule.getInputShape` +`VisionModule.getInputShape` --- @@ -181,4 +337,4 @@ Optional callback to monitor download progress. #### Overrides -`BaseModule.load` +`VisionModule.load` diff --git a/docs/docs/06-api-reference/classes/StyleTransferModule.md b/docs/docs/06-api-reference/classes/StyleTransferModule.md index 1efc27c02..c6923ddf6 100644 --- a/docs/docs/06-api-reference/classes/StyleTransferModule.md +++ b/docs/docs/06-api-reference/classes/StyleTransferModule.md @@ -24,13 +24,87 @@ Module for style transfer tasks. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ The stylized image as a Base64-encoded string. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md b/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md index 72053896b..9c7dece38 100644 --- a/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md +++ b/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md @@ -24,13 +24,87 @@ Module for generating text embeddings from input text. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ A Float32Array containing the vector embeddings. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/TextToImageModule.md b/docs/docs/06-api-reference/classes/TextToImageModule.md index 2450c09c3..63bc34ae9 100644 --- a/docs/docs/06-api-reference/classes/TextToImageModule.md +++ b/docs/docs/06-api-reference/classes/TextToImageModule.md @@ -36,13 +36,87 @@ Optional callback function that receives the current step index during inference ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -54,9 +128,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -115,7 +191,9 @@ A Base64-encoded string representing the generated PNG image. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -144,7 +222,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/VADModule.md b/docs/docs/06-api-reference/classes/VADModule.md index f37c5239e..996d69832 100644 --- a/docs/docs/06-api-reference/classes/VADModule.md +++ b/docs/docs/06-api-reference/classes/VADModule.md @@ -24,13 +24,87 @@ Module for Voice Activity Detection (VAD) functionalities. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ A promise resolving to an array of detected speech segments. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md b/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md index c5cdde479..8af6a41a0 100644 --- a/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md +++ b/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md @@ -8,7 +8,7 @@ Defined in: [errors/ErrorCodes.ts:4](https://github.com/software-mansion/react-n > **AccessFailed**: `34` -Defined in: [errors/ErrorCodes.ts:156](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L156) +Defined in: [errors/ErrorCodes.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L160) Could not access a resource. @@ -18,7 +18,7 @@ Could not access a resource. > **DelegateInvalidCompatibility**: `48` -Defined in: [errors/ErrorCodes.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L172) +Defined in: [errors/ErrorCodes.ts:176](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L176) Init stage: Backend receives an incompatible delegate version. @@ -28,7 +28,7 @@ Init stage: Backend receives an incompatible delegate version. > **DelegateInvalidHandle**: `50` -Defined in: [errors/ErrorCodes.ts:180](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L180) +Defined in: [errors/ErrorCodes.ts:184](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L184) Execute stage: The handle is invalid. @@ -38,7 +38,7 @@ Execute stage: The handle is invalid. > **DelegateMemoryAllocationFailed**: `49` -Defined in: [errors/ErrorCodes.ts:176](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L176) +Defined in: [errors/ErrorCodes.ts:180](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L180) Init stage: Backend fails to allocate memory. @@ -58,7 +58,7 @@ Thrown when the number of downloaded files is unexpected, due to download interr > **EndOfMethod**: `3` -Defined in: [errors/ErrorCodes.ts:124](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L124) +Defined in: [errors/ErrorCodes.ts:128](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L128) Status indicating there are no more steps of execution to run @@ -88,7 +88,7 @@ An error ocurred when saving a file. This could be, for instance a result image > **Internal**: `1` -Defined in: [errors/ErrorCodes.ts:116](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L116) +Defined in: [errors/ErrorCodes.ts:120](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L120) An internal error occurred. @@ -98,7 +98,7 @@ An internal error occurred. > **InvalidArgument**: `18` -Defined in: [errors/ErrorCodes.ts:136](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L136) +Defined in: [errors/ErrorCodes.ts:140](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L140) User provided an invalid argument. @@ -118,7 +118,7 @@ Thrown when config parameters passed to a model are invalid. For example, when L > **InvalidExternalData**: `36` -Defined in: [errors/ErrorCodes.ts:164](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L164) +Defined in: [errors/ErrorCodes.ts:168](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L168) Error caused by the contents of external data. @@ -148,7 +148,7 @@ Thrown when the type of model source passed by the user is invalid. > **InvalidProgram**: `35` -Defined in: [errors/ErrorCodes.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L160) +Defined in: [errors/ErrorCodes.ts:164](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L164) Error caused by the contents of a program. @@ -158,7 +158,7 @@ Error caused by the contents of a program. > **InvalidState**: `2` -Defined in: [errors/ErrorCodes.ts:120](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L120) +Defined in: [errors/ErrorCodes.ts:124](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L124) Status indicating the executor is in an invalid state for a targeted operation. @@ -168,7 +168,7 @@ Status indicating the executor is in an invalid state for a targeted operation. > **InvalidType**: `19` -Defined in: [errors/ErrorCodes.ts:140](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L140) +Defined in: [errors/ErrorCodes.ts:144](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L144) Object is an invalid type for the operation. @@ -198,7 +198,7 @@ Thrown when a language is passed to a multi-language model that is not supported > **MemoryAllocationFailed**: `33` -Defined in: [errors/ErrorCodes.ts:152](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L152) +Defined in: [errors/ErrorCodes.ts:156](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L156) Could not allocate the requested memory. @@ -208,7 +208,7 @@ Could not allocate the requested memory. > **MissingDataChunk**: `161` -Defined in: [errors/ErrorCodes.ts:72](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L72) +Defined in: [errors/ErrorCodes.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L76) Thrown when streaming transcription is attempted but audio data chunk is missing. @@ -238,7 +238,7 @@ Thrown when a user tries to run a model that is not yet downloaded or loaded int > **MultilingualConfiguration**: `160` -Defined in: [errors/ErrorCodes.ts:68](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L68) +Defined in: [errors/ErrorCodes.ts:72](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L72) Thrown when there's a configuration mismatch between multilingual and language settings in Speech-to-Text models. @@ -248,7 +248,7 @@ Thrown when there's a configuration mismatch between multilingual and language s > **NotFound**: `32` -Defined in: [errors/ErrorCodes.ts:148](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L148) +Defined in: [errors/ErrorCodes.ts:152](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L152) Requested resource could not be found. @@ -258,7 +258,7 @@ Requested resource could not be found. > **NotImplemented**: `17` -Defined in: [errors/ErrorCodes.ts:132](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L132) +Defined in: [errors/ErrorCodes.ts:136](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L136) Operation is not yet implemented. @@ -268,7 +268,7 @@ Operation is not yet implemented. > **NotSupported**: `16` -Defined in: [errors/ErrorCodes.ts:128](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L128) +Defined in: [errors/ErrorCodes.ts:132](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L132) Operation is not supported in the current context. @@ -278,7 +278,7 @@ Operation is not supported in the current context. > **Ok**: `0` -Defined in: [errors/ErrorCodes.ts:112](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L112) +Defined in: [errors/ErrorCodes.ts:116](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L116) Status indicating a successful operation. @@ -288,7 +288,7 @@ Status indicating a successful operation. > **OperatorMissing**: `20` -Defined in: [errors/ErrorCodes.ts:144](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L144) +Defined in: [errors/ErrorCodes.ts:148](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L148) Operator(s) missing in the operator registry. @@ -298,17 +298,27 @@ Operator(s) missing in the operator registry. > **OutOfResources**: `37` -Defined in: [errors/ErrorCodes.ts:168](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L168) +Defined in: [errors/ErrorCodes.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L172) Does not have enough resources to perform the requested operation. --- +### PlatformNotSupported + +> **PlatformNotSupported**: `119` + +Defined in: [errors/ErrorCodes.ts:64](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L64) + +Thrown when a feature or platform is not supported in the current environment. + +--- + ### ResourceFetcherAdapterNotInitialized > **ResourceFetcherAdapterNotInitialized**: `186` -Defined in: [errors/ErrorCodes.ts:108](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L108) +Defined in: [errors/ErrorCodes.ts:112](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L112) Thrown when trying to load resources without fetcher initialization. @@ -318,7 +328,7 @@ Thrown when trying to load resources without fetcher initialization. > **ResourceFetcherAlreadyOngoing**: `183` -Defined in: [errors/ErrorCodes.ts:96](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L96) +Defined in: [errors/ErrorCodes.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L100) Thrown when trying to resume a download that is already ongoing. @@ -328,7 +338,7 @@ Thrown when trying to resume a download that is already ongoing. > **ResourceFetcherAlreadyPaused**: `182` -Defined in: [errors/ErrorCodes.ts:92](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L92) +Defined in: [errors/ErrorCodes.ts:96](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L96) Thrown when trying to pause a download that is already paused. @@ -338,7 +348,7 @@ Thrown when trying to pause a download that is already paused. > **ResourceFetcherDownloadFailed**: `180` -Defined in: [errors/ErrorCodes.ts:84](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L84) +Defined in: [errors/ErrorCodes.ts:88](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L88) Thrown when a resource fails to download. This could be due to invalid URL, or for example a network problem. @@ -348,7 +358,7 @@ Thrown when a resource fails to download. This could be due to invalid URL, or f > **ResourceFetcherDownloadInProgress**: `181` -Defined in: [errors/ErrorCodes.ts:88](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L88) +Defined in: [errors/ErrorCodes.ts:92](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L92) Thrown when a user tries to trigger a download that's already in progress. @@ -358,7 +368,7 @@ Thrown when a user tries to trigger a download that's already in progress. > **ResourceFetcherMissingUri**: `185` -Defined in: [errors/ErrorCodes.ts:104](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L104) +Defined in: [errors/ErrorCodes.ts:108](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L108) Thrown when required URI information is missing for a download operation. @@ -368,7 +378,7 @@ Thrown when required URI information is missing for a download operation. > **ResourceFetcherNotActive**: `184` -Defined in: [errors/ErrorCodes.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L100) +Defined in: [errors/ErrorCodes.ts:104](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L104) Thrown when trying to pause, resume, or cancel a download that is not active. @@ -378,7 +388,7 @@ Thrown when trying to pause, resume, or cancel a download that is not active. > **StreamingInProgress**: `163` -Defined in: [errors/ErrorCodes.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L80) +Defined in: [errors/ErrorCodes.ts:84](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L84) Thrown when trying to start a new streaming session while another is already in progress. @@ -388,7 +398,7 @@ Thrown when trying to start a new streaming session while another is already in > **StreamingNotStarted**: `162` -Defined in: [errors/ErrorCodes.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L76) +Defined in: [errors/ErrorCodes.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L80) Thrown when trying to stop or insert data into a stream that hasn't been started. @@ -408,7 +418,7 @@ Thrown when React Native ExecuTorch threadpool problem occurs. > **TokenizerError**: `167` -Defined in: [errors/ErrorCodes.ts:64](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L64) +Defined in: [errors/ErrorCodes.ts:68](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L68) Thrown when an error occurs with the tokenizer or tokenization process. diff --git a/docs/docs/06-api-reference/index.md b/docs/docs/06-api-reference/index.md index 125046b67..f49c25e9d 100644 --- a/docs/docs/06-api-reference/index.md +++ b/docs/docs/06-api-reference/index.md @@ -186,6 +186,7 @@ - [RnExecutorchErrorCode](enumerations/RnExecutorchErrorCode.md) - [Logger](classes/Logger.md) - [RnExecutorchError](classes/RnExecutorchError.md) +- [Frame](interfaces/Frame.md) ## TTS Supported Voices @@ -232,6 +233,7 @@ - [OCRDetection](interfaces/OCRDetection.md) - [OCRProps](interfaces/OCRProps.md) - [OCRType](interfaces/OCRType.md) +- [PixelData](interfaces/PixelData.md) - [Point](interfaces/Point.md) - [Segment](interfaces/Segment.md) - [SpeechToTextModelConfig](interfaces/SpeechToTextModelConfig.md) diff --git a/docs/docs/06-api-reference/interfaces/Frame.md b/docs/docs/06-api-reference/interfaces/Frame.md new file mode 100644 index 000000000..149a3837f --- /dev/null +++ b/docs/docs/06-api-reference/interfaces/Frame.md @@ -0,0 +1,36 @@ +# Interface: Frame + +Defined in: [types/common.ts:197](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L197) + +Frame data for vision model processing. + +## Methods + +### getNativeBuffer() + +> **getNativeBuffer**(): `object` + +Defined in: [types/common.ts:205](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L205) + +Pointer to native platform buffer (zero-copy, best performance). + +- On iOS: CVPixelBufferRef pointer +- On Android: AHardwareBuffer\* pointer + +Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer` + +#### Returns + +`object` + +##### pointer + +> **pointer**: `bigint` + +##### release() + +> **release**(): `void` + +###### Returns + +`void` diff --git a/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md b/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md index a9f28e5cf..4bd5dba98 100644 --- a/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md +++ b/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md @@ -29,36 +29,57 @@ Contains the error object if the model failed to load, download, or encountered ### forward() -> **forward**: (`imageSource`, `detectionThreshold?`) => `Promise`\<[`Detection`](Detection.md)[]\> +> **forward**: (`input`, `detectionThreshold?`) => `Promise`\<[`Detection`](Detection.md)[]\> -Defined in: [types/objectDetection.ts:179](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L179) +Defined in: [types/objectDetection.ts:199](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L199) -Executes the model's forward pass to detect objects within the provided image. +Executes the model's forward pass with automatic input type detection. + +Supports two input types: + +1. **String path/URI**: File path, URL, or Base64-encoded string +2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + +**Note**: For VisionCamera frame processing, use `runOnFrame` instead. #### Parameters -##### imageSource +##### input -`string` +Image source (string or PixelData object) -A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. +`string` | [`PixelData`](PixelData.md) ##### detectionThreshold? `number` -An optional number between 0 and 1 representing the minimum confidence score required for an object to be included in the results. Default is 0.7. +An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5. #### Returns `Promise`\<[`Detection`](Detection.md)[]\> -A Promise that resolves to an array of `Detection` objects, where each object typically contains bounding box coordinates, a class label, and a confidence score. +A Promise that resolves to an array of `Detection` objects. #### Throws If the model is not loaded or is currently processing another image. +#### Example + +```typescript +// String path +const detections1 = await model.forward('file:///path/to/image.jpg'); + +// Pixel data +const detections2 = await model.forward({ + dataPtr: new Uint8Array(rgbPixels), + sizes: [480, 640, 3], + scalarType: ScalarType.BYTE, +}); +``` + --- ### isGenerating @@ -78,3 +99,46 @@ Indicates whether the model is currently processing an image. Defined in: [types/objectDetection.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L160) Indicates whether the object detection model is loaded and ready to process images. + +--- + +### runOnFrame + +> **runOnFrame**: (`frame`, `detectionThreshold`) => [`Detection`](Detection.md)[] \| `null` + +Defined in: [types/objectDetection.ts:231](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L231) + +Synchronous worklet function for real-time VisionCamera frame processing. +Automatically handles native buffer extraction and cleanup. + +**Use this for VisionCamera frame processing in worklets.** +For async processing, use `forward()` instead. + +Available after model is loaded (`isReady: true`). + +#### Example + +```typescript +const { runOnFrame, isReady } = useObjectDetection({ model: MODEL }); + +const frameOutput = useFrameOutput({ + onFrame(frame) { + 'worklet'; + if (!runOnFrame) return; + const detections = runOnFrame(frame, 0.5); + frame.dispose(); + }, +}); +``` + +#### Param + +VisionCamera Frame object + +#### Param + +The threshold for detection sensitivity. + +#### Returns + +Array of Detection objects representing detected items in the frame. diff --git a/docs/docs/06-api-reference/interfaces/PixelData.md b/docs/docs/06-api-reference/interfaces/PixelData.md new file mode 100644 index 000000000..7ef9865aa --- /dev/null +++ b/docs/docs/06-api-reference/interfaces/PixelData.md @@ -0,0 +1,65 @@ +# Interface: PixelData + +Defined in: [types/common.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L172) + +Represents raw pixel data in RGB format for vision models. + +This type extends TensorPtr with constraints specific to image data: + +- dataPtr must be Uint8Array (8-bit unsigned integers) +- scalarType is always BYTE (ScalarType.BYTE) +- sizes represents [height, width, channels] where channels must be 3 (RGB) + +## Example + +```typescript +const pixelData: PixelData = { + dataPtr: new Uint8Array(width * height * 3), // RGB pixel data + sizes: [height, width, 3], // [height, width, channels] + scalarType: ScalarType.BYTE, +}; +``` + +## Extends + +- `Omit`\<[`TensorPtr`](TensorPtr.md), `"dataPtr"` \| `"scalarType"`\> + +## Properties + +### dataPtr + +> **dataPtr**: `Uint8Array` + +Defined in: [types/common.ts:178](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L178) + +RGB pixel data as Uint8Array. +Expected format: RGB (3 channels), not RGBA or BGRA. +Size must equal: width _ height _ 3 + +--- + +### scalarType + +> **scalarType**: [`BYTE`](../enumerations/ScalarType.md#byte) + +Defined in: [types/common.ts:191](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L191) + +Scalar type is always BYTE for pixel data. + +--- + +### sizes + +> **sizes**: \[`number`, `number`, `3`\] + +Defined in: [types/common.ts:186](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L186) + +Dimensions of the pixel data: [height, width, channels]. + +- sizes[0]: height (number of rows) +- sizes[1]: width (number of columns) +- sizes[2]: channels (must be 3 for RGB) + +#### Overrides + +[`TensorPtr`](TensorPtr.md).[`sizes`](TensorPtr.md#sizes) diff --git a/docs/docs/06-api-reference/typedoc-sidebar.cjs b/docs/docs/06-api-reference/typedoc-sidebar.cjs index bbd478710..f5d9ec3d4 100644 --- a/docs/docs/06-api-reference/typedoc-sidebar.cjs +++ b/docs/docs/06-api-reference/typedoc-sidebar.cjs @@ -1,4 +1,4 @@ // @ts-check /** @type {import("@docusaurus/plugin-content-docs").SidebarsConfig} */ -const typedocSidebar = {items:[{type:"category",label:"Hooks",items:[{type:"doc",id:"06-api-reference/functions/useClassification",label:"useClassification"},{type:"doc",id:"06-api-reference/functions/useExecutorchModule",label:"useExecutorchModule"},{type:"doc",id:"06-api-reference/functions/useImageEmbeddings",label:"useImageEmbeddings"},{type:"doc",id:"06-api-reference/functions/useImageSegmentation",label:"useImageSegmentation"},{type:"doc",id:"06-api-reference/functions/useLLM",label:"useLLM"},{type:"doc",id:"06-api-reference/functions/useObjectDetection",label:"useObjectDetection"},{type:"doc",id:"06-api-reference/functions/useOCR",label:"useOCR"},{type:"doc",id:"06-api-reference/functions/useSpeechToText",label:"useSpeechToText"},{type:"doc",id:"06-api-reference/functions/useStyleTransfer",label:"useStyleTransfer"},{type:"doc",id:"06-api-reference/functions/useTextEmbeddings",label:"useTextEmbeddings"},{type:"doc",id:"06-api-reference/functions/useTextToImage",label:"useTextToImage"},{type:"doc",id:"06-api-reference/functions/useTextToSpeech",label:"useTextToSpeech"},{type:"doc",id:"06-api-reference/functions/useTokenizer",label:"useTokenizer"},{type:"doc",id:"06-api-reference/functions/useVAD",label:"useVAD"},{type:"doc",id:"06-api-reference/functions/useVerticalOCR",label:"useVerticalOCR"}]},{type:"category",label:"Interfaces",items:[{type:"doc",id:"06-api-reference/interfaces/ResourceSourceExtended",label:"ResourceSourceExtended"}]},{type:"category",label:"Models - Classification",items:[{type:"doc",id:"06-api-reference/variables/EFFICIENTNET_V2_S",label:"EFFICIENTNET_V2_S"}]},{type:"category",label:"Models - Image Embeddings",items:[{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_IMAGE",label:"CLIP_VIT_BASE_PATCH32_IMAGE"}]},{type:"category",label:"Models - Image Generation",items:[{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_256",label:"BK_SDM_TINY_VPRED_256"},{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_512",label:"BK_SDM_TINY_VPRED_512"}]},{type:"category",label:"Models - Image Segmentation",items:[{type:"doc",id:"06-api-reference/variables/DEEPLAB_V3_RESNET50",label:"DEEPLAB_V3_RESNET50"},{type:"doc",id:"06-api-reference/variables/SELFIE_SEGMENTATION",label:"SELFIE_SEGMENTATION"}]},{type:"category",label:"Models - LMM",items:[{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B",label:"HAMMER2_1_0_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B_QUANTIZED",label:"HAMMER2_1_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B",label:"HAMMER2_1_1_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B_QUANTIZED",label:"HAMMER2_1_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B",label:"HAMMER2_1_3B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B_QUANTIZED",label:"HAMMER2_1_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT",label:"LFM2_5_1_2B_INSTRUCT"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT_QUANTIZED",label:"LFM2_5_1_2B_INSTRUCT_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B",label:"LLAMA3_2_1B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_QLORA",label:"LLAMA3_2_1B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_SPINQUANT",label:"LLAMA3_2_1B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B",label:"LLAMA3_2_3B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_QLORA",label:"LLAMA3_2_3B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_SPINQUANT",label:"LLAMA3_2_3B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B",label:"PHI_4_MINI_4B"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B_QUANTIZED",label:"PHI_4_MINI_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B",label:"QWEN2_5_0_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B_QUANTIZED",label:"QWEN2_5_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B",label:"QWEN2_5_1_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B_QUANTIZED",label:"QWEN2_5_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B",label:"QWEN2_5_3B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B_QUANTIZED",label:"QWEN2_5_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B",label:"QWEN3_0_6B"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B_QUANTIZED",label:"QWEN3_0_6B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B",label:"QWEN3_1_7B"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B_QUANTIZED",label:"QWEN3_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B",label:"QWEN3_4B"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B_QUANTIZED",label:"QWEN3_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B",label:"SMOLLM2_1_1_7B"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B_QUANTIZED",label:"SMOLLM2_1_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M",label:"SMOLLM2_1_135M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M_QUANTIZED",label:"SMOLLM2_1_135M_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M",label:"SMOLLM2_1_360M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M_QUANTIZED",label:"SMOLLM2_1_360M_QUANTIZED"}]},{type:"category",label:"Models - Object Detection",items:[{type:"doc",id:"06-api-reference/variables/SSDLITE_320_MOBILENET_V3_LARGE",label:"SSDLITE_320_MOBILENET_V3_LARGE"}]},{type:"category",label:"Models - Speech To Text",items:[{type:"doc",id:"06-api-reference/variables/WHISPER_BASE",label:"WHISPER_BASE"},{type:"doc",id:"06-api-reference/variables/WHISPER_BASE_EN",label:"WHISPER_BASE_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL",label:"WHISPER_SMALL"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL_EN",label:"WHISPER_SMALL_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY",label:"WHISPER_TINY"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN",label:"WHISPER_TINY_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN_QUANTIZED",label:"WHISPER_TINY_EN_QUANTIZED"}]},{type:"category",label:"Models - Style Transfer",items:[{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_CANDY",label:"STYLE_TRANSFER_CANDY"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_MOSAIC",label:"STYLE_TRANSFER_MOSAIC"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_RAIN_PRINCESS",label:"STYLE_TRANSFER_RAIN_PRINCESS"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_UDNIE",label:"STYLE_TRANSFER_UDNIE"}]},{type:"category",label:"Models - Text Embeddings",items:[{type:"doc",id:"06-api-reference/variables/ALL_MINILM_L6_V2",label:"ALL_MINILM_L6_V2"},{type:"doc",id:"06-api-reference/variables/ALL_MPNET_BASE_V2",label:"ALL_MPNET_BASE_V2"},{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_TEXT",label:"CLIP_VIT_BASE_PATCH32_TEXT"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MINILM_L6_COS_V1",label:"MULTI_QA_MINILM_L6_COS_V1"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MPNET_BASE_DOT_V1",label:"MULTI_QA_MPNET_BASE_DOT_V1"}]},{type:"category",label:"Models - Text to Speech",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_MEDIUM",label:"KOKORO_MEDIUM"},{type:"doc",id:"06-api-reference/variables/KOKORO_SMALL",label:"KOKORO_SMALL"}]},{type:"category",label:"Models - Voice Activity Detection",items:[{type:"doc",id:"06-api-reference/variables/FSMN_VAD",label:"FSMN_VAD"}]},{type:"category",label:"OCR Supported Alphabets",items:[{type:"doc",id:"06-api-reference/variables/OCR_ABAZA",label:"OCR_ABAZA"},{type:"doc",id:"06-api-reference/variables/OCR_ADYGHE",label:"OCR_ADYGHE"},{type:"doc",id:"06-api-reference/variables/OCR_AFRIKAANS",label:"OCR_AFRIKAANS"},{type:"doc",id:"06-api-reference/variables/OCR_ALBANIAN",label:"OCR_ALBANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_AVAR",label:"OCR_AVAR"},{type:"doc",id:"06-api-reference/variables/OCR_AZERBAIJANI",label:"OCR_AZERBAIJANI"},{type:"doc",id:"06-api-reference/variables/OCR_BELARUSIAN",label:"OCR_BELARUSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BOSNIAN",label:"OCR_BOSNIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BULGARIAN",label:"OCR_BULGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CHECHEN",label:"OCR_CHECHEN"},{type:"doc",id:"06-api-reference/variables/OCR_CROATIAN",label:"OCR_CROATIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CZECH",label:"OCR_CZECH"},{type:"doc",id:"06-api-reference/variables/OCR_DANISH",label:"OCR_DANISH"},{type:"doc",id:"06-api-reference/variables/OCR_DARGWA",label:"OCR_DARGWA"},{type:"doc",id:"06-api-reference/variables/OCR_DUTCH",label:"OCR_DUTCH"},{type:"doc",id:"06-api-reference/variables/OCR_ENGLISH",label:"OCR_ENGLISH"},{type:"doc",id:"06-api-reference/variables/OCR_ESTONIAN",label:"OCR_ESTONIAN"},{type:"doc",id:"06-api-reference/variables/OCR_FRENCH",label:"OCR_FRENCH"},{type:"doc",id:"06-api-reference/variables/OCR_GERMAN",label:"OCR_GERMAN"},{type:"doc",id:"06-api-reference/variables/OCR_HUNGARIAN",label:"OCR_HUNGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_ICELANDIC",label:"OCR_ICELANDIC"},{type:"doc",id:"06-api-reference/variables/OCR_INDONESIAN",label:"OCR_INDONESIAN"},{type:"doc",id:"06-api-reference/variables/OCR_INGUSH",label:"OCR_INGUSH"},{type:"doc",id:"06-api-reference/variables/OCR_IRISH",label:"OCR_IRISH"},{type:"doc",id:"06-api-reference/variables/OCR_ITALIAN",label:"OCR_ITALIAN"},{type:"doc",id:"06-api-reference/variables/OCR_JAPANESE",label:"OCR_JAPANESE"},{type:"doc",id:"06-api-reference/variables/OCR_KANNADA",label:"OCR_KANNADA"},{type:"doc",id:"06-api-reference/variables/OCR_KARBADIAN",label:"OCR_KARBADIAN"},{type:"doc",id:"06-api-reference/variables/OCR_KOREAN",label:"OCR_KOREAN"},{type:"doc",id:"06-api-reference/variables/OCR_KURDISH",label:"OCR_KURDISH"},{type:"doc",id:"06-api-reference/variables/OCR_LAK",label:"OCR_LAK"},{type:"doc",id:"06-api-reference/variables/OCR_LATIN",label:"OCR_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_LATVIAN",label:"OCR_LATVIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LEZGHIAN",label:"OCR_LEZGHIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LITHUANIAN",label:"OCR_LITHUANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_MALAY",label:"OCR_MALAY"},{type:"doc",id:"06-api-reference/variables/OCR_MALTESE",label:"OCR_MALTESE"},{type:"doc",id:"06-api-reference/variables/OCR_MAORI",label:"OCR_MAORI"},{type:"doc",id:"06-api-reference/variables/OCR_MONGOLIAN",label:"OCR_MONGOLIAN"},{type:"doc",id:"06-api-reference/variables/OCR_NORWEGIAN",label:"OCR_NORWEGIAN"},{type:"doc",id:"06-api-reference/variables/OCR_OCCITAN",label:"OCR_OCCITAN"},{type:"doc",id:"06-api-reference/variables/OCR_PALI",label:"OCR_PALI"},{type:"doc",id:"06-api-reference/variables/OCR_POLISH",label:"OCR_POLISH"},{type:"doc",id:"06-api-reference/variables/OCR_PORTUGUESE",label:"OCR_PORTUGUESE"},{type:"doc",id:"06-api-reference/variables/OCR_ROMANIAN",label:"OCR_ROMANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_RUSSIAN",label:"OCR_RUSSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_CYRILLIC",label:"OCR_SERBIAN_CYRILLIC"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_LATIN",label:"OCR_SERBIAN_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_SIMPLIFIED_CHINESE",label:"OCR_SIMPLIFIED_CHINESE"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVAK",label:"OCR_SLOVAK"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVENIAN",label:"OCR_SLOVENIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SPANISH",label:"OCR_SPANISH"},{type:"doc",id:"06-api-reference/variables/OCR_SWAHILI",label:"OCR_SWAHILI"},{type:"doc",id:"06-api-reference/variables/OCR_SWEDISH",label:"OCR_SWEDISH"},{type:"doc",id:"06-api-reference/variables/OCR_TABASSARAN",label:"OCR_TABASSARAN"},{type:"doc",id:"06-api-reference/variables/OCR_TAGALOG",label:"OCR_TAGALOG"},{type:"doc",id:"06-api-reference/variables/OCR_TAJIK",label:"OCR_TAJIK"},{type:"doc",id:"06-api-reference/variables/OCR_TELUGU",label:"OCR_TELUGU"},{type:"doc",id:"06-api-reference/variables/OCR_TURKISH",label:"OCR_TURKISH"},{type:"doc",id:"06-api-reference/variables/OCR_UKRAINIAN",label:"OCR_UKRAINIAN"},{type:"doc",id:"06-api-reference/variables/OCR_UZBEK",label:"OCR_UZBEK"},{type:"doc",id:"06-api-reference/variables/OCR_VIETNAMESE",label:"OCR_VIETNAMESE"},{type:"doc",id:"06-api-reference/variables/OCR_WELSH",label:"OCR_WELSH"}]},{type:"category",label:"Other",items:[{type:"doc",id:"06-api-reference/enumerations/RnExecutorchErrorCode",label:"RnExecutorchErrorCode"},{type:"doc",id:"06-api-reference/classes/Logger",label:"Logger"},{type:"doc",id:"06-api-reference/classes/RnExecutorchError",label:"RnExecutorchError"}]},{type:"category",label:"TTS Supported Voices",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_HEART",label:"KOKORO_VOICE_AF_HEART"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_RIVER",label:"KOKORO_VOICE_AF_RIVER"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_SARAH",label:"KOKORO_VOICE_AF_SARAH"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_ADAM",label:"KOKORO_VOICE_AM_ADAM"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_MICHAEL",label:"KOKORO_VOICE_AM_MICHAEL"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_SANTA",label:"KOKORO_VOICE_AM_SANTA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BF_EMMA",label:"KOKORO_VOICE_BF_EMMA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BM_DANIEL",label:"KOKORO_VOICE_BM_DANIEL"}]},{type:"category",label:"Types",items:[{type:"doc",id:"06-api-reference/enumerations/CocoLabel",label:"CocoLabel"},{type:"doc",id:"06-api-reference/enumerations/DeeplabLabel",label:"DeeplabLabel"},{type:"doc",id:"06-api-reference/enumerations/DownloadStatus",label:"DownloadStatus"},{type:"doc",id:"06-api-reference/enumerations/HTTP_CODE",label:"HTTP_CODE"},{type:"doc",id:"06-api-reference/enumerations/ScalarType",label:"ScalarType"},{type:"doc",id:"06-api-reference/enumerations/SelfieSegmentationLabel",label:"SelfieSegmentationLabel"},{type:"doc",id:"06-api-reference/enumerations/SourceType",label:"SourceType"},{type:"doc",id:"06-api-reference/interfaces/Bbox",label:"Bbox"},{type:"doc",id:"06-api-reference/interfaces/ChatConfig",label:"ChatConfig"},{type:"doc",id:"06-api-reference/interfaces/ClassificationProps",label:"ClassificationProps"},{type:"doc",id:"06-api-reference/interfaces/ClassificationType",label:"ClassificationType"},{type:"doc",id:"06-api-reference/interfaces/ContextStrategy",label:"ContextStrategy"},{type:"doc",id:"06-api-reference/interfaces/DecodingOptions",label:"DecodingOptions"},{type:"doc",id:"06-api-reference/interfaces/Detection",label:"Detection"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleProps",label:"ExecutorchModuleProps"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleType",label:"ExecutorchModuleType"},{type:"doc",id:"06-api-reference/interfaces/GenerationConfig",label:"GenerationConfig"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsProps",label:"ImageEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsType",label:"ImageEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationProps",label:"ImageSegmentationProps"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationType",label:"ImageSegmentationType"},{type:"doc",id:"06-api-reference/interfaces/KokoroConfig",label:"KokoroConfig"},{type:"doc",id:"06-api-reference/interfaces/KokoroVoiceExtras",label:"KokoroVoiceExtras"},{type:"doc",id:"06-api-reference/interfaces/LLMConfig",label:"LLMConfig"},{type:"doc",id:"06-api-reference/interfaces/LLMProps",label:"LLMProps"},{type:"doc",id:"06-api-reference/interfaces/LLMType",label:"LLMType"},{type:"doc",id:"06-api-reference/interfaces/Message",label:"Message"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionProps",label:"ObjectDetectionProps"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionType",label:"ObjectDetectionType"},{type:"doc",id:"06-api-reference/interfaces/OCRDetection",label:"OCRDetection"},{type:"doc",id:"06-api-reference/interfaces/OCRProps",label:"OCRProps"},{type:"doc",id:"06-api-reference/interfaces/OCRType",label:"OCRType"},{type:"doc",id:"06-api-reference/interfaces/Point",label:"Point"},{type:"doc",id:"06-api-reference/interfaces/Segment",label:"Segment"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextModelConfig",label:"SpeechToTextModelConfig"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextProps",label:"SpeechToTextProps"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextType",label:"SpeechToTextType"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferProps",label:"StyleTransferProps"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferType",label:"StyleTransferType"},{type:"doc",id:"06-api-reference/interfaces/TensorPtr",label:"TensorPtr"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsProps",label:"TextEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsType",label:"TextEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/TextToImageProps",label:"TextToImageProps"},{type:"doc",id:"06-api-reference/interfaces/TextToImageType",label:"TextToImageType"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechConfig",label:"TextToSpeechConfig"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechInput",label:"TextToSpeechInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechProps",label:"TextToSpeechProps"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechStreamingInput",label:"TextToSpeechStreamingInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechType",label:"TextToSpeechType"},{type:"doc",id:"06-api-reference/interfaces/TokenizerProps",label:"TokenizerProps"},{type:"doc",id:"06-api-reference/interfaces/TokenizerType",label:"TokenizerType"},{type:"doc",id:"06-api-reference/interfaces/ToolCall",label:"ToolCall"},{type:"doc",id:"06-api-reference/interfaces/ToolsConfig",label:"ToolsConfig"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionResult",label:"TranscriptionResult"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionSegment",label:"TranscriptionSegment"},{type:"doc",id:"06-api-reference/interfaces/VADProps",label:"VADProps"},{type:"doc",id:"06-api-reference/interfaces/VADType",label:"VADType"},{type:"doc",id:"06-api-reference/interfaces/VerticalOCRProps",label:"VerticalOCRProps"},{type:"doc",id:"06-api-reference/interfaces/VoiceConfig",label:"VoiceConfig"},{type:"doc",id:"06-api-reference/interfaces/Word",label:"Word"},{type:"doc",id:"06-api-reference/type-aliases/LabelEnum",label:"LabelEnum"},{type:"doc",id:"06-api-reference/type-aliases/LLMTool",label:"LLMTool"},{type:"doc",id:"06-api-reference/type-aliases/MessageRole",label:"MessageRole"},{type:"doc",id:"06-api-reference/type-aliases/ModelNameOf",label:"ModelNameOf"},{type:"doc",id:"06-api-reference/type-aliases/ModelSources",label:"ModelSources"},{type:"doc",id:"06-api-reference/type-aliases/OCRLanguage",label:"OCRLanguage"},{type:"doc",id:"06-api-reference/type-aliases/ResourceSource",label:"ResourceSource"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationConfig",label:"SegmentationConfig"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationLabels",label:"SegmentationLabels"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationModelName",label:"SegmentationModelName"},{type:"doc",id:"06-api-reference/type-aliases/SpeechToTextLanguage",label:"SpeechToTextLanguage"},{type:"doc",id:"06-api-reference/type-aliases/TensorBuffer",label:"TensorBuffer"},{type:"doc",id:"06-api-reference/type-aliases/TextToSpeechLanguage",label:"TextToSpeechLanguage"},{type:"doc",id:"06-api-reference/type-aliases/Triple",label:"Triple"},{type:"doc",id:"06-api-reference/variables/SPECIAL_TOKENS",label:"SPECIAL_TOKENS"}]},{type:"category",label:"Typescript API",items:[{type:"doc",id:"06-api-reference/classes/ClassificationModule",label:"ClassificationModule"},{type:"doc",id:"06-api-reference/classes/ExecutorchModule",label:"ExecutorchModule"},{type:"doc",id:"06-api-reference/classes/ImageEmbeddingsModule",label:"ImageEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/ImageSegmentationModule",label:"ImageSegmentationModule"},{type:"doc",id:"06-api-reference/classes/LLMModule",label:"LLMModule"},{type:"doc",id:"06-api-reference/classes/ObjectDetectionModule",label:"ObjectDetectionModule"},{type:"doc",id:"06-api-reference/classes/OCRModule",label:"OCRModule"},{type:"doc",id:"06-api-reference/classes/SpeechToTextModule",label:"SpeechToTextModule"},{type:"doc",id:"06-api-reference/classes/StyleTransferModule",label:"StyleTransferModule"},{type:"doc",id:"06-api-reference/classes/TextEmbeddingsModule",label:"TextEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/TextToImageModule",label:"TextToImageModule"},{type:"doc",id:"06-api-reference/classes/TextToSpeechModule",label:"TextToSpeechModule"},{type:"doc",id:"06-api-reference/classes/TokenizerModule",label:"TokenizerModule"},{type:"doc",id:"06-api-reference/classes/VADModule",label:"VADModule"},{type:"doc",id:"06-api-reference/classes/VerticalOCRModule",label:"VerticalOCRModule"}]},{type:"category",label:"Utilities - General",items:[{type:"category",label:"ResourceFetcherUtils",items:[{type:"category",label:"Functions",items:[{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/calculateDownloadProgress",label:"calculateDownloadProgress"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/getFilenameFromUri",label:"getFilenameFromUri"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/hashObject",label:"hashObject"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/removeFilePrefix",label:"removeFilePrefix"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/triggerHuggingFaceDownloadCounter",label:"triggerHuggingFaceDownloadCounter"}]}],link:{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/index"}},{type:"doc",id:"06-api-reference/classes/ResourceFetcher",label:"ResourceFetcher"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchConfig",label:"ExecutorchConfig"},{type:"doc",id:"06-api-reference/interfaces/ResourceFetcherAdapter",label:"ResourceFetcherAdapter"},{type:"doc",id:"06-api-reference/functions/cleanupExecutorch",label:"cleanupExecutorch"},{type:"doc",id:"06-api-reference/functions/initExecutorch",label:"initExecutorch"}]},{type:"category",label:"Utilities - LLM",items:[{type:"doc",id:"06-api-reference/variables/DEFAULT_CHAT_CONFIG",label:"DEFAULT_CHAT_CONFIG"},{type:"doc",id:"06-api-reference/variables/DEFAULT_CONTEXT_BUFFER_TOKENS",label:"DEFAULT_CONTEXT_BUFFER_TOKENS"},{type:"doc",id:"06-api-reference/variables/DEFAULT_MESSAGE_HISTORY",label:"DEFAULT_MESSAGE_HISTORY"},{type:"doc",id:"06-api-reference/variables/DEFAULT_SYSTEM_PROMPT",label:"DEFAULT_SYSTEM_PROMPT"},{type:"doc",id:"06-api-reference/variables/parseToolCall",label:"parseToolCall"},{type:"doc",id:"06-api-reference/functions/DEFAULT_STRUCTURED_OUTPUT_PROMPT",label:"DEFAULT_STRUCTURED_OUTPUT_PROMPT"},{type:"doc",id:"06-api-reference/functions/fixAndValidateStructuredOutput",label:"fixAndValidateStructuredOutput"},{type:"doc",id:"06-api-reference/functions/getStructuredOutputPrompt",label:"getStructuredOutputPrompt"}]},{type:"category",label:"Utils",items:[{type:"doc",id:"06-api-reference/classes/MessageCountContextStrategy",label:"MessageCountContextStrategy"},{type:"doc",id:"06-api-reference/classes/NoopContextStrategy",label:"NoopContextStrategy"},{type:"doc",id:"06-api-reference/classes/SlidingWindowContextStrategy",label:"SlidingWindowContextStrategy"}]}]}; +const typedocSidebar = {items:[{type:"category",label:"Hooks",items:[{type:"doc",id:"06-api-reference/functions/useClassification",label:"useClassification"},{type:"doc",id:"06-api-reference/functions/useExecutorchModule",label:"useExecutorchModule"},{type:"doc",id:"06-api-reference/functions/useImageEmbeddings",label:"useImageEmbeddings"},{type:"doc",id:"06-api-reference/functions/useImageSegmentation",label:"useImageSegmentation"},{type:"doc",id:"06-api-reference/functions/useLLM",label:"useLLM"},{type:"doc",id:"06-api-reference/functions/useObjectDetection",label:"useObjectDetection"},{type:"doc",id:"06-api-reference/functions/useOCR",label:"useOCR"},{type:"doc",id:"06-api-reference/functions/useSpeechToText",label:"useSpeechToText"},{type:"doc",id:"06-api-reference/functions/useStyleTransfer",label:"useStyleTransfer"},{type:"doc",id:"06-api-reference/functions/useTextEmbeddings",label:"useTextEmbeddings"},{type:"doc",id:"06-api-reference/functions/useTextToImage",label:"useTextToImage"},{type:"doc",id:"06-api-reference/functions/useTextToSpeech",label:"useTextToSpeech"},{type:"doc",id:"06-api-reference/functions/useTokenizer",label:"useTokenizer"},{type:"doc",id:"06-api-reference/functions/useVAD",label:"useVAD"},{type:"doc",id:"06-api-reference/functions/useVerticalOCR",label:"useVerticalOCR"}]},{type:"category",label:"Interfaces",items:[{type:"doc",id:"06-api-reference/interfaces/ResourceSourceExtended",label:"ResourceSourceExtended"}]},{type:"category",label:"Models - Classification",items:[{type:"doc",id:"06-api-reference/variables/EFFICIENTNET_V2_S",label:"EFFICIENTNET_V2_S"}]},{type:"category",label:"Models - Image Embeddings",items:[{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_IMAGE",label:"CLIP_VIT_BASE_PATCH32_IMAGE"}]},{type:"category",label:"Models - Image Generation",items:[{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_256",label:"BK_SDM_TINY_VPRED_256"},{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_512",label:"BK_SDM_TINY_VPRED_512"}]},{type:"category",label:"Models - Image Segmentation",items:[{type:"doc",id:"06-api-reference/variables/DEEPLAB_V3_RESNET50",label:"DEEPLAB_V3_RESNET50"},{type:"doc",id:"06-api-reference/variables/SELFIE_SEGMENTATION",label:"SELFIE_SEGMENTATION"}]},{type:"category",label:"Models - LMM",items:[{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B",label:"HAMMER2_1_0_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B_QUANTIZED",label:"HAMMER2_1_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B",label:"HAMMER2_1_1_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B_QUANTIZED",label:"HAMMER2_1_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B",label:"HAMMER2_1_3B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B_QUANTIZED",label:"HAMMER2_1_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT",label:"LFM2_5_1_2B_INSTRUCT"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT_QUANTIZED",label:"LFM2_5_1_2B_INSTRUCT_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B",label:"LLAMA3_2_1B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_QLORA",label:"LLAMA3_2_1B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_SPINQUANT",label:"LLAMA3_2_1B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B",label:"LLAMA3_2_3B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_QLORA",label:"LLAMA3_2_3B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_SPINQUANT",label:"LLAMA3_2_3B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B",label:"PHI_4_MINI_4B"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B_QUANTIZED",label:"PHI_4_MINI_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B",label:"QWEN2_5_0_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B_QUANTIZED",label:"QWEN2_5_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B",label:"QWEN2_5_1_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B_QUANTIZED",label:"QWEN2_5_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B",label:"QWEN2_5_3B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B_QUANTIZED",label:"QWEN2_5_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B",label:"QWEN3_0_6B"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B_QUANTIZED",label:"QWEN3_0_6B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B",label:"QWEN3_1_7B"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B_QUANTIZED",label:"QWEN3_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B",label:"QWEN3_4B"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B_QUANTIZED",label:"QWEN3_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B",label:"SMOLLM2_1_1_7B"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B_QUANTIZED",label:"SMOLLM2_1_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M",label:"SMOLLM2_1_135M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M_QUANTIZED",label:"SMOLLM2_1_135M_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M",label:"SMOLLM2_1_360M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M_QUANTIZED",label:"SMOLLM2_1_360M_QUANTIZED"}]},{type:"category",label:"Models - Object Detection",items:[{type:"doc",id:"06-api-reference/variables/SSDLITE_320_MOBILENET_V3_LARGE",label:"SSDLITE_320_MOBILENET_V3_LARGE"}]},{type:"category",label:"Models - Speech To Text",items:[{type:"doc",id:"06-api-reference/variables/WHISPER_BASE",label:"WHISPER_BASE"},{type:"doc",id:"06-api-reference/variables/WHISPER_BASE_EN",label:"WHISPER_BASE_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL",label:"WHISPER_SMALL"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL_EN",label:"WHISPER_SMALL_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY",label:"WHISPER_TINY"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN",label:"WHISPER_TINY_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN_QUANTIZED",label:"WHISPER_TINY_EN_QUANTIZED"}]},{type:"category",label:"Models - Style Transfer",items:[{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_CANDY",label:"STYLE_TRANSFER_CANDY"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_MOSAIC",label:"STYLE_TRANSFER_MOSAIC"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_RAIN_PRINCESS",label:"STYLE_TRANSFER_RAIN_PRINCESS"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_UDNIE",label:"STYLE_TRANSFER_UDNIE"}]},{type:"category",label:"Models - Text Embeddings",items:[{type:"doc",id:"06-api-reference/variables/ALL_MINILM_L6_V2",label:"ALL_MINILM_L6_V2"},{type:"doc",id:"06-api-reference/variables/ALL_MPNET_BASE_V2",label:"ALL_MPNET_BASE_V2"},{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_TEXT",label:"CLIP_VIT_BASE_PATCH32_TEXT"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MINILM_L6_COS_V1",label:"MULTI_QA_MINILM_L6_COS_V1"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MPNET_BASE_DOT_V1",label:"MULTI_QA_MPNET_BASE_DOT_V1"}]},{type:"category",label:"Models - Text to Speech",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_MEDIUM",label:"KOKORO_MEDIUM"},{type:"doc",id:"06-api-reference/variables/KOKORO_SMALL",label:"KOKORO_SMALL"}]},{type:"category",label:"Models - Voice Activity Detection",items:[{type:"doc",id:"06-api-reference/variables/FSMN_VAD",label:"FSMN_VAD"}]},{type:"category",label:"OCR Supported Alphabets",items:[{type:"doc",id:"06-api-reference/variables/OCR_ABAZA",label:"OCR_ABAZA"},{type:"doc",id:"06-api-reference/variables/OCR_ADYGHE",label:"OCR_ADYGHE"},{type:"doc",id:"06-api-reference/variables/OCR_AFRIKAANS",label:"OCR_AFRIKAANS"},{type:"doc",id:"06-api-reference/variables/OCR_ALBANIAN",label:"OCR_ALBANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_AVAR",label:"OCR_AVAR"},{type:"doc",id:"06-api-reference/variables/OCR_AZERBAIJANI",label:"OCR_AZERBAIJANI"},{type:"doc",id:"06-api-reference/variables/OCR_BELARUSIAN",label:"OCR_BELARUSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BOSNIAN",label:"OCR_BOSNIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BULGARIAN",label:"OCR_BULGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CHECHEN",label:"OCR_CHECHEN"},{type:"doc",id:"06-api-reference/variables/OCR_CROATIAN",label:"OCR_CROATIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CZECH",label:"OCR_CZECH"},{type:"doc",id:"06-api-reference/variables/OCR_DANISH",label:"OCR_DANISH"},{type:"doc",id:"06-api-reference/variables/OCR_DARGWA",label:"OCR_DARGWA"},{type:"doc",id:"06-api-reference/variables/OCR_DUTCH",label:"OCR_DUTCH"},{type:"doc",id:"06-api-reference/variables/OCR_ENGLISH",label:"OCR_ENGLISH"},{type:"doc",id:"06-api-reference/variables/OCR_ESTONIAN",label:"OCR_ESTONIAN"},{type:"doc",id:"06-api-reference/variables/OCR_FRENCH",label:"OCR_FRENCH"},{type:"doc",id:"06-api-reference/variables/OCR_GERMAN",label:"OCR_GERMAN"},{type:"doc",id:"06-api-reference/variables/OCR_HUNGARIAN",label:"OCR_HUNGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_ICELANDIC",label:"OCR_ICELANDIC"},{type:"doc",id:"06-api-reference/variables/OCR_INDONESIAN",label:"OCR_INDONESIAN"},{type:"doc",id:"06-api-reference/variables/OCR_INGUSH",label:"OCR_INGUSH"},{type:"doc",id:"06-api-reference/variables/OCR_IRISH",label:"OCR_IRISH"},{type:"doc",id:"06-api-reference/variables/OCR_ITALIAN",label:"OCR_ITALIAN"},{type:"doc",id:"06-api-reference/variables/OCR_JAPANESE",label:"OCR_JAPANESE"},{type:"doc",id:"06-api-reference/variables/OCR_KANNADA",label:"OCR_KANNADA"},{type:"doc",id:"06-api-reference/variables/OCR_KARBADIAN",label:"OCR_KARBADIAN"},{type:"doc",id:"06-api-reference/variables/OCR_KOREAN",label:"OCR_KOREAN"},{type:"doc",id:"06-api-reference/variables/OCR_KURDISH",label:"OCR_KURDISH"},{type:"doc",id:"06-api-reference/variables/OCR_LAK",label:"OCR_LAK"},{type:"doc",id:"06-api-reference/variables/OCR_LATIN",label:"OCR_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_LATVIAN",label:"OCR_LATVIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LEZGHIAN",label:"OCR_LEZGHIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LITHUANIAN",label:"OCR_LITHUANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_MALAY",label:"OCR_MALAY"},{type:"doc",id:"06-api-reference/variables/OCR_MALTESE",label:"OCR_MALTESE"},{type:"doc",id:"06-api-reference/variables/OCR_MAORI",label:"OCR_MAORI"},{type:"doc",id:"06-api-reference/variables/OCR_MONGOLIAN",label:"OCR_MONGOLIAN"},{type:"doc",id:"06-api-reference/variables/OCR_NORWEGIAN",label:"OCR_NORWEGIAN"},{type:"doc",id:"06-api-reference/variables/OCR_OCCITAN",label:"OCR_OCCITAN"},{type:"doc",id:"06-api-reference/variables/OCR_PALI",label:"OCR_PALI"},{type:"doc",id:"06-api-reference/variables/OCR_POLISH",label:"OCR_POLISH"},{type:"doc",id:"06-api-reference/variables/OCR_PORTUGUESE",label:"OCR_PORTUGUESE"},{type:"doc",id:"06-api-reference/variables/OCR_ROMANIAN",label:"OCR_ROMANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_RUSSIAN",label:"OCR_RUSSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_CYRILLIC",label:"OCR_SERBIAN_CYRILLIC"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_LATIN",label:"OCR_SERBIAN_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_SIMPLIFIED_CHINESE",label:"OCR_SIMPLIFIED_CHINESE"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVAK",label:"OCR_SLOVAK"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVENIAN",label:"OCR_SLOVENIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SPANISH",label:"OCR_SPANISH"},{type:"doc",id:"06-api-reference/variables/OCR_SWAHILI",label:"OCR_SWAHILI"},{type:"doc",id:"06-api-reference/variables/OCR_SWEDISH",label:"OCR_SWEDISH"},{type:"doc",id:"06-api-reference/variables/OCR_TABASSARAN",label:"OCR_TABASSARAN"},{type:"doc",id:"06-api-reference/variables/OCR_TAGALOG",label:"OCR_TAGALOG"},{type:"doc",id:"06-api-reference/variables/OCR_TAJIK",label:"OCR_TAJIK"},{type:"doc",id:"06-api-reference/variables/OCR_TELUGU",label:"OCR_TELUGU"},{type:"doc",id:"06-api-reference/variables/OCR_TURKISH",label:"OCR_TURKISH"},{type:"doc",id:"06-api-reference/variables/OCR_UKRAINIAN",label:"OCR_UKRAINIAN"},{type:"doc",id:"06-api-reference/variables/OCR_UZBEK",label:"OCR_UZBEK"},{type:"doc",id:"06-api-reference/variables/OCR_VIETNAMESE",label:"OCR_VIETNAMESE"},{type:"doc",id:"06-api-reference/variables/OCR_WELSH",label:"OCR_WELSH"}]},{type:"category",label:"Other",items:[{type:"doc",id:"06-api-reference/enumerations/RnExecutorchErrorCode",label:"RnExecutorchErrorCode"},{type:"doc",id:"06-api-reference/classes/Logger",label:"Logger"},{type:"doc",id:"06-api-reference/classes/RnExecutorchError",label:"RnExecutorchError"},{type:"doc",id:"06-api-reference/interfaces/Frame",label:"Frame"}]},{type:"category",label:"TTS Supported Voices",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_HEART",label:"KOKORO_VOICE_AF_HEART"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_RIVER",label:"KOKORO_VOICE_AF_RIVER"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_SARAH",label:"KOKORO_VOICE_AF_SARAH"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_ADAM",label:"KOKORO_VOICE_AM_ADAM"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_MICHAEL",label:"KOKORO_VOICE_AM_MICHAEL"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_SANTA",label:"KOKORO_VOICE_AM_SANTA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BF_EMMA",label:"KOKORO_VOICE_BF_EMMA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BM_DANIEL",label:"KOKORO_VOICE_BM_DANIEL"}]},{type:"category",label:"Types",items:[{type:"doc",id:"06-api-reference/enumerations/CocoLabel",label:"CocoLabel"},{type:"doc",id:"06-api-reference/enumerations/DeeplabLabel",label:"DeeplabLabel"},{type:"doc",id:"06-api-reference/enumerations/DownloadStatus",label:"DownloadStatus"},{type:"doc",id:"06-api-reference/enumerations/HTTP_CODE",label:"HTTP_CODE"},{type:"doc",id:"06-api-reference/enumerations/ScalarType",label:"ScalarType"},{type:"doc",id:"06-api-reference/enumerations/SelfieSegmentationLabel",label:"SelfieSegmentationLabel"},{type:"doc",id:"06-api-reference/enumerations/SourceType",label:"SourceType"},{type:"doc",id:"06-api-reference/interfaces/Bbox",label:"Bbox"},{type:"doc",id:"06-api-reference/interfaces/ChatConfig",label:"ChatConfig"},{type:"doc",id:"06-api-reference/interfaces/ClassificationProps",label:"ClassificationProps"},{type:"doc",id:"06-api-reference/interfaces/ClassificationType",label:"ClassificationType"},{type:"doc",id:"06-api-reference/interfaces/ContextStrategy",label:"ContextStrategy"},{type:"doc",id:"06-api-reference/interfaces/DecodingOptions",label:"DecodingOptions"},{type:"doc",id:"06-api-reference/interfaces/Detection",label:"Detection"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleProps",label:"ExecutorchModuleProps"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleType",label:"ExecutorchModuleType"},{type:"doc",id:"06-api-reference/interfaces/GenerationConfig",label:"GenerationConfig"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsProps",label:"ImageEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsType",label:"ImageEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationProps",label:"ImageSegmentationProps"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationType",label:"ImageSegmentationType"},{type:"doc",id:"06-api-reference/interfaces/KokoroConfig",label:"KokoroConfig"},{type:"doc",id:"06-api-reference/interfaces/KokoroVoiceExtras",label:"KokoroVoiceExtras"},{type:"doc",id:"06-api-reference/interfaces/LLMConfig",label:"LLMConfig"},{type:"doc",id:"06-api-reference/interfaces/LLMProps",label:"LLMProps"},{type:"doc",id:"06-api-reference/interfaces/LLMType",label:"LLMType"},{type:"doc",id:"06-api-reference/interfaces/Message",label:"Message"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionProps",label:"ObjectDetectionProps"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionType",label:"ObjectDetectionType"},{type:"doc",id:"06-api-reference/interfaces/OCRDetection",label:"OCRDetection"},{type:"doc",id:"06-api-reference/interfaces/OCRProps",label:"OCRProps"},{type:"doc",id:"06-api-reference/interfaces/OCRType",label:"OCRType"},{type:"doc",id:"06-api-reference/interfaces/PixelData",label:"PixelData"},{type:"doc",id:"06-api-reference/interfaces/Point",label:"Point"},{type:"doc",id:"06-api-reference/interfaces/Segment",label:"Segment"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextModelConfig",label:"SpeechToTextModelConfig"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextProps",label:"SpeechToTextProps"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextType",label:"SpeechToTextType"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferProps",label:"StyleTransferProps"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferType",label:"StyleTransferType"},{type:"doc",id:"06-api-reference/interfaces/TensorPtr",label:"TensorPtr"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsProps",label:"TextEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsType",label:"TextEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/TextToImageProps",label:"TextToImageProps"},{type:"doc",id:"06-api-reference/interfaces/TextToImageType",label:"TextToImageType"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechConfig",label:"TextToSpeechConfig"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechInput",label:"TextToSpeechInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechProps",label:"TextToSpeechProps"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechStreamingInput",label:"TextToSpeechStreamingInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechType",label:"TextToSpeechType"},{type:"doc",id:"06-api-reference/interfaces/TokenizerProps",label:"TokenizerProps"},{type:"doc",id:"06-api-reference/interfaces/TokenizerType",label:"TokenizerType"},{type:"doc",id:"06-api-reference/interfaces/ToolCall",label:"ToolCall"},{type:"doc",id:"06-api-reference/interfaces/ToolsConfig",label:"ToolsConfig"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionResult",label:"TranscriptionResult"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionSegment",label:"TranscriptionSegment"},{type:"doc",id:"06-api-reference/interfaces/VADProps",label:"VADProps"},{type:"doc",id:"06-api-reference/interfaces/VADType",label:"VADType"},{type:"doc",id:"06-api-reference/interfaces/VerticalOCRProps",label:"VerticalOCRProps"},{type:"doc",id:"06-api-reference/interfaces/VoiceConfig",label:"VoiceConfig"},{type:"doc",id:"06-api-reference/interfaces/Word",label:"Word"},{type:"doc",id:"06-api-reference/type-aliases/LabelEnum",label:"LabelEnum"},{type:"doc",id:"06-api-reference/type-aliases/LLMTool",label:"LLMTool"},{type:"doc",id:"06-api-reference/type-aliases/MessageRole",label:"MessageRole"},{type:"doc",id:"06-api-reference/type-aliases/ModelNameOf",label:"ModelNameOf"},{type:"doc",id:"06-api-reference/type-aliases/ModelSources",label:"ModelSources"},{type:"doc",id:"06-api-reference/type-aliases/OCRLanguage",label:"OCRLanguage"},{type:"doc",id:"06-api-reference/type-aliases/ResourceSource",label:"ResourceSource"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationConfig",label:"SegmentationConfig"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationLabels",label:"SegmentationLabels"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationModelName",label:"SegmentationModelName"},{type:"doc",id:"06-api-reference/type-aliases/SpeechToTextLanguage",label:"SpeechToTextLanguage"},{type:"doc",id:"06-api-reference/type-aliases/TensorBuffer",label:"TensorBuffer"},{type:"doc",id:"06-api-reference/type-aliases/TextToSpeechLanguage",label:"TextToSpeechLanguage"},{type:"doc",id:"06-api-reference/type-aliases/Triple",label:"Triple"},{type:"doc",id:"06-api-reference/variables/SPECIAL_TOKENS",label:"SPECIAL_TOKENS"}]},{type:"category",label:"Typescript API",items:[{type:"doc",id:"06-api-reference/classes/ClassificationModule",label:"ClassificationModule"},{type:"doc",id:"06-api-reference/classes/ExecutorchModule",label:"ExecutorchModule"},{type:"doc",id:"06-api-reference/classes/ImageEmbeddingsModule",label:"ImageEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/ImageSegmentationModule",label:"ImageSegmentationModule"},{type:"doc",id:"06-api-reference/classes/LLMModule",label:"LLMModule"},{type:"doc",id:"06-api-reference/classes/ObjectDetectionModule",label:"ObjectDetectionModule"},{type:"doc",id:"06-api-reference/classes/OCRModule",label:"OCRModule"},{type:"doc",id:"06-api-reference/classes/SpeechToTextModule",label:"SpeechToTextModule"},{type:"doc",id:"06-api-reference/classes/StyleTransferModule",label:"StyleTransferModule"},{type:"doc",id:"06-api-reference/classes/TextEmbeddingsModule",label:"TextEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/TextToImageModule",label:"TextToImageModule"},{type:"doc",id:"06-api-reference/classes/TextToSpeechModule",label:"TextToSpeechModule"},{type:"doc",id:"06-api-reference/classes/TokenizerModule",label:"TokenizerModule"},{type:"doc",id:"06-api-reference/classes/VADModule",label:"VADModule"},{type:"doc",id:"06-api-reference/classes/VerticalOCRModule",label:"VerticalOCRModule"}]},{type:"category",label:"Utilities - General",items:[{type:"category",label:"ResourceFetcherUtils",items:[{type:"category",label:"Functions",items:[{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/calculateDownloadProgress",label:"calculateDownloadProgress"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/getFilenameFromUri",label:"getFilenameFromUri"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/hashObject",label:"hashObject"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/removeFilePrefix",label:"removeFilePrefix"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/triggerHuggingFaceDownloadCounter",label:"triggerHuggingFaceDownloadCounter"}]}],link:{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/index"}},{type:"doc",id:"06-api-reference/classes/ResourceFetcher",label:"ResourceFetcher"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchConfig",label:"ExecutorchConfig"},{type:"doc",id:"06-api-reference/interfaces/ResourceFetcherAdapter",label:"ResourceFetcherAdapter"},{type:"doc",id:"06-api-reference/functions/cleanupExecutorch",label:"cleanupExecutorch"},{type:"doc",id:"06-api-reference/functions/initExecutorch",label:"initExecutorch"}]},{type:"category",label:"Utilities - LLM",items:[{type:"doc",id:"06-api-reference/variables/DEFAULT_CHAT_CONFIG",label:"DEFAULT_CHAT_CONFIG"},{type:"doc",id:"06-api-reference/variables/DEFAULT_CONTEXT_BUFFER_TOKENS",label:"DEFAULT_CONTEXT_BUFFER_TOKENS"},{type:"doc",id:"06-api-reference/variables/DEFAULT_MESSAGE_HISTORY",label:"DEFAULT_MESSAGE_HISTORY"},{type:"doc",id:"06-api-reference/variables/DEFAULT_SYSTEM_PROMPT",label:"DEFAULT_SYSTEM_PROMPT"},{type:"doc",id:"06-api-reference/variables/parseToolCall",label:"parseToolCall"},{type:"doc",id:"06-api-reference/functions/DEFAULT_STRUCTURED_OUTPUT_PROMPT",label:"DEFAULT_STRUCTURED_OUTPUT_PROMPT"},{type:"doc",id:"06-api-reference/functions/fixAndValidateStructuredOutput",label:"fixAndValidateStructuredOutput"},{type:"doc",id:"06-api-reference/functions/getStructuredOutputPrompt",label:"getStructuredOutputPrompt"}]},{type:"category",label:"Utils",items:[{type:"doc",id:"06-api-reference/classes/MessageCountContextStrategy",label:"MessageCountContextStrategy"},{type:"doc",id:"06-api-reference/classes/NoopContextStrategy",label:"NoopContextStrategy"},{type:"doc",id:"06-api-reference/classes/SlidingWindowContextStrategy",label:"SlidingWindowContextStrategy"}]}]}; module.exports = typedocSidebar.items; \ No newline at end of file diff --git a/packages/react-native-executorch/android/gradle.properties b/packages/react-native-executorch/android/gradle.properties index b30a8b11d..97cdd1854 100644 --- a/packages/react-native-executorch/android/gradle.properties +++ b/packages/react-native-executorch/android/gradle.properties @@ -1,5 +1,5 @@ RnExecutorch_kotlinVersion=1.7.0 -RnExecutorch_minSdkVersion=21 +RnExecutorch_minSdkVersion=26 RnExecutorch_targetSdkVersion=31 RnExecutorch_compileSdkVersion=31 -RnExecutorch_ndkversion=21.4.7075529 +RnExecutorch_ndkversion=21.4.7075529 \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h index f4fd2e7f0..d49f3a175 100644 --- a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h +++ b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h @@ -75,6 +75,11 @@ enum class RnExecutorchErrorCode : int32_t { * interruptions. */ DownloadInterrupted = 118, + /** + * Thrown when a feature or platform is not supported in the current + * environment. + */ + PlatformNotSupported = 119, /** * Thrown when an error occurs with the tokenizer or tokenization process. */ diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h index d5c98763d..54e8c1cbb 100644 --- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h +++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h @@ -54,8 +54,13 @@ class RnExecutorchInstaller { meta::createConstructorArgsWithCallInvoker( args, runtime, jsCallInvoker); - auto modelImplementationPtr = std::make_shared( - std::make_from_tuple(constructorArgs)); + auto modelImplementationPtr = std::apply( + [](auto &&...unpackedArgs) { + return std::make_shared( + std::forward(unpackedArgs)...); + }, + std::move(constructorArgs)); + auto modelHostObject = std::make_shared>( modelImplementationPtr, jsCallInvoker); diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index df9abbdef..d0cba9916 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -15,11 +15,13 @@ #include #include +#include #include #include #include #include #include +#include #include using namespace rnexecutorch::models::speech_to_text::types; @@ -346,6 +348,15 @@ inline jsi::Value getJsiValue(const std::vector &vec, return {runtime, array}; } +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); + } + return {runtime, array}; +} + // Conditional as on android, size_t and uint64_t reduce to the same type, // introducing ambiguity template #include #include +#include #include #include #include @@ -168,9 +169,26 @@ template class ModelHostObject : public JsiHostObject { addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject, promiseHostFunction<&Model::stream>, "stream")); + } + + if constexpr (meta::HasGenerateFromString) { + addFunctions( + JSI_EXPORT_FUNCTION(ModelHostObject, + promiseHostFunction<&Model::generateFromString>, + "generateFromString")); + } + + if constexpr (meta::HasGenerateFromFrame) { addFunctions(JSI_EXPORT_FUNCTION( - ModelHostObject, synchronousHostFunction<&Model::streamStop>, - "streamStop")); + ModelHostObject, visionHostFunction<&Model::generateFromFrame>, + "generateFromFrame")); + } + + if constexpr (meta::HasGenerateFromPixels) { + addFunctions( + JSI_EXPORT_FUNCTION(ModelHostObject, + promiseHostFunction<&Model::generateFromPixels>, + "generateFromPixels")); } } @@ -221,6 +239,68 @@ template class ModelHostObject : public JsiHostObject { } } + /** + * Unlike promiseHostFunction, this runs synchronously on the JS thread, + * which is required for VisionCamera worklet frame processors. + * + * The key challenge is argument mapping: the C++ function takes + * (Runtime, frameData, Rest...) but from the JS side, Runtime is injected + * automatically and frameData is JS args[0]. The remaining args (Rest...) + * map to JS args[1..N]. + * + * This is achieved via TailSignature: it extracts the Rest... parameter pack + * from the function pointer type, creates a dummy free function with only + * those types, then uses createArgsTupleFromJsi on that dummy to convert + * args[1..N] — bypassing the manually-handled frameData at args[0]. + * + * Argument mapping: + * C++ params: (Runtime&, frameData, Rest[0], Rest[1], ...) + * JS args: ( args[0], args[1], args[2], ...) + * JS arg count = C++ arity - 1 (Runtime is injected, not counted) + * + */ + template JSI_HOST_FUNCTION(visionHostFunction) { + constexpr std::size_t cppArgCount = + meta::FunctionTraits::arity; + constexpr std::size_t expectedJsArgs = cppArgCount - 1; + + if (count != expectedJsArgs) { + throw jsi::JSError(runtime, "Argument count mismatch in vision function"); + } + + try { + auto dummyFuncPtr = &meta::TailSignature::dummy; + auto tailArgsTuple = + meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime); + + using ReturnType = + typename meta::FunctionTraits::return_type; + + if constexpr (std::is_void_v) { + std::apply( + [&](auto &&...tailArgs) { + (model.get()->*FnPtr)( + runtime, args[0], + std::forward(tailArgs)...); + }, + std::move(tailArgsTuple)); + return jsi::Value::undefined(); + } else { + auto result = std::apply( + [&](auto &&...tailArgs) { + return (model.get()->*FnPtr)( + runtime, args[0], + std::forward(tailArgs)...); + }, + std::move(tailArgsTuple)); + + return jsi_conversion::getJsiValue(std::move(result), runtime); + } + } catch (const std::exception &e) { + throw jsi::JSError(runtime, e.what()); + } + } + // A generic host function that resolves a promise with a result of a // function. JSI arguments are converted to the types provided in the function // signature, and the return value is converted back to JSI before resolving. @@ -353,4 +433,4 @@ template class ModelHostObject : public JsiHostObject { std::shared_ptr callInvoker; }; -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h index 8290a810b..fde81e046 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -23,7 +24,8 @@ template std::tuple fillTupleFromArgs(std::index_sequence, const jsi::Value *args, jsi::Runtime &runtime) { - return std::make_tuple(jsi_conversion::getValue(args[I], runtime)...); + return std::tuple{ + jsi_conversion::getValue(args[I], runtime)...}; } /** @@ -47,4 +49,52 @@ std::tuple createArgsTupleFromJsi(R (Model::*f)(Types...) const, return fillTupleFromArgs(std::index_sequence_for{}, args, runtime); } -} // namespace rnexecutorch::meta \ No newline at end of file + +// Free function overload used by visionHostFunction: accepts a dummy free +// function pointer whose parameter types (Rest...) are extracted by +// TailSignature and converted from JSI args. +template +std::tuple createArgsTupleFromJsi(void (*f)(Types...), + const jsi::Value *args, + jsi::Runtime &runtime) { + return fillTupleFromArgs(std::index_sequence_for{}, args, + runtime); +} + +// Extracts arity, return type, and argument types from a member function +// pointer at compile time. Used by visionHostFunction to determine the expected +// JS argument count and invoke the correct return path. +template struct FunctionTraits; + +template +struct FunctionTraits { + static constexpr std::size_t arity = sizeof...(Args); + using return_type = R; + using args_tuple = std::tuple; +}; + +template +struct FunctionTraits { + static constexpr std::size_t arity = sizeof...(Args); + using return_type = R; + using args_tuple = std::tuple; +}; + +// Strips the first two parameters (Runtime& and jsi::Value&) from a member +// function pointer and exposes the remaining types as a dummy free function. +// Used by visionHostFunction to parse only the tail JS args via +// createArgsTupleFromJsi, while frameData at args[0] is passed manually. +template struct TailSignature; + +template +struct TailSignature { + static void dummy(Rest...) {} +}; + +template +struct TailSignature { + static void dummy(Rest...) {} +}; +} // namespace rnexecutorch::meta diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h index 85a3db449..2d7612f25 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h @@ -16,6 +16,21 @@ concept HasGenerate = requires(T t) { { &T::generate }; }; +template +concept HasGenerateFromString = requires(T t) { + { &T::generateFromString }; +}; + +template +concept HasGenerateFromPixels = requires(T t) { + { &T::generateFromPixels }; +}; + +template +concept HasGenerateFromFrame = requires(T t) { + { &T::generateFromFrame }; +}; + template concept HasEncode = requires(T t) { { &T::encode }; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp new file mode 100644 index 000000000..8f67175c4 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -0,0 +1,62 @@ +#include "VisionModel.h" +#include +#include +#include +#include + +namespace rnexecutorch::models { + +using namespace facebook; + +cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) const { + auto frameObj = frameData.asObject(runtime); + cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj); + + // Camera sensors natively deliver frames in landscape orientation. + // Rotate 90° CW so all models receive upright portrait frames. + if (frame.cols > frame.rows) { + cv::Mat upright; + cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE); + return upright; + } + return frame; +} + +cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { + if (tensorView.sizes.size() != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: sizes must have 3 elements " + "[height, width, channels], got %zu", + tensorView.sizes.size()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + int32_t height = tensorView.sizes[0]; + int32_t width = tensorView.sizes[1]; + int32_t channels = tensorView.sizes[2]; + + if (channels != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: expected 3 channels (RGB), got %d", + channels); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (tensorView.scalarType != ScalarType::Byte) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "Invalid pixel data: scalarType must be BYTE (Uint8Array)"); + } + + uint8_t *dataPtr = static_cast(tensorView.dataPtr); + cv::Mat image(height, width, CV_8UC3, dataPtr); + + return image; +} + +} // namespace rnexecutorch::models \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h new file mode 100644 index 000000000..a2a461772 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -0,0 +1,168 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace rnexecutorch { +namespace models { + +/** + * @brief Base class for computer vision models that support real-time camera + * input + * + * VisionModel extends BaseModel with thread-safe inference and automatic frame + * extraction from VisionCamera. This class is designed for models that need to + * process camera frames in real-time (e.g., at 30fps). + * + * Thread Safety: + * - All inference operations are protected by a mutex via scoped_lock + * + * Usage: + * Subclasses should: + * 1. Inherit from VisionModel instead of BaseModel + * 2. Implement preprocessFrame() with model-specific preprocessing + * 3. Delegate to runInference() which handles locking internally + * + * Example: + * @code + * class Classification : public VisionModel { + * public: + * std::unordered_map + * generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) { + * auto frameObject = frameValue.asObject(runtime); + * cv::Mat frame = utils::extractFrame(runtime, frameObject); + * return runInference(frame); + * } + * }; + * @endcode + */ +class VisionModel : public BaseModel { +public: + /** + * @brief Construct a VisionModel with the same parameters as BaseModel + * + * VisionModel uses the same construction pattern as BaseModel, just adding + * thread-safety on top. + */ + VisionModel(const std::string &modelSource, + std::shared_ptr callInvoker) + : BaseModel(modelSource, callInvoker) {} + + virtual ~VisionModel() = default; + + /** + * @brief Thread-safe unload that waits for any in-flight inference to + * complete + * + * Overrides BaseModel::unload() to acquire inference_mutex_ before + * resetting the module. This prevents a crash where BaseModel::unload() + * destroys module_ while generateFromFrame() is still executing on the + * VisionCamera worklet thread. + */ + void unload() noexcept { + std::scoped_lock lock(inference_mutex_); + BaseModel::unload(); + } + +protected: + /** + * @brief Mutex to ensure thread-safe inference + * + * This mutex protects against race conditions when: + * - generateFromFrame() is called from VisionCamera worklet thread (30fps) + * - generate() is called from JavaScript thread simultaneously + * + * Usage guidelines: + * - Use std::lock_guard for blocking operations (JS API can wait) + * - Use try_lock() for non-blocking operations (camera should skip frames) + * + * @note Marked mutable to allow locking in const methods if needed + */ + mutable std::mutex inference_mutex_; + + /** + * @brief Preprocess a camera frame for model input + * + * This method should implement model-specific preprocessing such as: + * - Resizing to the model's expected input size + * - Color space conversion (e.g., BGR to RGB) + * - Normalization + * - Any other model-specific transformations + * + * @param frame Input frame from camera (already extracted and rotated by + * FrameExtractor) + * @return Preprocessed cv::Mat ready for tensor conversion + * + * @note The input frame is already in RGB format and rotated 90° clockwise + * @note This method is called under mutex protection in generateFromFrame() + */ + virtual cv::Mat preprocessFrame(const cv::Mat &frame) const = 0; + + /** + * @brief Extract and preprocess frame from VisionCamera in one call + * + * This is a convenience method that combines frame extraction and + * preprocessing. It handles both nativeBuffer (zero-copy) and ArrayBuffer + * paths automatically. + * + * @param runtime JSI runtime + * @param frameData JSI value containing frame data from VisionCamera + * + * @return Preprocessed cv::Mat ready for tensor conversion + * + * @throws std::runtime_error if frame extraction fails + * + * @note This method does NOT acquire the inference mutex - caller is + * responsible + * @note Typical usage: + * @code + * cv::Mat preprocessed = extractFromFrame(runtime, frameData); + * auto tensor = image_processing::getTensorFromMatrix(dims, preprocessed); + * @endcode + */ + cv::Mat extractFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) const; + + /** + * @brief Extract cv::Mat from raw pixel data (TensorPtr) sent from + * JavaScript + * + * This method enables users to run inference on raw pixel data without file + * I/O. Useful for processing images already in memory (e.g., from canvas, + * image library). + * + * @param tensorView JSTensorViewIn containing: + * - dataPtr: Pointer to raw pixel values (RGB format) + * - sizes: [height, width, channels] - must be 3D + * - scalarType: Must be ScalarType::Byte (Uint8Array) + * + * @return cv::Mat containing the pixel data + * + * @throws RnExecutorchError if tensorView format is invalid + * + * @note The returned cv::Mat owns a copy of the data + * @note Expected pixel format: RGB (3 channels), row-major order + * @note Typical usage from JS: + * @code + * const pixels = new Uint8Array([...]); // Raw RGB pixel data + * const result = model.generateFromPixels({ + * dataPtr: pixels, + * sizes: [480, 640, 3], + * scalarType: ScalarType.BYTE + * }, 0.5); + * @endcode + */ + cv::Mat extractFromPixels(const JSTensorViewIn &tensorView) const; +}; + +} // namespace models +// Register VisionModel constructor traits +// Even though VisionModel is abstract, the metaprogramming system needs to know +// its constructor signature for derived classes +REGISTER_CONSTRUCTOR(models::VisionModel, std::string, + std::shared_ptr); + +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index 0fba07108..2a00d5dce 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -12,7 +12,7 @@ namespace rnexecutorch::models::classification { Classification::Classification(const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputShapes = getAllInputShapes(); if (inputShapes.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -32,20 +32,78 @@ Classification::Classification(const std::string &modelSource, modelInputShape[modelInputShape.size() - 2]); } +cv::Mat Classification::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (rgb.size() != modelImageSize) { + cv::Mat resized; + cv::resize(rgb, resized, modelImageSize); + return resized; + } + + return rgb; +} + std::unordered_map -Classification::generate(std::string imageSource) { +Classification::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; auto inputTensor = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]) - .first; + image_processing::getTensorFromMatrix(tensorDims, preprocessed); + auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { throw RnExecutorchError(forwardResult.error(), "The model's forward function did not succeed. " "Ensure the model input is correct."); } + return postprocess(forwardResult->at(0).toTensor()); } +std::unordered_map +Classification::generateFromString(std::string imageSource) { + cv::Mat imageBGR = image_processing::readImage(imageSource); + + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + + return runInference(imageRGB); +} + +std::unordered_map +Classification::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + cv::Mat frame = extractFromFrame(runtime, frameData); + return runInference(frame); +} + +std::unordered_map +Classification::generateFromPixels(JSTensorViewIn pixelData) { + cv::Mat image = extractFromPixels(pixelData); + + return runInference(image); +} + std::unordered_map Classification::postprocess(const Tensor &tensor) { std::span resultData( diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h index 1465fc5f9..473d9b4bb 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h @@ -3,25 +3,40 @@ #include #include +#include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include namespace rnexecutorch { namespace models::classification { using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class Classification : public BaseModel { +class Classification : public VisionModel { public: Classification(const std::string &modelSource, std::shared_ptr callInvoker); + [[nodiscard("Registered non-void function")]] std::unordered_map< std::string_view, float> - generate(std::string imageSource); + generateFromString(std::string imageSource); + + [[nodiscard("Registered non-void function")]] std::unordered_map< + std::string_view, float> + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + + [[nodiscard("Registered non-void function")]] std::unordered_map< + std::string_view, float> + generateFromPixels(JSTensorViewIn pixelData); + +protected: + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: + std::unordered_map runInference(cv::Mat image); + std::unordered_map postprocess(const Tensor &tensor); cv::Size modelImageSize{0, 0}; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp index ec3129e76..a82fffbb2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp @@ -1,17 +1,18 @@ #include "ImageEmbeddings.h" +#include + #include #include #include #include -#include namespace rnexecutorch::models::embeddings { ImageEmbeddings::ImageEmbeddings( const std::string &modelSource, std::shared_ptr callInvoker) - : BaseEmbeddings(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputTensors = getAllInputShapes(); if (inputTensors.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -31,10 +32,43 @@ ImageEmbeddings::ImageEmbeddings( modelInputShape[modelInputShape.size() - 2]); } +cv::Mat ImageEmbeddings::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (rgb.size() != modelImageSize) { + cv::Mat resized; + cv::resize(rgb, resized, modelImageSize); + return resized; + } + + return rgb; +} + std::shared_ptr -ImageEmbeddings::generate(std::string imageSource) { - auto [inputTensor, originalSize] = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); +ImageEmbeddings::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); auto forwardResult = BaseModel::forward(inputTensor); @@ -45,7 +79,33 @@ ImageEmbeddings::generate(std::string imageSource) { "is correct."); } - return BaseEmbeddings::postprocess(forwardResult); + auto forwardResultTensor = forwardResult->at(0).toTensor(); + return std::make_shared( + forwardResultTensor.const_data_ptr(), forwardResultTensor.nbytes()); +} + +std::shared_ptr +ImageEmbeddings::generateFromString(std::string imageSource) { + cv::Mat imageBGR = image_processing::readImage(imageSource); + + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + + return runInference(imageRGB); +} + +std::shared_ptr +ImageEmbeddings::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + cv::Mat frame = extractFromFrame(runtime, frameData); + return runInference(frame); +} + +std::shared_ptr +ImageEmbeddings::generateFromPixels(JSTensorViewIn pixelData) { + cv::Mat image = extractFromPixels(pixelData); + + return runInference(image); } } // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h index 7e114e939..ec11ee5c6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h @@ -2,25 +2,41 @@ #include #include +#include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include +#include namespace rnexecutorch { namespace models::embeddings { using executorch::extension::TensorPtr; using executorch::runtime::EValue; -class ImageEmbeddings final : public BaseEmbeddings { +class ImageEmbeddings final : public VisionModel { public: ImageEmbeddings(const std::string &modelSource, std::shared_ptr callInvoker); + [[nodiscard( "Registered non-void function")]] std::shared_ptr - generate(std::string imageSource); + generateFromString(std::string imageSource); + + [[nodiscard( + "Registered non-void function")]] std::shared_ptr + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + + [[nodiscard( + "Registered non-void function")]] std::shared_ptr + generateFromPixels(JSTensorViewIn pixelData); + +protected: + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: + std::shared_ptr runInference(cv::Mat image); + cv::Size modelImageSize{0, 0}; }; } // namespace models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp index 141ec430e..3a2bfd0cf 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp @@ -1,7 +1,4 @@ #include "BaseImageSegmentation.h" -#include "jsi/jsi.h" - -#include #include #include @@ -14,14 +11,14 @@ namespace rnexecutorch::models::image_segmentation { BaseImageSegmentation::BaseImageSegmentation( const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { initModelImageSize(); } BaseImageSegmentation::BaseImageSegmentation( const std::string &modelSource, std::vector normMean, std::vector normStd, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { initModelImageSize(); if (normMean.size() == 3) { normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); @@ -55,7 +52,43 @@ void BaseImageSegmentation::initModelImageSize() { numModelPixels = modelImageSize.area(); } -TensorPtr BaseImageSegmentation::preprocess(const std::string &imageSource, +cv::Mat BaseImageSegmentation::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + cv::Mat processed; + if (rgb.size() != modelImageSize) { + cv::resize(rgb, processed, modelImageSize); + } else { + processed = rgb; + } + + if (normMean_.has_value() && normStd_.has_value()) { + processed.convertTo(processed, CV_32FC3, 1.0 / 255.0); + processed -= *normMean_; + processed /= *normStd_; + } + + return processed; +} + +TensorPtr +BaseImageSegmentation::preprocessFromString(const std::string &imageSource, cv::Size &originalSize) { auto [inputTensor, origSize] = image_processing::readImageToTensor( imageSource, getAllInputShapes()[0], false, normMean_, normStd_); @@ -63,12 +96,35 @@ TensorPtr BaseImageSegmentation::preprocess(const std::string &imageSource, return inputTensor; } -std::shared_ptr BaseImageSegmentation::generate( +SegmentationResult BaseImageSegmentation::runInference( + cv::Mat image, cv::Size originalSize, std::vector allClasses, + std::set> classesOfInterest, bool resize) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); + + auto forwardResult = BaseModel::forward(inputTensor); + + if (!forwardResult.ok()) { + throw RnExecutorchError(forwardResult.error(), + "The model's forward function did not succeed. " + "Ensure the model input is correct."); + } + + return postprocess(forwardResult->at(0).toTensor(), originalSize, allClasses, + classesOfInterest, resize); +} + +SegmentationResult BaseImageSegmentation::generateFromString( std::string imageSource, std::vector allClasses, std::set> classesOfInterest, bool resize) { cv::Size originalSize; - auto inputTensor = preprocess(imageSource, originalSize); + auto inputTensor = preprocessFromString(imageSource, originalSize); auto forwardResult = BaseModel::forward(inputTensor); @@ -82,7 +138,29 @@ std::shared_ptr BaseImageSegmentation::generate( classesOfInterest, resize); } -std::shared_ptr BaseImageSegmentation::postprocess( +SegmentationResult BaseImageSegmentation::generateFromFrame( + jsi::Runtime &runtime, const jsi::Value &frameData, + std::vector allClasses, + std::set> classesOfInterest, bool resize) { + // extractFromFrame rotates landscape frames 90° CW automatically. + cv::Mat frame = extractFromFrame(runtime, frameData); + cv::Size originalSize = frame.size(); + + return runInference(frame, originalSize, std::move(allClasses), + std::move(classesOfInterest), resize); +} + +SegmentationResult BaseImageSegmentation::generateFromPixels( + JSTensorViewIn pixelData, std::vector allClasses, + std::set> classesOfInterest, bool resize) { + cv::Mat image = extractFromPixels(pixelData); + cv::Size originalSize = image.size(); + + return runInference(image, originalSize, std::move(allClasses), + std::move(classesOfInterest), resize); +} + +SegmentationResult BaseImageSegmentation::postprocess( const Tensor &tensor, cv::Size originalSize, std::vector &allClasses, std::set> &classesOfInterest, bool resize) { @@ -167,8 +245,8 @@ std::shared_ptr BaseImageSegmentation::postprocess( } // Filter classes of interest - auto buffersToReturn = std::make_shared>>(); + auto buffersToReturn = std::make_shared< + std::unordered_map>>(); for (std::size_t cl = 0; cl < resultClasses.size(); ++cl) { if (cl < allClasses.size() && classesOfInterest.contains(allClasses[cl])) { (*buffersToReturn)[allClasses[cl]] = resultClasses[cl]; @@ -191,48 +269,7 @@ std::shared_ptr BaseImageSegmentation::postprocess( } } - return populateDictionary(argmax, buffersToReturn); -} - -std::shared_ptr BaseImageSegmentation::populateDictionary( - std::shared_ptr argmax, - std::shared_ptr>> - classesToOutput) { - auto promisePtr = std::make_shared>(); - std::future doneFuture = promisePtr->get_future(); - - std::shared_ptr dictPtr = nullptr; - callInvoker->invokeAsync( - [argmax, classesToOutput, &dictPtr, promisePtr](jsi::Runtime &runtime) { - dictPtr = std::make_shared(runtime); - auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, argmax); - - auto int32ArrayCtor = - runtime.global().getPropertyAsFunction(runtime, "Int32Array"); - auto int32Array = - int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer) - .getObject(runtime); - dictPtr->setProperty(runtime, "ARGMAX", int32Array); - - for (auto &[classLabel, owningBuffer] : *classesToOutput) { - auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer); - - auto float32ArrayCtor = - runtime.global().getPropertyAsFunction(runtime, "Float32Array"); - auto float32Array = - float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer) - .getObject(runtime); - - dictPtr->setProperty( - runtime, jsi::String::createFromAscii(runtime, classLabel.data()), - float32Array); - } - promisePtr->set_value(); - }); - - doneFuture.wait(); - return dictPtr; + return SegmentationResult{argmax, buffersToReturn}; } } // namespace rnexecutorch::models::image_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h index f46f41d69..49daf5ee5 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h @@ -8,7 +8,8 @@ #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" #include -#include +#include +#include namespace rnexecutorch { namespace models::image_segmentation { @@ -17,7 +18,7 @@ using namespace facebook; using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class BaseImageSegmentation : public BaseModel { +class BaseImageSegmentation : public VisionModel { public: BaseImageSegmentation(const std::string &modelSource, std::shared_ptr callInvoker); @@ -26,14 +27,28 @@ class BaseImageSegmentation : public BaseModel { std::vector normMean, std::vector normStd, std::shared_ptr callInvoker); - [[nodiscard("Registered non-void function")]] std::shared_ptr - generate(std::string imageSource, std::vector allClasses, - std::set> classesOfInterest, bool resize); + [[nodiscard("Registered non-void function")]] SegmentationResult + generateFromString(std::string imageSource, + std::vector allClasses, + std::set> classesOfInterest, + bool resize); + + [[nodiscard("Registered non-void function")]] SegmentationResult + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, + std::vector allClasses, + std::set> classesOfInterest, + bool resize); + + [[nodiscard("Registered non-void function")]] SegmentationResult + generateFromPixels(JSTensorViewIn pixelData, + std::vector allClasses, + std::set> classesOfInterest, + bool resize); protected: - virtual TensorPtr preprocess(const std::string &imageSource, - cv::Size &originalSize); - virtual std::shared_ptr + cv::Mat preprocessFrame(const cv::Mat &frame) const override; + + virtual SegmentationResult postprocess(const Tensor &tensor, cv::Size originalSize, std::vector &allClasses, std::set> &classesOfInterest, @@ -44,14 +59,15 @@ class BaseImageSegmentation : public BaseModel { std::optional normMean_; std::optional normStd_; - std::shared_ptr populateDictionary( - std::shared_ptr argmax, - std::shared_ptr>> - classesToOutput); - private: void initModelImageSize(); + + SegmentationResult runInference( + cv::Mat image, cv::Size originalSize, std::vector allClasses, + std::set> classesOfInterest, bool resize); + + TensorPtr preprocessFromString(const std::string &imageSource, + cv::Size &originalSize); }; } // namespace models::image_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h new file mode 100644 index 000000000..b5d6f5067 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include +#include +#include + +namespace rnexecutorch::models::image_segmentation { + +struct SegmentationResult { + std::shared_ptr argmax; + std::shared_ptr< + std::unordered_map>> + classBuffers; +}; + +} // namespace rnexecutorch::models::image_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 8b5bc022f..7f7216b02 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -2,14 +2,16 @@ #include #include +#include #include +#include namespace rnexecutorch::models::object_detection { ObjectDetection::ObjectDetection( const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputTensors = getAllInputShapes(); if (inputTensors.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -29,14 +31,42 @@ ObjectDetection::ObjectDetection( modelInputShape[modelInputShape.size() - 2]); } +cv::Mat ObjectDetection::preprocessFrame(const cv::Mat &frame) const { + const std::vector tensorDims = getAllInputShapes()[0]; + cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1], + tensorDims[tensorDims.size() - 2]); + + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + // Only resize if dimensions don't match + if (rgb.size() != tensorSize) { + cv::Mat resized; + cv::resize(rgb, resized, tensorSize); + return resized; + } + + return rgb; +} + std::vector ObjectDetection::postprocess(const std::vector &tensors, cv::Size originalSize, double detectionThreshold) { - if (detectionThreshold <= 0 || detectionThreshold > 1) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig, - "Detection threshold must be greater than 0 " - "and less than or equal to 1."); - } float widthRatio = static_cast(originalSize.width) / modelImageSize.width; float heightRatio = @@ -70,14 +100,23 @@ ObjectDetection::postprocess(const std::vector &tensors, scores[i]); } - std::vector output = utils::nonMaxSuppression(detections); - return output; + return utils::nonMaxSuppression(detections); } std::vector -ObjectDetection::generate(std::string imageSource, double detectionThreshold) { - auto [inputTensor, originalSize] = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); +ObjectDetection::runInference(cv::Mat image, double detectionThreshold) { + if (detectionThreshold < 0.0 || detectionThreshold > 1.0) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + "detectionThreshold must be in range [0, 1]"); + } + std::scoped_lock lock(inference_mutex_); + + cv::Size originalSize = image.size(); + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { @@ -88,4 +127,31 @@ ObjectDetection::generate(std::string imageSource, double detectionThreshold) { return postprocess(forwardResult.get(), originalSize, detectionThreshold); } -} // namespace rnexecutorch::models::object_detection + +std::vector +ObjectDetection::generateFromString(std::string imageSource, + double detectionThreshold) { + cv::Mat imageBGR = image_processing::readImage(imageSource); + + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + + return runInference(imageRGB, detectionThreshold); +} + +std::vector +ObjectDetection::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData, + double detectionThreshold) { + cv::Mat frame = extractFromFrame(runtime, frameData); + return runInference(frame, detectionThreshold); +} + +std::vector +ObjectDetection::generateFromPixels(JSTensorViewIn pixelData, + double detectionThreshold) { + cv::Mat image = extractFromPixels(pixelData); + + return runInference(image, detectionThreshold); +} +} // namespace rnexecutorch::models::object_detection \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h index bba09a6d8..d32eea95e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h @@ -8,7 +8,7 @@ #include "Types.h" #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include #include namespace rnexecutorch { @@ -16,12 +16,22 @@ namespace models::object_detection { using executorch::extension::TensorPtr; using executorch::runtime::EValue; -class ObjectDetection : public BaseModel { +class ObjectDetection : public VisionModel { public: ObjectDetection(const std::string &modelSource, std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector - generate(std::string imageSource, double detectionThreshold); + generateFromString(std::string imageSource, double detectionThreshold); + [[nodiscard("Registered non-void function")]] std::vector + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, + double detectionThreshold); + [[nodiscard("Registered non-void function")]] std::vector + generateFromPixels(JSTensorViewIn pixelData, double detectionThreshold); + +protected: + std::vector runInference(cv::Mat image, + double detectionThreshold); + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: std::vector postprocess(const std::vector &tensors, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp index a521b4e8b..50834a1b8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace rnexecutorch::models::ocr { OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource, @@ -12,12 +13,8 @@ OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource, : detector(detectorSource, callInvoker), recognitionHandler(recognizerSource, symbols, callInvoker) {} -std::vector OCR::generate(std::string input) { - cv::Mat image = image_processing::readImage(input); - if (image.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, - "Failed to load image from path: " + input); - } +std::vector OCR::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); /* 1. Detection process returns the list of bounding boxes containing areas @@ -43,6 +40,63 @@ std::vector OCR::generate(std::string input) { return result; } +std::vector OCR::generateFromString(std::string input) { + cv::Mat image = image_processing::readImage(input); + if (image.empty()) { + throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, + "Failed to load image from path: " + input); + } + return runInference(image); +} + +std::vector +OCR::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) { + auto frameObj = frameData.asObject(runtime); + cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj); + // extractFrame returns RGB; convert to BGR for consistency with readImage + cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR); + return runInference(frame); +} + +std::vector +OCR::generateFromPixels(JSTensorViewIn pixelData) { + if (pixelData.sizes.size() != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: sizes must have 3 elements " + "[height, width, channels], got %zu", + pixelData.sizes.size()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + int32_t height = pixelData.sizes[0]; + int32_t width = pixelData.sizes[1]; + int32_t channels = pixelData.sizes[2]; + + if (channels != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: expected 3 channels (RGB), got %d", + channels); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (pixelData.scalarType != executorch::aten::ScalarType::Byte) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "Invalid pixel data: scalarType must be BYTE (Uint8Array)"); + } + + uint8_t *dataPtr = static_cast(pixelData.dataPtr); + // Input is RGB from JS; convert to BGR for consistency with readImage + cv::Mat rgbImage(height, width, CV_8UC3, dataPtr); + cv::Mat image; + cv::cvtColor(rgbImage, image, cv::COLOR_RGB2BGR); + return runInference(image); +} + std::size_t OCR::getMemoryLowerBound() const noexcept { return detector.getMemoryLowerBound() + recognitionHandler.getMemoryLowerBound(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h index d84ba903f..719cb957c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h @@ -1,9 +1,11 @@ #pragma once +#include #include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" +#include #include #include #include @@ -28,13 +30,20 @@ class OCR final { const std::string &recognizerSource, const std::string &symbols, std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector - generate(std::string input); + generateFromString(std::string input); + [[nodiscard("Registered non-void function")]] std::vector + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + [[nodiscard("Registered non-void function")]] std::vector + generateFromPixels(JSTensorViewIn pixelData); std::size_t getMemoryLowerBound() const noexcept; void unload() noexcept; private: + std::vector runInference(cv::Mat image); + Detector detector; RecognitionHandler recognitionHandler; + mutable std::mutex inference_mutex_; }; } // namespace models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp index 3b9c0187b..c334f5d84 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace rnexecutorch::models::style_transfer { using namespace facebook; @@ -13,7 +14,7 @@ using executorch::extension::TensorPtr; StyleTransfer::StyleTransfer(const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputShapes = getAllInputShapes(); if (inputShapes.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -33,17 +34,67 @@ StyleTransfer::StyleTransfer(const std::string &modelSource, modelInputShape[modelInputShape.size() - 2]); } -std::string StyleTransfer::postprocess(const Tensor &tensor, - cv::Size originalSize) { +cv::Mat StyleTransfer::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (rgb.size() != modelImageSize) { + cv::Mat resized; + cv::resize(rgb, resized, modelImageSize); + return resized; + } + + return rgb; +} + +PixelDataResult StyleTransfer::postprocess(const Tensor &tensor, + cv::Size outputSize) { + // Convert tensor output (at modelImageSize) to CV_8UC3 BGR mat cv::Mat mat = image_processing::getMatrixFromTensor(modelImageSize, tensor); - cv::resize(mat, mat, originalSize); - return image_processing::saveToTempFile(mat); + // Resize only if requested output differs from model output size + if (mat.size() != outputSize) { + cv::resize(mat, mat, outputSize); + } + + // Convert BGR -> RGBA so JS can pass the buffer directly to Skia + cv::Mat rgba; + cv::cvtColor(mat, rgba, cv::COLOR_BGR2RGBA); + + std::size_t dataSize = + static_cast(outputSize.width) * outputSize.height * 4; + auto pixelBuffer = std::make_shared(rgba.data, dataSize); + log(LOG_LEVEL::Debug, + "[StyleTransfer] postprocess: RGBA buffer size:", dataSize, + "w:", outputSize.width, "h:", outputSize.height); + + return PixelDataResult{pixelBuffer, outputSize.width, outputSize.height}; } -std::string StyleTransfer::generate(std::string imageSource) { - auto [inputTensor, originalSize] = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); +PixelDataResult StyleTransfer::runInference(cv::Mat image, + cv::Size originalSize) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { @@ -55,4 +106,31 @@ std::string StyleTransfer::generate(std::string imageSource) { return postprocess(forwardResult->at(0).toTensor(), originalSize); } +PixelDataResult StyleTransfer::generateFromString(std::string imageSource) { + cv::Mat imageBGR = image_processing::readImage(imageSource); + cv::Size originalSize = imageBGR.size(); + + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + + return runInference(imageRGB, originalSize); +} + +PixelDataResult StyleTransfer::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + // extractFromFrame rotates landscape frames 90° CW automatically. + cv::Mat frame = extractFromFrame(runtime, frameData); + + // For real-time frame processing, output at modelImageSize to avoid + // allocating large buffers (e.g. 1280x720x3 ~2.7MB) on every frame. + return runInference(frame, modelImageSize); +} + +PixelDataResult StyleTransfer::generateFromPixels(JSTensorViewIn pixelData) { + cv::Mat image = extractFromPixels(pixelData); + cv::Size originalSize = image.size(); + + return runInference(image, originalSize); +} + } // namespace rnexecutorch::models::style_transfer diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h index 73744c4d8..99f9f4b3a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h @@ -9,7 +9,9 @@ #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include +#include +#include namespace rnexecutorch { namespace models::style_transfer { @@ -17,15 +19,30 @@ using namespace facebook; using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class StyleTransfer : public BaseModel { +class StyleTransfer : public VisionModel { public: StyleTransfer(const std::string &modelSource, std::shared_ptr callInvoker); - [[nodiscard("Registered non-void function")]] std::string - generate(std::string imageSource); + + [[nodiscard("Registered non-void function")]] PixelDataResult + generateFromString(std::string imageSource); + + [[nodiscard("Registered non-void function")]] PixelDataResult + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + + [[nodiscard("Registered non-void function")]] PixelDataResult + generateFromPixels(JSTensorViewIn pixelData); + +protected: + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: - std::string postprocess(const Tensor &tensor, cv::Size originalSize); + // outputSize: size to resize the styled output to before returning. + // Pass modelImageSize for real-time frame processing (avoids large allocs). + // Pass the source image size for generateFromString/generateFromPixels. + PixelDataResult runInference(cv::Mat image, cv::Size outputSize); + + PixelDataResult postprocess(const Tensor &tensor, cv::Size outputSize); cv::Size modelImageSize{0, 0}; }; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h new file mode 100644 index 000000000..f677183a6 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include + +namespace rnexecutorch::models::style_transfer { + +struct PixelDataResult { + std::shared_ptr dataPtr; + int width; + int height; +}; + +} // namespace rnexecutorch::models::style_transfer diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp index 0f75d2015..71ea737f8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp @@ -1,10 +1,12 @@ #include "VerticalOCR.h" #include #include +#include #include #include #include #include +#include #include namespace rnexecutorch::models::ocr { @@ -16,12 +18,9 @@ VerticalOCR::VerticalOCR(const std::string &detectorSource, converter(symbols), independentCharacters(independentChars), callInvoker(invoker) {} -std::vector VerticalOCR::generate(std::string input) { - cv::Mat image = image_processing::readImage(input); - if (image.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, - "Failed to load image from path: " + input); - } +std::vector VerticalOCR::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); + // 1. Large Detector std::vector largeBoxes = detector.generate(image, constants::kLargeDetectorWidth); @@ -44,6 +43,65 @@ std::vector VerticalOCR::generate(std::string input) { return predictions; } +std::vector +VerticalOCR::generateFromString(std::string input) { + cv::Mat image = image_processing::readImage(input); + if (image.empty()) { + throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, + "Failed to load image from path: " + input); + } + return runInference(image); +} + +std::vector +VerticalOCR::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + auto frameObj = frameData.asObject(runtime); + cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj); + // extractFrame returns RGB; convert to BGR for consistency with readImage + cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR); + return runInference(frame); +} + +std::vector +VerticalOCR::generateFromPixels(JSTensorViewIn pixelData) { + if (pixelData.sizes.size() != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: sizes must have 3 elements " + "[height, width, channels], got %zu", + pixelData.sizes.size()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + int32_t height = pixelData.sizes[0]; + int32_t width = pixelData.sizes[1]; + int32_t channels = pixelData.sizes[2]; + + if (channels != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: expected 3 channels (RGB), got %d", + channels); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (pixelData.scalarType != executorch::aten::ScalarType::Byte) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "Invalid pixel data: scalarType must be BYTE (Uint8Array)"); + } + + uint8_t *dataPtr = static_cast(pixelData.dataPtr); + // Input is RGB from JS; convert to BGR for consistency with readImage + cv::Mat rgbImage(height, width, CV_8UC3, dataPtr); + cv::Mat image; + cv::cvtColor(rgbImage, image, cv::COLOR_RGB2BGR); + return runInference(image); +} + std::size_t VerticalOCR::getMemoryLowerBound() const noexcept { return detector.getMemoryLowerBound() + recognizer.getMemoryLowerBound(); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h index e97fb9034..4016e2813 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h @@ -1,12 +1,14 @@ #pragma once #include +#include #include #include #include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" +#include #include #include #include @@ -48,11 +50,17 @@ class VerticalOCR final { bool indpendentCharacters, std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector - generate(std::string input); + generateFromString(std::string input); + [[nodiscard("Registered non-void function")]] std::vector + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + [[nodiscard("Registered non-void function")]] std::vector + generateFromPixels(JSTensorViewIn pixelData); std::size_t getMemoryLowerBound() const noexcept; void unload() noexcept; private: + std::vector runInference(cv::Mat image); + std::pair _handleIndependentCharacters( const types::DetectorBBox &box, const cv::Mat &originalImage, const std::vector &characterBoxes, @@ -75,6 +83,7 @@ class VerticalOCR final { CTCLabelConverter converter; bool independentCharacters; std::shared_ptr callInvoker; + mutable std::mutex inference_mutex_; }; } // namespace models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index e2a8c16bf..79c0b3129 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -148,24 +148,33 @@ add_rn_test(BaseModelTests integration/BaseModelTest.cpp) add_rn_test(ClassificationTests integration/ClassificationTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/classification/Classification.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/object_detection/ObjectDetection.cpp ${RNEXECUTORCH_DIR}/models/object_detection/Utils.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/embeddings/image/ImageEmbeddings.cpp ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp @@ -179,8 +188,11 @@ add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp add_rn_test(StyleTransferTests integration/StyleTransferTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/style_transfer/StyleTransfer.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(VADTests integration/VoiceActivityDetectionTest.cpp @@ -241,8 +253,10 @@ add_rn_test(OCRTests integration/OCRTest.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(VerticalOCRTests integration/VerticalOCRTest.cpp @@ -255,6 +269,8 @@ add_rn_test(VerticalOCRTests integration/VerticalOCRTest.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp index 10aa663a4..b64f167c9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp @@ -28,7 +28,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -42,37 +42,37 @@ INSTANTIATE_TYPED_TEST_SUITE_P(Classification, CommonModelTest, // ============================================================================ TEST(ClassificationGenerateTests, InvalidImagePathThrows) { Classification model(kValidClassificationModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(ClassificationGenerateTests, EmptyImagePathThrows) { Classification model(kValidClassificationModelPath, nullptr); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(ClassificationGenerateTests, MalformedURIThrows) { Classification model(kValidClassificationModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(ClassificationGenerateTests, ValidImageReturnsResults) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); EXPECT_FALSE(results.empty()); } TEST(ClassificationGenerateTests, ResultsHaveCorrectSize) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); auto expectedNumClasses = constants::kImagenet1kV1Labels.size(); EXPECT_EQ(results.size(), expectedNumClasses); } TEST(ClassificationGenerateTests, ResultsContainValidProbabilities) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); float sum = 0.0f; for (const auto &[label, prob] : results) { @@ -85,7 +85,7 @@ TEST(ClassificationGenerateTests, ResultsContainValidProbabilities) { TEST(ClassificationGenerateTests, TopPredictionHasReasonableConfidence) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); float maxProb = 0.0f; for (const auto &[label, prob] : results) { diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp index 3a2374695..ba76939a8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp @@ -29,7 +29,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -43,31 +43,31 @@ INSTANTIATE_TYPED_TEST_SUITE_P(ImageEmbeddings, CommonModelTest, // ============================================================================ TEST(ImageEmbeddingsGenerateTests, InvalidImagePathThrows) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(ImageEmbeddingsGenerateTests, EmptyImagePathThrows) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(ImageEmbeddingsGenerateTests, MalformedURIThrows) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(ImageEmbeddingsGenerateTests, ValidImageReturnsResults) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); EXPECT_NE(result, nullptr); EXPECT_GT(result->size(), 0u); } TEST(ImageEmbeddingsGenerateTests, ResultsHaveCorrectSize) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); size_t numFloats = result->size() / sizeof(float); constexpr size_t kClipEmbeddingDimensions = 512; EXPECT_EQ(numFloats, kClipEmbeddingDimensions); @@ -77,7 +77,7 @@ TEST(ImageEmbeddingsGenerateTests, ResultsAreNormalized) { // TODO: Investigate the source of the issue; GTEST_SKIP() << "Expected to fail in emulator environments"; ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); const float *data = reinterpret_cast(result->data()); size_t numFloats = result->size() / sizeof(float); @@ -92,7 +92,7 @@ TEST(ImageEmbeddingsGenerateTests, ResultsAreNormalized) { TEST(ImageEmbeddingsGenerateTests, ResultsContainValidValues) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); const float *data = reinterpret_cast(result->data()); size_t numFloats = result->size() / sizeof(float); diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp index 428fb5afb..6f6f708be 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp @@ -41,7 +41,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -67,27 +67,27 @@ TEST(OCRCtorTests, EmptySymbolsThrows) { TEST(OCRGenerateTests, InvalidImagePathThrows) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(OCRGenerateTests, EmptyImagePathThrows) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(OCRGenerateTests, MalformedURIThrows) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(OCRGenerateTests, ValidImageReturnsResults) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); // May or may not have detections depending on image content EXPECT_GE(results.size(), 0u); } @@ -95,7 +95,7 @@ TEST(OCRGenerateTests, ValidImageReturnsResults) { TEST(OCRGenerateTests, DetectionsHaveValidBoundingBoxes) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); for (const auto &detection : results) { // Each bbox should have 4 points @@ -110,7 +110,7 @@ TEST(OCRGenerateTests, DetectionsHaveValidBoundingBoxes) { TEST(OCRGenerateTests, DetectionsHaveValidScores) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -121,7 +121,7 @@ TEST(OCRGenerateTests, DetectionsHaveValidScores) { TEST(OCRGenerateTests, DetectionsHaveNonEmptyText) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); for (const auto &detection : results) { EXPECT_FALSE(detection.text.empty()); } diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp index ae80208a6..76c838ca1 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp @@ -1,6 +1,8 @@ #include "BaseModelTests.h" +#include #include #include +#include #include #include @@ -29,7 +31,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath, 0.5); + (void)model.generateFromString(kValidTestImagePath, 0.5); } }; } // namespace model_tests @@ -43,49 +45,50 @@ INSTANTIATE_TYPED_TEST_SUITE_P(ObjectDetection, CommonModelTest, // ============================================================================ TEST(ObjectDetectionGenerateTests, InvalidImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg", 0.5), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg", 0.5), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, EmptyImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate("", 0.5), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString("", 0.5), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, MalformedURIThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad", 0.5), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad", 0.5), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, NegativeThresholdThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate(kValidTestImagePath, -0.1), + EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, -0.1), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, ThresholdAboveOneThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate(kValidTestImagePath, 1.1), + EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 1.1), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, ValidImageReturnsResults) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); EXPECT_GE(results.size(), 0u); } TEST(ObjectDetectionGenerateTests, HighThresholdReturnsFewerResults) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto lowThresholdResults = model.generate(kValidTestImagePath, 0.1); - auto highThresholdResults = model.generate(kValidTestImagePath, 0.9); + auto lowThresholdResults = model.generateFromString(kValidTestImagePath, 0.1); + auto highThresholdResults = + model.generateFromString(kValidTestImagePath, 0.9); EXPECT_GE(lowThresholdResults.size(), highThresholdResults.size()); } TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); for (const auto &detection : results) { EXPECT_LE(detection.x1, detection.x2); @@ -97,7 +100,7 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -107,13 +110,80 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); for (const auto &detection : results) { EXPECT_GE(detection.label, 0); } } +// ============================================================================ +// generateFromPixels tests +// ============================================================================ +TEST(ObjectDetectionPixelTests, ValidPixelDataReturnsResults) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int32_t width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 128); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + auto results = model.generateFromPixels(tensorView, 0.3); + EXPECT_GE(results.size(), 0u); +} + +TEST(ObjectDetectionPixelTests, WrongSizesLengthThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + std::vector pixelData(16, 0); + JSTensorViewIn tensorView{ + pixelData.data(), {4, 4}, executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, WrongChannelCountThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int32_t width = 4, height = 4, channels = 4; + std::vector pixelData(width * height * channels, 0); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, WrongScalarTypeThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int32_t width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 0); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Float}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int32_t width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 128); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, -0.1), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int32_t width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 128); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 1.1), + RnExecutorchError); +} + TEST(ObjectDetectionInheritedTests, GetInputShapeWorks) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); auto shape = model.getInputShape("forward", 0); diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp index 3e6951617..5d300de83 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp @@ -1,6 +1,4 @@ #include "BaseModelTests.h" -#include "utils/TestUtils.h" -#include #include #include #include @@ -30,7 +28,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -44,51 +42,36 @@ INSTANTIATE_TYPED_TEST_SUITE_P(StyleTransfer, CommonModelTest, // ============================================================================ TEST(StyleTransferGenerateTests, InvalidImagePathThrows) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(StyleTransferGenerateTests, EmptyImagePathThrows) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(StyleTransferGenerateTests, MalformedURIThrows) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } -TEST(StyleTransferGenerateTests, ValidImageReturnsFilePath) { +TEST(StyleTransferGenerateTests, ValidImageReturnsNonNull) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); - EXPECT_FALSE(result.empty()); -} - -TEST(StyleTransferGenerateTests, ResultIsValidFilePath) { - StyleTransfer model(kValidStyleTransferModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); - test_utils::trimFilePrefix(result); - EXPECT_TRUE(std::filesystem::exists(result)); -} - -TEST(StyleTransferGenerateTests, ResultFileHasContent) { - StyleTransfer model(kValidStyleTransferModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); - test_utils::trimFilePrefix(result); - auto fileSize = std::filesystem::file_size(result); - EXPECT_GT(fileSize, 0u); + auto result = model.generateFromString(kValidTestImagePath); + EXPECT_NE(result.dataPtr, nullptr); + EXPECT_GT(result.width, 0); + EXPECT_GT(result.height, 0); } TEST(StyleTransferGenerateTests, MultipleGeneratesWork) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_NO_THROW((void)model.generate(kValidTestImagePath)); - auto result1 = model.generate(kValidTestImagePath); - auto result2 = model.generate(kValidTestImagePath); - test_utils::trimFilePrefix(result1); - test_utils::trimFilePrefix(result2); - EXPECT_TRUE(std::filesystem::exists(result1)); - EXPECT_TRUE(std::filesystem::exists(result2)); + EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath)); + auto result1 = model.generateFromString(kValidTestImagePath); + auto result2 = model.generateFromString(kValidTestImagePath); + EXPECT_NE(result1.dataPtr, nullptr); + EXPECT_NE(result2.dataPtr, nullptr); } TEST(StyleTransferInheritedTests, GetInputShapeWorks) { diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp index 7b1010a81..56f18d862 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp @@ -43,7 +43,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidVerticalTestImagePath); + (void)model.generateFromString(kValidVerticalTestImagePath); } }; } // namespace model_tests @@ -85,34 +85,34 @@ TEST(VerticalOCRCtorTests, IndependentCharsFalseDoesntThrow) { TEST(VerticalOCRGenerateTests, IndependentCharsInvalidImageThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, IndependentCharsEmptyImagePathThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(VerticalOCRGenerateTests, IndependentCharsMalformedURIThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, IndependentCharsValidImageReturnsResults) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); EXPECT_GE(results.size(), 0u); } TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidBBoxes) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_EQ(detection.bbox.size(), 4u); @@ -126,7 +126,7 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidBBoxes) { TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidScores) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -137,7 +137,7 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidScores) { TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveNonEmptyText) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_FALSE(detection.text.empty()); @@ -148,34 +148,34 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveNonEmptyText) { TEST(VerticalOCRGenerateTests, JointCharsInvalidImageThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, JointCharsEmptyImagePathThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(VerticalOCRGenerateTests, JointCharsMalformedURIThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, JointCharsValidImageReturnsResults) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); EXPECT_GE(results.size(), 0u); } TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidBBoxes) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_EQ(detection.bbox.size(), 4u); @@ -189,7 +189,7 @@ TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidBBoxes) { TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidScores) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -200,7 +200,7 @@ TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidScores) { TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveNonEmptyText) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_FALSE(detection.text.empty()); @@ -216,8 +216,10 @@ TEST(VerticalOCRStrategyTests, BothStrategiesRunSuccessfully) { kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_NO_THROW((void)independentModel.generate(kValidVerticalTestImagePath)); - EXPECT_NO_THROW((void)jointModel.generate(kValidVerticalTestImagePath)); + EXPECT_NO_THROW( + (void)independentModel.generateFromString(kValidVerticalTestImagePath)); + EXPECT_NO_THROW( + (void)jointModel.generateFromString(kValidVerticalTestImagePath)); } TEST(VerticalOCRStrategyTests, BothStrategiesReturnValidResults) { @@ -229,8 +231,9 @@ TEST(VerticalOCRStrategyTests, BothStrategiesReturnValidResults) { createMockCallInvoker()); auto independentResults = - independentModel.generate(kValidVerticalTestImagePath); - auto jointResults = jointModel.generate(kValidVerticalTestImagePath); + independentModel.generateFromString(kValidVerticalTestImagePath); + auto jointResults = + jointModel.generateFromString(kValidVerticalTestImagePath); // Both should return some results (or none if no text detected) EXPECT_GE(independentResults.size(), 0u); diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp index 39b8ae09c..897a2778e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp @@ -14,6 +14,14 @@ namespace facebook::jsi { MutableBuffer::~MutableBuffer() {} Value::~Value() {} Value::Value(Value &&other) noexcept {} + +// Needed to link ObjectDetectionTests: generateFromFrame and FrameProcessor +// pull in these JSI symbols, but they are never called in tests. +Object Value::asObject(Runtime &) const & { __builtin_unreachable(); } +BigInt Value::asBigInt(Runtime &) const & { __builtin_unreachable(); } + +uint64_t BigInt::asUint64(Runtime &) const { return 0; } + } // namespace facebook::jsi namespace facebook::react { diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp new file mode 100644 index 000000000..c62d1b21c --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -0,0 +1,114 @@ +#include "FrameExtractor.h" +#include +#include +#include + +#ifdef __APPLE__ +#import +#endif + +#if defined(__ANDROID__) && __ANDROID_API__ >= 26 +#include +#endif + +namespace rnexecutorch::utils { + +namespace { + +#ifdef __APPLE__ +cv::Mat extractFromCVPixelBuffer(void *pixelBuffer) { + CVPixelBufferRef buffer = static_cast(pixelBuffer); + + size_t width = CVPixelBufferGetWidth(buffer); + size_t height = CVPixelBufferGetHeight(buffer); + size_t bytesPerRow = CVPixelBufferGetBytesPerRow(buffer); + OSType pixelFormat = CVPixelBufferGetPixelFormatType(buffer); + + CVPixelBufferLockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly); + void *baseAddress = CVPixelBufferGetBaseAddress(buffer); + + cv::Mat mat; + + if (pixelFormat == kCVPixelFormatType_32BGRA) { + mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, + baseAddress, bytesPerRow); + } else if (pixelFormat == kCVPixelFormatType_32RGBA) { + mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, + baseAddress, bytesPerRow); + } else if (pixelFormat == kCVPixelFormatType_24RGB) { + mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC3, + baseAddress, bytesPerRow); + } else { + CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly); + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported CVPixelBuffer format: %u", pixelFormat); + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, + errorMessage); + } + + // Note: We don't unlock here - Vision Camera manages the lifecycle + // When frame.dispose() is called, Vision Camera will unlock and release + + return mat; +} +#endif + +#ifdef __ANDROID__ +cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer) { +#if __ANDROID_API__ >= 26 + AHardwareBuffer *buffer = static_cast(hardwareBuffer); + + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(buffer, &desc); + + void *data = nullptr; + int lockResult = AHardwareBuffer_lock( + buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data); + + if (lockResult != 0) { + throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, + "Failed to lock AHardwareBuffer"); + } + + cv::Mat mat; + + if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM) { + mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); + } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) { + mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); + } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM) { + mat = cv::Mat(desc.height, desc.width, CV_8UC3, data, desc.stride * 3); + } else { + AHardwareBuffer_unlock(buffer, nullptr); + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported AHardwareBuffer format: %u", desc.format); + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, + errorMessage); + } + + // Note: We don't unlock here - Vision Camera manages the lifecycle + + return mat; +#else + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, + "AHardwareBuffer requires Android API 26+"); +#endif // __ANDROID_API__ >= 26 +} +#endif + +} // namespace + +cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) { +#ifdef __APPLE__ + return extractFromCVPixelBuffer(reinterpret_cast(bufferPtr)); +#elif defined(__ANDROID__) + return extractFromAHardwareBuffer(reinterpret_cast(bufferPtr)); +#else + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, + "NativeBuffer not supported on this platform"); +#endif +} + +} // namespace rnexecutorch::utils \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h new file mode 100644 index 000000000..dda4ff956 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace rnexecutorch::utils { + +/** + * @brief Extract cv::Mat from a native platform buffer pointer (zero-copy) + * + * Dispatches to the platform-specific implementation: + * - iOS: CVPixelBufferRef + * - Android: AHardwareBuffer + * + * @param bufferPtr Platform-specific buffer pointer (uint64_t) + * @return cv::Mat wrapping the buffer data (zero-copy) + * + * @throws RnExecutorchError if the platform is unsupported or extraction fails + * + * @note The returned cv::Mat does not own the data. + * Caller must ensure the buffer remains valid during use. + */ +cv::Mat extractFromNativeBuffer(uint64_t bufferPtr); + +} // namespace rnexecutorch::utils \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp new file mode 100644 index 000000000..1d03b97ba --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp @@ -0,0 +1,28 @@ +#include "FrameProcessor.h" +#include "FrameExtractor.h" +#include +#include + +namespace rnexecutorch::utils { + +namespace { + +bool hasNativeBuffer(jsi::Runtime &runtime, const jsi::Object &frameData) { + return frameData.hasProperty(runtime, "nativeBuffer"); +} + +} // namespace + +cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData) { + if (!hasNativeBuffer(runtime, frameData)) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + "FrameProcessor: No nativeBuffer found in frame"); + } + + auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer"); + uint64_t bufferPtr = static_cast( + nativeBufferValue.asBigInt(runtime).asUint64(runtime)); + + return extractFromNativeBuffer(bufferPtr); +} +} // namespace rnexecutorch::utils \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h new file mode 100644 index 000000000..6bbb3390d --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace rnexecutorch::utils { + +using namespace facebook; + +/** + * @brief Extract cv::Mat from VisionCamera frame data via nativeBuffer + * + * @param runtime JSI runtime + * @param frameData JSI object containing frame data from VisionCamera + * Expected properties: + * - nativeBuffer: BigInt pointer to native buffer + * + * @return cv::Mat wrapping the frame data (zero-copy) + * + * @throws RnExecutorchError if nativeBuffer is not present or extraction fails + * + * @note The returned cv::Mat does not own the data. + * Caller must ensure the source frame remains valid during use. + */ +cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData); + +} // namespace rnexecutorch::utils \ No newline at end of file diff --git a/packages/react-native-executorch/src/controllers/BaseOCRController.ts b/packages/react-native-executorch/src/controllers/BaseOCRController.ts index c124dadce..b6e5c3a5b 100644 --- a/packages/react-native-executorch/src/controllers/BaseOCRController.ts +++ b/packages/react-native-executorch/src/controllers/BaseOCRController.ts @@ -2,10 +2,24 @@ import { Logger } from '../common/Logger'; import { symbols } from '../constants/ocr/symbols'; import { RnExecutorchErrorCode } from '../errors/ErrorCodes'; import { RnExecutorchError, parseUnknownError } from '../errors/errorUtils'; -import { ResourceSource } from '../types/common'; +import { Frame, PixelData, ResourceSource, ScalarType } from '../types/common'; import { OCRLanguage, OCRDetection } from '../types/ocr'; import { ResourceFetcher } from '../utils/ResourceFetcher'; +function isPixelData(input: unknown): input is PixelData { + return ( + typeof input === 'object' && + input !== null && + 'dataPtr' in input && + input.dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray(input.sizes) && + input.sizes.length === 3 && + 'scalarType' in input && + input.scalarType === ScalarType.BYTE + ); +} + export abstract class BaseOCRController { protected nativeModule: any; public isReady: boolean = false; @@ -87,7 +101,34 @@ export abstract class BaseOCRController { } }; - public forward = async (imageSource: string): Promise => { + get runOnFrame(): ((frame: Frame) => OCRDetection[]) | null { + if (!this.nativeModule?.generateFromFrame) { + return null; + } + + const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; + + return (frame: any): OCRDetection[] => { + 'worklet'; + + let nativeBuffer: any = null; + try { + nativeBuffer = frame.getNativeBuffer(); + const frameData = { + nativeBuffer: nativeBuffer.pointer, + }; + return nativeGenerateFromFrame(frameData); + } finally { + if (nativeBuffer?.release) { + nativeBuffer.release(); + } + } + }; + } + + public forward = async ( + input: string | PixelData + ): Promise => { if (!this.isReady) { throw new RnExecutorchError( RnExecutorchErrorCode.ModuleNotLoaded, @@ -104,7 +145,17 @@ export abstract class BaseOCRController { try { this.isGenerating = true; this.isGeneratingCallback(this.isGenerating); - return await this.nativeModule.generate(imageSource); + + if (typeof input === 'string') { + return await this.nativeModule.generateFromString(input); + } else if (isPixelData(input)) { + return await this.nativeModule.generateFromPixels(input); + } else { + throw new RnExecutorchError( + RnExecutorchErrorCode.InvalidArgument, + 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' + ); + } } catch (e) { throw parseUnknownError(e); } finally { diff --git a/packages/react-native-executorch/src/errors/ErrorCodes.ts b/packages/react-native-executorch/src/errors/ErrorCodes.ts index 3e4e557a1..4ccb1f07f 100644 --- a/packages/react-native-executorch/src/errors/ErrorCodes.ts +++ b/packages/react-native-executorch/src/errors/ErrorCodes.ts @@ -58,6 +58,10 @@ export enum RnExecutorchErrorCode { * Thrown when the number of downloaded files is unexpected, due to download interruptions. */ DownloadInterrupted = 118, + /** + * Thrown when a feature or platform is not supported in the current environment. + */ + PlatformNotSupported = 119, /** * Thrown when an error occurs with the tokenizer or tokenization process. */ diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts index 88831f9aa..26a804227 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts @@ -9,6 +9,7 @@ import { ModelNameOf, ModelSources, } from '../../types/imageSegmentation'; +import { Frame, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils'; @@ -41,6 +42,14 @@ export const useImageSegmentation = ({ const [instance, setInstance] = useState > | null>(null); + const [runOnFrame, setRunOnFrame] = useState< + | (( + frame: Frame, + classesOfInterest?: string[], + resizeToInput?: boolean + ) => any) + | null + >(null); useEffect(() => { if (preventLoad) return; @@ -62,6 +71,10 @@ export const useImageSegmentation = ({ if (isMounted) { setInstance(currentInstance); setIsReady(true); + const worklet = currentInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } } } catch (err) { if (isMounted) setError(parseUnknownError(err)); @@ -70,6 +83,8 @@ export const useImageSegmentation = ({ return () => { isMounted = false; + setIsReady(false); + setRunOnFrame(null); currentInstance?.delete(); }; @@ -77,7 +92,7 @@ export const useImageSegmentation = ({ }, [model.modelName, model.modelSource, preventLoad]); const forward = async >>( - imageSource: string, + imageSource: string | PixelData, classesOfInterest: K[] = [], resizeToInput: boolean = true ) => { @@ -111,5 +126,6 @@ export const useImageSegmentation = ({ isGenerating, downloadProgress, forward, + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts index 6b2868834..967f750c6 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts @@ -1,5 +1,6 @@ import { useEffect, useState } from 'react'; -import { OCRProps, OCRType } from '../../types/ocr'; +import { OCRProps, OCRType, OCRDetection } from '../../types/ocr'; +import { Frame } from '../../types/common'; import { OCRController } from '../../controllers/OCRController'; import { RnExecutorchError } from '../../errors/errorUtils'; @@ -15,6 +16,9 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => { const [isReady, setIsReady] = useState(false); const [isGenerating, setIsGenerating] = useState(false); const [downloadProgress, setDownloadProgress] = useState(0); + const [runOnFrame, setRunOnFrame] = useState< + ((frame: Frame) => OCRDetection[]) | null + >(null); const [controllerInstance] = useState( () => @@ -35,9 +39,16 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => { model.language, setDownloadProgress ); + + const worklet = controllerInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } })(); return () => { + setRunOnFrame(null); + setIsReady(false); controllerInstance.delete(); }; }, [ @@ -54,5 +65,6 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => { isGenerating, forward: controllerInstance.forward, downloadProgress, + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts index 2d52eb706..845f1aa23 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts @@ -15,9 +15,10 @@ import { export const useObjectDetection = ({ model, preventLoad = false, -}: ObjectDetectionProps): ObjectDetectionType => - useModule({ +}: ObjectDetectionProps): ObjectDetectionType => { + return useModule({ module: ObjectDetectionModule, model, preventLoad: preventLoad, - }); + }) as ObjectDetectionType; +}; diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts index eb9d289eb..bd479aea2 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts @@ -1,5 +1,6 @@ import { useEffect, useState } from 'react'; -import { OCRType, VerticalOCRProps } from '../../types/ocr'; +import { OCRType, VerticalOCRProps, OCRDetection } from '../../types/ocr'; +import { Frame } from '../../types/common'; import { VerticalOCRController } from '../../controllers/VerticalOCRController'; import { RnExecutorchError } from '../../errors/errorUtils'; @@ -19,6 +20,9 @@ export const useVerticalOCR = ({ const [isReady, setIsReady] = useState(false); const [isGenerating, setIsGenerating] = useState(false); const [downloadProgress, setDownloadProgress] = useState(0); + const [runOnFrame, setRunOnFrame] = useState< + ((frame: Frame) => OCRDetection[]) | null + >(null); const [controllerInstance] = useState( () => @@ -40,9 +44,16 @@ export const useVerticalOCR = ({ independentCharacters, setDownloadProgress ); + + const worklet = controllerInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } })(); return () => { + setRunOnFrame(null); + setIsReady(false); controllerInstance.delete(); }; }, [ @@ -60,5 +71,6 @@ export const useVerticalOCR = ({ isGenerating, forward: controllerInstance.forward, downloadProgress, + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts index 1a35885d5..9838c4aa0 100644 --- a/packages/react-native-executorch/src/hooks/useModule.ts +++ b/packages/react-native-executorch/src/hooks/useModule.ts @@ -31,6 +31,7 @@ export const useModule = < const [isGenerating, setIsGenerating] = useState(false); const [downloadProgress, setDownloadProgress] = useState(0); const [moduleInstance] = useState(() => new module()); + const [runOnFrame, setRunOnFrame] = useState(null); useEffect(() => { if (preventLoad) return; @@ -46,6 +47,14 @@ export const useModule = < if (isMounted) setDownloadProgress(progress); }); if (isMounted) setIsReady(true); + + // Use "state trick" to make the worklet serializable for VisionCamera + if ('runOnFrame' in moduleInstance) { + const worklet = moduleInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } + } } catch (err) { if (isMounted) setError(parseUnknownError(err)); } @@ -53,6 +62,8 @@ export const useModule = < return () => { isMounted = false; + setIsReady(false); + setRunOnFrame(null); moduleInstance.delete(); }; @@ -99,5 +110,32 @@ export const useModule = < */ downloadProgress, forward, + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * Only available for Computer Vision modules that support real-time frame processing + * (e.g., ObjectDetection, Classification, ImageSegmentation). + * Returns `null` if the module doesn't implement frame processing. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * @example + * ```typescript + * const { runOnFrame } = useObjectDetection({ model: MODEL }); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const detections = runOnFrame(frame, 0.5); + * frame.dispose(); + * } + * }); + * ``` + */ + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/modules/BaseModule.ts b/packages/react-native-executorch/src/modules/BaseModule.ts index 6aefc8b2a..41a2da6cf 100644 --- a/packages/react-native-executorch/src/modules/BaseModule.ts +++ b/packages/react-native-executorch/src/modules/BaseModule.ts @@ -1,12 +1,68 @@ -import { ResourceSource } from '../types/common'; +import { Frame, ResourceSource } from '../types/common'; import { TensorPtr } from '../types/common'; +/** + * Base class for all React Native Executorch modules. + * + * Provides core functionality for loading models, running inference, + * and managing native resources. + * + * @category Base Classes + */ export abstract class BaseModule { /** - * Native module instance + * Native module instance (JSI Host Object) + * @internal */ nativeModule: any = null; + /** + * Process a camera frame directly for real-time inference. + * + * This method is bound to a native JSI function after calling `load()`, + * making it worklet-compatible and safe to call from VisionCamera's + * frame processor thread. + * + * **Performance characteristics:** + * - **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + * frame data is accessed directly without copying (fastest, recommended). + * - **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + * from native to JS, then accessed from native code (slower, fallback). + * + * **Usage with VisionCamera:** + * ```typescript + * const frameOutput = useFrameOutput({ + * pixelFormat: 'rgb', + * onFrame(frame) { + * 'worklet'; + * // Zero-copy approach (recommended) + * const nativeBuffer = frame.getNativeBuffer(); + * const result = model.generateFromFrame( + * { nativeBuffer: nativeBuffer.pointer, width: frame.width, height: frame.height }, + * ...args + * ); + * nativeBuffer.release(); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frameData Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + * @param args Additional model-specific arguments (e.g., threshold, options) + * @returns Model-specific output (e.g., detections, classifications, embeddings) + * + * @see {@link Frame} for frame data format details + */ + public generateFromFrame!: (frameData: Frame, ...args: any[]) => any; + + /** + * Load the model and prepare it for inference. + * + * @param modelSource - Resource location of the model binary + * @param onDownloadProgressCallback - Optional callback to monitor download progress (0-1) + * @param args - Additional model-specific loading arguments + */ + abstract load( modelSource: ResourceSource, onDownloadProgressCallback: (_: number) => void, @@ -19,6 +75,7 @@ export abstract class BaseModule { * * @param inputTensor - Array of input tensors. * @returns Array of output tensors. + * @internal */ protected async forwardET(inputTensor: TensorPtr[]): Promise { return await this.nativeModule.forward(inputTensor); @@ -36,7 +93,9 @@ export abstract class BaseModule { } /** - * Unloads the model from memory. + * Unloads the model from memory and releases native resources. + * + * Always call this method when you're done with a model to prevent memory leaks. */ delete() { if (this.nativeModule !== null) { diff --git a/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts index 45b7e2b39..d0735ae26 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts @@ -1,16 +1,18 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; -import { BaseModule } from '../BaseModule'; +import { ResourceSource, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for image classification tasks. * * @category Typescript API */ -export class ClassificationModule extends BaseModule { +export class ClassificationModule extends VisionModule<{ + [category: string]: number; +}> { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * To track the download progress, supply a callback function `onDownloadProgressCallback`. @@ -42,18 +44,9 @@ export class ClassificationModule extends BaseModule { } } - /** - * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. - * - * @param imageSource - The image source to be classified. - * @returns The classification result. - */ - async forward(imageSource: string): Promise<{ [category: string]: number }> { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return await this.nativeModule.generate(imageSource); + async forward( + input: string | PixelData + ): Promise<{ [category: string]: number }> { + return super.forward(input); } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts index 3e62f450d..6fb78c4cc 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts @@ -1,16 +1,16 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; +import { ResourceSource, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; -import { BaseModule } from '../BaseModule'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for generating image embeddings from input images. * * @category Typescript API */ -export class ImageEmbeddingsModule extends BaseModule { +export class ImageEmbeddingsModule extends VisionModule { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * @@ -41,18 +41,8 @@ export class ImageEmbeddingsModule extends BaseModule { } } - /** - * Executes the model's forward pass. Returns an embedding array for a given sentence. - * - * @param imageSource - The image source (URI/URL) to image that will be embedded. - * @returns A Float32Array containing the image embeddings. - */ - async forward(imageSource: string): Promise { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return new Float32Array(await this.nativeModule.generate(imageSource)); + async forward(input: string | PixelData): Promise { + const result = await super.forward(input); + return new Float32Array(result as unknown as ArrayBuffer); } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts index f2de6edd7..b2f7c908f 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts @@ -1,5 +1,11 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource, LabelEnum } from '../../types/common'; +import { + ResourceSource, + LabelEnum, + Frame, + PixelData, + ScalarType, +} from '../../types/common'; import { DeeplabLabel, ModelNameOf, @@ -47,6 +53,20 @@ export type SegmentationLabels = type ResolveLabels = T extends SegmentationModelName ? SegmentationLabels : T; +function isPixelData(input: unknown): input is PixelData { + return ( + typeof input === 'object' && + input !== null && + 'dataPtr' in input && + (input as any).dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray((input as any).sizes) && + (input as any).sizes.length === 3 && + 'scalarType' in input && + (input as any).scalarType === ScalarType.BYTE + ); +} + /** * Generic image segmentation module with type-safe label maps. * Use a model name (e.g. `'deeplab-v3'`) as the generic parameter for built-in models, @@ -75,6 +95,75 @@ export class ImageSegmentationModule< // TODO: figure it out so we can delete this (we need this because of basemodule inheritance) override async load() {} + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded. + * + * @example + * ```typescript + * const [runOnFrame, setRunOnFrame] = useState(null); + * setRunOnFrame(() => segmentation.runOnFrame); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const result = runOnFrame(frame, [], true); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frame - VisionCamera Frame object + * @param classesOfInterest - Labels for which to return per-class probability masks. + * @param resizeToInput - Whether to resize masks to original frame dimensions. Defaults to `true`. + */ + get runOnFrame(): + | (( + frame: Frame, + classesOfInterest?: string[], + resizeToInput?: boolean + ) => any) + | null { + if (!this.nativeModule?.generateFromFrame) { + return null; + } + + const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; + const allClassNames = this.allClassNames; + + return ( + frame: any, + classesOfInterest: string[] = [], + resizeToInput: boolean = true + ): any => { + 'worklet'; + + let nativeBuffer: any = null; + try { + nativeBuffer = frame.getNativeBuffer(); + const frameData = { + nativeBuffer: nativeBuffer.pointer, + }; + return nativeGenerateFromFrame( + frameData, + allClassNames, + classesOfInterest, + resizeToInput + ); + } finally { + if (nativeBuffer?.release) { + nativeBuffer.release(); + } + } + }; + } + /** * Creates a segmentation instance for a built-in model. * The config object is discriminated by `modelName` — each model can require different fields. @@ -167,14 +256,20 @@ export class ImageSegmentationModule< /** * Executes the model's forward pass to perform semantic segmentation on the provided image. * - * @param imageSource - A string representing the image source (e.g., a file path, URI, or Base64-encoded string). + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) * @param classesOfInterest - An optional list of label keys indicating which per-class probability masks to include in the output. `ARGMAX` is always returned regardless. * @param resizeToInput - Whether to resize the output masks to the original input image dimensions. If `false`, returns the raw model output dimensions. Defaults to `true`. * @returns A Promise resolving to an object with an `'ARGMAX'` key mapped to an `Int32Array` of per-pixel class indices, and each requested class label mapped to a `Float32Array` of per-pixel probabilities. * @throws {RnExecutorchError} If the model is not loaded. */ async forward>( - imageSource: string, + input: string | PixelData, classesOfInterest: K[] = [], resizeToInput: boolean = true ): Promise & Record> { @@ -189,14 +284,29 @@ export class ImageSegmentationModule< String(label) ); - const nativeResult = await this.nativeModule.generate( - imageSource, - this.allClassNames, - classesOfInterestNames, - resizeToInput - ); - - return nativeResult as Record<'ARGMAX', Int32Array> & - Record; + if (typeof input === 'string') { + const nativeResult = await this.nativeModule.generateFromString( + input, + this.allClassNames, + classesOfInterestNames, + resizeToInput + ); + return nativeResult as Record<'ARGMAX', Int32Array> & + Record; + } else if (isPixelData(input)) { + const nativeResult = await this.nativeModule.generateFromPixels( + input, + this.allClassNames, + classesOfInterestNames, + resizeToInput + ); + return nativeResult as Record<'ARGMAX', Int32Array> & + Record; + } else { + throw new RnExecutorchError( + RnExecutorchErrorCode.InvalidArgument, + 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' + ); + } } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 95b9e436b..f056cff62 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -1,17 +1,17 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; +import { ResourceSource, PixelData } from '../../types/common'; import { Detection } from '../../types/objectDetection'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; -import { BaseModule } from '../BaseModule'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for object detection tasks. * * @category Typescript API */ -export class ObjectDetectionModule extends BaseModule { +export class ObjectDetectionModule extends VisionModule { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * To track the download progress, supply a callback function `onDownloadProgressCallback`. @@ -43,23 +43,10 @@ export class ObjectDetectionModule extends BaseModule { } } - /** - * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. - * `detectionThreshold` can be supplied to alter the sensitivity of the detection. - * - * @param imageSource - The image source to be processed. - * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7. - * @returns An array of Detection objects representing detected items in the image. - */ async forward( - imageSource: string, - detectionThreshold: number = 0.7 + input: string | PixelData, + detectionThreshold: number = 0.5 ): Promise { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return await this.nativeModule.generate(imageSource, detectionThreshold); + return super.forward(input, detectionThreshold); } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts b/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts index 90e5242de..fc83f7bc7 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts @@ -1,16 +1,16 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; +import { ResourceSource, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; -import { BaseModule } from '../BaseModule'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for style transfer tasks. * * @category Typescript API */ -export class StyleTransferModule extends BaseModule { +export class StyleTransferModule extends VisionModule { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * To track the download progress, supply a callback function `onDownloadProgressCallback`. @@ -42,18 +42,7 @@ export class StyleTransferModule extends BaseModule { } } - /** - * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. - * - * @param imageSource - The image source to be processed. - * @returns The stylized image as a Base64-encoded string. - */ - async forward(imageSource: string): Promise { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return await this.nativeModule.generate(imageSource); + async forward(input: string | PixelData): Promise { + return super.forward(input); } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts new file mode 100644 index 000000000..762d09987 --- /dev/null +++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts @@ -0,0 +1,143 @@ +import { BaseModule } from '../BaseModule'; +import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; +import { RnExecutorchError } from '../../errors/errorUtils'; +import { Frame, PixelData, ScalarType } from '../../types/common'; + +/** + * Base class for computer vision models that support multiple input types. + * + * VisionModule extends BaseModule with: + * - Unified `forward()` API accepting string paths or raw pixel data + * - `runOnFrame` getter for real-time VisionCamera frame processing + * - Shared frame processor creation logic + * + * Subclasses should only implement model-specific loading logic. + * + * @category Typescript API + */ +function isPixelData(input: unknown): input is PixelData { + return ( + typeof input === 'object' && + input !== null && + 'dataPtr' in input && + input.dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray(input.sizes) && + input.sizes.length === 3 && + 'scalarType' in input && + input.scalarType === ScalarType.BYTE + ); +} + +export abstract class VisionModule extends BaseModule { + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * + * Only available after the model is loaded. Returns null if not loaded. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * @example + * ```typescript + * const model = new ClassificationModule(); + * await model.load({ modelSource: MODEL }); + * + * // Use the functional form of setState to store the worklet — passing it + * // directly would cause React to invoke it immediately as an updater fn. + * const [runOnFrame, setRunOnFrame] = useState(null); + * setRunOnFrame(() => model.runOnFrame); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const result = runOnFrame(frame); + * frame.dispose(); + * } + * }); + * ``` + */ + get runOnFrame(): ((frame: Frame, ...args: any[]) => TOutput) | null { + if (!this.nativeModule?.generateFromFrame) { + return null; + } + + // Extract pure JSI function reference (runs on JS thread) + const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; + + // Return worklet that captures ONLY the JSI function + return (frame: any, ...args: any[]): TOutput => { + 'worklet'; + + let nativeBuffer: any = null; + try { + nativeBuffer = frame.getNativeBuffer(); + const frameData = { + nativeBuffer: nativeBuffer.pointer, + }; + return nativeGenerateFromFrame(frameData, ...args); + } finally { + if (nativeBuffer?.release) { + nativeBuffer.release(); + } + } + }; + } + + /** + * Executes the model's forward pass with automatic input type detection. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * This method is async and cannot be called in worklet context. + * + * @param input - Image source (string path or PixelData object) + * @param args - Additional model-specific arguments + * @returns A Promise that resolves to the model output. + * + * @example + * ```typescript + * // String path (async) + * const result1 = await model.forward('file:///path/to/image.jpg'); + * + * // Pixel data (async) + * const result2 = await model.forward({ + * dataPtr: new Uint8Array(pixelBuffer), + * sizes: [480, 640, 3], + * scalarType: ScalarType.BYTE + * }); + * + * // For VisionCamera frames, use runOnFrame in worklet: + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!model.runOnFrame) return; + * const result = model.runOnFrame(frame); + * } + * }); + * ``` + */ + async forward(input: string | PixelData, ...args: any[]): Promise { + if (this.nativeModule == null) + throw new RnExecutorchError( + RnExecutorchErrorCode.ModuleNotLoaded, + 'The model is currently not loaded. Please load the model before calling forward().' + ); + + // Type detection and routing + if (typeof input === 'string') { + return await this.nativeModule.generateFromString(input, ...args); + } else if (isPixelData(input)) { + return await this.nativeModule.generateFromPixels(input, ...args); + } else { + throw new RnExecutorchError( + RnExecutorchErrorCode.InvalidArgument, + 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' + ); + } + } +} diff --git a/packages/react-native-executorch/src/types/classification.ts b/packages/react-native-executorch/src/types/classification.ts index 51152ec08..64a20ecf3 100644 --- a/packages/react-native-executorch/src/types/classification.ts +++ b/packages/react-native-executorch/src/types/classification.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Props for the `useClassification` hook. @@ -43,9 +43,46 @@ export interface ClassificationType { /** * Executes the model's forward pass to classify the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be classified. - * @returns A Promise that resolves to the classification result (typically containing labels and confidence scores). + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @returns A Promise that resolves to the classification result (labels and confidence scores). * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ - forward: (imageSource: string) => Promise<{ [category: string]: number }>; + forward: ( + input: string | PixelData + ) => Promise<{ [category: string]: number }>; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @example + * ```typescript + * const { runOnFrame, isReady } = useClassification({ model: MODEL }); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const result = runOnFrame(frame); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frame - VisionCamera Frame object + * @returns Object mapping class labels to confidence scores. + */ + runOnFrame: ((frame: Frame) => { [category: string]: number }) | null; } diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts index 384caa861..d992214dd 100644 --- a/packages/react-native-executorch/src/types/common.ts +++ b/packages/react-native-executorch/src/types/common.ts @@ -151,3 +151,56 @@ export type LabelEnum = Readonly>; * @category Types */ export type Triple = readonly [T, T, T]; +/** + * Represents raw pixel data in RGB format for vision models. + * + * This type extends TensorPtr with constraints specific to image data: + * - dataPtr must be Uint8Array (8-bit unsigned integers) + * - scalarType is always BYTE (ScalarType.BYTE) + * - sizes represents [height, width, channels] where channels must be 3 (RGB) + * + * @category Types + * @example + * ```typescript + * const pixelData: PixelData = { + * dataPtr: new Uint8Array(width * height * 3), // RGB pixel data + * sizes: [height, width, 3], // [height, width, channels] + * scalarType: ScalarType.BYTE + * }; + * ``` + */ +export interface PixelData extends Omit { + /** + * RGB pixel data as Uint8Array. + * Expected format: RGB (3 channels), not RGBA or BGRA. + * Size must equal: width * height * 3 + */ + dataPtr: Uint8Array; + + /** + * Dimensions of the pixel data: [height, width, channels]. + * - sizes[0]: height (number of rows) + * - sizes[1]: width (number of columns) + * - sizes[2]: channels (must be 3 for RGB) + */ + sizes: [number, number, 3]; + + /** + * Scalar type is always BYTE for pixel data. + */ + scalarType: ScalarType.BYTE; +} + +/** + * Frame data for vision model processing. + */ +export interface Frame { + /** + * Pointer to native platform buffer (zero-copy, best performance). + * - On iOS: CVPixelBufferRef pointer + * - On Android: AHardwareBuffer* pointer + * + * Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer` + */ + getNativeBuffer(): { pointer: bigint; release(): void }; +} diff --git a/packages/react-native-executorch/src/types/imageEmbeddings.ts b/packages/react-native-executorch/src/types/imageEmbeddings.ts index 5dc23d66f..ccee4b4b1 100644 --- a/packages/react-native-executorch/src/types/imageEmbeddings.ts +++ b/packages/react-native-executorch/src/types/imageEmbeddings.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Props for the `useImageEmbeddings` hook. @@ -43,9 +43,30 @@ export interface ImageEmbeddingsType { /** * Executes the model's forward pass to generate embeddings (a feature vector) for the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) * @returns A Promise that resolves to a `Float32Array` containing the generated embedding vector. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ - forward: (imageSource: string) => Promise; + forward: (input: string | PixelData) => Promise; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @returns Float32Array containing the embedding vector for the frame. + */ + runOnFrame: ((frame: Frame) => Float32Array) | null; } diff --git a/packages/react-native-executorch/src/types/imageSegmentation.ts b/packages/react-native-executorch/src/types/imageSegmentation.ts index 6d79a801d..7e760487c 100644 --- a/packages/react-native-executorch/src/types/imageSegmentation.ts +++ b/packages/react-native-executorch/src/types/imageSegmentation.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { LabelEnum, Triple, ResourceSource } from './common'; +import { LabelEnum, Triple, ResourceSource, PixelData, Frame } from './common'; /** * Configuration for a custom segmentation model. @@ -127,15 +127,44 @@ export interface ImageSegmentationType { /** * Executes the model's forward pass to perform semantic segmentation on the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) * @param classesOfInterest - An optional array of label keys indicating which per-class probability masks to include in the output. `ARGMAX` is always returned regardless. * @param resizeToInput - Whether to resize the output masks to the original input image dimensions. If `false`, returns the raw model output dimensions. Defaults to `true`. * @returns A Promise resolving to an object with an `'ARGMAX'` `Int32Array` of per-pixel class indices, and each requested class label mapped to a `Float32Array` of per-pixel probabilities. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ forward: ( - imageSource: string, + input: string | PixelData, classesOfInterest?: K[], resizeToInput?: boolean ) => Promise & Record>; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @param classesOfInterest - Labels for which to return per-class probability masks. + * @param resizeToInput - Whether to resize masks to original frame dimensions. Defaults to `true`. + * @returns Object with `ARGMAX` Int32Array and per-class Float32Array masks. + */ + runOnFrame: + | (( + frame: Frame, + classesOfInterest?: string[], + resizeToInput?: boolean + ) => Record<'ARGMAX', Int32Array> & Record) + | null; } diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index 94f7cf5c0..5aaf81833 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Represents a bounding box for a detected object in an image. @@ -170,14 +170,65 @@ export interface ObjectDetectionType { downloadProgress: number; /** - * Executes the model's forward pass to detect objects within the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. - * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score required for an object to be included in the results. Default is 0.7. - * @returns A Promise that resolves to an array of `Detection` objects, where each object typically contains bounding box coordinates, a class label, and a confidence score. + * Executes the model's forward pass with automatic input type detection. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5. + * @returns A Promise that resolves to an array of `Detection` objects. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. + * + * @example + * ```typescript + * // String path + * const detections1 = await model.forward('file:///path/to/image.jpg'); + * + * // Pixel data + * const detections2 = await model.forward({ + * dataPtr: new Uint8Array(rgbPixels), + * sizes: [480, 640, 3], + * scalarType: ScalarType.BYTE + * }); + * ``` */ forward: ( - imageSource: string, + input: string | PixelData, detectionThreshold?: number ) => Promise; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @example + * ```typescript + * const { runOnFrame, isReady } = useObjectDetection({ model: MODEL }); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const detections = runOnFrame(frame, 0.5); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frame - VisionCamera Frame object + * @param detectionThreshold - The threshold for detection sensitivity. + * @returns Array of Detection objects representing detected items in the frame. + */ + runOnFrame: + | ((frame: Frame, detectionThreshold: number) => Detection[]) + | null; } diff --git a/packages/react-native-executorch/src/types/ocr.ts b/packages/react-native-executorch/src/types/ocr.ts index 6ca2f4324..1b0640172 100644 --- a/packages/react-native-executorch/src/types/ocr.ts +++ b/packages/react-native-executorch/src/types/ocr.ts @@ -1,6 +1,6 @@ import { symbols } from '../constants/ocr/symbols'; import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { Frame, PixelData, ResourceSource } from './common'; /** * OCRDetection represents a single detected text instance in an image, @@ -104,11 +104,35 @@ export interface OCRType { /** * Executes the OCR pipeline (detection and recognition) on the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. - * @returns A Promise that resolves to the OCR results (typically containing the recognized text strings and their bounding boxes). + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @returns A Promise that resolves to the OCR results (recognized text and bounding boxes). * @throws {RnExecutorchError} If the models are not loaded or are currently processing another image. */ - forward: (imageSource: string) => Promise; + forward: (input: string | PixelData) => Promise; + + /** + * Synchronous worklet function for VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * **Note**: OCR is a two-stage pipeline (detection + recognition) and may not + * achieve real-time frame rates. Frames may be dropped if inference is still running. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @returns Array of OCRDetection results for the frame. + */ + runOnFrame: ((frame: Frame) => OCRDetection[]) | null; } /** diff --git a/packages/react-native-executorch/src/types/styleTransfer.ts b/packages/react-native-executorch/src/types/styleTransfer.ts index 162086722..3cf3d17fa 100644 --- a/packages/react-native-executorch/src/types/styleTransfer.ts +++ b/packages/react-native-executorch/src/types/styleTransfer.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Configuration properties for the `useStyleTransfer` hook. @@ -43,9 +43,30 @@ export interface StyleTransferType { /** * Executes the model's forward pass to apply the specific artistic style to the provided image. - * @param imageSource - A string representing the input image source (e.g., a file path, URI, or base64 string) to be stylized. - * @returns A Promise that resolves to a string containing the stylized image (typically as a base64 string or a file URI). + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @returns A Promise that resolves to `PixelData` containing the stylized image as raw RGB pixel data. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ - forward: (imageSource: string) => Promise; + forward: (input: string | PixelData) => Promise; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @returns PixelData containing the stylized frame as raw RGB pixel data. + */ + runOnFrame: ((frame: Frame) => PixelData) | null; } diff --git a/scripts/errors.config.ts b/scripts/errors.config.ts index 3e6cf1090..6953eec2e 100644 --- a/scripts/errors.config.ts +++ b/scripts/errors.config.ts @@ -59,6 +59,10 @@ export const errorDefinitions = { * Thrown when the number of downloaded files is unexpected, due to download interruptions. */ DownloadInterrupted: 0x76, + /* + * Thrown when a feature or platform is not supported in the current environment. + */ + PlatformNotSupported: 0x77, /* * Thrown when an error occurs with the tokenizer or tokenization process. diff --git a/yarn.lock b/yarn.lock index 436005c8d..ac9b276b6 100644 --- a/yarn.lock +++ b/yarn.lock @@ -53,6 +53,17 @@ __metadata: languageName: node linkType: hard +"@babel/code-frame@npm:^7.28.6, @babel/code-frame@npm:^7.29.0": + version: 7.29.0 + resolution: "@babel/code-frame@npm:7.29.0" + dependencies: + "@babel/helper-validator-identifier": "npm:^7.28.5" + js-tokens: "npm:^4.0.0" + picocolors: "npm:^1.1.1" + checksum: 10/199e15ff89007dd30675655eec52481cb245c9fdf4f81e4dc1f866603b0217b57aff25f5ffa0a95bbc8e31eb861695330cd7869ad52cc211aa63016320ef72c5 + languageName: node + linkType: hard + "@babel/compat-data@npm:^7.20.5, @babel/compat-data@npm:^7.27.2, @babel/compat-data@npm:^7.27.7, @babel/compat-data@npm:^7.28.5": version: 7.28.5 resolution: "@babel/compat-data@npm:7.28.5" @@ -110,6 +121,19 @@ __metadata: languageName: node linkType: hard +"@babel/generator@npm:^7.29.0": + version: 7.29.1 + resolution: "@babel/generator@npm:7.29.1" + dependencies: + "@babel/parser": "npm:^7.29.0" + "@babel/types": "npm:^7.29.0" + "@jridgewell/gen-mapping": "npm:^0.3.12" + "@jridgewell/trace-mapping": "npm:^0.3.28" + jsesc: "npm:^3.0.2" + checksum: 10/61fe4ddd6e817aa312a14963ccdbb5c9a8c57e8b97b98d19a8a99ccab2215fda1a5f52bc8dd8d2e3c064497ddeb3ab8ceb55c76fa0f58f8169c34679d2256fe0 + languageName: node + linkType: hard + "@babel/helper-annotate-as-pure@npm:^7.27.1, @babel/helper-annotate-as-pure@npm:^7.27.3": version: 7.27.3 resolution: "@babel/helper-annotate-as-pure@npm:7.27.3" @@ -149,6 +173,23 @@ __metadata: languageName: node linkType: hard +"@babel/helper-create-class-features-plugin@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/helper-create-class-features-plugin@npm:7.28.6" + dependencies: + "@babel/helper-annotate-as-pure": "npm:^7.27.3" + "@babel/helper-member-expression-to-functions": "npm:^7.28.5" + "@babel/helper-optimise-call-expression": "npm:^7.27.1" + "@babel/helper-replace-supers": "npm:^7.28.6" + "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1" + "@babel/traverse": "npm:^7.28.6" + semver: "npm:^6.3.1" + peerDependencies: + "@babel/core": ^7.0.0 + checksum: 10/11f55607fcf66827ade745c0616aa3c6086aa655c0fab665dd3c4961829752e4c94c942262db30c4831ef9bce37ad444722e85ef1b7136587e28c6b1ef8ad43c + languageName: node + linkType: hard + "@babel/helper-create-regexp-features-plugin@npm:^7.18.6, @babel/helper-create-regexp-features-plugin@npm:^7.27.1": version: 7.28.5 resolution: "@babel/helper-create-regexp-features-plugin@npm:7.28.5" @@ -242,6 +283,13 @@ __metadata: languageName: node linkType: hard +"@babel/helper-plugin-utils@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/helper-plugin-utils@npm:7.28.6" + checksum: 10/21c853bbc13dbdddf03309c9a0477270124ad48989e1ad6524b83e83a77524b333f92edd2caae645c5a7ecf264ec6d04a9ebe15aeb54c7f33c037b71ec521e4a + languageName: node + linkType: hard + "@babel/helper-remap-async-to-generator@npm:^7.18.9, @babel/helper-remap-async-to-generator@npm:^7.27.1": version: 7.27.1 resolution: "@babel/helper-remap-async-to-generator@npm:7.27.1" @@ -268,6 +316,19 @@ __metadata: languageName: node linkType: hard +"@babel/helper-replace-supers@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/helper-replace-supers@npm:7.28.6" + dependencies: + "@babel/helper-member-expression-to-functions": "npm:^7.28.5" + "@babel/helper-optimise-call-expression": "npm:^7.27.1" + "@babel/traverse": "npm:^7.28.6" + peerDependencies: + "@babel/core": ^7.0.0 + checksum: 10/ad2724713a4d983208f509e9607e8f950855f11bd97518a700057eb8bec69d687a8f90dc2da0c3c47281d2e3b79cf1d14ecf1fe3e1ee0a8e90b61aee6759c9a7 + languageName: node + linkType: hard + "@babel/helper-skip-transparent-expression-wrappers@npm:^7.20.0, @babel/helper-skip-transparent-expression-wrappers@npm:^7.27.1": version: 7.27.1 resolution: "@babel/helper-skip-transparent-expression-wrappers@npm:7.27.1" @@ -343,6 +404,17 @@ __metadata: languageName: node linkType: hard +"@babel/parser@npm:^7.28.6, @babel/parser@npm:^7.29.0": + version: 7.29.0 + resolution: "@babel/parser@npm:7.29.0" + dependencies: + "@babel/types": "npm:^7.29.0" + bin: + parser: ./bin/babel-parser.js + checksum: 10/b1576dca41074997a33ee740d87b330ae2e647f4b7da9e8d2abd3772b18385d303b0cee962b9b88425e0f30d58358dbb8d63792c1a2d005c823d335f6a029747 + languageName: node + linkType: hard + "@babel/plugin-bugfix-firefox-class-in-computed-class-key@npm:^7.28.5": version: 7.28.5 resolution: "@babel/plugin-bugfix-firefox-class-in-computed-class-key@npm:7.28.5" @@ -767,6 +839,17 @@ __metadata: languageName: node linkType: hard +"@babel/plugin-syntax-typescript@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/plugin-syntax-typescript@npm:7.28.6" + dependencies: + "@babel/helper-plugin-utils": "npm:^7.28.6" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/5c55f9c63bd36cf3d7e8db892294c8f85000f9c1526c3a1cc310d47d1e174f5c6f6605e5cc902c4636d885faba7a9f3d5e5edc6b35e4f3b1fd4c2d58d0304fa5 + languageName: node + linkType: hard + "@babel/plugin-syntax-unicode-sets-regex@npm:^7.18.6": version: 7.18.6 resolution: "@babel/plugin-syntax-unicode-sets-regex@npm:7.18.6" @@ -779,7 +862,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-arrow-functions@npm:^7.0.0, @babel/plugin-transform-arrow-functions@npm:^7.0.0-0, @babel/plugin-transform-arrow-functions@npm:^7.24.7, @babel/plugin-transform-arrow-functions@npm:^7.27.1": +"@babel/plugin-transform-arrow-functions@npm:7.27.1, @babel/plugin-transform-arrow-functions@npm:^7.0.0, @babel/plugin-transform-arrow-functions@npm:^7.0.0-0, @babel/plugin-transform-arrow-functions@npm:^7.24.7, @babel/plugin-transform-arrow-functions@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-arrow-functions@npm:7.27.1" dependencies: @@ -838,7 +921,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-class-properties@npm:^7.0.0-0, @babel/plugin-transform-class-properties@npm:^7.25.4, @babel/plugin-transform-class-properties@npm:^7.27.1": +"@babel/plugin-transform-class-properties@npm:7.27.1, @babel/plugin-transform-class-properties@npm:^7.0.0-0, @babel/plugin-transform-class-properties@npm:^7.25.4, @babel/plugin-transform-class-properties@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-class-properties@npm:7.27.1" dependencies: @@ -862,7 +945,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-classes@npm:^7.0.0, @babel/plugin-transform-classes@npm:^7.0.0-0, @babel/plugin-transform-classes@npm:^7.25.4, @babel/plugin-transform-classes@npm:^7.28.4": +"@babel/plugin-transform-classes@npm:7.28.4, @babel/plugin-transform-classes@npm:^7.0.0, @babel/plugin-transform-classes@npm:^7.0.0-0, @babel/plugin-transform-classes@npm:^7.25.4, @babel/plugin-transform-classes@npm:^7.28.4": version: 7.28.4 resolution: "@babel/plugin-transform-classes@npm:7.28.4" dependencies: @@ -1136,7 +1219,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-nullish-coalescing-operator@npm:^7.0.0-0, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.24.7, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.27.1": +"@babel/plugin-transform-nullish-coalescing-operator@npm:7.27.1, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.0.0-0, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.24.7, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-nullish-coalescing-operator@npm:7.27.1" dependencies: @@ -1196,6 +1279,18 @@ __metadata: languageName: node linkType: hard +"@babel/plugin-transform-optional-chaining@npm:7.27.1": + version: 7.27.1 + resolution: "@babel/plugin-transform-optional-chaining@npm:7.27.1" + dependencies: + "@babel/helper-plugin-utils": "npm:^7.27.1" + "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/34b0f96400c259a2722740d17a001fe45f78d8ff052c40e29db2e79173be72c1cfe8d9681067e3f5da3989e4a557402df5c982c024c18257587a41e022f95640 + languageName: node + linkType: hard + "@babel/plugin-transform-optional-chaining@npm:^7.0.0-0, @babel/plugin-transform-optional-chaining@npm:^7.24.8, @babel/plugin-transform-optional-chaining@npm:^7.27.1, @babel/plugin-transform-optional-chaining@npm:^7.28.5": version: 7.28.5 resolution: "@babel/plugin-transform-optional-chaining@npm:7.28.5" @@ -1376,7 +1471,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-shorthand-properties@npm:^7.0.0, @babel/plugin-transform-shorthand-properties@npm:^7.0.0-0, @babel/plugin-transform-shorthand-properties@npm:^7.24.7, @babel/plugin-transform-shorthand-properties@npm:^7.27.1": +"@babel/plugin-transform-shorthand-properties@npm:7.27.1, @babel/plugin-transform-shorthand-properties@npm:^7.0.0, @babel/plugin-transform-shorthand-properties@npm:^7.0.0-0, @babel/plugin-transform-shorthand-properties@npm:^7.24.7, @babel/plugin-transform-shorthand-properties@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-shorthand-properties@npm:7.27.1" dependencies: @@ -1421,7 +1516,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-template-literals@npm:^7.0.0-0, @babel/plugin-transform-template-literals@npm:^7.27.1": +"@babel/plugin-transform-template-literals@npm:7.27.1, @babel/plugin-transform-template-literals@npm:^7.0.0-0, @babel/plugin-transform-template-literals@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-template-literals@npm:7.27.1" dependencies: @@ -1458,6 +1553,21 @@ __metadata: languageName: node linkType: hard +"@babel/plugin-transform-typescript@npm:^7.27.1": + version: 7.28.6 + resolution: "@babel/plugin-transform-typescript@npm:7.28.6" + dependencies: + "@babel/helper-annotate-as-pure": "npm:^7.27.3" + "@babel/helper-create-class-features-plugin": "npm:^7.28.6" + "@babel/helper-plugin-utils": "npm:^7.28.6" + "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1" + "@babel/plugin-syntax-typescript": "npm:^7.28.6" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/a0bccc531fa8710a45b0b593140273741e0e4a0721b1ef6ef9dfefae0bbe61528440d65aab7936929551fd76793272257d74f60cf66891352f793294930a4b67 + languageName: node + linkType: hard + "@babel/plugin-transform-unicode-escapes@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-unicode-escapes@npm:7.27.1" @@ -1481,7 +1591,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-unicode-regex@npm:^7.0.0, @babel/plugin-transform-unicode-regex@npm:^7.0.0-0, @babel/plugin-transform-unicode-regex@npm:^7.24.7, @babel/plugin-transform-unicode-regex@npm:^7.27.1": +"@babel/plugin-transform-unicode-regex@npm:7.27.1, @babel/plugin-transform-unicode-regex@npm:^7.0.0, @babel/plugin-transform-unicode-regex@npm:^7.0.0-0, @babel/plugin-transform-unicode-regex@npm:^7.24.7, @babel/plugin-transform-unicode-regex@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-unicode-regex@npm:7.27.1" dependencies: @@ -1614,6 +1724,21 @@ __metadata: languageName: node linkType: hard +"@babel/preset-typescript@npm:7.27.1": + version: 7.27.1 + resolution: "@babel/preset-typescript@npm:7.27.1" + dependencies: + "@babel/helper-plugin-utils": "npm:^7.27.1" + "@babel/helper-validator-option": "npm:^7.27.1" + "@babel/plugin-syntax-jsx": "npm:^7.27.1" + "@babel/plugin-transform-modules-commonjs": "npm:^7.27.1" + "@babel/plugin-transform-typescript": "npm:^7.27.1" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/9d8e75326b3c93fa016ba7aada652800fc77bc05fcc181888700a049935e8cf1284b549de18a5d62ef3591d02f097ea6de1111f7d71a991aaf36ba74657bd145 + languageName: node + linkType: hard + "@babel/preset-typescript@npm:^7.16.7, @babel/preset-typescript@npm:^7.23.0, @babel/preset-typescript@npm:^7.24.7": version: 7.28.5 resolution: "@babel/preset-typescript@npm:7.28.5" @@ -1647,6 +1772,17 @@ __metadata: languageName: node linkType: hard +"@babel/template@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/template@npm:7.28.6" + dependencies: + "@babel/code-frame": "npm:^7.28.6" + "@babel/parser": "npm:^7.28.6" + "@babel/types": "npm:^7.28.6" + checksum: 10/0ad6e32bf1e7e31bf6b52c20d15391f541ddd645cbd488a77fe537a15b280ee91acd3a777062c52e03eedbc2e1f41548791f6a3697c02476ec5daf49faa38533 + languageName: node + linkType: hard + "@babel/traverse--for-generate-function-map@npm:@babel/traverse@^7.25.3, @babel/traverse@npm:^7.25.3, @babel/traverse@npm:^7.27.1, @babel/traverse@npm:^7.28.0, @babel/traverse@npm:^7.28.3, @babel/traverse@npm:^7.28.4, @babel/traverse@npm:^7.28.5": version: 7.28.5 resolution: "@babel/traverse@npm:7.28.5" @@ -1662,6 +1798,21 @@ __metadata: languageName: node linkType: hard +"@babel/traverse@npm:^7.28.6": + version: 7.29.0 + resolution: "@babel/traverse@npm:7.29.0" + dependencies: + "@babel/code-frame": "npm:^7.29.0" + "@babel/generator": "npm:^7.29.0" + "@babel/helper-globals": "npm:^7.28.0" + "@babel/parser": "npm:^7.29.0" + "@babel/template": "npm:^7.28.6" + "@babel/types": "npm:^7.29.0" + debug: "npm:^4.3.1" + checksum: 10/3a0d0438f1ba9fed4fbe1706ea598a865f9af655a16ca9517ab57bda526e224569ca1b980b473fb68feea5e08deafbbf2cf9febb941f92f2d2533310c3fc4abc + languageName: node + linkType: hard + "@babel/types@npm:^7.0.0, @babel/types@npm:^7.20.7, @babel/types@npm:^7.21.3, @babel/types@npm:^7.24.7, @babel/types@npm:^7.25.2, @babel/types@npm:^7.26.0, @babel/types@npm:^7.27.1, @babel/types@npm:^7.27.3, @babel/types@npm:^7.28.2, @babel/types@npm:^7.28.4, @babel/types@npm:^7.28.5, @babel/types@npm:^7.3.3, @babel/types@npm:^7.4.4": version: 7.28.5 resolution: "@babel/types@npm:7.28.5" @@ -1672,6 +1823,16 @@ __metadata: languageName: node linkType: hard +"@babel/types@npm:^7.28.6, @babel/types@npm:^7.29.0": + version: 7.29.0 + resolution: "@babel/types@npm:7.29.0" + dependencies: + "@babel/helper-string-parser": "npm:^7.27.1" + "@babel/helper-validator-identifier": "npm:^7.28.5" + checksum: 10/bfc2b211210f3894dcd7e6a33b2d1c32c93495dc1e36b547376aa33441abe551ab4bc1640d4154ee2acd8e46d3bbc925c7224caae02fcaf0e6a771e97fccc661 + languageName: node + linkType: hard + "@bcoe/v8-coverage@npm:^0.2.3": version: 0.2.3 resolution: "@bcoe/v8-coverage@npm:0.2.3" @@ -4799,6 +4960,15 @@ __metadata: languageName: node linkType: hard +"@types/react@npm:~19.2.0": + version: 19.2.14 + resolution: "@types/react@npm:19.2.14" + dependencies: + csstype: "npm:^3.2.2" + checksum: 10/fbff239089ee64b6bd9b00543594db498278b06de527ef1b0f71bb0eb09cc4445a71b5dd3c0d3d0257255c4eed94406be40a74ad4a987ade8a8d5dd65c82bc5f + languageName: node + linkType: hard + "@types/semver@npm:^7.3.12": version: 7.7.1 resolution: "@types/semver@npm:7.7.1" @@ -5147,6 +5317,18 @@ __metadata: languageName: node linkType: hard +"ajv@npm:^8.11.0": + version: 8.18.0 + resolution: "ajv@npm:8.18.0" + dependencies: + fast-deep-equal: "npm:^3.1.3" + fast-uri: "npm:^3.0.1" + json-schema-traverse: "npm:^1.0.0" + require-from-string: "npm:^2.0.2" + checksum: 10/bfed9de827a2b27c6d4084324eda76a4e32bdde27410b3e9b81d06e6f8f5c78370fc6b93fe1d869f1939ff1d7c4ae8896960995acb8425e3e9288c8884247c48 + languageName: node + linkType: hard + "anser@npm:^1.4.9": version: 1.4.10 resolution: "anser@npm:1.4.10" @@ -6387,8 +6569,9 @@ __metadata: "@react-navigation/native": "npm:^7.1.6" "@shopify/react-native-skia": "npm:2.2.12" "@types/pngjs": "npm:^6.0.5" - "@types/react": "npm:~19.1.10" + "@types/react": "npm:~19.2.0" expo: "npm:^54.0.27" + expo-build-properties: "npm:~1.0.10" expo-constants: "npm:~18.0.11" expo-font: "npm:~14.0.10" expo-linking: "npm:~8.0.10" @@ -6402,12 +6585,15 @@ __metadata: react-native-gesture-handler: "npm:~2.28.0" react-native-image-picker: "npm:^7.2.2" react-native-loading-spinner-overlay: "npm:^3.0.1" - react-native-reanimated: "npm:~4.1.1" + react-native-nitro-image: "npm:^0.12.0" + react-native-nitro-modules: "npm:^0.33.9" + react-native-reanimated: "npm:~4.2.1" react-native-safe-area-context: "npm:~5.6.0" react-native-screens: "npm:~4.16.0" react-native-svg: "npm:15.12.1" react-native-svg-transformer: "npm:^1.5.0" - react-native-worklets: "npm:0.5.1" + react-native-vision-camera: "npm:5.0.0-beta.2" + react-native-worklets: "npm:^0.7.2" languageName: unknown linkType: soft @@ -6437,7 +6623,7 @@ __metadata: languageName: node linkType: hard -"convert-source-map@npm:^2.0.0": +"convert-source-map@npm:2.0.0, convert-source-map@npm:^2.0.0": version: 2.0.0 resolution: "convert-source-map@npm:2.0.0" checksum: 10/c987be3ec061348cdb3c2bfb924bec86dea1eacad10550a85ca23edb0fe3556c3a61c7399114f3331ccb3499d7fd0285ab24566e5745929412983494c3926e15 @@ -6757,7 +6943,7 @@ __metadata: languageName: node linkType: hard -"csstype@npm:^3.0.2": +"csstype@npm:^3.0.2, csstype@npm:^3.2.2": version: 3.2.3 resolution: "csstype@npm:3.2.3" checksum: 10/ad41baf7e2ffac65ab544d79107bf7cd1a4bb9bab9ac3302f59ab4ba655d5e30942a8ae46e10ba160c6f4ecea464cc95b975ca2fefbdeeacd6ac63f12f99fe1f @@ -7849,6 +8035,18 @@ __metadata: languageName: node linkType: hard +"expo-build-properties@npm:~1.0.10": + version: 1.0.10 + resolution: "expo-build-properties@npm:1.0.10" + dependencies: + ajv: "npm:^8.11.0" + semver: "npm:^7.6.0" + peerDependencies: + expo: "*" + checksum: 10/0dde41d659d243268ceae49bba3e4c07b72c245df8124f86fb720bc0556a2c4d03dd75e59e068a07438ef5ba3188b67a7a6516d2a37d3d91429070745b2506a2 + languageName: node + linkType: hard + "expo-calendar@npm:~15.0.8": version: 15.0.8 resolution: "expo-calendar@npm:15.0.8" @@ -8236,6 +8434,13 @@ __metadata: languageName: node linkType: hard +"fast-uri@npm:^3.0.1": + version: 3.1.0 + resolution: "fast-uri@npm:3.1.0" + checksum: 10/818b2c96dc913bcf8511d844c3d2420e2c70b325c0653633f51821e4e29013c2015387944435cd0ef5322c36c9beecc31e44f71b257aeb8e0b333c1d62bb17c2 + languageName: node + linkType: hard + "fast-xml-parser@npm:^4.4.1": version: 4.5.3 resolution: "fast-xml-parser@npm:4.5.3" @@ -10192,6 +10397,13 @@ __metadata: languageName: node linkType: hard +"json-schema-traverse@npm:^1.0.0": + version: 1.0.0 + resolution: "json-schema-traverse@npm:1.0.0" + checksum: 10/02f2f466cdb0362558b2f1fd5e15cce82ef55d60cd7f8fa828cf35ba74330f8d767fcae5c5c2adb7851fa811766c694b9405810879bc4e1ddd78a7c0e03658ad + languageName: node + linkType: hard + "json-stable-stringify-without-jsonify@npm:^1.0.1": version: 1.0.1 resolution: "json-stable-stringify-without-jsonify@npm:1.0.1" @@ -13138,7 +13350,7 @@ __metadata: languageName: node linkType: hard -"react-native-is-edge-to-edge@npm:^1.1.6, react-native-is-edge-to-edge@npm:^1.2.1": +"react-native-is-edge-to-edge@npm:1.2.1, react-native-is-edge-to-edge@npm:^1.1.6, react-native-is-edge-to-edge@npm:^1.2.1": version: 1.2.1 resolution: "react-native-is-edge-to-edge@npm:1.2.1" peerDependencies: @@ -13183,6 +13395,27 @@ __metadata: languageName: node linkType: hard +"react-native-nitro-image@npm:^0.12.0": + version: 0.12.0 + resolution: "react-native-nitro-image@npm:0.12.0" + peerDependencies: + react: "*" + react-native: "*" + react-native-nitro-modules: "*" + checksum: 10/03f165381c35e060d4d05eae3ce029b32a4009482f327e9526840f306181ca87a862b335e12667c55d4ee9f2069542ca93dd112feb7f1822bf7d2ddc38fe58f0 + languageName: node + linkType: hard + +"react-native-nitro-modules@npm:^0.33.9": + version: 0.33.9 + resolution: "react-native-nitro-modules@npm:0.33.9" + peerDependencies: + react: "*" + react-native: "*" + checksum: 10/4ebf4db46d1e4987a0e52054724081aa9712bcd1d505a6dbdd47aebc6afe72a7abaa0e947651d9f3cc594e4eb3dba47fc6f59db27c5a5ed383946e40d96543a0 + languageName: node + linkType: hard + "react-native-reanimated@npm:~4.1.1": version: 4.1.6 resolution: "react-native-reanimated@npm:4.1.6" @@ -13198,6 +13431,20 @@ __metadata: languageName: node linkType: hard +"react-native-reanimated@npm:~4.2.1": + version: 4.2.2 + resolution: "react-native-reanimated@npm:4.2.2" + dependencies: + react-native-is-edge-to-edge: "npm:1.2.1" + semver: "npm:7.7.3" + peerDependencies: + react: "*" + react-native: "*" + react-native-worklets: ">=0.7.0" + checksum: 10/2ad24cc827aaabb54c18d75a4ab98b92a25dd57c05bfabb886341c0e62d8efc5d5973f415cb1da2ecab9ebe077bec1179b91c681de90e124dbf1160a418ee29d + languageName: node + linkType: hard + "react-native-safe-area-context@npm:~5.6.0": version: 5.6.2 resolution: "react-native-safe-area-context@npm:5.6.2" @@ -13251,6 +13498,19 @@ __metadata: languageName: node linkType: hard +"react-native-vision-camera@npm:5.0.0-beta.2": + version: 5.0.0-beta.2 + resolution: "react-native-vision-camera@npm:5.0.0-beta.2" + peerDependencies: + react: "*" + react-native: "*" + react-native-nitro-image: "*" + react-native-nitro-modules: "*" + react-native-worklets: "*" + checksum: 10/1f38d097d001c10b8544d0b931a9387a91c5df1e0677ae53e639962a90589586af02ca658ca5e99a5ca179af8d86bc8365227cf70750f2df4bfb775f4a26fc6d + languageName: node + linkType: hard + "react-native-worklets@npm:0.5.1": version: 0.5.1 resolution: "react-native-worklets@npm:0.5.1" @@ -13274,6 +13534,29 @@ __metadata: languageName: node linkType: hard +"react-native-worklets@npm:^0.7.2": + version: 0.7.4 + resolution: "react-native-worklets@npm:0.7.4" + dependencies: + "@babel/plugin-transform-arrow-functions": "npm:7.27.1" + "@babel/plugin-transform-class-properties": "npm:7.27.1" + "@babel/plugin-transform-classes": "npm:7.28.4" + "@babel/plugin-transform-nullish-coalescing-operator": "npm:7.27.1" + "@babel/plugin-transform-optional-chaining": "npm:7.27.1" + "@babel/plugin-transform-shorthand-properties": "npm:7.27.1" + "@babel/plugin-transform-template-literals": "npm:7.27.1" + "@babel/plugin-transform-unicode-regex": "npm:7.27.1" + "@babel/preset-typescript": "npm:7.27.1" + convert-source-map: "npm:2.0.0" + semver: "npm:7.7.3" + peerDependencies: + "@babel/core": "*" + react: "*" + react-native: "*" + checksum: 10/922b209940e298d21313d22f8a6eb87ad603442850c7ff8bc9cfef694cb211d7ec9903e24ee20b6bcf6164f8e7c165b65307dcca3d67465fdffda1c45fe05d1d + languageName: node + linkType: hard + "react-native@npm:0.81.5": version: 0.81.5 resolution: "react-native@npm:0.81.5" @@ -13790,21 +14073,21 @@ __metadata: languageName: node linkType: hard -"semver@npm:^6.3.0, semver@npm:^6.3.1": - version: 6.3.1 - resolution: "semver@npm:6.3.1" +"semver@npm:7.7.3, semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1": + version: 7.7.3 + resolution: "semver@npm:7.7.3" bin: semver: bin/semver.js - checksum: 10/1ef3a85bd02a760c6ef76a45b8c1ce18226de40831e02a00bad78485390b98b6ccaa31046245fc63bba4a47a6a592b6c7eedc65cc47126e60489f9cc1ce3ed7e + checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9 languageName: node linkType: hard -"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1": - version: 7.7.3 - resolution: "semver@npm:7.7.3" +"semver@npm:^6.3.0, semver@npm:^6.3.1": + version: 6.3.1 + resolution: "semver@npm:6.3.1" bin: semver: bin/semver.js - checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9 + checksum: 10/1ef3a85bd02a760c6ef76a45b8c1ce18226de40831e02a00bad78485390b98b6ccaa31046245fc63bba4a47a6a592b6c7eedc65cc47126e60489f9cc1ce3ed7e languageName: node linkType: hard