diff --git a/.dockerignore b/.dockerignore index 891c644f7..c0bc46de7 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,7 +7,8 @@ databases .env.* .git .github -docs +docs/* +!docs/serviceTemplates src/test *.md *.log diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9a81f4571..76e7c324f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -9,6 +9,7 @@ on: pull_request: branches: - 'main' + - 'next-4' env: DOCKERHUB_IMAGE: ${{ 'oceanprotocol/ocean-node' }} diff --git a/Dockerfile b/Dockerfile index 1567fa7e6..b8e4b14a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,6 +43,9 @@ COPY --chown=node:node --from=builder /usr/src/app/node_modules ./node_modules COPY --chown=node:node --from=builder /usr/src/app/schemas ./schemas COPY --chown=node:node --from=builder /usr/src/app/package.json ./ COPY --chown=node:node --from=builder /usr/src/app/config.json ./ +# Ship the operator service-on-demand templates so SERVICE_TEMPLATES_PATH=docs/serviceTemplates/ +# resolves inside the image (the rest of docs/ stays excluded via .dockerignore). +COPY --chown=node:node --from=builder /usr/src/app/docs/serviceTemplates ./docs/serviceTemplates RUN mkdir -p databases c2d_storage logs diff --git a/README.md b/README.md index 67492baac..7e211fe6b 100644 --- a/README.md +++ b/README.md @@ -156,3 +156,4 @@ Your node is now running. To start additional nodes, repeat these steps in a new - [Docker Deployment Guide](docs/dockerDeployment.md) - [C2D GPU Guide](docs/GPU.md) - [Compute pricing](docs/compute-pricing.md) +- [Services (Service-on-Demand)](docs/services.md) diff --git a/config.json b/config.json index 703783c67..35b084a61 100644 --- a/config.json +++ b/config.json @@ -94,6 +94,7 @@ "validateUnsignedDDO": true, "jwtSecret": "ocean-node-secret", "enableBenchmark": false, + "serviceTemplatesPath": "docs/serviceTemplates/", "dockerComputeEnvironments": [ { "socketPath": "/var/run/docker.sock", diff --git a/docs/API.md b/docs/API.md index dd5475852..bf50118a5 100644 --- a/docs/API.md +++ b/docs/API.md @@ -1819,3 +1819,276 @@ Delete a file from a bucket. ```json { "success": true } ``` + +--- + +## Service on Demand + +Service-on-Demand lets a consumer launch a long-running Docker container (e.g. JupyterLab, a +vLLM inference server, VS Code) on a compute environment, pay up front via on-chain escrow for +a requested `duration`, and reach it over forwarded network endpoints +(`http://:`) while it runs. Unlike a compute job, a service stays up until +it expires, is stopped, or is extended. See [`services.md`](./services.md) for the full design +and security model. + +All routes live under `/api/services`. Every command except `serviceTemplates` is +authenticated by a signature over `consumerAddress` + `nonce` + `command` (or an auth-token +`Authorization` header). Cost is computed only from the environment's server-side pricing and +charged to the authenticated `consumerAddress`. + +> **Note:** service containers run hardened (`no-new-privileges`, `CapDrop: ['ALL']`), so a +> process inside the container cannot bind to a port below 1024 — have your service listen on a +> **high port** (the published host port is allocated by the node regardless). + +### Service object definitions + +#### `ServiceTemplatePublic` (returned by `serviceTemplates`) + +Operator-published blueprint. Secret `envVars` values are never returned — only their keys via +`envVarKeys`. + +| property | type | description | +| ------------------------- | ----------------------------- | ----------- | +| id | string | template id (`[a-z0-9][a-z0-9_-]{0,63}`) | +| name / description | string | human-readable labels | +| image | string | base image | +| tag / checksum / dockerfile | string | image spec — exactly one | +| exposedPorts | number[] | container ports to forward | +| envVarKeys | string[] | keys of operator-set env vars (values never returned) | +| userConfigurableEnvVars | object[] | `{ key, validation?, sensitive? }` passed via `userData` | +| command / entrypoint | string[] | Docker CMD / ENTRYPOINT overrides | +| requiredResources | object[] | resources the service MUST have to run | +| recommendedResources | object[] | resources for best performance | + +#### `ServiceJob` (returned by start / status / extend / restart / stop) + +The encrypted `userData` is never returned. Key fields: + +| property | type | description | +| ------------- | -------- | ----------- | +| serviceId | string | unique id of the running service | +| environment | string | envId the service runs on | +| owner | string | consumerAddress | +| status | number | `10` Starting, `20` Locking, `11` PullImage, `13` BuildImage, `30` Claiming, `40` Running, `12` PullImageFailed, `14` BuildImageFailed, `15` VulnerableImage, `50` Stopping, `70` Stopped, `75` Expired, `99` Error | +| statusText | string | human-readable status | +| dateCreated | string | ISO timestamp | +| expiresAt | number | Unix ms timestamp when the paid window ends | +| duration | number | requested seconds | +| endpoints | object[] | `{ containerPort, hostPort, url }` per exposed port | +| resources | object[] | `{ id, amount, price }` | +| payment | object | initial start payment record | +| extendPayments | object[] | one entry per successful extend | + +--- + +### `HTTP` GET /api/services/serviceTemplates + +### `P2P` command: serviceGetTemplates + +#### Description + +List the operator-published service templates (sanitized). Not authenticated. + +#### Query Parameters + +| name | type | required | description | +| ------- | ------ | -------- | ----------- | +| chainId | number | | filter to templates whose envs price on this chain | + +#### Response (200) + +```json +[ + { + "id": "jupyter-cpu", + "name": "JupyterLab (CPU)", + "image": "quay.io/jupyter/datascience-notebook", + "tag": "latest", + "exposedPorts": [8888], + "userConfigurableEnvVars": [{ "key": "JUPYTER_TOKEN", "sensitive": true }], + "requiredResources": [{ "id": "cpu", "min": 1 }, { "id": "ram", "min": 2 }] + } +] +``` + +--- + +### `HTTP` POST /api/services/serviceStart + +### `P2P` command: serviceStart + +#### Description + +Validate the request, persist the job, and **return immediately** with the `serviceId` — the +response does **not** wait for escrow or the image pull/build. The consumer supplies the +container spec directly (an `image` referenced by `tag`/`checksum`, or an inline `dockerfile` +when the operator allows building). + +The returned job has `status: 10` (`Starting`) and no `endpoints` yet. A background loop then +advances it: `Starting → Locking` (escrow lock) `→ PullImage`/`BuildImage` (image + scan) `→ +Claiming` (claim on success, or refund/cancel the lock on failure) `→ Running`. **Poll +`serviceStatus`** until `status` is `40` (`Running`, with `endpoints` populated) or a terminal +`*Failed` / `Error` status. + +#### Request Body + +```json +{ + "consumerAddress": "0x...", + "nonce": "123", + "signature": "0x...", + "environment": "env-1", + "image": "nginxinc/nginx-unprivileged", + "tag": "alpine", + "exposedPorts": [8080], + "resources": [{ "id": "cpu", "amount": 1 }, { "id": "ram", "amount": 1 }], + "duration": 3600, + "userData": "", + "payment": { "chainId": 8996, "token": "0x..." } +} +``` + +| field | type | required | description | +| --------------------- | -------- | -------- | ----------- | +| environment | string | v | envId to run on (services must be enabled on it) | +| image | string | v | base image | +| tag / checksum / dockerfile | string | | image spec — at most one; `dockerfile` requires `allowImageBuild` | +| additionalDockerFiles | object | | filename → content; only with `dockerfile` | +| dockerCmd / dockerEntrypoint | string[] | | container CMD / ENTRYPOINT overrides | +| exposedPorts | number[] | | container ports to publish | +| resources | object[] | | `{ id, amount }` requested resources | +| duration | number | v | seconds; capped by `serviceOnDemand.maxDurationSeconds` | +| userData | string | | ECIES-encrypted (to the node pubkey) JSON of env vars | +| payment | object | v | `{ chainId, token }` | + +#### Response (200) + +The immediate response — `Starting`, no endpoints yet. Poll `serviceStatus` for the rest. + +```json +[ + { + "serviceId": "0x...", + "environment": "env-1", + "owner": "0x...", + "status": 10, + "statusText": "Starting", + "expiresAt": 1735689600000, + "duration": 3600, + "endpoints": [], + "payment": { "chainId": 8996, "token": "0x...", "cost": 10 } + } +] +``` + +Errors: `403` services disabled on the env / access denied, `400` invalid params (bad address, +duration, image spec, unavailable resources, or no pricing for the token). Escrow lock/claim now +happens in the background, so escrow failures surface as the job ending in an `Error` / `*Failed` +status (observed via `serviceStatus`), not as a synchronous `402`. + +--- + +### `HTTP` GET /api/services/serviceStatus + +### `P2P` command: serviceGetStatus + +#### Description + +Read service job status and endpoints. **Authenticated and owner-scoped** — only services owned +by the authenticated `consumerAddress` are returned. + +#### Query Parameters + +| name | type | required | description | +| --------------- | ------ | -------- | ----------- | +| consumerAddress | string | v | owner address | +| nonce | string | v | request nonce | +| signature | string | v | signed message (or use an `Authorization` auth-token header) | +| serviceId | string | | filter to a single service; omit to list all owned services | + +#### Response (200) + +Array of `ServiceJob` (with `userData` stripped). + +--- + +### `HTTP` POST /api/services/serviceExtend + +### `P2P` command: serviceExtend + +#### Description + +Pay to push the service expiry further out. The total remaining duration must not exceed +`maxDurationSeconds`. Re-checks the environment access list. + +#### Request Body + +```json +{ + "consumerAddress": "0x...", + "nonce": "123", + "signature": "0x...", + "serviceId": "0x...", + "additionalDuration": 1800, + "payment": { "chainId": 8996, "token": "0x..." } +} +``` + +`additionalDuration` must be a positive number of seconds. + +#### Response (200) + +The updated `ServiceJob` (advanced `expiresAt`, new entry in `extendPayments`). + +--- + +### `HTTP` POST /api/services/serviceRestart + +### `P2P` command: serviceRestart + +#### Description + +Recreate the service container (no extra charge), keeping the same `expiresAt` and host ports. +Re-checks the environment service gate and access list; rejected if the service has expired. +Optionally pass `userData` to replace the stored env vars. + +#### Request Body + +```json +{ + "consumerAddress": "0x...", + "nonce": "123", + "signature": "0x...", + "serviceId": "0x...", + "userData": "" +} +``` + +#### Response (200) + +The `ServiceJob` with a new `containerId` (same `hostPort` and `expiresAt`). + +--- + +### `HTTP` POST /api/services/serviceStop + +### `P2P` command: serviceStop + +#### Description + +Tear down the service container and network and release its resources. Owner-gated. + +#### Request Body + +```json +{ + "consumerAddress": "0x...", + "nonce": "123", + "signature": "0x...", + "serviceId": "0x..." +} +``` + +#### Response (200) + +The `ServiceJob` with `status: 70` (Stopped). diff --git a/docs/Ocean Node.postman_collection.json b/docs/Ocean Node.postman_collection.json index 8f5f7a128..4f65f5104 100644 --- a/docs/Ocean Node.postman_collection.json +++ b/docs/Ocean Node.postman_collection.json @@ -1,193 +1,1181 @@ { "info": { - "_postman_id": "ff8f2614-8d77-40e4-9031-9ca2ed9f7973", - "name": "Ocean Node", + "name": "Ocean Node API", + "_postman_id": "ocean-node-http-api", + "description": "Complete collection of HTTP endpoints exposed by an Ocean Node's HTTP server.\n\nSet the `baseUrl` collection variable to your node (default `http://localhost:8000`). Many endpoints are authenticated with a signature over `consumerAddress` + `nonce` + `command`, or with an auth-token `Authorization` header. Convenience variables (`consumerAddress`, `signature`, `nonce`, `chainId`, `did`, `serviceId`, `jobId`, `token`, `bucketId`, `wallet`, `owner`) are provided as placeholders.\n\nGenerated from the route definitions in `src/components/httpRoutes/` and their handler implementations.", "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" }, + "variable": [ + { "key": "baseUrl", "value": "http://localhost:8000", "type": "string" }, + { "key": "consumerAddress", "value": "0x0000000000000000000000000000000000000000", "type": "string" }, + { "key": "signature", "value": "0x", "type": "string" }, + { "key": "nonce", "value": "1", "type": "string" }, + { "key": "chainId", "value": "8996", "type": "string" }, + { "key": "did", "value": "did:op:0000000000000000000000000000000000000000000000000000000000000000", "type": "string" }, + { "key": "serviceId", "value": "", "type": "string" }, + { "key": "jobId", "value": "", "type": "string" }, + { "key": "token", "value": "", "type": "string" }, + { "key": "bucketId", "value": "", "type": "string" }, + { "key": "wallet", "value": "0x0000000000000000000000000000000000000000", "type": "string" }, + { "key": "owner", "value": "0x0000000000000000000000000000000000000000", "type": "string" } + ], "item": [ { - "name": "8000 - getP2pPeers", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8001 - getP2pPeers", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8000 - getOceanPeers", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8001 - getOceanPeers", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8000 - getPeer", - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "http://127.0.0.1:8000/getP2pPeer?peerId=16Uiu2HAmQU8YmsACkFjkaFqEECLN3Csu6JgoU3hw9EsPmk7i9TFL", - "protocol": "http", - "host": ["127", "0", "0", "1"], - "port": "8000", - "path": ["getP2pPeer"], - "query": [ - { - "key": "peerId", - "value": "16Uiu2HAmQU8YmsACkFjkaFqEECLN3Csu6JgoU3hw9EsPmk7i9TFL" - } - ] - } - } - }, - { - "name": "8000 - advertiseDid", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8001 - advertiseDid", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8000 - getProvidersForDid", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8001 - getProvidersForDid", - "request": { - "method": "GET", - "header": [] - }, - "response": [] - }, - { - "name": "8000 - directCommand (findDDO)", - "request": { - "method": "POST", - "header": [ - { - "key": "Content-Type", - "value": "application/json", - "type": "default" - } - ] - }, - "body": { - "mode": "raw", - "raw": "{\n \"command\": \"findDDO\",\n \"id\": \"did:op:0ebed8226ada17fde24b6bf2b95d27f8f05fcce09139ff5cec31f6d81a7cd2ea\"\n}" - }, - "url": { - "raw": "http://127.0.0.1:8000/directCommand", - "protocol": "http", - "host": ["127", "0", "0", "1"], - "port": "8000", - "path": ["directCommand"] - }, - "response": [] - }, - { - "name": "8001 - directCommand (findDDO)", - "request": { - "method": "POST", - "header": [ - { - "key": "Content-Type", - "value": "application/json", - "type": "default" - } - ] - }, - "body": { - "mode": "raw", - "raw": "{\n \"command\": \"findDDO\",\n \"id\": \"did:op:0ebed8226ada17fde24b6bf2b95d27f8f05fcce09139ff5cec31f6d81a7cd2ea\", \"node\": \"16Uiu2HAkvfXgYiFhsHRJvcdtmMs3aopgoRphb5xnXMh3dxCRuuX\"\n}" - }, - "url": { - "raw": "http://127.0.0.1:8001/directCommand", - "protocol": "http", - "host": ["127", "0", "0", "1"], - "port": "8001", - "path": ["directCommand"] - }, - "response": [] - }, - { - "name": "8000 - directCommand", - "request": { - "method": "POST", - "header": [ - { - "key": "Content-Type", - "value": "application/json", - "type": "default" - } - ], - "body": { - "mode": "raw", - "raw": "{\n \"command\":\"downloadURL\",\n \"node\": \"16Uiu2HAkxiemC25d2iZWTkVRQmZr9L9h3RNGnhiUWXEonmsPEC8y\",\n \"url\": \"http://example.com\",\n \"aes_encrypted_key\": \"0x1234567890abcdef\"\n}" - }, - "url": { - "raw": "http://127.0.0.1:8000/directCommand", - "protocol": "http", - "host": ["127", "0", "0", "1"], - "port": "8000", - "path": ["directCommand"] - } - }, - "response": [] - }, - { - "name": "8001 - directCommand", - "request": { - "method": "POST", - "header": [ - { - "key": "Content-Type", - "value": "application/json", - "type": "default" - } - ], - "body": { - "mode": "raw", - "raw": "{\n \"command\":\"downloadURL\",\n \"node\": \"16Uiu2HAkvfXgYiFhsHRJvcdtmMs3aopgoRphb5xnXMh3dxCRuuX\",\n \"url\": \"http://example.com\",\n \"aes_encrypted_key\": \"0x1234567890abcdef\"\n}" - }, - "url": { - "raw": "http://127.0.0.1:8001/directCommand", - "protocol": "http", - "host": ["127", "0", "0", "1"], - "port": "8001", - "path": ["directCommand"] - } - }, - "response": [] + "name": "Node Info", + "item": [ + { + "name": "Get Node Info", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/", + "host": ["{{baseUrl}}"], + "path": [""] + }, + "description": "Returns node identity and the list of available service endpoints (nodeId, chainIds, providerAddress, nodePublicKey, serviceEndpoints, software, version)." + } + } + ] + }, + { + "name": "Aquarius (DDO)", + "item": [ + { + "name": "Get DDO by DID", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/aquarius/assets/ddo/{{did}}", + "host": ["{{baseUrl}}"], + "path": ["api", "aquarius", "assets", "ddo", "{{did}}"] + }, + "description": "Retrieve the full DDO for a DID. Append an optional `/true` path segment to force a fresh lookup." + } + }, + { + "name": "Get DDO Metadata by DID", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/aquarius/assets/metadata/{{did}}", + "host": ["{{baseUrl}}"], + "path": ["api", "aquarius", "assets", "metadata", "{{did}}"] + }, + "description": "Retrieve DDO metadata for a DID. Optional trailing `/true` forces a fresh lookup." + } + }, + { + "name": "Query Assets", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"query\": {\n \"match_all\": {}\n },\n \"from\": 0,\n \"size\": 10\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/aquarius/assets/metadata/query", + "host": ["{{baseUrl}}"], + "path": ["api", "aquarius", "assets", "metadata", "query"] + }, + "description": "Query indexed assets. Body is a search query object (filter/query/sort/from/size)." + } + }, + { + "name": "Get DDO State", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/aquarius/state/ddo?did={{did}}", + "host": ["{{baseUrl}}"], + "path": ["api", "aquarius", "state", "ddo"], + "query": [ + { "key": "did", "value": "{{did}}" }, + { "key": "nft", "value": "", "disabled": true }, + { "key": "txId", "value": "", "disabled": true } + ] + }, + "description": "Query DDO state by `did`, `nft`, or `txId` (at least one required)." + } + }, + { + "name": "Validate DDO", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { + "mode": "raw", + "raw": "{\n \"ddo\": {},\n \"publisherAddress\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/aquarius/assets/ddo/validate", + "host": ["{{baseUrl}}"], + "path": ["api", "aquarius", "assets", "ddo", "validate"] + }, + "description": "Validate a DDO's schema and signature before publishing." + } + } + ] + }, + { + "name": "Provider", + "item": [ + { + "name": "Get Nonce", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/nonce?userAddress={{consumerAddress}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "nonce"], + "query": [{ "key": "userAddress", "value": "{{consumerAddress}}" }] + }, + "description": "Get the current nonce for a user address (used when signing requests)." + } + }, + { + "name": "Initialize (get fees)", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/initialize?documentId={{did}}&serviceId=&consumerAddress={{consumerAddress}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "initialize"], + "query": [ + { "key": "documentId", "value": "{{did}}" }, + { "key": "serviceId", "value": "" }, + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "validUntil", "value": "", "disabled": true }, + { "key": "policyServer", "value": "", "disabled": true } + ] + }, + "description": "Get the fees required to access an asset service." + } + }, + { + "name": "Encrypt", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/octet-stream" }], + "body": { "mode": "raw", "raw": "hello world" }, + "url": { + "raw": "{{baseUrl}}/api/services/encrypt?nonce={{nonce}}&consumerAddress={{consumerAddress}}&signature={{signature}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "encrypt"], + "query": [ + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" } + ] + }, + "description": "ECIES-encrypt the raw request body. Returns application/octet-stream. 25MB limit." + } + }, + { + "name": "Encrypt File", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"files\": {\n \"type\": \"url\",\n \"url\": \"https://example.com/file.txt\",\n \"method\": \"GET\"\n }\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/encryptFile?nonce={{nonce}}&consumerAddress={{consumerAddress}}&signature={{signature}}&encryptMethod=ECIES", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "encryptFile"], + "query": [ + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "encryptMethod", "value": "ECIES" } + ] + }, + "description": "Encrypt a file (AES or ECIES). Accepts a StorageObject JSON, raw binary, or multipart. Returns encrypted bytes with X-Encrypted-By / X-Encrypted-Method headers." + } + }, + { + "name": "Decrypt", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"decrypterAddress\": \"{{consumerAddress}}\",\n \"chainId\": {{chainId}},\n \"nonce\": \"{{nonce}}\",\n \"signature\": \"{{signature}}\",\n \"transactionId\": \"\",\n \"dataNftAddress\": \"\",\n \"encryptedDocument\": \"\",\n \"flags\": 0,\n \"documentHash\": \"\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/decrypt", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "decrypt"] + }, + "description": "Decrypt a DDO document. Returns the decrypted payload as text/plain." + } + }, + { + "name": "Download", + "request": { + "method": "GET", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/download?fileIndex=0&documentId={{did}}&serviceId=&transferTxId=&nonce={{nonce}}&consumerAddress={{consumerAddress}}&signature={{signature}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "download"], + "query": [ + { "key": "fileIndex", "value": "0" }, + { "key": "documentId", "value": "{{did}}" }, + { "key": "serviceId", "value": "" }, + { "key": "transferTxId", "value": "" }, + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "userdata", "value": "", "disabled": true }, + { "key": "policyServer", "value": "", "disabled": true } + ] + }, + "description": "Download an asset file after a valid transfer (order). Streams the file." + } + } + ] + }, + { + "name": "File Info", + "item": [ + { + "name": "File Info", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"did\": \"{{did}}\",\n \"serviceId\": \"\",\n \"consumerAddress\": \"{{consumerAddress}}\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/fileInfo", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "fileInfo"] + }, + "description": "Get file info for an asset (by `did`+`serviceId`) or by a raw file `type` descriptor. `consumerAddress` is required for NODE_PERSISTENT_STORAGE ACL gating." + } + } + ] + }, + { + "name": "Compute", + "item": [ + { + "name": "Get Compute Environments", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/computeEnvironments?chainId={{chainId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "computeEnvironments"], + "query": [ + { "key": "chainId", "value": "{{chainId}}" }, + { "key": "node", "value": "", "disabled": true } + ] + }, + "description": "List available compute environments (optionally filtered by chain)." + } + }, + { + "name": "Initialize Compute", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"algorithm\": { \"documentId\": \"{{did}}\", \"serviceId\": \"\" },\n \"datasets\": [\n { \"documentId\": \"{{did}}\", \"serviceId\": \"\" }\n ]\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/initializeCompute", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "initializeCompute"] + }, + "description": "Validate algorithm + datasets and return a price/initialization quote." + } + }, + { + "name": "Start Compute (paid)", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { + "mode": "raw", + "raw": "{\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\",\n \"environment\": \"\",\n \"algorithm\": { \"documentId\": \"{{did}}\", \"serviceId\": \"\" },\n \"datasets\": [ { \"documentId\": \"{{did}}\", \"serviceId\": \"\" } ],\n \"maxJobDuration\": 3600,\n \"resources\": [ { \"id\": \"cpu\", \"amount\": 1 }, { \"id\": \"ram\", \"amount\": 1 } ],\n \"payment\": { \"chainId\": {{chainId}}, \"token\": \"0x...\" }\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/compute", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "compute"] + }, + "description": "Start a paid compute job. Optional fields: policyServer, metadata, additionalViewers, queueMaxWaitTime, encryptedDockerRegistryAuth, output, outputBucketId." + } + }, + { + "name": "Start Free Compute", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\",\n \"environment\": \"\",\n \"algorithm\": { \"documentId\": \"{{did}}\", \"serviceId\": \"\" },\n \"datasets\": [],\n \"resources\": [ { \"id\": \"cpu\", \"amount\": 1 }, { \"id\": \"ram\", \"amount\": 1 } ],\n \"maxJobDuration\": 600\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/freeCompute", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "freeCompute"] + }, + "description": "Start a free compute job (no payment) on a free-tier environment." + } + }, + { + "name": "Get Compute Status", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/compute?consumerAddress={{consumerAddress}}&jobId={{jobId}}&agreementId=", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "compute"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "jobId", "value": "{{jobId}}" }, + { "key": "agreementId", "value": "" }, + { "key": "node", "value": "", "disabled": true } + ] + }, + "description": "Get the status of a compute job." + } + }, + { + "name": "Stop Compute", + "request": { + "method": "PUT", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/compute?consumerAddress={{consumerAddress}}&signature={{signature}}&nonce={{nonce}}&jobId={{jobId}}&agreementId=", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "compute"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "jobId", "value": "{{jobId}}" }, + { "key": "agreementId", "value": "" }, + { "key": "node", "value": "", "disabled": true } + ] + }, + "description": "Stop a running compute job (parameters are query strings)." + } + }, + { + "name": "Get Compute Result", + "request": { + "method": "GET", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/computeResult?consumerAddress={{consumerAddress}}&jobId={{jobId}}&index=0&signature={{signature}}&nonce={{nonce}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "computeResult"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "jobId", "value": "{{jobId}}" }, + { "key": "index", "value": "0" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "node", "value": "", "disabled": true } + ] + }, + "description": "Download a compute job result by index. Streams the result file." + } + }, + { + "name": "Get Compute Streamable Logs", + "request": { + "method": "GET", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/computeStreamableLogs?consumerAddress={{consumerAddress}}&jobId={{jobId}}&signature={{signature}}&nonce={{nonce}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "computeStreamableLogs"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "jobId", "value": "{{jobId}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "node", "value": "", "disabled": true } + ] + }, + "description": "Stream real-time logs from a running compute job (404 if not running)." + } + } + ] + }, + { + "name": "Service on Demand", + "item": [ + { + "name": "Get Service Templates", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/serviceTemplates?chainId={{chainId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "serviceTemplates"], + "query": [ + { "key": "chainId", "value": "{{chainId}}" }, + { "key": "node", "value": "", "disabled": true } + ] + }, + "description": "List operator-published service templates (sanitized). Not authenticated." + } + }, + { + "name": "Start Service", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { + "mode": "raw", + "raw": "{\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"nonce\": \"{{nonce}}\",\n \"signature\": \"{{signature}}\",\n \"environment\": \"\",\n \"image\": \"nginxinc/nginx-unprivileged\",\n \"tag\": \"alpine\",\n \"exposedPorts\": [8080],\n \"resources\": [ { \"id\": \"cpu\", \"amount\": 1 }, { \"id\": \"ram\", \"amount\": 1 } ],\n \"duration\": 3600,\n \"userData\": \"\",\n \"payment\": { \"chainId\": {{chainId}}, \"token\": \"0x...\" }\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/serviceStart", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "serviceStart"] + }, + "description": "Launch a long-running service container, paid via escrow. Optional: checksum, dockerfile, additionalDockerFiles, dockerCmd, dockerEntrypoint. Services must listen on a high port (>1024)." + } + }, + { + "name": "Get Service Status", + "request": { + "method": "GET", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/serviceStatus?consumerAddress={{consumerAddress}}&nonce={{nonce}}&signature={{signature}}&serviceId={{serviceId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "serviceStatus"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "serviceId", "value": "{{serviceId}}" }, + { "key": "node", "value": "", "disabled": true } + ] + }, + "description": "Read service status/endpoints. Authenticated and owner-scoped. Omit serviceId to list all owned services." + } + }, + { + "name": "Extend Service", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { + "mode": "raw", + "raw": "{\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"nonce\": \"{{nonce}}\",\n \"signature\": \"{{signature}}\",\n \"serviceId\": \"{{serviceId}}\",\n \"additionalDuration\": 1800,\n \"payment\": { \"chainId\": {{chainId}}, \"token\": \"0x...\" }\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/serviceExtend", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "serviceExtend"] + }, + "description": "Pay to push the service expiry further out (additionalDuration in seconds, must be positive)." + } + }, + { + "name": "Restart Service", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { + "mode": "raw", + "raw": "{\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"nonce\": \"{{nonce}}\",\n \"signature\": \"{{signature}}\",\n \"serviceId\": \"{{serviceId}}\",\n \"userData\": \"\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/serviceRestart", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "serviceRestart"] + }, + "description": "Recreate the service container (no extra charge), keeping the same hostPort and expiresAt. Optional userData replaces stored env vars." + } + }, + { + "name": "Stop Service", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { + "mode": "raw", + "raw": "{\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"nonce\": \"{{nonce}}\",\n \"signature\": \"{{signature}}\",\n \"serviceId\": \"{{serviceId}}\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/serviceStop", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "serviceStop"] + }, + "description": "Tear down the service container and network and release resources. Owner-gated." + } + } + ] + }, + { + "name": "Persistent Storage", + "item": [ + { + "name": "Create Bucket", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { + "mode": "raw", + "raw": "{\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\",\n \"label\": \"my-bucket\",\n \"accessLists\": []\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/persistentStorage/buckets", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "persistentStorage", "buckets"] + }, + "description": "Create a new persistent storage bucket owned by consumerAddress." + } + }, + { + "name": "Update Bucket", + "request": { + "method": "PATCH", + "header": [ + { "key": "Content-Type", "value": "application/json" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { "mode": "raw", "raw": "{\n \"label\": \"renamed-bucket\"\n}" }, + "url": { + "raw": "{{baseUrl}}/api/services/persistentStorage/buckets/{{bucketId}}?consumerAddress={{consumerAddress}}&signature={{signature}}&nonce={{nonce}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "persistentStorage", "buckets", "{{bucketId}}"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" } + ] + }, + "description": "Update a bucket (e.g. set its label)." + } + }, + { + "name": "List Buckets", + "request": { + "method": "GET", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/persistentStorage/buckets?consumerAddress={{consumerAddress}}&signature={{signature}}&nonce={{nonce}}&owner={{owner}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "persistentStorage", "buckets"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" }, + { "key": "owner", "value": "{{owner}}" } + ] + }, + "description": "List buckets for an owner (filtered by access lists for the calling consumer)." + } + }, + { + "name": "List Files in Bucket", + "request": { + "method": "GET", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/persistentStorage/buckets/{{bucketId}}/files?consumerAddress={{consumerAddress}}&signature={{signature}}&nonce={{nonce}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "persistentStorage", "buckets", "{{bucketId}}", "files"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" } + ] + }, + "description": "List all files in a bucket." + } + }, + { + "name": "Get File Object", + "request": { + "method": "GET", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/persistentStorage/buckets/{{bucketId}}/files/myfile.txt/object?consumerAddress={{consumerAddress}}&signature={{signature}}&nonce={{nonce}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "persistentStorage", "buckets", "{{bucketId}}", "files", "myfile.txt", "object"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" } + ] + }, + "description": "Get the file object metadata for a file in a bucket." + } + }, + { + "name": "Upload File", + "request": { + "method": "POST", + "header": [ + { "key": "Content-Type", "value": "application/octet-stream" }, + { "key": "Authorization", "value": "{{token}}", "disabled": true } + ], + "body": { "mode": "raw", "raw": "" }, + "url": { + "raw": "{{baseUrl}}/api/services/persistentStorage/buckets/{{bucketId}}/files/myfile.txt?consumerAddress={{consumerAddress}}&signature={{signature}}&nonce={{nonce}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "persistentStorage", "buckets", "{{bucketId}}", "files", "myfile.txt"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" } + ] + }, + "description": "Upload a file to a bucket. Body is the raw file bytes (supports chunked uploads)." + } + }, + { + "name": "Delete File", + "request": { + "method": "DELETE", + "header": [{ "key": "Authorization", "value": "{{token}}", "disabled": true }], + "url": { + "raw": "{{baseUrl}}/api/services/persistentStorage/buckets/{{bucketId}}/files/myfile.txt?consumerAddress={{consumerAddress}}&signature={{signature}}&nonce={{nonce}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "persistentStorage", "buckets", "{{bucketId}}", "files", "myfile.txt"], + "query": [ + { "key": "consumerAddress", "value": "{{consumerAddress}}" }, + { "key": "signature", "value": "{{signature}}" }, + { "key": "nonce", "value": "{{nonce}}" } + ] + }, + "description": "Delete a file from a bucket. Returns { success: true }." + } + } + ] + }, + { + "name": "Escrow", + "item": [ + { + "name": "Get Escrow Events", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/escrow/events?chainId={{chainId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "escrow", "events"], + "query": [ + { "key": "chainId", "value": "{{chainId}}" }, + { "key": "eventType", "value": "", "disabled": true }, + { "key": "payer", "value": "", "disabled": true }, + { "key": "payee", "value": "", "disabled": true }, + { "key": "token", "value": "", "disabled": true }, + { "key": "jobId", "value": "", "disabled": true }, + { "key": "txId", "value": "", "disabled": true }, + { "key": "offset", "value": "", "disabled": true }, + { "key": "size", "value": "", "disabled": true } + ] + }, + "description": "Query escrow contract events with optional filters (eventType e.g. Lock/Claimed/Canceled/Deposit/Withdraw/Auth)." + } + } + ] + }, + { + "name": "Access Lists", + "item": [ + { + "name": "Search Access Lists by Wallet", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/accesslists?wallet={{wallet}}&chainId={{chainId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "accesslists"], + "query": [ + { "key": "wallet", "value": "{{wallet}}" }, + { "key": "chainId", "value": "{{chainId}}", "disabled": true } + ] + }, + "description": "Search access lists that include a given wallet address." + } + }, + { + "name": "Get Access List", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/accesslists/{{chainId}}/0xContractAddress", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "accesslists", "{{chainId}}", "0xContractAddress"] + }, + "description": "Get a specific access list by chainId and contract address." + } + } + ] + }, + { + "name": "Auth", + "item": [ + { + "name": "Create Auth Token", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"address\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\",\n \"validUntil\": null,\n \"chainId\": {{chainId}}\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/auth/token", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "auth", "token"] + }, + "description": "Create an auth token (returned token can be used as the Authorization header on authenticated routes)." + } + }, + { + "name": "Invalidate Auth Token", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"address\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"token\": \"{{token}}\",\n \"nonce\": \"{{nonce}}\",\n \"chainId\": {{chainId}}\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/auth/token/invalidate", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "auth", "token", "invalidate"] + }, + "description": "Invalidate an existing auth token." + } + } + ] + }, + { + "name": "Policy Server", + "item": [ + { + "name": "Policy Server Passthrough", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"policyServerPassthrough\": {}\n}" }, + "url": { + "raw": "{{baseUrl}}/api/services/PolicyServerPassthrough", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "PolicyServerPassthrough"] + }, + "description": "Pass arbitrary data through to the configured policy server. Streams the response." + } + }, + { + "name": "Initialize PS Verification", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"documentId\": \"{{did}}\",\n \"serviceId\": \"\",\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"policyServer\": {}\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/services/initializePSVerification", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "initializePSVerification"] + }, + "description": "Initialize a policy server verification flow. Streams the response." + } + } + ] + }, + { + "name": "Admin", + "item": [ + { + "name": "Fetch Config", + "request": { + "method": "GET", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"address\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/admin/config", + "host": ["{{baseUrl}}"], + "path": ["api", "admin", "config"] + }, + "description": "Fetch the node configuration (admin-only). Note: this GET reads auth fields from the JSON body." + } + }, + { + "name": "Update Config", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"address\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\",\n \"config\": {}\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/admin/config/update", + "host": ["{{baseUrl}}"], + "path": ["api", "admin", "config", "update"] + }, + "description": "Update the node configuration (admin-only)." + } + } + ] + }, + { + "name": "Queue & Jobs", + "item": [ + { + "name": "Get Index Queue", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/indexQueue", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "indexQueue"] + }, + "description": "Get the current indexing queue." + } + }, + { + "name": "Get Jobs for Identifier", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/services/jobs/{{jobId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "services", "jobs", "{{jobId}}"] + }, + "description": "Get the indexer job pool for a given job identifier." + } + } + ] + }, + { + "name": "P2P Peers", + "item": [ + { + "name": "Get P2P Network Stats", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/getP2pNetworkStats", + "host": ["{{baseUrl}}"], + "path": ["getP2pNetworkStats"] + }, + "description": "Get P2P network statistics." + } + }, + { + "name": "Get P2P Peers", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/getP2PPeers", + "host": ["{{baseUrl}}"], + "path": ["getP2PPeers"] + }, + "description": "List all connected P2P peers." + } + }, + { + "name": "Get P2P Peer", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/getP2PPeer?peerId=", + "host": ["{{baseUrl}}"], + "path": ["getP2PPeer"], + "query": [{ "key": "peerId", "value": "" }] + }, + "description": "Get details for a specific peer by peerId." + } + }, + { + "name": "Find Peer", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/findPeer?peerId=&timeout=10000", + "host": ["{{baseUrl}}"], + "path": ["findPeer"], + "query": [ + { "key": "peerId", "value": "" }, + { "key": "timeout", "value": "10000", "disabled": true } + ] + }, + "description": "Find a peer's multiaddress by peerId." + } + } + ] + }, + { + "name": "DIDs / Providers", + "item": [ + { + "name": "Get Providers for String", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/getProvidersForString?input={{did}}", + "host": ["{{baseUrl}}"], + "path": ["getProvidersForString"], + "query": [{ "key": "input", "value": "{{did}}" }] + }, + "description": "Get the providers (peers) that serve a single string/CID." + } + }, + { + "name": "Get Providers for Strings", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "[\n \"did:op:...\",\n \"did:op:...\"\n]" }, + "url": { + "raw": "{{baseUrl}}/getProvidersForStrings?timeout=10000", + "host": ["{{baseUrl}}"], + "path": ["getProvidersForStrings"], + "query": [{ "key": "timeout", "value": "10000", "disabled": true }] + }, + "description": "Batch lookup: get providers for an array of strings/CIDs. Body is a JSON array of strings." + } + } + ] + }, + { + "name": "Logs", + "item": [ + { + "name": "Get Logs", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"address\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/logs?maxLogs=100&page=1", + "host": ["{{baseUrl}}"], + "path": ["logs"], + "query": [ + { "key": "maxLogs", "value": "100" }, + { "key": "page", "value": "1" }, + { "key": "moduleName", "value": "", "disabled": true }, + { "key": "level", "value": "", "disabled": true }, + { "key": "startTime", "value": "", "disabled": true }, + { "key": "endTime", "value": "", "disabled": true } + ] + }, + "description": "Retrieve node logs (admin auth via signed body). Filter/paginate with query params." + } + }, + { + "name": "Get Log by ID", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"address\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\",\n \"logId\": \"123\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/log/123", + "host": ["{{baseUrl}}"], + "path": ["log", "123"] + }, + "description": "Retrieve a single log entry by id (logId in body must match the :id path param)." + } + } + ] + }, + { + "name": "Direct Command (P2P)", + "description": "POST /directCommand is a single endpoint that dispatches to a protocol command handler chosen by the `command` field. It can run locally or be forwarded to another peer via `node`/`multiAddrs`. Below are representative command bodies; any PROTOCOL_COMMANDS value is accepted (download, encrypt, encryptFile, decryptDDO, getDDO, query, nonce, status, detailedStatus, findDDO, getFees, fileInfo, validateDDO, getComputeEnvironments, startCompute, freeStartCompute, stopCompute, getComputeStatus, getComputeStreamableLogs, getComputeResult, initializeCompute, reindexTx, reindexChain, collectFees, PolicyServerPassthrough, getP2PPeer(s), getP2PNetworkStats, findPeer, createAuthToken, invalidateAuthToken, fetchConfig, pushConfig, getLogs, jobs, persistentStorage*, getAccessList, searchAccessList, getEscrowEvents, serviceGetTemplates, serviceStart, serviceStop, serviceRestart, serviceGetStatus, serviceExtend, stopNode, ...).", + "item": [ + { + "name": "getDDO", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"getDDO\",\n \"id\": \"{{did}}\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Fetch a DDO by id over the command interface." + } + }, + { + "name": "nonce", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"nonce\",\n \"address\": \"{{consumerAddress}}\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Get the nonce for an address." + } + }, + { + "name": "status", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"status\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Get node status. Use \"command\": \"detailedStatus\" for the extended report." + } + }, + { + "name": "query", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"query\",\n \"query\": { \"query\": { \"match_all\": {} }, \"from\": 0, \"size\": 10 }\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Query indexed assets." + } + }, + { + "name": "getFees", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"getFees\",\n \"ddoId\": \"{{did}}\",\n \"serviceId\": \"\",\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"validUntil\": 0\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Get the provider fees for an asset service." + } + }, + { + "name": "fileInfo", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"fileInfo\",\n \"did\": \"{{did}}\",\n \"serviceId\": \"\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Get file info for an asset or a raw file descriptor (type/url/...)." + } + }, + { + "name": "download", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"download\",\n \"fileIndex\": 0,\n \"documentId\": \"{{did}}\",\n \"serviceId\": \"\",\n \"transferTxId\": \"\",\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Download an asset file (streams the response)." + } + }, + { + "name": "encrypt", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"encrypt\",\n \"blob\": \"hello\",\n \"consumerAddress\": \"{{consumerAddress}}\",\n \"signature\": \"{{signature}}\",\n \"nonce\": \"{{nonce}}\",\n \"encoding\": \"string\",\n \"encryptionType\": \"ECIES\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Encrypt a blob (AES or ECIES)." + } + }, + { + "name": "decryptDDO", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"decryptDDO\",\n \"decrypterAddress\": \"{{consumerAddress}}\",\n \"chainId\": {{chainId}},\n \"nonce\": \"{{nonce}}\",\n \"signature\": \"{{signature}}\",\n \"transactionId\": \"\",\n \"dataNftAddress\": \"\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Decrypt a DDO document." + } + }, + { + "name": "findDDO", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"findDDO\",\n \"id\": \"{{did}}\"\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Find which peers can serve a DDO." + } + }, + { + "name": "validateDDO", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"validateDDO\",\n \"ddo\": {}\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Validate a DDO." + } + }, + { + "name": "getComputeEnvironments", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"getComputeEnvironments\",\n \"chainId\": {{chainId}}\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "List compute environments over the command interface." + } + }, + { + "name": "getEscrowEvents", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { "mode": "raw", "raw": "{\n \"command\": \"getEscrowEvents\",\n \"chainId\": {{chainId}}\n}" }, + "url": { "raw": "{{baseUrl}}/directCommand", "host": ["{{baseUrl}}"], "path": ["directCommand"] }, + "description": "Query escrow events over the command interface." + } + } + ] } ] } diff --git a/docs/env.md b/docs/env.md index 3c1405295..b548e292f 100644 --- a/docs/env.md +++ b/docs/env.md @@ -129,6 +129,8 @@ Environmental variables are also tracked in `ENVIRONMENT_VARIABLES` within `src/ - `C2D_DOWNLOAD_TIMEOUT`: Timeout (in seconds) for pulling the algorithm docker image during a C2D job. If the pull exceeds this timeout, the job fails with `PullImageFailed` instead of getting stuck. Defaults to `900` (15 minutes). Example: `900` +- `SERVICE_TEMPLATES_PATH`: Path to a folder of operator-published Service-on-Demand template files (`*.json`, validated against the template schema). The folder is re-read on every `serviceTemplates` request, so templates can be added, edited, or removed without restarting the node. Maps to the `serviceTemplatesPath` config field. Defaults to `databases/serviceTemplates/`. See the [Services guide](services.md). Example: `docs/serviceTemplates/` + The `DOCKER_COMPUTE_ENVIRONMENTS` environment variable is used to configure Docker-based compute environments in Ocean Node. For GPU setup and examples see [GPU Guide](GPU.md). For pricing configuration see [Compute pricing](compute-pricing.md). `cpu`, `ram`, and `disk` resources are **auto-detected** from the host at startup. All resource values are expressed in natural units: CPU in cores, RAM and disk in GB. diff --git a/docs/serviceTemplates/llamacpp-phi4-cpu.json b/docs/serviceTemplates/llamacpp-phi4-cpu.json new file mode 100644 index 000000000..c5f171b7a --- /dev/null +++ b/docs/serviceTemplates/llamacpp-phi4-cpu.json @@ -0,0 +1,23 @@ +{ + "id": "llamacpp-phi4-cpu", + "name": "llama.cpp — Phi-4 (CPU)", + "description": "CPU-only OpenAI-compatible chat server (llama.cpp) running Microsoft Phi-4 as a Q4_K_M GGUF quantization. On startup llama.cpp downloads the GGUF from the Hugging Face Hub (-hf), then serves /v1/chat/completions on port 8080. No GPU required. Note: vLLM's published image is CUDA-only, so this CPU template uses llama.cpp instead; the model is downloaded then run locally just like the vLLM templates.", + "image": "ghcr.io/ggml-org/llama.cpp", + "tag": "server", + "exposedPorts": [8080], + "command": [ + "-hf", + "bartowski/phi-4-GGUF:Q4_K_M", + "--host", + "0.0.0.0", + "--port", + "8080", + "-c", + "8192" + ], + "requiredResources": [ + { "id": "cpu", "min": 4, "recommended": 8, "unit": "cores" }, + { "id": "ram", "min": 12, "recommended": 16, "unit": "GB" }, + { "id": "disk", "min": 15, "recommended": 20, "unit": "GB" } + ] +} diff --git a/docs/serviceTemplates/vllm-hf-model.json b/docs/serviceTemplates/vllm-hf-model.json new file mode 100644 index 000000000..373de1b74 --- /dev/null +++ b/docs/serviceTemplates/vllm-hf-model.json @@ -0,0 +1,44 @@ +{ + "id": "vllm-hf-model", + "name": "vLLM — any Hugging Face model (MODEL_ID)", + "description": "Local OpenAI-compatible LLM inference server (vLLM) that serves a Hugging Face model of the consumer's choice. The model id is supplied via MODEL_ID and substituted into the launch command (${MODEL_ID}); on startup vLLM downloads that model from the Hugging Face Hub, then serves it on port 8000 (e.g. POST /v1/chat/completions). Provide HF_TOKEN for gated/private models. Requires a CUDA GPU sized to the chosen model.", + "image": "vllm/vllm-openai", + "tag": "latest", + "exposedPorts": [8000], + "command": [ + "--model", + "${MODEL_ID}", + "--host", + "0.0.0.0", + "--port", + "8000", + "--max-model-len", + "8192", + "--gpu-memory-utilization", + "0.9" + ], + "userConfigurableEnvVars": [ + { + "key": "MODEL_ID", + "validation": "^[A-Za-z0-9][\\w.-]*(/[A-Za-z0-9][\\w.-]*)?$" + }, + { + "key": "HF_TOKEN", + "validation": "^hf_[A-Za-z0-9]{20,}$", + "sensitive": true + } + ], + "requiredResources": [ + { "id": "cpu", "min": 2, "recommended": 4, "unit": "cores" }, + { "id": "ram", "min": 8, "recommended": 16, "unit": "GB" }, + { "id": "disk", "min": 20, "recommended": 50, "unit": "GB" }, + { + "kind": "discrete", + "type": "gpu", + "min": 1, + "recommended": 1, + "unit": "count", + "description": "CUDA-capable GPU; size VRAM to the chosen MODEL_ID" + } + ] +} diff --git a/docs/serviceTemplates/vllm-nomic-embed.json b/docs/serviceTemplates/vllm-nomic-embed.json new file mode 100644 index 000000000..ad2baa744 --- /dev/null +++ b/docs/serviceTemplates/vllm-nomic-embed.json @@ -0,0 +1,32 @@ +{ + "id": "vllm-nomic-embed", + "name": "vLLM — Nomic Embed (embeddings API)", + "description": "Local embedding server (vLLM) serving nomic-ai/nomic-embed-text-v1.5. On startup vLLM downloads the model from the Hugging Face Hub, then exposes the OpenAI-compatible embeddings API on port 8000 (POST /v1/embeddings) — handy for testing the embeddings endpoint. Runs vLLM in embedding/pooling mode (--task embed) and requires --trust-remote-code because the model ships custom modeling code that is executed in the container. The model is small, so a few GB of VRAM is enough.", + "image": "vllm/vllm-openai", + "tag": "latest", + "exposedPorts": [8000], + "command": [ + "--model", + "nomic-ai/nomic-embed-text-v1.5", + "--task", + "embed", + "--trust-remote-code", + "--host", + "0.0.0.0", + "--port", + "8000" + ], + "requiredResources": [ + { "id": "cpu", "min": 2, "recommended": 4, "unit": "cores" }, + { "id": "ram", "min": 4, "recommended": 8, "unit": "GB" }, + { "id": "disk", "min": 5, "recommended": 10, "unit": "GB" }, + { + "kind": "discrete", + "type": "gpu", + "min": 1, + "recommended": 1, + "unit": "count", + "description": "CUDA-capable GPU; a few GB VRAM is enough for this 137M embedding model" + } + ] +} diff --git a/docs/serviceTemplates/vllm-qwen-0_5b.json b/docs/serviceTemplates/vllm-qwen-0_5b.json new file mode 100644 index 000000000..3aefff604 --- /dev/null +++ b/docs/serviceTemplates/vllm-qwen-0_5b.json @@ -0,0 +1,40 @@ +{ + "id": "vllm-qwen-0-5b", + "name": "vLLM — Qwen2.5 0.5B Instruct (local)", + "description": "Local OpenAI-compatible LLM inference server (vLLM) serving the small Qwen/Qwen2.5-0.5B-Instruct model. On startup vLLM downloads the model from the Hugging Face Hub, then serves it locally on port 8000 (e.g. POST /v1/chat/completions). The model is fixed by the operator; the consumer only needs a GPU-enabled environment. Set HF_TOKEN only if you later switch to a gated model.", + "image": "vllm/vllm-openai", + "tag": "latest", + "exposedPorts": [8000], + "command": [ + "--model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--host", + "0.0.0.0", + "--port", + "8000", + "--max-model-len", + "8192", + "--gpu-memory-utilization", + "0.9" + ], + "userConfigurableEnvVars": [ + { + "key": "HF_TOKEN", + "validation": "^hf_[A-Za-z0-9]{20,}$", + "sensitive": true + } + ], + "requiredResources": [ + { "id": "cpu", "min": 2, "recommended": 4, "unit": "cores" }, + { "id": "ram", "min": 8, "recommended": 16, "unit": "GB" }, + { "id": "disk", "min": 10, "recommended": 20, "unit": "GB" }, + { + "kind": "discrete", + "type": "gpu", + "min": 1, + "recommended": 1, + "unit": "count", + "description": "CUDA-capable GPU (>= 6 GB VRAM is plenty for this 0.5B model)" + } + ] +} diff --git a/docs/services.md b/docs/services.md new file mode 100644 index 000000000..d9a68f79f --- /dev/null +++ b/docs/services.md @@ -0,0 +1,121 @@ +# Services (Service-on-Demand) + +A high-level overview of the service-on-demand feature: how it works, how it is +configured, and the security properties you should be aware of. + +## What is a service? + +A **service** is a long-running Docker container that a consumer launches on a compute +environment and pays for up front via on-chain escrow. Unlike a compute job — which runs +an algorithm to completion and exits — a service stays up for a requested **duration** and +exposes one or more network **endpoints** (`http://:`) that the +consumer can connect to while it runs. + +The consumer supplies the container spec directly in the request: an `image` +(referenced by `tag` or `checksum`, or an inline `dockerfile` when the operator allows +building), optional `dockerCmd` / `dockerEntrypoint`, the container ports to expose, the +requested resources (cpu/ram/disk/gpu), the duration, and encrypted `userData` that is +injected as container environment variables. + +## Lifecycle + +All endpoints live under `/api/services`. Every request except `serviceTemplates` is +authenticated by a signature (or auth token) over the caller's `consumerAddress` + +`nonce` + command. `serviceStatus` is a `GET`, so it carries `consumerAddress`, `nonce`, +and `signature` as query parameters (or an auth-token `Authorization` header). + +| Command | Route | Method | Purpose | +| --- | --- | --- | --- | +| `SERVICE_START` | `/api/services/serviceStart` | POST | Validate, persist a `Starting` record, and return the `serviceId` immediately (escrow + image + container happen in the background) | +| `SERVICE_GET_STATUS` | `/api/services/serviceStatus` | GET | Read job status / endpoints — authenticated, owner-scoped (see notice below); poll this to follow a starting service | +| `SERVICE_EXTEND` | `/api/services/serviceExtend` | POST | Pay to push the expiry further out | +| `SERVICE_RESTART` | `/api/services/serviceRestart` | POST | Recreate the container (no extra charge) | +| `SERVICE_STOP` | `/api/services/serviceStop` | POST | Tear down the container and release resources | +| `SERVICE_GET_TEMPLATES` | `/api/services/serviceTemplates` | GET | List operator-published service templates | + +**Start is asynchronous.** `serviceStart` does only the fast, synchronous validation and then +returns the `serviceId` right away — it does **not** wait for escrow or the (potentially +multi-minute) image pull/build. A background loop on the node then advances the service through +a sequence of statuses; clients **poll `serviceStatus`** to follow it to `Running` (or a +terminal `*Failed` / `Error`). + +**Handler (synchronous, before responding):** signature check → environment + access-list + +`features.services` check → `userData` decrypt (validity check) → duration cap → resource +resolution & availability → cost computed from **server-side** environment pricing → persist the +job as `Starting` (which also reserves its resources) → respond `200` with the `serviceId`. + +**Background pipeline (per the start statuses below):** +`Starting (10)` → **locking** `Locking (20)`: escrow `createLock` (+ wait for it to mine) → +**image** `PullImage (11)` / `BuildImage (13)`: pull or build the image and run the vulnerability +scan → **payment** `Claiming (30)`: `claimLock` on success, or `cancelLock` (refund) if the image +step failed → allocate host ports, create the network, create + start the container → +`Running (40)`. + +Escrow is **claimed only after the image succeeds**; if the image pull/build/scan fails, or +container creation fails before the claim, the lock is **cancelled (refunded)** and the job ends +in a `*Failed` / `Error` status. This is a change from the previous synchronous flow, which +locked-then-claimed up front. + +## Configuration + +Service-on-demand is configured per Docker connection under `serviceOnDemand`: + +| Field | Meaning | +| --- | --- | +| `enabled` | Master switch for the feature on this connection. | +| `nodeHost` | Externally reachable host used to build endpoint URLs. | +| `hostPortRange` | `[start, end]` range the node allocates published host ports from. | +| `maxDurationSeconds` | Upper bound on a service's lifetime (default 86400). | +| `allowImageBuild` | If true, consumers may submit an inline `dockerfile` to build. | + +Whether a given environment accepts services is gated by its `features.services` flag, +and access can be restricted with the environment's `access` allow-list +(`addresses` + on-chain `accessLists`). Operator-published **templates** are loaded from +`serviceTemplatesPath` (default `databases/serviceTemplates/`); template secret values +are never returned by the API (only the env-var keys are exposed). + +## Security model & important notices + +- **Container hardening.** Service containers are created with + `SecurityOpt: ['no-new-privileges']`, `CapDrop: ['ALL']`, and `PidsLimit: 512`. + Unlike the compute path, the service path does **not** force a non-root `User` — + arbitrary service images often expect to start as root, so the image's declared user + is kept. Dropping all capabilities + `no-new-privileges` keeps that root process + unprivileged. + +- **⚠️ Low ports won't bind inside the container.** Because `CapDrop: ['ALL']` removes + `NET_BIND_SERVICE`, a process **inside** the container cannot bind to a container port + below 1024. Have your service listen on a **high port** (the externally published + *host* port is allocated by the node from `hostPortRange` regardless). If a specific + image genuinely needs a low in-container port, that requires explicitly adding + `CapAdd: ['NET_BIND_SERVICE']` in the engine — it is intentionally not enabled by + default. + +- **Access lists apply to the whole lifecycle.** `start`, `extend`, and `restart` all + re-check the environment's `access` allow-list (access lists are mutable, so a + revoked consumer cannot keep a service alive). `stop` is owner-gated only, so a + revoked owner can still shut their own service down. + +- **No privileged/advanced Docker config.** The service path deliberately omits the + user-injectable advanced Docker config (host bind mounts, extra capabilities, + `seccomp:unconfined`, devices beyond the priced GPU pool) that the compute path + supports. Do not thread it in. + +- **Payment is server-priced.** Cost is computed only from the environment's configured + pricing for the requested token/chain; the consumer cannot influence the charged + amount, and the escrow payer is always the signature-authenticated `consumerAddress` + (you cannot charge someone else). + +- **`serviceStatus` is authenticated and owner-scoped.** The caller must supply + `consumerAddress` plus a valid `nonce`/`signature` (or auth token) proving control of + that address; results are restricted to services owned by it, so one consumer cannot + read another's job records or endpoint URLs. That said, a published service endpoint is + still reachable by anyone who learns or guesses its URL — the node only port-forwards + and does not authenticate traffic to the container, so put your own authentication in + front of any sensitive service and do not rely on endpoint-URL secrecy as access + control. + +- **`allowImageBuild` runs arbitrary build instructions.** When enabled, a consumer's + inline `dockerfile` is built by the Docker daemon, so its `RUN` steps execute arbitrary + commands in the daemon's build sandbox. Leave it disabled unless you intend to offer + build-from-source and trust the consumer set. diff --git a/package.json b/package.json index 0a85c3f32..04fb4d6ea 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "test:integration": "npm run build-tests && npm run mocha \"./dist/test/integration/**/*.test.js\"", "test:computeunit": "npm run build-tests && npm run mocha \"./dist/test/unit/compute.test.js\"", "test:computeintegration": "npm run build-tests && npm run mocha \"./dist/test/integration/compute.test.js\"", + "test:servicesintegration": "npm run build-tests && npm run mocha \"./dist/test/integration/services.test.js\"", "test:indexer": "npm run build-tests && npm run mocha \"./dist/test/integration/indexer.test.js\"", "test:integration:light": "npm run build-tests && npm run mocha-light \"./dist/test/integration/**/*.test.js\"", "test:unit:cover": "nyc --report-dir coverage/unit npm run test:unit", diff --git a/src/@types/C2D/C2D.ts b/src/@types/C2D/C2D.ts index eb2744089..b8ddbb5ad 100644 --- a/src/@types/C2D/C2D.ts +++ b/src/@types/C2D/C2D.ts @@ -1,6 +1,14 @@ import { MetadataAlgorithm, ConsumerParameter } from '@oceanprotocol/ddo-js' import type { BaseFileObject, StorageObject, EncryptMethod } from '../fileObject.js' import type { AccessList } from '../AccessList.js' +import type { ServiceOnDemandConfig } from './ServiceOnDemand.js' + +// Per-environment capability flags. Both default to true at config-parse and +// at runtime construction; only an explicit false disables a capability. +export interface ComputeEnvFeatures { + computeJobs: boolean // false → COMPUTE_START + FREE_COMPUTE_START rejected + services: boolean // false → SERVICE_START rejected; env hidden from service matching +} export enum C2DClusterType { // eslint-disable-next-line no-unused-vars OPF_K8 = 0, @@ -147,6 +155,7 @@ export interface ComputeEnvironmentBaseConfig { free?: ComputeEnvironmentFreeOptions platform: RunningPlatform enableNetwork?: boolean // whether network is enabled for algorithm containers + features?: ComputeEnvFeatures // always populated at runtime construction; gates compute/service starts } export interface ComputeRuntimes { @@ -180,6 +189,7 @@ export interface C2DEnvironmentConfig { free?: C2DEnvironmentFreeConfig // config-time only; resolved to ComputeEnvironmentFreeOptions at startup resources?: EnvironmentResourceRef[] // lightweight refs to connection pool enableNetwork?: boolean // whether network is enabled for algorithm containers + features?: ComputeEnvFeatures // config-time, optional } export interface C2DDockerConfig { @@ -197,6 +207,7 @@ export interface C2DDockerConfig { scanImageDBUpdateInterval?: number // Default: 12 hours resources?: ComputeResource[] // optional: cpu/ram/disk auto-detected; include for GPUs/NICs or to cap auto-detected totals environments: C2DEnvironmentConfig[] + serviceOnDemand?: ServiceOnDemandConfig // per-daemon Service-on-Demand operational config } export type ComputeResultType = diff --git a/src/@types/C2D/ServiceOnDemand.ts b/src/@types/C2D/ServiceOnDemand.ts new file mode 100644 index 000000000..8693022a4 --- /dev/null +++ b/src/@types/C2D/ServiceOnDemand.ts @@ -0,0 +1,130 @@ +import type { DBComputeJobPayment, ComputeResourceRequestWithPrice } from './C2D.js' + +// ── Resource requirements ───────────────────────────────────────────── + +export interface TemplateResourceRequirement { + // Exactly one of `id` or `kind` must be set. + id?: string // exact resource id: 'cpu' | 'ram' | 'disk' | named GPU ('gpu-0') + kind?: 'discrete' | 'fungible' // match ANY resource of this kind across the env pool + type?: string // optional: further filter within kind ('gpu', 'fpga', 'tpu') + + min: number // MUST have at least this much — service is rejected otherwise + recommended?: number // ideal amount; below this the env gets a lower score + unit?: string // display hint: 'cores' | 'GB' | 'count' + description?: string // shown in UI: "CUDA GPU — 2 recommended for large models" +} + +// ── Template definition ─────────────────────────────────────────────── + +export interface UserConfigurableEnvVar { + key: string // env var name, passed in userData + validation?: string // optional regex; validated at SERVICE_START time + sensitive?: boolean // advisory hint for clients/UI (e.g. mask on input). The node receives ALL userData ECIES-encrypted, so this does not change node-side storage. +} + +export interface ServiceTemplate { + id: string // [a-z0-9][a-z0-9_-]{0,63} + name?: string + description?: string + // Image specification — exactly one of (tag | checksum | dockerfile) must be set: + image: string // base image name + tag?: string // e.g. "latest" — mutually exclusive with checksum/dockerfile + checksum?: string // digest: "sha256:<64 hex>" — mutually exclusive with tag/dockerfile + dockerfile?: string // inline Dockerfile content — triggers build; mutually exclusive with tag/checksum + additionalDockerFiles?: Record // filename → content; only valid with dockerfile + exposedPorts: number[] + envVars?: Record // fixed env vars — operator-set, never returned to callers + userConfigurableEnvVars?: UserConfigurableEnvVar[] + command?: string[] // Docker CMD override; ${KEY} expanded from userData + entrypoint?: string[] // Docker ENTRYPOINT override + requiredResources?: TemplateResourceRequirement[] // MUST satisfy — gates SERVICE_START + recommendedResources?: TemplateResourceRequirement[] // SHOULD satisfy — used for scoring + UI +} + +// ── Public / sanitized types ────────────────────────────────────────── + +// Safe to return in API responses: envVars values are stripped (keys only). Choosing a +// matching compute environment is the client's responsibility (see GET_COMPUTE_ENVIRONMENTS). +export interface ServiceTemplatePublic extends Omit { + envVarKeys?: string[] // keys of envVars only, never values +} + +// ── Operational config (per Docker daemon, not global) ──────────────── + +export interface ServiceOnDemandConfig { + enabled: boolean + nodeHost: string // host (or IP) clients use to reach forwarded service ports; e.g. 'localhost' + hostPortRange?: [number, number] // e.g. [30000, 32767]; specific to this daemon's host + maxDurationSeconds?: number // default: 86400 (24 h) + allowImageBuild?: boolean // default: false — gates Dockerfile-based services per daemon +} + +// ── Runtime service job ─────────────────────────────────────────────── + +export interface ServiceEndpoint { + containerPort: number + hostPort: number + url: string // e.g. "http://:31042" +} + +/* eslint-disable no-unused-vars */ +export enum ServiceStatusNumber { + Starting = 10, // DB record created by the start handler; awaits background processing + PullImage = 11, // pulling pre-built image from registry + PullImageFailed = 12, + BuildImage = 13, // building from Dockerfile + BuildImageFailed = 14, + VulnerableImage = 15, // Trivy scan found critical vulnerabilities + Locking = 20, // escrow createLock in progress (funds locked, not yet claimed) + Claiming = 30, // payment phase: claimLock on success, or cancelLock if the image step failed + Running = 40, + Stopping = 50, + Stopped = 70, + Expired = 75, + Error = 99 +} +/* eslint-enable no-unused-vars */ + +export const ServiceStatusText: Record = { + [ServiceStatusNumber.Starting]: 'Starting', + [ServiceStatusNumber.PullImage]: 'PullImage', + [ServiceStatusNumber.PullImageFailed]: 'PullImageFailed', + [ServiceStatusNumber.BuildImage]: 'BuildImage', + [ServiceStatusNumber.BuildImageFailed]: 'BuildImageFailed', + [ServiceStatusNumber.VulnerableImage]: 'VulnerableImage', + [ServiceStatusNumber.Locking]: 'Locking', + [ServiceStatusNumber.Claiming]: 'Claiming', + [ServiceStatusNumber.Running]: 'Running', + [ServiceStatusNumber.Stopping]: 'Stopping', + [ServiceStatusNumber.Stopped]: 'Stopped', + [ServiceStatusNumber.Expired]: 'Expired', + [ServiceStatusNumber.Error]: 'Error' +} + +export interface ServiceJob { + serviceId: string // unique id for a running service — distinct from a compute jobId + clusterHash: string + environment: string // envId the service runs on — used for shared resource accounting + pricing + owner: string // consumerAddress + image: string + tag?: string + checksum?: string + dockerfile?: string // inline Dockerfile (when built); kept so restart can rebuild + additionalDockerFiles?: Record // extra build-context files (only with dockerfile) + dockerCmd?: string[] // container CMD override + dockerEntrypoint?: string[] // container ENTRYPOINT override + containerImage: string // resolved final reference used by Docker (image:tag, image@digest, or built name) + containerId: string + networkId: string // per-service Docker network id + status: ServiceStatusNumber + statusText: string + dateCreated: string // ISO timestamp + expiresAt: number // Unix ms timestamp + duration: number // requested seconds + exposedPorts: number[] + endpoints: ServiceEndpoint[] + userData?: string // ECIES(node key) string sent by the client; stored as-is, decrypted only at start/restart; never returned + resources: ComputeResourceRequestWithPrice[] + payment: DBComputeJobPayment // initial start payment + extendPayments?: DBComputeJobPayment[] // one entry per successful SERVICE_EXTEND +} diff --git a/src/@types/OceanNode.ts b/src/@types/OceanNode.ts index 6717de5c6..1cb2a5f31 100644 --- a/src/@types/OceanNode.ts +++ b/src/@types/OceanNode.ts @@ -106,6 +106,7 @@ export interface dockerRegistrysAuth { export interface OceanNodeConfig { dockerComputeEnvironments: C2DDockerConfig[] + serviceTemplatesPath?: string // folder of *.json service templates; defaults to 'databases/serviceTemplates/' dockerRegistrysAuth: dockerRegistrysAuth authorizedDecrypters: string[] authorizedDecryptersList: AccessListContract | null diff --git a/src/@types/commands.ts b/src/@types/commands.ts index 335a0b827..880ca06f7 100644 --- a/src/@types/commands.ts +++ b/src/@types/commands.ts @@ -398,3 +398,60 @@ export interface PersistentStorageDeleteFileCommand extends Command { bucketId: string fileName: string } + +// ── Service On Demand ───────────────────────────────────────────────── + +export interface ServiceGetTemplatesCommand extends Command { + chainId?: number +} + +export interface ServiceStartCommand extends Command { + consumerAddress: string + nonce: string + signature: string + environment: string // required: the envId to run the service on (from GET_COMPUTE_ENVIRONMENTS) + // Image spec — exactly one of tag/checksum/dockerfile. `image` is always required. + image: string // base image name (or build label when dockerfile is set) + tag?: string // pull by name:tag + checksum?: string // pull by digest: "sha256:<64 hex>" + dockerfile?: string // build from inline Dockerfile; requires allowImageBuild on the env + additionalDockerFiles?: Record // extra files in the build context + dockerCmd?: string[] // exact container command (Docker exec-form CMD override; no shell) + dockerEntrypoint?: string[] // container ENTRYPOINT override + exposedPorts?: number[] // container ports to forward + resources?: ComputeResourceRequest[] + duration: number // seconds; capped by serviceOnDemand.maxDurationSeconds + userData?: string // ECIES-encrypted (to the node's public key) JSON object → the container's env-var map + payment: { chainId: number; token: string } +} + +export interface ServiceStopCommand extends Command { + consumerAddress: string + nonce: string + signature: string + serviceId: string +} + +export interface ServiceGetStatusCommand extends Command { + consumerAddress: string + nonce: string + signature: string + serviceId?: string +} + +export interface ServiceRestartCommand extends Command { + consumerAddress: string + nonce: string + signature: string + serviceId: string + userData?: string // optional ECIES-encrypted userData. If provided it REPLACES the stored userData (send the complete set). If omitted, the stored userData is reused — no re-supply needed. +} + +export interface ServiceExtendCommand extends Command { + consumerAddress: string + nonce: string + signature: string + serviceId: string + additionalDuration: number + payment: { chainId: number; token: string } +} diff --git a/src/components/c2d/compute_engine_base.ts b/src/components/c2d/compute_engine_base.ts index 5d724fb56..6c931dd74 100644 --- a/src/components/c2d/compute_engine_base.ts +++ b/src/components/c2d/compute_engine_base.ts @@ -16,6 +16,7 @@ import type { DBComputeJobMetadata, ComputeEnvFees } from '../../@types/C2D/C2D.js' +import type { ServiceJob } from '../../@types/C2D/ServiceOnDemand.js' import { C2DClusterType } from '../../@types/C2D/C2D.js' import { C2DDatabase } from '../database/C2DDatabase.js' import { Escrow } from '../core/utils/escrow.js' @@ -79,6 +80,58 @@ export abstract class C2DEngine { return null } + // ── Service on Demand (Docker-only for Stage 1; concrete no-ops here) ── + // Persists the initial Starting record and returns immediately. The heavy lifting + // (escrow lock/claim, image pull/build, container start) is done asynchronously by + // processServiceStart(), driven by the engine's background loop. + // eslint-disable-next-line @typescript-eslint/no-unused-vars, require-await + public async createServiceJob( + environment: string, + image: string, + tag: string | undefined, + checksum: string | undefined, + dockerfile: string | undefined, + additionalDockerFiles: Record | undefined, + dockerCmd: string[] | undefined, + dockerEntrypoint: string[] | undefined, + exposedPorts: number[], + resources: ComputeResourceRequest[], + duration: number, + owner: string, + payment: DBComputeJobPayment, + serviceId: string, + userData?: string // ECIES-encrypted; the engine decrypts it transiently into the container env + ): Promise { + return null + } + + // Background pipeline that advances a Starting service job through locking → image → + // payment → container → Running. Never throws (terminal failures are persisted as status). + // eslint-disable-next-line @typescript-eslint/no-unused-vars, require-await + public async processServiceStart(job: ServiceJob): Promise {} + + // eslint-disable-next-line @typescript-eslint/no-unused-vars, require-await + public async stopService(serviceId: string, owner: string): Promise { + return null + } + + // eslint-disable-next-line @typescript-eslint/no-unused-vars, require-await + public async restartService( + serviceId: string, + owner: string, + newUserData?: string + ): Promise { + return null + } + + // eslint-disable-next-line @typescript-eslint/no-unused-vars, require-await + public async getServiceStatus( + consumerAddress?: string, + serviceId?: string + ): Promise { + return [] + } + // eslint-disable-next-line require-await public abstract checkDockerImage( image: string, @@ -378,6 +431,32 @@ export abstract class C2DEngine { } } } + + // Fold in on-demand services: they share the same physical resource pool as + // compute jobs, so a running service must occupy resources too. Services are + // always paid (no free tier) and always "running" while in the DB's running set, + // so we only tally their resources — job-slot/queue metrics stay compute-only. + // Do NOT swallow this failure: getUsedResources feeds the strict resource-availability + // gate (checkIfResourcesAreAvailable). Under-counting running services would let the + // engine overcommit shared GPU/CPU/RAM. Let it propagate so the allocation path defers + // the job (the caller already wraps getComputeEnvironments in try/catch) rather than + // proceeding with missing service data. + const serviceJobs: ServiceJob[] = await this.db.getRunningServiceJobs( + this.getC2DConfig().hash + ) + for (const svc of serviceJobs) { + const isThisEnv = svc.environment === env.id + for (const resource of svc.resources) { + const envRes = envResourceMap.get(resource.id) + if (!envRes) continue + // discrete resources (GPUs, FPGAs, NICs) tracked globally across all envs; + // fungible resources (cpu, ram, disk) are per-env exclusive. + const isGloballyTracked = envRes.kind === 'discrete' + if (!isGloballyTracked && !isThisEnv) continue + if (!(resource.id in usedResources)) usedResources[resource.id] = 0 + usedResources[resource.id] += resource.amount + } + } return { totalJobs, totalFreeJobs, diff --git a/src/components/c2d/compute_engine_docker.ts b/src/components/c2d/compute_engine_docker.ts index 9b404fda3..316310a7a 100755 --- a/src/components/c2d/compute_engine_docker.ts +++ b/src/components/c2d/compute_engine_docker.ts @@ -65,6 +65,19 @@ import { isPersistentStorageType } from '../../@types/fileObject.js' import { getAddress, ZeroAddress } from 'ethers' +import { + ServiceStatusNumber, + ServiceStatusText +} from '../../@types/C2D/ServiceOnDemand.js' +import type { ServiceJob } from '../../@types/C2D/ServiceOnDemand.js' +import { resolveServiceImage } from './serviceResourceMatching.js' +import { + allocateHostPort, + releaseHostPort, + seedAllocatedPorts, + userDataToEnv, + decryptUserData +} from '../core/service/utils.js' const C2D_CONTAINER_UID = 1000 const C2D_CONTAINER_GID = 1000 @@ -79,6 +92,20 @@ export class C2DEngineDocker extends C2DEngine { private cronTime: number = 2000 private jobImageSizes: Map = new Map() private isInternalLoopRunning: boolean = false + // Set true by stop() so a stopped engine cannot reschedule or run another InternalLoop pass. + // Without this, an in-flight loop's finally → setNewTimer() resurrects the timer on a stopped + // instance, leaving two engines (old + new, e.g. across a test restart) racing the same DB + // and double-processing a job (→ Docker 409 "container name already in use"). + private stopped: boolean = false + // The currently-running InternalLoop pass, so stop() can drain it before returning. + private internalLoopPromise: Promise | null = null + // serviceIds currently being advanced by processServiceStart, so the InternalLoop doesn't + // launch a second pipeline for the same service while one is already in flight. + private servicesBeingStarted: Set = new Set() + // The in-flight processServiceStart() promises (launched fire-and-forget by InternalLoop), + // so stop() can drain them before returning — otherwise a start could outlive stop() and + // race a restarted engine on the same shared DB. + private serviceStartPromises: Set> = new Set() private imageCleanupTimer: NodeJS.Timeout | null = null private paymentClaimTimer: NodeJS.Timeout | null = null private scanDBUpdateTimer: NodeJS.Timeout | null = null @@ -413,7 +440,11 @@ export class C2DEngineDocker extends C2DEngine { queMaxWaitTimeFree: 0, runMaxWaitTime: 0, runMaxWaitTimeFree: 0, - enableNetwork: envDef.enableNetwork + enableNetwork: envDef.enableNetwork, + features: { + computeJobs: envDef.features?.computeJobs ?? true, + services: envDef.features?.services ?? true + } } if (envDef.storageExpiry !== undefined) env.storageExpiry = envDef.storageExpiry @@ -474,6 +505,14 @@ export class C2DEngineDocker extends C2DEngine { // Rebuild CPU allocations from running containers (handles node restart) await this.rebuildCpuAllocations() + // Re-seed the in-memory allocated-host-port set from running service jobs (handles node restart) + await seedAllocatedPorts(this.db, this.getC2DConfig().hash).catch((e) => { + CORE_LOGGER.warn(`Could not seed allocated service ports: ${e.message}`) + }) + + // (re)starting: clear the stopped flag so the loop can schedule again + this.stopped = false + // only now set the timer if (!this.cronTimer) { this.setNewTimer() @@ -560,12 +599,29 @@ export class C2DEngineDocker extends C2DEngine { } } - public override stop(): Promise { + public override async stop(): Promise { + // Mark stopped FIRST so the in-flight loop's finally won't reschedule and a queued timer + // becomes a no-op. This keeps a stopped engine from racing a freshly-started one on the + // same shared DB (which caused the same job to be processed twice). + this.stopped = true // Clear the timer and reset the flag if (this.cronTimer) { clearTimeout(this.cronTimer) this.cronTimer = null } + // Drain a currently-running InternalLoop pass so it fully completes before we return, + // so the caller (e.g. addC2DEngines / tearDownAll) can start a new engine knowing the old + // one is quiescent. + if (this.internalLoopPromise) { + await this.internalLoopPromise.catch(() => {}) + this.internalLoopPromise = null + } + // Drain any in-flight service-start pipelines launched by the loop, so none continue + // (and touch escrow/Docker on the shared DB) after the engine is considered stopped. + if (this.serviceStartPromises.size > 0) { + await Promise.allSettled([...this.serviceStartPromises]) + this.serviceStartPromises.clear() + } this.isInternalLoopRunning = false // Stop image cleanup timer if (this.imageCleanupTimer) { @@ -578,7 +634,6 @@ export class C2DEngineDocker extends C2DEngine { this.paymentClaimTimer = null CORE_LOGGER.debug('Payment claim timer stopped') } - return Promise.resolve() } public async updateImageUsage(image: string): Promise { @@ -1619,18 +1674,31 @@ export class C2DEngineDocker extends C2DEngine { } private setNewTimer() { + // never reschedule once stopped (prevents an in-flight loop's finally from resurrecting + // the timer on a stopped engine) + if (this.stopped) { + return + } if (this.cronTimer) { return } // don't set the cron if we don't have compute environments if (this.envs.length > 0) - this.cronTimer = setTimeout(this.InternalLoop.bind(this), this.cronTime) + this.cronTimer = setTimeout(() => { + // track the running pass so stop() can drain it + this.internalLoopPromise = this.InternalLoop() + }, this.cronTime) } private async InternalLoop() { // this is the internal loop of docker engine // gets list of all running jobs and process them one by one + // a queued timer may fire after stop(); a stopped engine must not process anything + if (this.stopped) { + return + } + // Prevent concurrent execution if (this.isInternalLoopRunning) { CORE_LOGGER.debug( @@ -1665,6 +1733,45 @@ export class C2DEngineDocker extends C2DEngine { // wait for all promises, there is no return await Promise.all(promises) } + + // Service-on-Demand starts: advance pending service jobs through the start pipeline. + // Fire-and-forget (NOT awaited): an image pull can take minutes and must not block the + // loop (compute jobs + expiry must keep advancing). The in-progress guard prevents a + // second pipeline for the same service across overlapping ticks. processServiceStart + // never throws, so the unawaited promise is safe. + const pendingStarts = await this.db.getPendingServiceStarts( + this.getC2DConfig().hash + ) + for (const svc of pendingStarts) { + if (this.servicesBeingStarted.has(svc.serviceId)) continue + this.servicesBeingStarted.add(svc.serviceId) + // Track the promise so stop() can drain it; clean both trackers when it settles. + const startPromise = this.processServiceStart(svc).finally(() => { + this.servicesBeingStarted.delete(svc.serviceId) + this.serviceStartPromises.delete(startPromise) + }) + this.serviceStartPromises.add(startPromise) + } + + // Service-on-Demand expiry: stop services whose paid window has elapsed. + const expiredServices = await this.db.getExpiredServiceJobs( + this.getC2DConfig().hash + ) + for (const svc of expiredServices) { + CORE_LOGGER.info(`Service ${svc.serviceId} expired — stopping`) + await this.stopService(svc.serviceId, svc.owner).catch((e) => { + CORE_LOGGER.error( + `Failed to stop expired service ${svc.serviceId}: ${e.message}` + ) + }) + // mark the (now stopped) record as Expired so it is not picked up again + const [stoppedJob] = await this.db.getServiceJob(svc.serviceId, svc.owner) + if (stoppedJob) { + stoppedJob.status = ServiceStatusNumber.Expired + stoppedJob.statusText = ServiceStatusText[ServiceStatusNumber.Expired] + await this.db.updateServiceJob(stoppedJob) + } + } } catch (e) { CORE_LOGGER.error(`Error in C2D InternalLoop: ${e.message}`) } finally { @@ -2870,6 +2977,668 @@ export class C2DEngineDocker extends C2DEngine { } } + // ── Service on Demand ───────────────────────────────────────────────── + + // Pulls a plain image reference with the same registry-auth + Trivy scan + // guarantees as pullImage(). Service code MUST go through this — never docker.pull() raw. + private async pullImageRef( + imageRef: string, + encryptedDockerRegistryAuth?: string, + logFile?: string + ): Promise { + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), this.getImagePullTimeoutMs()) + try { + const { registry } = this.parseImage(imageRef) + let dockerRegistryAuthForPull: any + if (encryptedDockerRegistryAuth) { + const decryptedDockerRegistryAuth = await this.keyManager.decrypt( + Uint8Array.from(Buffer.from(encryptedDockerRegistryAuth, 'hex')), + EncryptMethod.ECIES + ) + dockerRegistryAuthForPull = JSON.parse(decryptedDockerRegistryAuth.toString()) + } else { + dockerRegistryAuthForPull = this.getDockerRegistryAuth(registry) + } + + const pullOptions: any = { abortSignal: controller.signal } + if (dockerRegistryAuthForPull) { + const registryUrl = new URL(registry) + const serveraddress = + registryUrl.hostname + (registryUrl.port ? `:${registryUrl.port}` : '') + const authString = dockerRegistryAuthForPull.auth + ? dockerRegistryAuthForPull.auth + : Buffer.from( + `${dockerRegistryAuthForPull.username}:${dockerRegistryAuthForPull.password}` + ).toString('base64') + pullOptions.authconfig = { + serveraddress, + ...(dockerRegistryAuthForPull.auth + ? { auth: authString } + : { + username: dockerRegistryAuthForPull.username, + password: dockerRegistryAuthForPull.password + }) + } + } + + const pullStream = await this.docker.pull(imageRef, pullOptions) + await new Promise((resolve, reject) => { + this.docker.modem.followProgress( + pullStream, + (err: any) => { + if (err) { + if (logFile) appendFileSync(logFile, String(err.message)) + return reject(err) + } + resolve() + }, + (progress: any) => { + if (logFile) appendFileSync(logFile, (progress.status ?? '') + '\n') + } + ) + }) + this.updateImageUsage(imageRef).catch((e) => { + CORE_LOGGER.debug(`Failed to track image usage: ${e.message}`) + }) + + // Image scanning — same Trivy check used by the compute path. + if (this.scanImages) { + const scanResult = await this.checkImageVulnerability(imageRef) + if (scanResult.vulnerable) { + await this.docker + .getImage(imageRef) + .remove({ force: true }) + .catch(() => {}) + throw new Error( + `Image "${imageRef}" failed security scan: ${JSON.stringify(scanResult.summary)}` + ) + } + } + } finally { + clearTimeout(timer) + } + } + + // Builds an image from a plain Dockerfile string with the same build mechanics as + // buildImage(). Service code MUST go through this — never docker.buildImage() raw. + private async buildImageFromSpec( + imageRef: string, + dockerfile: string, + additionalDockerFiles: Record, + maxDurationSeconds: number, + memoryGB?: number, + cpuCount?: number + ): Promise { + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), maxDurationSeconds * 1000) + try { + const pack = tarStream.pack() + pack.entry({ name: 'Dockerfile' }, dockerfile) + for (const [name, content] of Object.entries(additionalDockerFiles)) + pack.entry({ name }, content) + pack.finalize() + + const buildOptions: Dockerode.ImageBuildOptions = { + t: imageRef, + nocache: true, + abortSignal: controller.signal + } + if (memoryGB) { + buildOptions.memory = memoryGB * 1024 ** 3 + buildOptions.memswap = memoryGB * 1024 ** 3 + } + if (cpuCount) { + buildOptions.cpuquota = cpuCount * 100000 + buildOptions.cpuperiod = 100000 + } + + const buildStream = (await this.docker.buildImage(pack, buildOptions)) as Readable + await new Promise((resolve, reject) => { + this.docker.modem.followProgress(buildStream, (err: any) => + err ? reject(err) : resolve() + ) + }) + // Verify image exists after build + await this.docker.getImage(imageRef).inspect() + + // Image scanning — same Trivy gate used by pullImageRef(). A built image must clear + // it too; on failure remove the built image so it cannot be used to start a service. + if (this.scanImages) { + const scanResult = await this.checkImageVulnerability(imageRef) + if (scanResult.vulnerable) { + await this.docker + .getImage(imageRef) + .remove({ force: true }) + .catch(() => {}) + throw new Error( + `Image "${imageRef}" failed security scan: ${JSON.stringify(scanResult.summary)}` + ) + } + } + } finally { + clearTimeout(timer) + } + } + + // Builds Docker HostConfig resource constraints (memory, cpu, GPU device requests) + // from a service resource request, resolved against the connection-level resource pool. + private buildServiceResourceConstraints(resources: ComputeResourceRequest[]): { + Memory?: number + NanoCpus?: number + DeviceRequests?: any[] + } { + const connResources: ComputeResource[] = + this.getC2DConfig().connection?.resources ?? [] + const ram = resources.find((r) => r.id === 'ram')?.amount + const cpu = resources.find((r) => r.id === 'cpu')?.amount + const deviceRequests = this.getDockerDeviceRequest(resources, connResources) ?? [] + return { + Memory: ram ? ram * 1024 ** 3 : undefined, + NanoCpus: cpu ? cpu * 1e9 : undefined, + DeviceRequests: deviceRequests.length ? deviceRequests : undefined + } + } + + // Handler-facing: persist the initial Starting record and return immediately so the HTTP + // response carries the serviceId without waiting for escrow/image/container. The background + // loop then calls processServiceStart() to advance it. Persisting Starting also reserves the + // service's resources (getRunningServiceJobs counts Starting), preventing oversubscription. + public override async createServiceJob( + environment: string, + image: string, + tag: string | undefined, + checksum: string | undefined, + dockerfile: string | undefined, + additionalDockerFiles: Record | undefined, + dockerCmd: string[] | undefined, + dockerEntrypoint: string[] | undefined, + exposedPorts: number[], + resources: ComputeResourceRequest[], + duration: number, + owner: string, + payment: DBComputeJobPayment, + serviceId: string, + userData?: string + ): Promise { + const containerImage = resolveServiceImage( + image, + tag, + checksum, + dockerfile, + serviceId + ) + const job: ServiceJob = { + serviceId, + clusterHash: this.getC2DConfig().hash, + environment, + owner, + image, + tag, + checksum, + dockerfile, + additionalDockerFiles, + dockerCmd, + dockerEntrypoint, + containerImage, + containerId: '', + networkId: '', + status: ServiceStatusNumber.Starting, + statusText: ServiceStatusText[ServiceStatusNumber.Starting], + dateCreated: new Date().toISOString(), + expiresAt: Date.now() + duration * 1000, + duration, + exposedPorts, + endpoints: [], + userData, // stored as received (ECIES-encrypted); decrypted transiently at container start + resources: resources.map((r) => ({ id: r.id, amount: r.amount })), + payment + } + await this.db.newServiceJob(job) + return job + } + + // Background pipeline (driven by InternalLoop): Starting → Locking → (Pull|Build)Image → + // Claiming → Running. Escrow is reordered vs. the old sync flow: createLock first, claimLock + // only AFTER the image succeeds, cancelLock (refund) if the image step fails. Never throws — + // every terminal outcome is persisted as the job status so clients see it via serviceStatus. + public override async processServiceStart(job: ServiceJob): Promise { + const { serviceId } = job + const { chainId, token } = job.payment + + // Orphan recovery: a previous process died mid-start (after a restart the in-memory loop + // guard is empty, so an intermediate-state record reaches here). Refund any unclaimed lock, + // tear down partial docker, release ports, and mark Error — never resume on-chain ops. + if (job.status !== ServiceStatusNumber.Starting) { + CORE_LOGGER.error( + `processServiceStart: orphaned service ${serviceId} in state "${job.statusText}" — cleaning up` + ) + if (job.payment.lockTx && !job.payment.claimTx && !job.payment.cancelTx) { + job.payment.cancelTx = await this.safeCancelLock( + chainId, + serviceId, + token, + job.owner + ) + } + if (job.containerId) + await this.cleanupServiceDocker(this.docker.getContainer(job.containerId), null) + if (job.networkId) + await this.docker + .getNetwork(job.networkId) + .remove() + .catch(() => {}) + for (const ep of job.endpoints) releaseHostPort(ep.hostPort) + job.status = ServiceStatusNumber.Error + job.statusText = 'Service start aborted (node restarted mid-start)' + await this.db.updateServiceJob(job) + return + } + + const sod = this.getC2DConfig().connection?.serviceOnDemand + // Live docker handles, tracked so the catch can tear down a half-created service. + let network: Dockerode.Network | null = null + let container: Dockerode.Container | null = null + try { + // 1. LOCKING — lock the consumer's funds in escrow (refundable until claimed). + job.status = ServiceStatusNumber.Locking + job.statusText = ServiceStatusText[ServiceStatusNumber.Locking] + await this.db.updateServiceJob(job) + const lockTx = await this.escrow.createLock( + chainId, + serviceId, + token, + job.owner, + job.payment.cost, + this.escrow.getMinLockTime(job.duration) + ) + if (!lockTx) throw new Error('Escrow lock failed') + await this.escrow.waitForTransaction(chainId, lockTx) + job.payment.lockTx = lockTx + await this.db.updateServiceJob(job) + + // 2. IMAGE — pull or build the image (vulnerability scan runs inside these helpers). + let imageError: Error | null = null + try { + if (job.dockerfile) { + if (!sod?.allowImageBuild) + throw new Error( + 'Dockerfile-based services are not allowed on this environment (allowImageBuild=false)' + ) + job.status = ServiceStatusNumber.BuildImage + job.statusText = ServiceStatusText[ServiceStatusNumber.BuildImage] + await this.db.updateServiceJob(job) + const ram = job.resources.find((r) => r.id === 'ram')?.amount + const cpu = job.resources.find((r) => r.id === 'cpu')?.amount + await this.buildImageFromSpec( + job.containerImage, + job.dockerfile, + job.additionalDockerFiles ?? {}, + sod?.maxDurationSeconds ?? job.duration, + ram, + cpu + ) + } else { + job.status = ServiceStatusNumber.PullImage + job.statusText = ServiceStatusText[ServiceStatusNumber.PullImage] + await this.db.updateServiceJob(job) + await this.pullImageRef(job.containerImage) + } + } catch (e: any) { + imageError = e + } + + // 3. PAYMENT — claim the lock on success, or cancel it (refund the consumer) if the + // image step failed. + job.status = ServiceStatusNumber.Claiming + job.statusText = ServiceStatusText[ServiceStatusNumber.Claiming] + await this.db.updateServiceJob(job) + + if (imageError) { + job.payment.cancelTx = await this.safeCancelLock( + chainId, + serviceId, + token, + job.owner + ) + job.status = job.dockerfile + ? ServiceStatusNumber.BuildImageFailed + : ServiceStatusNumber.PullImageFailed + job.statusText = String(imageError.message) + await this.db.updateServiceJob(job) + CORE_LOGGER.error( + `startService ${serviceId} image step failed (lock refunded): ${imageError.message}` + ) + return + } + + const claimTx = await this.escrow.claimLock( + chainId, + serviceId, + token, + job.owner, + job.payment.cost, + `service-start:${serviceId}` + ) + if (!claimTx) { + job.payment.cancelTx = await this.safeCancelLock( + chainId, + serviceId, + token, + job.owner + ) + throw new Error('Escrow claim failed — lock cancelled') + } + job.payment.claimTx = claimTx + await this.db.updateServiceJob(job) + + // 4. CONTINUE — allocate host ports → network → create + start container → Running. + // Sequential allocation with rollback (job.endpoints isn't populated yet, so a mid-way + // failure would otherwise strand reserved ports in the in-memory allocatedPorts set). + const [rangeStart, rangeEnd] = sod?.hostPortRange ?? [30000, 32767] + const hostPorts: number[] = [] + try { + for (let i = 0; i < job.exposedPorts.length; i++) { + // eslint-disable-next-line no-await-in-loop + hostPorts.push(await allocateHostPort(rangeStart, rangeEnd)) + } + } catch (e) { + for (const port of hostPorts) releaseHostPort(port) + throw e + } + + const nodeHost = sod?.nodeHost ?? 'localhost' + job.endpoints = job.exposedPorts.map((cp, i) => ({ + containerPort: cp, + hostPort: hostPorts[i], + url: `http://${nodeHost}:${hostPorts[i]}` + })) + + network = await this.docker.createNetwork({ Name: `ocean-svc-${serviceId}` }) + + // Container env from the decrypted (in-memory) userData; command/entrypoint from the request. + const decryptedUserData = await decryptUserData(job.userData, this.keyManager) + const env = userDataToEnv(decryptedUserData) + const cmd = job.dockerCmd?.length ? job.dockerCmd : undefined + const entrypoint = job.dockerEntrypoint?.length ? job.dockerEntrypoint : undefined + + const PortBindings: Record> = {} + const ExposedPorts: Record> = {} + job.exposedPorts.forEach((cp, i) => { + PortBindings[`${cp}/tcp`] = [{ HostPort: String(hostPorts[i]) }] + ExposedPorts[`${cp}/tcp`] = {} + }) + + const { Memory, NanoCpus, DeviceRequests } = this.buildServiceResourceConstraints( + job.resources.map((r) => ({ id: r.id, amount: r.amount })) + ) + + container = await this.docker.createContainer({ + Image: job.containerImage, + Cmd: cmd, + Entrypoint: entrypoint, + Env: Object.entries(env).map(([k, v]) => `${k}=${v}`), + ExposedPorts, + HostConfig: { + Memory, + NanoCpus, + DeviceRequests, + PortBindings, + NetworkMode: network.id, + SecurityOpt: ['no-new-privileges'], // security plan #5 + CapDrop: ['ALL'], + PidsLimit: 512 + } + }) + await container.start() + + job.containerId = container.id + job.networkId = network.id + job.status = ServiceStatusNumber.Running + job.statusText = ServiceStatusText[ServiceStatusNumber.Running] + await this.db.updateServiceJob(job) + } catch (err: any) { + await this.cleanupServiceDocker(container, network) + for (const ep of job.endpoints) releaseHostPort(ep.hostPort) + // Refund if funds were locked but never claimed (e.g. container creation failed). + if (job.payment.lockTx && !job.payment.claimTx && !job.payment.cancelTx) { + job.payment.cancelTx = await this.safeCancelLock( + chainId, + serviceId, + token, + job.owner + ) + } + job.status = ServiceStatusNumber.Error + job.statusText = String(err.message) + await this.db.updateServiceJob(job) + CORE_LOGGER.error(`startService ${serviceId} failed: ${err.message}`) + } + } + + // Best-effort escrow refund used by the start pipeline's failure paths. Returns the cancel + // tx hash, or '' if there was nothing to cancel / the cancel itself failed (never throws). + private async safeCancelLock( + chainId: number, + serviceId: string, + token: string, + owner: string + ): Promise { + try { + return (await this.escrow.cancelExpiredLock(chainId, serviceId, token, owner)) ?? '' + } catch (e: any) { + CORE_LOGGER.error(`cancelExpiredLock failed for ${serviceId}: ${e.message}`) + return '' + } + } + + // Best-effort teardown of a half-created service container + network. Used by the + // startService / restartService failure paths to avoid leaking Docker networks + // (which would exhaust the daemon's IPAM CIDR pool over repeated failures). + private async cleanupServiceDocker( + container: Dockerode.Container | null, + network: Dockerode.Network | null + ): Promise { + if (container) { + await container.stop({ t: 5 }).catch(() => {}) + await container.remove({ force: true }).catch(() => {}) + } + if (network) { + await network.remove().catch(() => {}) + } + } + + public override async stopService( + serviceId: string, + owner: string + ): Promise { + const [job] = await this.db.getServiceJob(serviceId, owner) + if (!job) return null + if ( + job.status === ServiceStatusNumber.Stopped || + job.status === ServiceStatusNumber.Expired + ) + return job + + job.status = ServiceStatusNumber.Stopping + job.statusText = ServiceStatusText[ServiceStatusNumber.Stopping] + await this.db.updateServiceJob(job) + + // "Already gone" Docker errors (404 missing, 304 already stopped) are the desired end + // state, so treat them as success. Any other error means teardown genuinely failed — + // record it and keep the job OUT of Stopped so the persisted state stays accurate. + const isBenignDockerError = (e: any) => e?.statusCode === 404 || e?.statusCode === 304 + let cleanupError: Error | null = null + try { + if (job.containerId) { + const c = this.docker.getContainer(job.containerId) + await c.stop({ t: 10 }).catch((e) => { + if (!isBenignDockerError(e)) throw e + }) + await c.remove({ force: true }).catch((e) => { + if (!isBenignDockerError(e)) throw e + }) + } + if (job.networkId) { + await this.docker + .getNetwork(job.networkId) + .remove() + .catch((e) => { + if (!isBenignDockerError(e)) throw e + }) + } + // Only release the reserved host ports once the container is confirmed gone — a + // port still bound by a not-removed container must not be handed to another service. + for (const ep of job.endpoints) releaseHostPort(ep.hostPort) + } catch (err: any) { + cleanupError = err + CORE_LOGGER.error(`stopService ${serviceId} cleanup error: ${err.message}`) + } + + if (cleanupError) { + job.status = ServiceStatusNumber.Error + job.statusText = `stop failed: ${cleanupError.message}` + } else { + job.status = ServiceStatusNumber.Stopped + job.statusText = ServiceStatusText[ServiceStatusNumber.Stopped] + } + await this.db.updateServiceJob(job) + return job + } + + public override async restartService( + serviceId: string, + owner: string, + newUserData?: string + ): Promise { + const [job] = await this.db.getServiceJob(serviceId, owner) + if (!job) return null + // Reject on the expiry timestamp too, not just the status: the expiry cron flips the + // status asynchronously, so a service can be past its paid window before it reads Expired. + // Restarting then would silently extend the service beyond what was paid for. + if (job.status === ServiceStatusNumber.Expired || Date.now() >= job.expiresAt) + throw new Error('Cannot restart an expired service') + + // 1. Tear down existing container + network (best-effort) + if (job.containerId) { + const c = this.docker.getContainer(job.containerId) + await c.stop({ t: 10 }).catch(() => {}) + await c.remove({ force: true }).catch(() => {}) + } + if (job.networkId) { + await this.docker + .getNetwork(job.networkId) + .remove() + .catch(() => {}) + } + + job.status = ServiceStatusNumber.Starting + job.statusText = ServiceStatusText[ServiceStatusNumber.Starting] + job.containerId = '' + job.networkId = '' + await this.db.updateServiceJob(job) + + // Live Docker handles for the newly-created container/network, tracked so the + // catch block can tear them down on failure (otherwise the network leaks). + let network: Dockerode.Network | null = null + let container: Dockerode.Container | null = null + try { + const sod = this.getC2DConfig().connection?.serviceOnDemand + + // 2. Pull or rebuild image based on how the service was originally started + // (the original Dockerfile + build files are stored on the job). + if (job.dockerfile) { + job.status = ServiceStatusNumber.BuildImage + job.statusText = ServiceStatusText[ServiceStatusNumber.BuildImage] + await this.db.updateServiceJob(job) + const ram = job.resources.find((r) => r.id === 'ram')?.amount + const cpu = job.resources.find((r) => r.id === 'cpu')?.amount + await this.buildImageFromSpec( + job.containerImage, + job.dockerfile, + job.additionalDockerFiles ?? {}, + sod?.maxDurationSeconds ?? job.duration, + ram, + cpu + ) + } else { + job.status = ServiceStatusNumber.PullImage + job.statusText = ServiceStatusText[ServiceStatusNumber.PullImage] + await this.db.updateServiceJob(job) + await this.pullImageRef(job.containerImage) + } + + // 3. Effective userData: newUserData REPLACES the stored one when supplied. + const effectiveUserData = newUserData ?? job.userData + const decryptedUserData = await decryptUserData(effectiveUserData, this.keyManager) + + // 4. Rebuild env (from userData) + command/entrypoint (stored on the job) + const env = userDataToEnv(decryptedUserData) + const cmd = job.dockerCmd?.length ? job.dockerCmd : undefined + const entrypoint = job.dockerEntrypoint?.length ? job.dockerEntrypoint : undefined + + // 5. Rebuild port bindings — reuse already-allocated host ports + const PortBindings: Record> = {} + const ExposedPorts: Record> = {} + job.endpoints.forEach((ep) => { + PortBindings[`${ep.containerPort}/tcp`] = [{ HostPort: String(ep.hostPort) }] + ExposedPorts[`${ep.containerPort}/tcp`] = {} + }) + + // 6. New per-service network + network = await this.docker.createNetwork({ Name: `ocean-svc-${serviceId}` }) + + // 7. Resource constraints + const { Memory, NanoCpus, DeviceRequests } = this.buildServiceResourceConstraints( + job.resources.map((r) => ({ id: r.id, amount: r.amount })) + ) + + // 8. Create and start new container + container = await this.docker.createContainer({ + Image: job.containerImage, + Cmd: cmd, + Entrypoint: entrypoint, + Env: Object.entries(env).map(([k, v]) => `${k}=${v}`), + ExposedPorts, + HostConfig: { + Memory, + NanoCpus, + DeviceRequests, + PortBindings, + NetworkMode: network.id, + SecurityOpt: ['no-new-privileges'], + CapDrop: ['ALL'], + PidsLimit: 512 + } + }) + await container.start() + + // 9. Update record — same expiresAt, same payment, new container/network. + job.containerId = container.id + job.networkId = network.id + job.userData = effectiveUserData + job.status = ServiceStatusNumber.Running + job.statusText = ServiceStatusText[ServiceStatusNumber.Running] + await this.db.updateServiceJob(job) + return job + } catch (err: any) { + await this.cleanupServiceDocker(container, network) + job.status = ServiceStatusNumber.Error + job.statusText = String(err.message) + await this.db.updateServiceJob(job) + CORE_LOGGER.error(`restartService ${serviceId} failed: ${err.message}`) + throw err + } + } + + public override async getServiceStatus( + consumerAddress?: string, + serviceId?: string + ): Promise { + const jobs = await this.db.getServiceJob(serviceId, consumerAddress) + return jobs.filter((j) => j.clusterHash === this.getC2DConfig().hash) + } + private addUserDataToFilesObject( filesObject: any, userData: { [key: string]: any } diff --git a/src/components/c2d/compute_engines.ts b/src/components/c2d/compute_engines.ts index beab7002a..296ff22f3 100644 --- a/src/components/c2d/compute_engines.ts +++ b/src/components/c2d/compute_engines.ts @@ -10,15 +10,22 @@ import { C2DDatabase } from '../database/C2DDatabase.js' import { Escrow } from '../core/utils/escrow.js' import { KeyManager } from '../KeyManager/index.js' import { CORE_LOGGER } from '../../utils/logging/common.js' +import type { + ServiceTemplate, + ServiceTemplatePublic +} from '../../@types/C2D/ServiceOnDemand.js' +import { loadServiceTemplates } from '../core/service/templateLoader.js' export class C2DEngines { public engines: C2DEngine[] + private config: OceanNodeConfig public constructor( config: OceanNodeConfig, db: C2DDatabase, escrow: Escrow, keyManager: KeyManager ) { + this.config = config const crons = { imageCleanup: false, scanDBUpdate: false @@ -130,6 +137,23 @@ export class C2DEngines { throw new Error(`C2D Engine not found by id: ${envId}`) } + // Called by SERVICE_GET_TEMPLATES handler. Loads + validates templates from the + // serviceTemplatesPath folder and returns them sanitized (envVars values stripped to keys). + // Choosing a compatible compute environment is the client's responsibility — the node + // exposes environments + their resources via GET_COMPUTE_ENVIRONMENTS. + async fetchServiceTemplates(): Promise { + const templates: ServiceTemplate[] = await loadServiceTemplates( + this.config?.serviceTemplatesPath + ) + return templates.map((tmpl) => { + const { envVars, ...rest } = tmpl + return { + ...rest, + ...(envVars ? { envVarKeys: Object.keys(envVars) } : {}) + } + }) + } + async fetchEnvironments( chainId?: number, engine?: C2DEngine diff --git a/src/components/c2d/serviceResourceMatching.ts b/src/components/c2d/serviceResourceMatching.ts new file mode 100644 index 000000000..6303a6ad7 --- /dev/null +++ b/src/components/c2d/serviceResourceMatching.ts @@ -0,0 +1,13 @@ +// Resolves the final Docker image reference from a service start spec. +// Priority: dockerfile > checksum > tag > default "latest". +export function resolveServiceImage( + image: string, + tag?: string, + checksum?: string, + dockerfile?: string, + serviceId?: string +): string { + if (dockerfile) return `${serviceId!.toLowerCase()}-svc-image:latest` + if (checksum) return `${image}@${checksum}` + return `${image}:${tag ?? 'latest'}` +} diff --git a/src/components/core/compute/startCompute.ts b/src/components/core/compute/startCompute.ts index 5d4029a5d..ba3e7863e 100644 --- a/src/components/core/compute/startCompute.ts +++ b/src/components/core/compute/startCompute.ts @@ -136,6 +136,15 @@ export class PaidComputeStartHandler extends CommonComputeHandler { } } } + if (env.features?.computeJobs === false) { + return { + stream: null, + status: { + httpStatus: 403, + error: 'Compute jobs are not enabled on this environment' + } + } + } if (!task.maxJobDuration || task.maxJobDuration > env.maxJobDuration) { task.maxJobDuration = env.maxJobDuration } @@ -950,6 +959,15 @@ export class FreeComputeStartHandler extends CommonComputeHandler { } } } + if (env.features?.computeJobs === false) { + return { + stream: null, + status: { + httpStatus: 403, + error: 'Compute jobs are not enabled on this environment' + } + } + } try { const accessGranted = await validateAccess( task.consumerAddress, @@ -1062,7 +1080,7 @@ export class FreeComputeStartHandler extends CommonComputeHandler { } } -async function validateAccess( +export async function validateAccess( consumerAddress: string, access: ComputeAccessList | undefined, oceanNode: OceanNode diff --git a/src/components/core/handler/coreHandlersRegistry.ts b/src/components/core/handler/coreHandlersRegistry.ts index 55663c96f..65d109bc7 100644 --- a/src/components/core/handler/coreHandlersRegistry.ts +++ b/src/components/core/handler/coreHandlersRegistry.ts @@ -59,6 +59,14 @@ import { } from './persistentStorage.js' import { GetAccessListHandler, SearchAccessListHandler } from './accessListHandler.js' import { EscrowEventsHandler } from './escrowHandler.js' +import { + ServiceGetTemplatesHandler, + ServiceStartHandler, + ServiceStopHandler, + ServiceExtendHandler, + ServiceRestartHandler, + ServiceGetStatusHandler +} from '../service/index.js' export type HandlerRegistry = { handlerName: string // name of the handler @@ -146,6 +154,27 @@ export class CoreHandlersRegistry { PROTOCOL_COMMANDS.COMPUTE_INITIALIZE, new ComputeInitializeHandler(node) ) + this.registerCoreHandler( + PROTOCOL_COMMANDS.SERVICE_GET_TEMPLATES, + new ServiceGetTemplatesHandler(node) + ) + this.registerCoreHandler( + PROTOCOL_COMMANDS.SERVICE_START, + new ServiceStartHandler(node) + ) + this.registerCoreHandler(PROTOCOL_COMMANDS.SERVICE_STOP, new ServiceStopHandler(node)) + this.registerCoreHandler( + PROTOCOL_COMMANDS.SERVICE_EXTEND, + new ServiceExtendHandler(node) + ) + this.registerCoreHandler( + PROTOCOL_COMMANDS.SERVICE_RESTART, + new ServiceRestartHandler(node) + ) + this.registerCoreHandler( + PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + new ServiceGetStatusHandler(node) + ) this.registerCoreHandler(PROTOCOL_COMMANDS.STOP_NODE, new StopNodeHandler(node)) this.registerCoreHandler(PROTOCOL_COMMANDS.STOP_JOB, new StopJobHandler(node)) this.registerCoreHandler(PROTOCOL_COMMANDS.REINDEX_TX, new ReindexTxHandler(node)) diff --git a/src/components/core/service/extendService.ts b/src/components/core/service/extendService.ts new file mode 100644 index 000000000..22f0cd364 --- /dev/null +++ b/src/components/core/service/extendService.ts @@ -0,0 +1,229 @@ +import { Readable } from 'stream' +import { P2PCommandResponse } from '../../../@types/index.js' +import { ServiceExtendCommand } from '../../../@types/commands.js' +import { CommandHandler } from '../handler/handler.js' +import { + ValidateParams, + validateCommandParameters, + buildInvalidParametersResponse, + buildInvalidRequestMessage +} from '../../httpRoutes/validateCommands.js' +import { CORE_LOGGER } from '../../../utils/logging/common.js' +import type { C2DEngine } from '../../c2d/compute_engine_base.js' +import type { ComputeEnvironment } from '../../../@types/C2D/C2D.js' +import type { ServiceJob } from '../../../@types/C2D/ServiceOnDemand.js' +import { ServiceStatusNumber } from '../../../@types/C2D/ServiceOnDemand.js' +import { validateAccess } from '../compute/startCompute.js' +import { toPublicServiceJob } from './utils.js' + +export class ServiceExtendHandler extends CommandHandler { + validate(command: ServiceExtendCommand): ValidateParams { + const commandValidation = validateCommandParameters(command, [ + 'consumerAddress', + 'serviceId', + 'additionalDuration', + 'payment' + ]) + if (commandValidation.valid) { + if (parseInt(String(command.additionalDuration)) <= 0) + return buildInvalidRequestMessage('Invalid additionalDuration') + } + return commandValidation + } + + async handle(task: ServiceExtendCommand): Promise { + const validationResponse = await this.verifyParamsAndRateLimits(task) + if (this.shouldDenyTaskHandling(validationResponse)) return validationResponse + + const auth = await this.validateTokenOrSignature( + task.authorization, + task.consumerAddress, + task.nonce, + task.signature, + task.command + ) + if (auth.status.httpStatus !== 200) return auth + + const engines = this.getOceanNode().getC2DEngines() + if (!engines) + return { + stream: null, + status: { httpStatus: 503, error: 'Compute engines not configured' } + } + + // Find job + let job: ServiceJob | null = null + let engine: C2DEngine | null = null + for (const eng of engines.getAllEngines()) { + const [found] = await eng.db.getServiceJob(task.serviceId, task.consumerAddress) + if (found) { + job = found + engine = eng + break + } + } + if (!job || !engine) + return buildInvalidParametersResponse( + buildInvalidRequestMessage('Service job not found: ' + task.serviceId) + ) + + // Ownership check + if (job.owner.toLowerCase() !== task.consumerAddress.toLowerCase()) + return { stream: null, status: { httpStatus: 401, error: 'Not the service owner' } } + + // Resolve the environment the service actually runs on. This MUST exist: both the + // access gate and pricing key off it. A missing env would otherwise let validateAccess + // auto-allow (undefined access → true) and pricing fall back to an unrelated env. + const runEnv: ComputeEnvironment | undefined = ( + await engine.getComputeEnvironments() + ).find((e) => e.id === job!.environment) + if (!runEnv) + return buildInvalidParametersResponse( + buildInvalidRequestMessage(`Service environment "${job.environment}" not found`) + ) + + // Access-list gate (mirrors paid compute → 403). Re-checked here because access + // lists are mutable and extending prolongs use of the restricted environment. + const accessGranted = await validateAccess( + task.consumerAddress, + runEnv.access, + this.getOceanNode() + ) + if (!accessGranted) + return { stream: null, status: { httpStatus: 403, error: 'Access denied' } } + + // State check — only Starting or Running can be extended + if ( + job.status !== ServiceStatusNumber.Starting && + job.status !== ServiceStatusNumber.Running + ) + return buildInvalidParametersResponse( + buildInvalidRequestMessage( + `Cannot extend a service in state "${job.statusText}". Only Starting or Running services can be extended.` + ) + ) + + // Extension must not push total beyond maxDurationSeconds + const sod = engine.getC2DConfig().connection?.serviceOnDemand + const maxDuration = sod?.maxDurationSeconds ?? 86400 + const remainingSeconds = Math.max(0, Math.floor((job.expiresAt - Date.now()) / 1000)) + const newTotalDuration = remainingSeconds + task.additionalDuration + if (newTotalDuration > maxDuration) + return buildInvalidParametersResponse( + buildInvalidRequestMessage( + `Extension would result in ${newTotalDuration}s remaining, exceeding maximum ${maxDuration}s` + ) + ) + + // Cost — same price formula as the start, priced off the env the service runs on. + // No fallback: pricing must use runEnv (resolved above); calculateResourcesCost returns + // null if that env has no pricing for the token, handled by the check below. + const costExtend = engine.calculateResourcesCost( + job.resources.map((r) => ({ id: r.id, amount: r.amount })), + runEnv, + task.payment.chainId, + task.payment.token, + task.additionalDuration + ) + if (costExtend === null) + return buildInvalidParametersResponse( + buildInvalidRequestMessage( + `No pricing configured for token ${task.payment.token} on chain ${task.payment.chainId}` + ) + ) + + // Escrow lock + immediate claim + let lockTx: string | null + try { + lockTx = await engine.escrow.createLock( + task.payment.chainId, + task.serviceId, + task.payment.token, + task.consumerAddress, + costExtend, + engine.escrow.getMinLockTime(task.additionalDuration) + ) + } catch (e: any) { + CORE_LOGGER.error(`Service extend createLock failed: ${e.message}`) + return { stream: null, status: { httpStatus: 402, error: e.message } } + } + if (!lockTx) + return { + stream: null, + status: { httpStatus: 402, error: 'Escrow lock failed for extend' } + } + + // Wait for the lock tx to be mined before claiming (same-signer back-to-back txs). + try { + await engine.escrow.waitForTransaction(task.payment.chainId, lockTx) + } catch (e: any) { + CORE_LOGGER.error(`Service extend lock not confirmed: ${e.message}`) + await engine.escrow + .cancelExpiredLock( + task.payment.chainId, + task.serviceId, + task.payment.token, + task.consumerAddress + ) + .catch((err) => CORE_LOGGER.error(`cancelExpiredLock failed: ${err.message}`)) + return { + stream: null, + status: { httpStatus: 402, error: 'Escrow lock not confirmed — lock cancelled' } + } + } + + let claimTx: string | null + try { + claimTx = await engine.escrow.claimLock( + task.payment.chainId, + task.serviceId, + task.payment.token, + task.consumerAddress, + costExtend, + `service-extend:${task.serviceId}` + ) + } catch (e: any) { + claimTx = null + CORE_LOGGER.error(`Service extend claimLock failed: ${e.message}`) + } + if (!claimTx) { + await engine.escrow + .cancelExpiredLock( + task.payment.chainId, + task.serviceId, + task.payment.token, + task.consumerAddress + ) + .catch((e) => CORE_LOGGER.error(`cancelExpiredLock failed: ${e.message}`)) + return { + stream: null, + status: { httpStatus: 402, error: 'Escrow claim failed — lock cancelled' } + } + } + + // Payment successful — push expiresAt forward and record extension payment + job.expiresAt += task.additionalDuration * 1000 + job.duration += task.additionalDuration + job.extendPayments = [ + ...(job.extendPayments ?? []), + { + chainId: task.payment.chainId, + token: task.payment.token, + lockTx, + claimTx, + cancelTx: '', + cost: costExtend + } + ] + await engine.db.updateServiceJob(job) + + CORE_LOGGER.logMessage( + `Service ${task.serviceId} extended by ${task.additionalDuration}s, new expiresAt: ${job.expiresAt}`, + true + ) + return { + stream: Readable.from(JSON.stringify([toPublicServiceJob(job)])), + status: { httpStatus: 200 } + } + } +} diff --git a/src/components/core/service/getStatus.ts b/src/components/core/service/getStatus.ts new file mode 100644 index 000000000..a141b3e40 --- /dev/null +++ b/src/components/core/service/getStatus.ts @@ -0,0 +1,53 @@ +import { Readable } from 'stream' +import { P2PCommandResponse } from '../../../@types/index.js' +import { ServiceGetStatusCommand } from '../../../@types/commands.js' +import { CommandHandler } from '../handler/handler.js' +import { + ValidateParams, + validateCommandParameters +} from '../../httpRoutes/validateCommands.js' +import type { ServiceJob } from '../../../@types/C2D/ServiceOnDemand.js' +import { toPublicServiceJob } from './utils.js' + +export class ServiceGetStatusHandler extends CommandHandler { + validate(command: ServiceGetStatusCommand): ValidateParams { + // consumerAddress is required: it is the owner scope AND the identity the + // signature/token is verified against. + return validateCommandParameters(command, ['consumerAddress']) + } + + async handle(task: ServiceGetStatusCommand): Promise { + const validationResponse = await this.verifyParamsAndRateLimits(task) + if (this.shouldDenyTaskHandling(validationResponse)) return validationResponse + + // Status exposes live endpoint URLs / payment data, so the caller must prove + // control of consumerAddress; results are then scoped to that owner. + const auth = await this.validateTokenOrSignature( + task.authorization, + task.consumerAddress, + task.nonce, + task.signature, + task.command + ) + if (auth.status.httpStatus !== 200) return auth + + const engines = this.getOceanNode().getC2DEngines() + if (!engines) + return { + stream: null, + status: { httpStatus: 503, error: 'Compute engines not configured' } + } + + // Aggregate across engines; each engine returns only its own cluster's jobs, + // and the query ANDs owner + serviceId so only the authenticated owner's jobs match. + const jobs: ServiceJob[] = [] + for (const eng of engines.getAllEngines()) { + jobs.push(...(await eng.getServiceStatus(task.consumerAddress, task.serviceId))) + } + + return { + stream: Readable.from(JSON.stringify(jobs.map(toPublicServiceJob))), + status: { httpStatus: 200 } + } + } +} diff --git a/src/components/core/service/getTemplates.ts b/src/components/core/service/getTemplates.ts new file mode 100644 index 000000000..6cfbd68fa --- /dev/null +++ b/src/components/core/service/getTemplates.ts @@ -0,0 +1,41 @@ +import { Readable } from 'stream' +import { P2PCommandResponse } from '../../../@types/index.js' +import { ServiceGetTemplatesCommand } from '../../../@types/commands.js' +import { CommandHandler } from '../handler/handler.js' +import { + ValidateParams, + validateCommandParameters +} from '../../httpRoutes/validateCommands.js' +import { CORE_LOGGER } from '../../../utils/logging/common.js' + +export class ServiceGetTemplatesHandler extends CommandHandler { + validate(command: ServiceGetTemplatesCommand): ValidateParams { + return validateCommandParameters(command, []) + } + + async handle(task: ServiceGetTemplatesCommand): Promise { + const validationResponse = await this.verifyParamsAndRateLimits(task) + if (this.shouldDenyTaskHandling(validationResponse)) return validationResponse + try { + const engines = this.getOceanNode().getC2DEngines() + if (!engines) + return { + stream: null, + status: { httpStatus: 503, error: 'Compute engines not configured' } + } + + const templates = await engines.fetchServiceTemplates() + CORE_LOGGER.logMessage( + `ServiceGetTemplates: returning ${templates.length} template(s)`, + true + ) + return { + stream: Readable.from(JSON.stringify(templates)), + status: { httpStatus: 200 } + } + } catch (error: any) { + CORE_LOGGER.error(error.message) + return { stream: null, status: { httpStatus: 500, error: error.message } } + } + } +} diff --git a/src/components/core/service/index.ts b/src/components/core/service/index.ts new file mode 100644 index 000000000..e1dc8d6e8 --- /dev/null +++ b/src/components/core/service/index.ts @@ -0,0 +1,8 @@ +export { ServiceGetTemplatesHandler } from './getTemplates.js' +export { ServiceStartHandler } from './startService.js' +export { ServiceStopHandler } from './stopService.js' +export { ServiceExtendHandler } from './extendService.js' +export { ServiceRestartHandler } from './restartService.js' +export { ServiceGetStatusHandler } from './getStatus.js' +export * from './utils.js' +export * from './templateLoader.js' diff --git a/src/components/core/service/restartService.ts b/src/components/core/service/restartService.ts new file mode 100644 index 000000000..9adc03a9d --- /dev/null +++ b/src/components/core/service/restartService.ts @@ -0,0 +1,124 @@ +import { Readable } from 'stream' +import { P2PCommandResponse } from '../../../@types/index.js' +import { ServiceRestartCommand } from '../../../@types/commands.js' +import { CommandHandler } from '../handler/handler.js' +import { + ValidateParams, + validateCommandParameters, + buildInvalidParametersResponse, + buildInvalidRequestMessage +} from '../../httpRoutes/validateCommands.js' +import { CORE_LOGGER } from '../../../utils/logging/common.js' +import type { C2DEngine } from '../../c2d/compute_engine_base.js' +import type { ComputeEnvironment } from '../../../@types/C2D/C2D.js' +import type { ServiceJob } from '../../../@types/C2D/ServiceOnDemand.js' +import { ServiceStatusNumber } from '../../../@types/C2D/ServiceOnDemand.js' +import { validateAccess } from '../compute/startCompute.js' +import { decryptUserData, toPublicServiceJob } from './utils.js' + +export class ServiceRestartHandler extends CommandHandler { + validate(command: ServiceRestartCommand): ValidateParams { + return validateCommandParameters(command, ['consumerAddress', 'serviceId']) + } + + async handle(task: ServiceRestartCommand): Promise { + const validationResponse = await this.verifyParamsAndRateLimits(task) + if (this.shouldDenyTaskHandling(validationResponse)) return validationResponse + + const auth = await this.validateTokenOrSignature( + task.authorization, + task.consumerAddress, + task.nonce, + task.signature, + task.command + ) + if (auth.status.httpStatus !== 200) return auth + + const node = this.getOceanNode() + const engines = node.getC2DEngines() + if (!engines) + return { + stream: null, + status: { httpStatus: 503, error: 'Compute engines not configured' } + } + + // Find job across all engines + let job: ServiceJob | null = null + let engine: C2DEngine | null = null + for (const eng of engines.getAllEngines()) { + const [found] = await eng.db.getServiceJob(task.serviceId, task.consumerAddress) + if (found) { + job = found + engine = eng + break + } + } + if (!job || !engine) + return buildInvalidParametersResponse( + buildInvalidRequestMessage('Service job not found: ' + task.serviceId) + ) + + // Ownership check + if (job.owner.toLowerCase() !== task.consumerAddress.toLowerCase()) + return { stream: null, status: { httpStatus: 401, error: 'Not the service owner' } } + + // Resolve the environment the service runs on. This MUST exist: the services gate and + // access gate both key off it, and restarting resumes the container on it. + const runEnv: ComputeEnvironment | undefined = ( + await engine.getComputeEnvironments() + ).find((e) => e.id === job!.environment) + if (!runEnv) + return buildInvalidParametersResponse( + buildInvalidRequestMessage(`Service environment "${job.environment}" not found`) + ) + + // Services capability gate (mirrors the start path → 403). features.services is mutable, + // so an environment that no longer offers services must not have its services resumed. + if (runEnv.features?.services === false) + return { + stream: null, + status: { httpStatus: 403, error: 'Services are not enabled on this environment' } + } + + // Access-list gate (mirrors paid compute → 403). Re-checked here because access + // lists are mutable and restarting resumes use of the restricted environment. + const accessGranted = await validateAccess(task.consumerAddress, runEnv.access, node) + if (!accessGranted) + return { stream: null, status: { httpStatus: 403, error: 'Access denied' } } + + // State check — cannot restart an expired service + if (job.status === ServiceStatusNumber.Expired) + return buildInvalidParametersResponse( + buildInvalidRequestMessage('Cannot restart an expired service') + ) + + // If newUserData is provided it REPLACES the stored userData (must be the complete set). + // Decrypt it as a validity check before touching the container. + if (task.userData) { + try { + await decryptUserData(task.userData, node.getKeyManager()) + } catch { + return buildInvalidParametersResponse( + buildInvalidRequestMessage( + 'userData could not be decrypted — it must be ECIES-encrypted to the node public key' + ) + ) + } + } + + try { + const restarted = await engine.restartService( + task.serviceId, + task.consumerAddress, + task.userData + ) + return { + stream: Readable.from(JSON.stringify([toPublicServiceJob(restarted)])), + status: { httpStatus: 200 } + } + } catch (error: any) { + CORE_LOGGER.error(`ServiceRestart ${task.serviceId} failed: ${error.message}`) + return { stream: null, status: { httpStatus: 500, error: error.message } } + } + } +} diff --git a/src/components/core/service/startService.ts b/src/components/core/service/startService.ts new file mode 100644 index 000000000..dd2a6737e --- /dev/null +++ b/src/components/core/service/startService.ts @@ -0,0 +1,219 @@ +import { Readable } from 'stream' +import { P2PCommandResponse } from '../../../@types/index.js' +import { ServiceStartCommand } from '../../../@types/commands.js' +import { CommandHandler } from '../handler/handler.js' +import { + ValidateParams, + validateCommandParameters, + buildInvalidParametersResponse, + buildInvalidRequestMessage +} from '../../httpRoutes/validateCommands.js' +import { CORE_LOGGER } from '../../../utils/logging/common.js' +import { isAddress } from 'ethers' +import type { C2DEngine } from '../../c2d/compute_engine_base.js' +import type { + ComputeEnvironment, + DBComputeJobPayment as Payment +} from '../../../@types/C2D/C2D.js' +import { generateUniqueID } from '../compute/utils.js' +import { validateAccess } from '../compute/startCompute.js' +import { decryptUserData, toPublicServiceJob } from './utils.js' + +export class ServiceStartHandler extends CommandHandler { + validate(command: ServiceStartCommand): ValidateParams { + const commandValidation = validateCommandParameters(command, [ + 'consumerAddress', + 'environment', + 'image', + 'duration', + 'payment' + ]) + if (commandValidation.valid) { + if (!isAddress(command.consumerAddress)) + return buildInvalidRequestMessage( + 'Parameter : "consumerAddress" is not a valid web3 address' + ) + if (parseInt(String(command.duration)) <= 0) + return buildInvalidRequestMessage('Invalid duration') + const imageModes = [command.tag, command.checksum, command.dockerfile].filter( + Boolean + ).length + if (imageModes > 1) + return buildInvalidRequestMessage( + 'Provide at most one of "tag", "checksum", "dockerfile"' + ) + } + return commandValidation + } + + async handle(task: ServiceStartCommand): Promise { + const validationResponse = await this.verifyParamsAndRateLimits(task) + if (this.shouldDenyTaskHandling(validationResponse)) return validationResponse + + const auth = await this.validateTokenOrSignature( + task.authorization, + task.consumerAddress, + task.nonce, + task.signature, + task.command + ) + if (auth.status.httpStatus !== 200) return auth + + const node = this.getOceanNode() + const engines = node.getC2DEngines() + if (!engines) + return { + stream: null, + status: { httpStatus: 503, error: 'Compute engines not configured' } + } + + try { + // 1. Resolve engine + environment (environment is mandatory) + let engine: C2DEngine + try { + engine = await engines.getC2DByEnvId(task.environment) + } catch { + return buildInvalidParametersResponse( + buildInvalidRequestMessage(`Unknown environment "${task.environment}"`) + ) + } + const env: ComputeEnvironment | undefined = ( + await engine.getComputeEnvironments() + ).find((e) => e.id === task.environment) + if (!env) + return buildInvalidParametersResponse( + buildInvalidRequestMessage(`Unknown environment "${task.environment}"`) + ) + + // 1a. Services capability gate (mirrors compute F4/F5 gates → 403) + if (env.features?.services === false) + return { + stream: null, + status: { + httpStatus: 403, + error: 'Services are not enabled on this environment' + } + } + + // 1b. Access-list gate (mirrors paid compute → 403). The signature only proves + // control of consumerAddress, not allowlist membership, so this must be + // enforced here before any escrow/charge logic. + const accessGranted = await validateAccess(task.consumerAddress, env.access, node) + if (!accessGranted) + return { + stream: null, + status: { + httpStatus: 403, + error: 'Access denied' + } + } + + // 2. Decrypt userData (pre-escrow validity check, so undecryptable input isn't charged). + // The decrypted object becomes the container's env-var map inside the engine. + if (task.userData) { + try { + await decryptUserData(task.userData, node.getKeyManager()) + } catch { + return buildInvalidParametersResponse( + buildInvalidRequestMessage( + 'userData could not be decrypted — it must be ECIES-encrypted to the node public key' + ) + ) + } + } + + // 4. Duration limit + const sod = engine.getC2DConfig().connection?.serviceOnDemand + const maxDuration = sod?.maxDurationSeconds ?? 86400 + if (task.duration > maxDuration) + return buildInvalidParametersResponse( + buildInvalidRequestMessage( + `Duration ${task.duration}s exceeds maximum ${maxDuration}s` + ) + ) + + // 5. Resolve resources (fill cpu/ram/disk defaults the same way compute jobs do) + let resources + try { + resources = await engine.checkAndFillMissingResources( + task.resources ?? [], + env, + false + ) + await engine.checkIfResourcesAreAvailable( + resources, + env, + false, + await engine.getComputeEnvironments() + ) + } catch (e: any) { + return buildInvalidParametersResponse( + buildInvalidRequestMessage(e?.message || String(e)) + ) + } + + // 6. Server-side cost (used to size the escrow lock the background loop will create). + const cost = engine.calculateResourcesCost( + resources, + env, + task.payment.chainId, + task.payment.token, + task.duration + ) + if (cost === null) + return buildInvalidParametersResponse( + buildInvalidRequestMessage( + `No pricing configured for token ${task.payment.token} on chain ${task.payment.chainId}` + ) + ) + + const serviceId = generateUniqueID({ + owner: task.consumerAddress, + environment: task.environment, + image: task.image, + duration: task.duration, + nonce: task.nonce + }) + + // Escrow tx hashes are filled in later by the background pipeline (locking → payment). + const payment: Payment = { + chainId: task.payment.chainId, + token: task.payment.token, + lockTx: '', + claimTx: '', + cancelTx: '', + cost + } + + // 7. Persist the Starting record and return immediately with the serviceId. The + // engine's background loop (processServiceStart) then performs escrow lock → image + // pull/build → claim/cancel → container start. Clients poll SERVICE_GET_STATUS to + // watch the service progress to Running (or a *Failed / Error terminal status). + const job = await engine.createServiceJob( + task.environment, + task.image, + task.tag, + task.checksum, + task.dockerfile, + task.additionalDockerFiles, + task.dockerCmd, + task.dockerEntrypoint, + task.exposedPorts ?? [], + resources, + task.duration, + task.consumerAddress, + payment, + serviceId, + task.userData + ) + + return { + stream: Readable.from(JSON.stringify([toPublicServiceJob(job)])), + status: { httpStatus: 200 } + } + } catch (error: any) { + CORE_LOGGER.error(`ServiceStart failed: ${error.message}`) + return { stream: null, status: { httpStatus: 500, error: error.message } } + } + } +} diff --git a/src/components/core/service/stopService.ts b/src/components/core/service/stopService.ts new file mode 100644 index 000000000..90587a5f7 --- /dev/null +++ b/src/components/core/service/stopService.ts @@ -0,0 +1,70 @@ +import { Readable } from 'stream' +import { P2PCommandResponse } from '../../../@types/index.js' +import { ServiceStopCommand } from '../../../@types/commands.js' +import { CommandHandler } from '../handler/handler.js' +import { + ValidateParams, + validateCommandParameters, + buildInvalidParametersResponse, + buildInvalidRequestMessage +} from '../../httpRoutes/validateCommands.js' +import { CORE_LOGGER } from '../../../utils/logging/common.js' +import type { C2DEngine } from '../../c2d/compute_engine_base.js' +import type { ServiceJob } from '../../../@types/C2D/ServiceOnDemand.js' +import { toPublicServiceJob } from './utils.js' + +export class ServiceStopHandler extends CommandHandler { + validate(command: ServiceStopCommand): ValidateParams { + return validateCommandParameters(command, ['consumerAddress', 'serviceId']) + } + + async handle(task: ServiceStopCommand): Promise { + const validationResponse = await this.verifyParamsAndRateLimits(task) + if (this.shouldDenyTaskHandling(validationResponse)) return validationResponse + + const auth = await this.validateTokenOrSignature( + task.authorization, + task.consumerAddress, + task.nonce, + task.signature, + task.command + ) + if (auth.status.httpStatus !== 200) return auth + + const engines = this.getOceanNode().getC2DEngines() + if (!engines) + return { + stream: null, + status: { httpStatus: 503, error: 'Compute engines not configured' } + } + + // Find job across all engines by serviceId + owner + let job: ServiceJob | null = null + let engine: C2DEngine | null = null + for (const eng of engines.getAllEngines()) { + const [found] = await eng.db.getServiceJob(task.serviceId, task.consumerAddress) + if (found) { + job = found + engine = eng + break + } + } + if (!job || !engine) + return buildInvalidParametersResponse( + buildInvalidRequestMessage('Service job not found: ' + task.serviceId) + ) + if (job.owner.toLowerCase() !== task.consumerAddress.toLowerCase()) + return { stream: null, status: { httpStatus: 401, error: 'Not the service owner' } } + + try { + const stopped = await engine.stopService(task.serviceId, task.consumerAddress) + return { + stream: Readable.from(JSON.stringify([toPublicServiceJob(stopped)])), + status: { httpStatus: 200 } + } + } catch (error: any) { + CORE_LOGGER.error(`ServiceStop ${task.serviceId} failed: ${error.message}`) + return { stream: null, status: { httpStatus: 500, error: error.message } } + } + } +} diff --git a/src/components/core/service/templateLoader.ts b/src/components/core/service/templateLoader.ts new file mode 100644 index 000000000..fc3c99abc --- /dev/null +++ b/src/components/core/service/templateLoader.ts @@ -0,0 +1,61 @@ +import { readdir, readFile } from 'fs/promises' +import { join } from 'path' +import type { ServiceTemplate } from '../../../@types/C2D/ServiceOnDemand.js' +import { ServiceTemplateSchema } from '../../../utils/config/schemas.js' +import { CORE_LOGGER } from '../../../utils/logging/common.js' + +// Re-reads on every call so operators can add/edit/remove template files without a restart. +// (If profiling ever shows this is hot, add an mtime-keyed cache — semantics stay identical.) +export async function loadServiceTemplates(dir?: string): Promise { + if (!dir) return [] // safety net; in practice the config schema always supplies the default + + let files: string[] + try { + files = (await readdir(dir)).filter((f) => f.toLowerCase().endsWith('.json')).sort() // deterministic order → stable duplicate resolution + } catch (e) { + // A missing folder is the normal "no templates" state — the default path + // (databases/serviceTemplates/) need not exist — so stay quiet on ENOENT. + if (e.code === 'ENOENT') { + CORE_LOGGER.debug( + `serviceTemplatesPath "${dir}" does not exist — no service templates loaded` + ) + } else { + CORE_LOGGER.error(`serviceTemplatesPath "${dir}" is not readable: ${e.message}`) + } + return [] + } + + const byId = new Map() + for (const file of files) { + let raw: unknown + try { + raw = JSON.parse(await readFile(join(dir, file), 'utf8')) + } catch (e) { + CORE_LOGGER.warn( + `Skipping service template file "${file}": invalid JSON (${e.message})` + ) + continue + } + // A file may be a single template object or an array of templates. + for (const candidate of Array.isArray(raw) ? raw : [raw]) { + const parsed = ServiceTemplateSchema.safeParse(candidate) + if (!parsed.success) { + CORE_LOGGER.warn( + `Skipping invalid template in "${file}": ${parsed.error.issues + .map((i) => i.message) + .join('; ')}` + ) + continue + } + const tmpl = parsed.data as ServiceTemplate + if (byId.has(tmpl.id)) { + CORE_LOGGER.warn( + `Duplicate service template id "${tmpl.id}" (in "${file}") — keeping the first occurrence` + ) + continue + } + byId.set(tmpl.id, tmpl) + } + } + return [...byId.values()] +} diff --git a/src/components/core/service/utils.ts b/src/components/core/service/utils.ts new file mode 100644 index 000000000..97cce1691 --- /dev/null +++ b/src/components/core/service/utils.ts @@ -0,0 +1,84 @@ +import net from 'net' +import type { ServiceJob } from '../../../@types/C2D/ServiceOnDemand.js' +import { EncryptMethod } from '../../../@types/fileObject.js' +import type { KeyManager } from '../../KeyManager/index.js' +import type { C2DDatabase } from '../../database/C2DDatabase.js' + +// Converts the decrypted userData object into a flat container env-var map (stringified values). +export function userDataToEnv(userData: Record): Record { + const env: Record = {} + for (const [k, v] of Object.entries(userData)) { + if (v !== undefined && v !== null) env[k] = String(v) + } + return env +} + +// Decrypts the ECIES userData string (encrypted by the client to the node's public key) +// and JSON-parses it. Called only transiently in memory — at SERVICE_START and SERVICE_RESTART +// to build the container env. Returns {} when no userData was supplied. +export async function decryptUserData( + encryptedUserData: string | undefined, + keyManager: KeyManager +): Promise> { + if (!encryptedUserData) return {} + const plain = await keyManager.decrypt( + Uint8Array.from(Buffer.from(encryptedUserData, 'hex')), + EncryptMethod.ECIES + ) + return JSON.parse(plain.toString()) +} + +// Strips the opaque encrypted userData blob from a ServiceJob before it enters an API +// response (it is node-only-decryptable and useless to callers). null-safe, so handlers +// can pass engine results straight through. EVERY handler returning service jobs +// (SERVICE_START / STOP / EXTEND / RESTART / GET_STATUS) must map results through this. +export function toPublicServiceJob( + job: ServiceJob | null +): Omit | null { + if (!job) return null + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const { userData, ...pub } = job + return pub +} + +// Port allocation — in-memory set seeded from DB on engine restart +const allocatedPorts = new Set() + +export async function seedAllocatedPorts( + db: C2DDatabase, + clusterHash: string +): Promise { + const jobs = await db.getRunningServiceJobs(clusterHash) + for (const job of jobs) for (const ep of job.endpoints) allocatedPorts.add(ep.hostPort) +} + +export async function allocateHostPort( + rangeStart: number, + rangeEnd: number +): Promise { + const size = rangeEnd - rangeStart + 1 + for (let i = 0; i < Math.min(size, 50); i++) { + const candidate = rangeStart + Math.floor(Math.random() * size) + if (allocatedPorts.has(candidate)) continue + // Reserve before the async check to close the TOCTOU window: the synchronous + // has()->add() pair is atomic, so no concurrent caller can claim the same port + // while we await isPortFree(). Release the reservation if the OS port is busy. + allocatedPorts.add(candidate) + // eslint-disable-next-line no-await-in-loop + if (await isPortFree(candidate)) return candidate + allocatedPorts.delete(candidate) + } + throw new Error(`No free host port in range ${rangeStart}–${rangeEnd}`) +} + +export function releaseHostPort(port: number): void { + allocatedPorts.delete(port) +} + +function isPortFree(port: number): Promise { + return new Promise((resolve) => { + const s = net.createServer() + s.once('error', () => resolve(false)) + s.listen(port, '0.0.0.0', () => s.close(() => resolve(true))) + }) +} diff --git a/src/components/core/utils/escrow.ts b/src/components/core/utils/escrow.ts index 313134c2f..4984cb5c3 100644 --- a/src/components/core/utils/escrow.ts +++ b/src/components/core/utils/escrow.ts @@ -41,6 +41,23 @@ export class Escrow { return maxJobDuration + this.claimDurationTimeout } + /** + * Waits for a submitted transaction to be mined. Used when two transactions are sent + * back-to-back from the node signer (e.g. the immediate createLock → claimLock sequence + * in Service-on-Demand) so the second tx picks up the advanced account nonce and acts on + * confirmed on-chain state. + */ + async waitForTransaction( + chain: number, + txHash: string, + confirmations: number = 1, + timeoutMs: number = 60000 + ): Promise { + const blockchain = this.getBlockchain(chain) + const provider = await blockchain.getProvider() + await provider.waitForTransaction(txHash, confirmations, timeoutMs) + } + /** * Get a Blockchain instance for the given chainId from BlockchainRegistry. * diff --git a/src/components/database/C2DDatabase.ts b/src/components/database/C2DDatabase.ts index b1af4db02..68b3acea1 100755 --- a/src/components/database/C2DDatabase.ts +++ b/src/components/database/C2DDatabase.ts @@ -5,6 +5,7 @@ import { DBComputeJob, C2DStatusNumber } from '../../@types/C2D/C2D.js' +import { ServiceJob } from '../../@types/C2D/ServiceOnDemand.js' import { SQLiteCompute } from './sqliteCompute.js' import { DATABASE_LOGGER } from '../../utils/logging/common.js' import { OceanNodeDBConfig } from '../../@types/OceanNode.js' @@ -30,6 +31,7 @@ export class C2DDatabase extends AbstractDatabase { this.provider = new SQLiteCompute('databases/c2dDatabase.sqlite') await this.provider.createTable() await this.provider.createImageTable() + await this.provider.createServiceTable() return this })() as unknown as C2DDatabase @@ -71,6 +73,32 @@ export class C2DDatabase extends AbstractDatabase { return await this.provider.getRunningJobs(engine, environment) } + // ── Service-on-Demand jobs ────────────────────────────────────────── + + async newServiceJob(job: ServiceJob): Promise { + return await this.provider.newServiceJob(job) + } + + async getServiceJob(serviceId?: string, owner?: string): Promise { + return await this.provider.getServiceJob(serviceId, owner) + } + + async updateServiceJob(job: ServiceJob): Promise { + return await this.provider.updateServiceJob(job) + } + + async getRunningServiceJobs(clusterHash?: string): Promise { + return await this.provider.getRunningServiceJobs(clusterHash) + } + + async getExpiredServiceJobs(clusterHash?: string): Promise { + return await this.provider.getExpiredServiceJobs(clusterHash) + } + + async getPendingServiceStarts(clusterHash?: string): Promise { + return await this.provider.getPendingServiceStarts(clusterHash) + } + async deleteJob(jobId: string): Promise { return await this.provider.deleteJob(jobId) } diff --git a/src/components/database/sqliteCompute.ts b/src/components/database/sqliteCompute.ts index 5a1006636..54ed913c4 100644 --- a/src/components/database/sqliteCompute.ts +++ b/src/components/database/sqliteCompute.ts @@ -4,6 +4,7 @@ import { C2DStatusText, type DBComputeJob } from '../../@types/C2D/C2D.js' +import { ServiceStatusNumber, type ServiceJob } from '../../@types/C2D/ServiceOnDemand.js' import sqlite3, { RunResult } from 'sqlite3' import { DATABASE_LOGGER } from '../../utils/logging/common.js' import { create256Hash } from '../../utils/crypt.js' @@ -150,6 +151,200 @@ export class SQLiteCompute implements ComputeDatabaseProvider { }) } + // ── Service-on-Demand jobs ────────────────────────────────────────── + + createServiceTable(): Promise { + const createTableSQL = ` + CREATE TABLE IF NOT EXISTS service_jobs ( + serviceId TEXT PRIMARY KEY, + owner TEXT, + clusterHash TEXT, + status INTEGER, + expiresAt INTEGER, + dateCreated TEXT, + body BLOB + ); + ` + return new Promise((resolve, reject) => { + this.db.run(createTableSQL, (err) => { + if (err) { + DATABASE_LOGGER.error('Could not create service_jobs table: ' + err.message) + reject(err) + } else { + resolve() + } + }) + }) + } + + newServiceJob(job: ServiceJob): Promise { + const insertSQL = ` + INSERT INTO service_jobs + (serviceId, owner, clusterHash, status, expiresAt, dateCreated, body) + VALUES (?, ?, ?, ?, ?, ?, ?); + ` + return new Promise((resolve, reject) => { + this.db.run( + insertSQL, + [ + job.serviceId, + job.owner, + job.clusterHash, + job.status, + job.expiresAt, + job.dateCreated, + Buffer.from(JSON.stringify(job)) + ], + (err) => { + if (err) { + DATABASE_LOGGER.error('Could not insert service job on DB: ' + err.message) + reject(err) + } else { + resolve() + } + } + ) + }) + } + + updateServiceJob(job: ServiceJob): Promise { + const updateSQL = ` + UPDATE service_jobs + SET owner = ?, clusterHash = ?, status = ?, expiresAt = ?, body = ? + WHERE serviceId = ?; + ` + return new Promise((resolve, reject) => { + this.db.run( + updateSQL, + [ + job.owner, + job.clusterHash, + job.status, + job.expiresAt, + Buffer.from(JSON.stringify(job)), + job.serviceId + ], + function (this: RunResult, err: Error | null) { + if (err) { + DATABASE_LOGGER.error(`Error while updating service job: ${err.message}`) + reject(err) + } else { + resolve(this.changes) + } + } + ) + }) + } + + private mapServiceRows(rows: any[] | undefined): ServiceJob[] { + if (!rows || rows.length === 0) return [] + return rows.map((row) => JSON.parse(row.body.toString()) as ServiceJob) + } + + getServiceJob(serviceId?: string, owner?: string): Promise { + const params: any[] = [] + let selectSQL = `SELECT * FROM service_jobs WHERE 1=1` + if (serviceId) { + selectSQL += ` AND serviceId = ?` + params.push(serviceId) + } + if (owner) { + selectSQL += ` AND owner = ?` + params.push(owner) + } + return new Promise((resolve, reject) => { + this.db.all(selectSQL, params, (err, rows: any[] | undefined) => { + if (err) { + DATABASE_LOGGER.error(err.message) + reject(err) + } else { + resolve(this.mapServiceRows(rows)) + } + }) + }) + } + + getRunningServiceJobs(clusterHash?: string): Promise { + // All pre-terminal statuses are "active": a service reserves its resources from the moment + // its record is created (Starting) through the whole start pipeline (Locking, image, Claiming) + // until it is Running, and while Stopping. + const activeStatuses = [ + ServiceStatusNumber.Starting, + ServiceStatusNumber.Locking, + ServiceStatusNumber.PullImage, + ServiceStatusNumber.BuildImage, + ServiceStatusNumber.Claiming, + ServiceStatusNumber.Running, + ServiceStatusNumber.Stopping + ] + const placeholders = activeStatuses.map(() => '?').join(',') + const params: Array = [...activeStatuses] + let selectSQL = `SELECT * FROM service_jobs WHERE status IN (${placeholders})` + if (clusterHash) { + selectSQL += ` AND clusterHash = ?` + params.push(clusterHash) + } + return new Promise((resolve, reject) => { + this.db.all(selectSQL, params, (err, rows: any[] | undefined) => { + if (err) { + DATABASE_LOGGER.error(err.message) + reject(err) + } else { + resolve(this.mapServiceRows(rows)) + } + }) + }) + } + + getExpiredServiceJobs(clusterHash?: string): Promise { + const params: Array = [ServiceStatusNumber.Running, Date.now()] + let selectSQL = `SELECT * FROM service_jobs WHERE status = ? AND expiresAt <= ?` + if (clusterHash) { + selectSQL += ` AND clusterHash = ?` + params.push(clusterHash) + } + return new Promise((resolve, reject) => { + this.db.all(selectSQL, params, (err, rows: any[] | undefined) => { + if (err) { + DATABASE_LOGGER.error(err.message) + reject(err) + } else { + resolve(this.mapServiceRows(rows)) + } + }) + }) + } + + // Service jobs that are mid-start and need the background loop to advance them. + // Starting = fresh (handler just created it); the intermediate states are picked up too so + // the loop can resume / orphan-recover them after a node restart. + getPendingServiceStarts(clusterHash?: string): Promise { + const startStatuses = [ + ServiceStatusNumber.Starting, + ServiceStatusNumber.Locking, + ServiceStatusNumber.PullImage, + ServiceStatusNumber.BuildImage, + ServiceStatusNumber.Claiming + ] + const placeholders = startStatuses.map(() => '?').join(',') + const params: Array = [...startStatuses] + let selectSQL = `SELECT * FROM service_jobs WHERE status IN (${placeholders})` + if (clusterHash) { + selectSQL += ` AND clusterHash = ?` + params.push(clusterHash) + } + return new Promise((resolve, reject) => { + this.db.all(selectSQL, params, (err, rows: any[] | undefined) => { + if (err) { + DATABASE_LOGGER.error(err.message) + reject(err) + } else { + resolve(this.mapServiceRows(rows)) + } + }) + }) + } + updateImage(image: string): Promise { const timestamp = Math.floor(Date.now() / 1000) // Unix timestamp in seconds const insertSQL = ` diff --git a/src/components/httpRoutes/compute.ts b/src/components/httpRoutes/compute.ts index 1885678df..a34f9dadb 100644 --- a/src/components/httpRoutes/compute.ts +++ b/src/components/httpRoutes/compute.ts @@ -21,8 +21,22 @@ import type { ComputeStopCommand, ComputeGetResultCommand, ComputeGetStatusCommand, - ComputeGetStreamableLogsCommand + ComputeGetStreamableLogsCommand, + ServiceGetTemplatesCommand, + ServiceStartCommand, + ServiceStopCommand, + ServiceExtendCommand, + ServiceRestartCommand, + ServiceGetStatusCommand } from '../../@types/commands.js' +import { + ServiceGetTemplatesHandler, + ServiceStartHandler, + ServiceStopHandler, + ServiceExtendHandler, + ServiceRestartHandler, + ServiceGetStatusHandler +} from '../core/service/index.js' import { streamToObject, streamToString } from '../../utils/util.js' import { PROTOCOL_COMMANDS, SERVICES_API_BASE_PATH } from '../../utils/constants.js' @@ -338,3 +352,120 @@ computeRoutes.post(`${SERVICES_API_BASE_PATH}/initializeCompute`, async (req, re computeRoutes.delete(`${SERVICES_API_BASE_PATH}/compute`, (req, res) => { res.status(404).send('Not yet implemented!') }) + +// ── Service on Demand ───────────────────────────────────────────────── + +async function runServiceCommand( + HandlerClass: any, + task: any, + res: express.Response +): Promise { + try { + const response = await new HandlerClass(res.req.oceanNode).handle(task) + if (response?.status?.httpStatus === 200) { + const result = await streamToObject(response.stream as Readable) + res.status(200).json(result) + } else { + HTTP_LOGGER.log(LOG_LEVELS_STR.LEVEL_INFO, `Error: ${response?.status?.error}`) + res.status(response?.status?.httpStatus || 500).json(response?.status?.error) + } + } catch (error) { + HTTP_LOGGER.log(LOG_LEVELS_STR.LEVEL_ERROR, `Error: ${error}`) + res.status(500).send('Internal Server Error') + } +} + +computeRoutes.get(`${SERVICES_API_BASE_PATH}/serviceTemplates`, async (req, res) => { + const task: ServiceGetTemplatesCommand = { + command: PROTOCOL_COMMANDS.SERVICE_GET_TEMPLATES, + chainId: parseInt(req.query.chainId as string) || undefined, + node: (req.query.node as string) || null, + caller: req.caller + } + await runServiceCommand(ServiceGetTemplatesHandler, task, res) +}) + +computeRoutes.post(`${SERVICES_API_BASE_PATH}/serviceStart`, async (req, res) => { + const task: ServiceStartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_START, + node: (req.body.node as string) || null, + consumerAddress: (req.body.consumerAddress as string) || null, + nonce: (req.body.nonce as string) || null, + signature: (req.body.signature as string) || null, + environment: (req.body.environment as string) || null, + image: (req.body.image as string) || null, + tag: (req.body.tag as string) || undefined, + checksum: (req.body.checksum as string) || undefined, + dockerfile: (req.body.dockerfile as string) || undefined, + additionalDockerFiles: req.body.additionalDockerFiles || undefined, + dockerCmd: (req.body.dockerCmd as string[]) || undefined, + dockerEntrypoint: (req.body.dockerEntrypoint as string[]) || undefined, + exposedPorts: (req.body.exposedPorts as number[]) || undefined, + resources: (req.body.resources as ComputeResourceRequest[]) || undefined, + duration: req.body.duration as number, + userData: (req.body.userData as string) || undefined, + payment: req.body.payment, + authorization: req.headers?.authorization, + caller: req.caller + } + await runServiceCommand(ServiceStartHandler, task, res) +}) + +computeRoutes.post(`${SERVICES_API_BASE_PATH}/serviceStop`, async (req, res) => { + const task: ServiceStopCommand = { + command: PROTOCOL_COMMANDS.SERVICE_STOP, + node: (req.body.node as string) || null, + consumerAddress: (req.body.consumerAddress as string) || null, + nonce: (req.body.nonce as string) || null, + signature: (req.body.signature as string) || null, + serviceId: (req.body.serviceId as string) || null, + authorization: req.headers?.authorization, + caller: req.caller + } + await runServiceCommand(ServiceStopHandler, task, res) +}) + +computeRoutes.post(`${SERVICES_API_BASE_PATH}/serviceExtend`, async (req, res) => { + const task: ServiceExtendCommand = { + command: PROTOCOL_COMMANDS.SERVICE_EXTEND, + node: (req.body.node as string) || null, + consumerAddress: (req.body.consumerAddress as string) || null, + nonce: (req.body.nonce as string) || null, + signature: (req.body.signature as string) || null, + serviceId: (req.body.serviceId as string) || null, + additionalDuration: req.body.additionalDuration as number, + payment: req.body.payment, + authorization: req.headers?.authorization, + caller: req.caller + } + await runServiceCommand(ServiceExtendHandler, task, res) +}) + +computeRoutes.post(`${SERVICES_API_BASE_PATH}/serviceRestart`, async (req, res) => { + const task: ServiceRestartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_RESTART, + node: (req.body.node as string) || null, + consumerAddress: (req.body.consumerAddress as string) || null, + nonce: (req.body.nonce as string) || null, + signature: (req.body.signature as string) || null, + serviceId: (req.body.serviceId as string) || null, + userData: (req.body.userData as string) || undefined, + authorization: req.headers?.authorization, + caller: req.caller + } + await runServiceCommand(ServiceRestartHandler, task, res) +}) + +computeRoutes.get(`${SERVICES_API_BASE_PATH}/serviceStatus`, async (req, res) => { + const task: ServiceGetStatusCommand = { + command: PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + consumerAddress: req.query.consumerAddress as string, + nonce: req.query.nonce as string, + signature: req.query.signature as string, + serviceId: (req.query.serviceId as string) || undefined, + node: (req.query.node as string) || null, + authorization: req.headers?.authorization, + caller: req.caller + } + await runServiceCommand(ServiceGetStatusHandler, task, res) +}) diff --git a/src/test/integration/algorithmsAccess.test.ts b/src/test/integration/algorithmsAccess.test.ts index 2272f7a79..3b7e08e20 100644 --- a/src/test/integration/algorithmsAccess.test.ts +++ b/src/test/integration/algorithmsAccess.test.ts @@ -146,7 +146,11 @@ describe('********** Trusted algorithms Flow', () => { // let's publish assets & algos it('should publish compute datasets & algos', async function () { - this.timeout(DEFAULT_TEST_TIMEOUT * 2) + // This suite runs after the AccessList suites, which leave many AddressAdded/NewAccessList + // events on the shared dev chain. The indexer drains that backlog (~1s/event) before it + // reaches these DDOs, so the default 15s effective wait can expire just before the DDO is + // saved. Give waitToIndex a longer window (and a matching mocha timeout) to absorb it. + this.timeout(DEFAULT_TEST_TIMEOUT * 6) publishedComputeDataset = await publishAsset( computeAssetWithNoAccess, publisherAccount @@ -156,7 +160,7 @@ describe('********** Trusted algorithms Flow', () => { oceanNode, publishedComputeDataset.ddo.id, EVENTS.METADATA_CREATED, - DEFAULT_TEST_TIMEOUT + DEFAULT_TEST_TIMEOUT * 2 ) // Fail the test if compute dataset DDO was not indexed - subsequent tests depend on it assert( @@ -169,7 +173,7 @@ describe('********** Trusted algorithms Flow', () => { oceanNode, publishedAlgoDataset.ddo.id, EVENTS.METADATA_CREATED, - DEFAULT_TEST_TIMEOUT + DEFAULT_TEST_TIMEOUT * 2 ) // Fail the test if algorithm DDO was not indexed - subsequent tests depend on it assert( diff --git a/src/test/integration/services.test.ts b/src/test/integration/services.test.ts new file mode 100644 index 000000000..cc14f3dd1 --- /dev/null +++ b/src/test/integration/services.test.ts @@ -0,0 +1,744 @@ +import { expect, assert } from 'chai' +import { + ServiceGetTemplatesHandler, + ServiceStartHandler, + ServiceStopHandler, + ServiceExtendHandler, + ServiceRestartHandler, + ServiceGetStatusHandler +} from '../../components/core/service/index.js' +import { ComputeGetEnvironmentsHandler } from '../../components/core/compute/index.js' +import type { + ServiceGetTemplatesCommand, + ServiceStartCommand, + ServiceStopCommand, + ServiceExtendCommand, + ServiceRestartCommand, + ServiceGetStatusCommand +} from '../../@types/commands.js' +import { + ServiceStatusNumber, + type ServiceJob, + type ServiceTemplatePublic +} from '../../@types/C2D/ServiceOnDemand.js' +import type { ComputeEnvironment } from '../../@types/C2D/C2D.js' +import { + ENVIRONMENT_VARIABLES, + PROTOCOL_COMMANDS, + getConfiguration +} from '../../utils/index.js' +import { Database } from '../../components/database/index.js' +import { OceanNode } from '../../OceanNode.js' +import { OceanNodeConfig } from '../../@types/OceanNode.js' +import { Readable } from 'stream' +import { streamToObject } from '../../utils/util.js' +import { ethers, JsonRpcProvider, Signer } from 'ethers' +import { RPCS } from '../../@types/blockchain.js' +import { + DEFAULT_TEST_TIMEOUT, + OverrideEnvConfig, + TEST_ENV_CONFIG_FILE, + buildEnvOverrideConfig, + getMockSupportedNetworks, + setupEnvironment, + tearDownEnvironment, + sleep +} from '../utils/utils.js' +import { DEVELOPMENT_CHAIN_ID, getOceanArtifactsAdresses } from '../../utils/address.js' +import OceanToken from '@oceanprotocol/contracts/artifacts/contracts/utils/OceanToken.sol/OceanToken.json' with { type: 'json' } +import EscrowJson from '@oceanprotocol/contracts/artifacts/contracts/escrow/Escrow.sol/Escrow.json' with { type: 'json' } +import { EncryptMethod } from '../../@types/fileObject.js' +import { createHashForSignature, safeSign } from '../utils/signature.js' +import { C2DEngineDocker } from '../../components/c2d/compute_engine_docker.js' +import Dockerode from 'dockerode' +import fsp from 'fs/promises' +import path from 'path' +import { tmpdir } from 'os' + +const TEMPLATE_ID = 'nginx-demo' +const MAX_DURATION = 600 // serviceOnDemand.maxDurationSeconds +const SERVICE_DURATION = 300 // long-lived service used through tests (d)→(l) +const EXPIRY_DURATION = 60 // short service for the expiry-cron test +const PORT_RANGE_START = 39000 +const PORT_RANGE_END = 39500 + +const TEMPLATE_JSON = { + id: TEMPLATE_ID, + name: 'Nginx demo', + image: 'nginxinc/nginx-unprivileged', + tag: 'alpine', + exposedPorts: [8080], + requiredResources: [ + { id: 'cpu', min: 1 }, + { id: 'ram', min: 1 } + ], + userConfigurableEnvVars: [{ key: 'EXTRA', validation: '^[a-zA-Z0-9]{1,20}$' }] +} + +describe('********** Service on Demand', () => { + let previousConfiguration: OverrideEnvConfig[] + let config: OceanNodeConfig + let dbconn: Database + let oceanNode: OceanNode + let provider: JsonRpcProvider + let publisherAccount: Signer + let consumerAccount: Signer + let nonOwnerAccount: Signer + let consumerAddress: string + let paymentToken: any + let paymentTokenContract: any + let escrowContract: any + let artifactsAddresses: any + let serviceTemplatesPath: string + let servicesEnv: ComputeEnvironment + let noServicesEnv: ComputeEnvironment + + // state threaded through the lifecycle tests + let serviceId: string + let hostPort: number + let expiresAt: number + let endpointUrl: string + const startedServices: string[] = [] + + const mockSupportedNetworks: RPCS = getMockSupportedNetworks() + + // ── helpers ────────────────────────────────────────────────────────── + + async function signFor(signer: Signer, command: string) { + const addr = await signer.getAddress() + const nonce = Date.now().toString() + const hash = createHashForSignature(addr, nonce, command) + const signature = await safeSign(signer, hash) + return { consumerAddress: addr, nonce, signature } + } + + async function encryptUserData(obj: Record): Promise { + const enc = await oceanNode + .getKeyManager() + .encrypt(new Uint8Array(Buffer.from(JSON.stringify(obj))), EncryptMethod.ECIES) + return Buffer.from(enc).toString('hex') + } + + async function fundEscrow(beneficiaryNodeAddr: string, durationForLock: number) { + // Always mint a large top-up rather than only when the balance is 0. Integration suites + // share one dev chain and run in sequence; by the time this suite runs, earlier suites + // (e.g. compute) have left locked funds against the same (token, payer, node beneficiary) + // and drained the wallet. Setting maxLockedAmount from a small leftover balance would push + // it below the already-accumulated currentLockedAmount → "will go over limit". A large + // deposit + authorization ceiling clears that leftover and covers all locks in (d)→(l). + const mintTx = await paymentTokenContract.mint( + consumerAddress, + ethers.parseUnits('1000000', 18) + ) + await mintTx.wait() + const balance = await paymentTokenContract.balanceOf(consumerAddress) + await ( + await paymentTokenContract + .connect(consumerAccount) + .approve(artifactsAddresses.development.Escrow, balance) + ).wait() + await ( + await escrowContract.connect(consumerAccount).deposit(paymentToken, balance) + ).wait() + const minLockSeconds = oceanNode.escrow.getMinLockTime(durationForLock) + await ( + await escrowContract + .connect(consumerAccount) + .authorize(paymentToken, beneficiaryNodeAddr, balance, minLockSeconds, 100) + ).wait() + return await oceanNode.escrow.getUserAvailableFunds( + DEVELOPMENT_CHAIN_ID, + consumerAddress, + paymentToken + ) + } + + async function getServiceJob(id: string): Promise { + const { nonce, signature } = await signFor( + consumerAccount, + PROTOCOL_COMMANDS.SERVICE_GET_STATUS + ) + const r = await new ServiceGetStatusHandler(oceanNode).handle({ + command: PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + serviceId: id, + consumerAddress, + nonce, + signature + } as ServiceGetStatusCommand) + const jobs = (await streamToObject(r.stream as Readable)) as ServiceJob[] + return jobs.find((j) => j.serviceId === id) + } + + async function pollServiceStatus( + id: string, + target: ServiceStatusNumber, + timeoutMs = DEFAULT_TEST_TIMEOUT * 3 + ): Promise { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + const job = await getServiceJob(id) + if (job && job.status === target) return job + if ( + job && + (job.status === ServiceStatusNumber.Error || + job.status === ServiceStatusNumber.PullImageFailed || + job.status === ServiceStatusNumber.BuildImageFailed) + ) { + throw new Error( + `service ${id} entered failure state ${job.status}: ${job.statusText}` + ) + } + await sleep(3000) + } + throw new Error(`pollServiceStatus(${id}) timed out waiting for status ${target}`) + } + + async function httpGetWithRetry( + url: string, + tries = 4 + ): Promise<{ ok: boolean; status: number; body: string }> { + let lastErr: any + for (let i = 0; i < tries; i++) { + try { + const res = await fetch(url) + const body = await res.text() + return { ok: res.ok, status: res.status, body } + } catch (e) { + lastErr = e + await sleep(1500) + } + } + throw lastErr + } + + function getDockerEngine(): C2DEngineDocker { + const engines = (oceanNode.getC2DEngines() as any).engines as C2DEngineDocker[] + return engines.find((e) => e instanceof C2DEngineDocker) as C2DEngineDocker + } + + // ── setup / teardown ───────────────────────────────────────────────── + + before(async function () { + this.timeout(DEFAULT_TEST_TIMEOUT * 3) + artifactsAddresses = getOceanArtifactsAdresses() + paymentToken = artifactsAddresses.development.Ocean + + // Write the template file BEFORE building the configuration. + serviceTemplatesPath = await fsp.mkdtemp(path.join(tmpdir(), 'ocean-svc-tmpl-')) + await fsp.writeFile( + path.join(serviceTemplatesPath, 'nginx-demo.json'), + JSON.stringify(TEMPLATE_JSON), + 'utf8' + ) + + const dockerEnvs = + '[{"socketPath":"/var/run/docker.sock",' + + '"serviceOnDemand":{"enabled":true,"nodeHost":"localhost","hostPortRange":[' + + PORT_RANGE_START + + ',' + + PORT_RANGE_END + + '],"maxDurationSeconds":' + + MAX_DURATION + + ',"allowImageBuild":true},' + + '"environments":[' + + '{"storageExpiry":604800,"maxJobDuration":3600,"minJobDuration":60,"features":{"computeJobs":true,"services":true},' + + '"resources":[{"id":"cpu","total":4,"max":4,"min":1,"type":"cpu"},{"id":"ram","total":10,"max":10,"min":1,"type":"ram"},{"id":"disk","total":10,"max":10,"min":0,"type":"disk"}],' + + '"fees":{"' + + DEVELOPMENT_CHAIN_ID + + '":[{"feeToken":"' + + paymentToken + + '","prices":[{"id":"cpu","price":1},{"id":"ram","price":1}]}]}},' + + '{"storageExpiry":604800,"maxJobDuration":3600,"minJobDuration":60,"features":{"computeJobs":true,"services":false},' + + '"resources":[{"id":"cpu","total":2,"max":2,"min":1,"type":"cpu"},{"id":"ram","total":4,"max":4,"min":1,"type":"ram"},{"id":"disk","total":4,"max":4,"min":0,"type":"disk"}],' + + '"fees":{"' + + DEVELOPMENT_CHAIN_ID + + '":[{"feeToken":"' + + paymentToken + + '","prices":[{"id":"cpu","price":1},{"id":"ram","price":1}]}]}}' + + ']}]' + + previousConfiguration = await setupEnvironment( + TEST_ENV_CONFIG_FILE, + buildEnvOverrideConfig( + [ + ENVIRONMENT_VARIABLES.RPCS, + ENVIRONMENT_VARIABLES.INDEXER_NETWORKS, + ENVIRONMENT_VARIABLES.PRIVATE_KEY, + ENVIRONMENT_VARIABLES.AUTHORIZED_DECRYPTERS, + ENVIRONMENT_VARIABLES.ADDRESS_FILE, + ENVIRONMENT_VARIABLES.DOCKER_COMPUTE_ENVIRONMENTS, + ENVIRONMENT_VARIABLES.SERVICE_TEMPLATES_PATH + ], + [ + JSON.stringify(mockSupportedNetworks), + JSON.stringify([DEVELOPMENT_CHAIN_ID]), + '0xc594c6e5def4bab63ac29eed19a134c130388f74f019bc74b8f4389df2837a58', + JSON.stringify(['0xe2DD09d719Da89e5a3D0F2549c7E24566e947260']), + `${process.env.HOME}/.ocean/ocean-contracts/artifacts/address.json`, + dockerEnvs, + serviceTemplatesPath + ] + ) + ) + + config = await getConfiguration(true) + assert( + config.serviceTemplatesPath === serviceTemplatesPath, + 'serviceTemplatesPath not applied to config' + ) + dbconn = await Database.init(config.dbConfig) + + // Clean stale running service jobs so prior runs don't consume shared resources. + const staleServices = await dbconn.c2d.getRunningServiceJobs() + for (const svc of staleServices) { + svc.status = ServiceStatusNumber.Stopped + svc.statusText = 'Stopped' + await dbconn.c2d.updateServiceJob(svc) + } + const staleJobs = await dbconn.c2d.getRunningJobs() + for (const job of staleJobs) { + await dbconn.c2d.deleteJob(job.jobId) + } + + oceanNode = OceanNode.getInstance(config, dbconn, null, null, null, null, null, true) + await oceanNode.addC2DEngines() + + provider = new JsonRpcProvider('http://127.0.0.1:8545') + publisherAccount = (await provider.getSigner(0)) as Signer + consumerAccount = (await provider.getSigner(1)) as Signer + nonOwnerAccount = (await provider.getSigner(3)) as Signer + consumerAddress = await consumerAccount.getAddress() + + paymentTokenContract = new ethers.Contract( + paymentToken, + OceanToken.abi, + publisherAccount + ) + escrowContract = new ethers.Contract( + artifactsAddresses.development.Escrow, + EscrowJson.abi, + publisherAccount + ) + }) + + after(async function () { + this.timeout(DEFAULT_TEST_TIMEOUT * 2) + // Best-effort: stop every service this suite started so no container/network/port leaks. + try { + const engine = getDockerEngine() + if (engine) { + for (const id of startedServices) { + await engine.stopService(id, consumerAddress).catch(() => {}) + } + } + } catch { + /* ignore */ + } + if (oceanNode) await oceanNode.tearDownAll() + await tearDownEnvironment(previousConfiguration) + if (serviceTemplatesPath) { + await fsp.rm(serviceTemplatesPath, { recursive: true, force: true }) + } + }) + + // ── tests ──────────────────────────────────────────────────────────── + + it('(a) sets up the service environment', () => { + assert(oceanNode, 'Failed to instantiate OceanNode') + assert(config.c2dClusters, 'Failed to get c2dClusters') + assert(config.serviceTemplatesPath === serviceTemplatesPath, 'wrong templates path') + assert(getDockerEngine(), 'No docker engine configured') + }) + + it('(b) SERVICE_GET_TEMPLATES returns the sanitized template catalogue', async () => { + const resp = await new ServiceGetTemplatesHandler(oceanNode).handle({ + command: PROTOCOL_COMMANDS.SERVICE_GET_TEMPLATES + } as ServiceGetTemplatesCommand) + assert(resp.status.httpStatus === 200, 'expected 200') + const templates = (await streamToObject( + resp.stream as Readable + )) as ServiceTemplatePublic[] + const tmpl = templates.find((t) => t.id === TEMPLATE_ID) + assert(tmpl, 'nginx-demo template not returned') + // compatibleEnvironments was removed — picking an env is the client's job. + expect((tmpl as any).compatibleEnvironments).to.equal(undefined) + + // Classify the two environments by their own features.services flag. + const envResp = await new ComputeGetEnvironmentsHandler(oceanNode).handle({ + command: PROTOCOL_COMMANDS.COMPUTE_GET_ENVIRONMENTS + }) + const envs = (await streamToObject( + envResp.stream as Readable + )) as ComputeEnvironment[] + assert(envs.length >= 2, 'expected at least 2 environments') + servicesEnv = envs.find((e) => e.features?.services === true) + noServicesEnv = envs.find((e) => e.features?.services === false) + assert(servicesEnv, 'services-enabled env not found') + assert(noServicesEnv, 'services-disabled env not found') + }) + + it('(c) funds the escrow for the consumer', async () => { + const funds = await fundEscrow(servicesEnv.consumerAddress, MAX_DURATION) + assert(BigInt(funds.toString()) > BigInt(0), 'Should have funds in escrow') + }) + + it('(d) SERVICE_START (nginx) → Running, endpoint reachable over HTTP', async function () { + this.timeout(DEFAULT_TEST_TIMEOUT * 4) + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_START) + const task: ServiceStartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_START, + consumerAddress: addr, + nonce, + signature, + environment: servicesEnv.id, + // Rootless nginx: runs as UID 101 and listens on 8080. The standard nginx image + // cannot start under the service hardening (CapDrop: ['ALL']) — it needs + // NET_BIND_SERVICE to bind :80 and CAP_SETUID/SETGID to drop workers to the nginx + // user. Services must use a high port and not rely on dropped capabilities. + image: 'nginxinc/nginx-unprivileged', + tag: 'alpine', + exposedPorts: [8080], + duration: SERVICE_DURATION, + resources: [ + { id: 'cpu', amount: 1 }, + { id: 'ram', amount: 1 } + ], + userData: await encryptUserData({ EXTRA: 'hello123' }), + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceStartHandler(oceanNode).handle(task) + assert( + resp.status.httpStatus === 200, + `expected 200, got ${resp.status.httpStatus}: ${resp.status?.error ?? ''}` + ) + const [job] = (await streamToObject(resp.stream as Readable)) as ServiceJob[] + assert(job.serviceId, 'no serviceId returned') + serviceId = job.serviceId + startedServices.push(serviceId) + + // serviceStart is async: the immediate response is a Starting record with no endpoints. + // The background loop then drives it (locking → image → claiming → running). + expect(job.status).to.equal(ServiceStatusNumber.Starting) + expect(job.endpoints).to.have.length(0) + + const running = await pollServiceStatus(serviceId, ServiceStatusNumber.Running) + assert(running.endpoints.length === 1, 'expected one endpoint') + hostPort = running.endpoints[0].hostPort + endpointUrl = running.endpoints[0].url + expiresAt = running.expiresAt + expect(hostPort).to.be.within(PORT_RANGE_START, PORT_RANGE_END) + expect(endpointUrl).to.equal(`http://localhost:${hostPort}`) + + const res = await httpGetWithRetry(endpointUrl) + assert(res.status === 200, `expected nginx HTTP 200, got ${res.status}`) + assert(res.body.toLowerCase().includes('nginx'), 'body should be the nginx page') + }) + + it('(e) SERVICE_GET_STATUS returns the job with userData stripped', async () => { + const job = await getServiceJob(serviceId) + assert(job, 'job not found') + expect(job.serviceId).to.equal(serviceId) + expect((job as any).userData).to.equal(undefined) + assert(job.payment, 'payment should be present') + + // an unauthenticated status request (no nonce/signature) is rejected + const unauth = await new ServiceGetStatusHandler(oceanNode).handle({ + command: PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + consumerAddress, + serviceId + } as ServiceGetStatusCommand) + expect(unauth.status.httpStatus).to.not.equal(200) + }) + + it('(f) SERVICE_START on a services-disabled environment → 403', async () => { + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_START) + const task: ServiceStartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_START, + consumerAddress: addr, + nonce, + signature, + environment: noServicesEnv.id, + // Rootless nginx: runs as UID 101 and listens on 8080. The standard nginx image + // cannot start under the service hardening (CapDrop: ['ALL']) — it needs + // NET_BIND_SERVICE to bind :80 and CAP_SETUID/SETGID to drop workers to the nginx + // user. Services must use a high port and not rely on dropped capabilities. + image: 'nginxinc/nginx-unprivileged', + tag: 'alpine', + exposedPorts: [8080], + duration: SERVICE_DURATION, + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceStartHandler(oceanNode).handle(task) + expect(resp.status.httpStatus).to.equal(403) + }) + + it('(g) SERVICE_START with duration > maxDurationSeconds → 400', async () => { + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_START) + const task: ServiceStartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_START, + consumerAddress: addr, + nonce, + signature, + environment: servicesEnv.id, + // Rootless nginx: runs as UID 101 and listens on 8080. The standard nginx image + // cannot start under the service hardening (CapDrop: ['ALL']) — it needs + // NET_BIND_SERVICE to bind :80 and CAP_SETUID/SETGID to drop workers to the nginx + // user. Services must use a high port and not rely on dropped capabilities. + image: 'nginxinc/nginx-unprivileged', + tag: 'alpine', + exposedPorts: [8080], + duration: MAX_DURATION + 1, + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceStartHandler(oceanNode).handle(task) + expect(resp.status.httpStatus).to.equal(400) + }) + + it('(h) SERVICE_START with undecryptable userData → 400', async () => { + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_START) + const task: ServiceStartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_START, + consumerAddress: addr, + nonce, + signature, + environment: servicesEnv.id, + // Rootless nginx: runs as UID 101 and listens on 8080. The standard nginx image + // cannot start under the service hardening (CapDrop: ['ALL']) — it needs + // NET_BIND_SERVICE to bind :80 and CAP_SETUID/SETGID to drop workers to the nginx + // user. Services must use a high port and not rely on dropped capabilities. + image: 'nginxinc/nginx-unprivileged', + tag: 'alpine', + exposedPorts: [8080], + duration: SERVICE_DURATION, + // not ECIES-encrypted to the node key → decrypt must fail + userData: Buffer.from('not-encrypted-userData').toString('hex'), + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceStartHandler(oceanNode).handle(task) + expect(resp.status.httpStatus).to.equal(400) + }) + + it('(i) SERVICE_EXTEND advances expiresAt and records an extendPayment', async () => { + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_EXTEND) + const task: ServiceExtendCommand = { + command: PROTOCOL_COMMANDS.SERVICE_EXTEND, + consumerAddress: addr, + nonce, + signature, + serviceId, + additionalDuration: 30, + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceExtendHandler(oceanNode).handle(task) + assert( + resp.status.httpStatus === 200, + `expected 200, got ${resp.status.httpStatus}: ${resp.status?.error ?? ''}` + ) + const [job] = (await streamToObject(resp.stream as Readable)) as ServiceJob[] + expect(job.expiresAt).to.equal(expiresAt + 30 * 1000) + expect(job.extendPayments?.length).to.equal(1) + expiresAt = job.expiresAt + }) + + it('(j) SERVICE_EXTEND by a non-owner is rejected (non-200)', async () => { + // In a real DB, getServiceJob filters by owner, so a non-owner lookup returns + // "not found" (400) rather than reaching the 401 ownership branch. + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(nonOwnerAccount, PROTOCOL_COMMANDS.SERVICE_EXTEND) + const task: ServiceExtendCommand = { + command: PROTOCOL_COMMANDS.SERVICE_EXTEND, + consumerAddress: addr, + nonce, + signature, + serviceId, + additionalDuration: 30, + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceExtendHandler(oceanNode).handle(task) + expect(resp.status.httpStatus).to.not.equal(200) + }) + + it('(k) SERVICE_RESTART → new container, same hostPort + expiresAt', async function () { + this.timeout(DEFAULT_TEST_TIMEOUT * 4) + const before = await getServiceJob(serviceId) + const oldContainerId = before.containerId + + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_RESTART) + const task: ServiceRestartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_RESTART, + consumerAddress: addr, + nonce, + signature, + serviceId + } + const resp = await new ServiceRestartHandler(oceanNode).handle(task) + assert( + resp.status.httpStatus === 200, + `expected 200, got ${resp.status.httpStatus}: ${resp.status?.error ?? ''}` + ) + const running = await pollServiceStatus(serviceId, ServiceStatusNumber.Running) + expect(running.containerId).to.not.equal(oldContainerId) + expect(running.endpoints[0].hostPort).to.equal(hostPort) + expect(running.expiresAt).to.equal(expiresAt) + + const res = await httpGetWithRetry(endpointUrl) + assert(res.status === 200, `expected nginx HTTP 200 after restart, got ${res.status}`) + }) + + it('(l) SERVICE_STOP → Stopped, container + network removed', async function () { + this.timeout(DEFAULT_TEST_TIMEOUT * 2) + const before = await getServiceJob(serviceId) + const { containerId } = before + + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_STOP) + const task: ServiceStopCommand = { + command: PROTOCOL_COMMANDS.SERVICE_STOP, + consumerAddress: addr, + nonce, + signature, + serviceId + } + const resp = await new ServiceStopHandler(oceanNode).handle(task) + assert( + resp.status.httpStatus === 200, + `expected 200, got ${resp.status.httpStatus}: ${resp.status?.error ?? ''}` + ) + const [job] = (await streamToObject(resp.stream as Readable)) as ServiceJob[] + expect(job.status).to.equal(ServiceStatusNumber.Stopped) + + // container should be gone + const docker = new Dockerode() + let inspectFailed = false + try { + await docker.getContainer(containerId).inspect() + } catch { + inspectFailed = true + } + assert(inspectFailed, 'container should have been removed') + }) + + it('(m) [slow] expiry cron marks a short-lived service Expired', async function () { + this.timeout(150000) + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_START) + const task: ServiceStartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_START, + consumerAddress: addr, + nonce, + signature, + environment: servicesEnv.id, + // Rootless nginx: runs as UID 101 and listens on 8080. The standard nginx image + // cannot start under the service hardening (CapDrop: ['ALL']) — it needs + // NET_BIND_SERVICE to bind :80 and CAP_SETUID/SETGID to drop workers to the nginx + // user. Services must use a high port and not rely on dropped capabilities. + image: 'nginxinc/nginx-unprivileged', + tag: 'alpine', + exposedPorts: [8080], + duration: EXPIRY_DURATION, + resources: [ + { id: 'cpu', amount: 1 }, + { id: 'ram', amount: 1 } + ], + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceStartHandler(oceanNode).handle(task) + assert(resp.status.httpStatus === 200, `start failed: ${resp.status?.error ?? ''}`) + const [job] = (await streamToObject(resp.stream as Readable)) as ServiceJob[] + startedServices.push(job.serviceId) + await pollServiceStatus(job.serviceId, ServiceStatusNumber.Running) + // wait out the duration; the InternalLoop cron (~2s) stops+expires it + const expired = await pollServiceStatus( + job.serviceId, + ServiceStatusNumber.Expired, + (EXPIRY_DURATION + 40) * 1000 + ) + expect(expired.status).to.equal(ServiceStatusNumber.Expired) + }) + + it('(n) [build] Dockerfile-based custom service builds and serves', async function () { + this.timeout(DEFAULT_TEST_TIMEOUT * 8) + const { + consumerAddress: addr, + nonce, + signature + } = await signFor(consumerAccount, PROTOCOL_COMMANDS.SERVICE_START) + // Rootless nginx base (listens on 8080 as UID 101) so it runs under the service + // hardening (CapDrop: ['ALL']). USER root only to write the file into the root-owned + // docroot, then back to 101 so the runtime nginx matches the image's unprivileged user. + const dockerfile = + 'FROM nginxinc/nginx-unprivileged:alpine\n' + + 'USER root\n' + + 'RUN echo built > /usr/share/nginx/html/built.txt\n' + + 'USER 101\n' + const task: ServiceStartCommand = { + command: PROTOCOL_COMMANDS.SERVICE_START, + consumerAddress: addr, + nonce, + signature, + environment: servicesEnv.id, + image: 'custom-svc', + dockerfile, + dockerCmd: ['nginx', '-g', 'daemon off;'], + exposedPorts: [8080], + duration: SERVICE_DURATION, + resources: [ + { id: 'cpu', amount: 1 }, + { id: 'ram', amount: 1 } + ], + payment: { chainId: DEVELOPMENT_CHAIN_ID, token: paymentToken } + } + const resp = await new ServiceStartHandler(oceanNode).handle(task) + assert( + resp.status.httpStatus === 200, + `expected 200, got ${resp.status.httpStatus}: ${resp.status?.error ?? ''}` + ) + const [job] = (await streamToObject(resp.stream as Readable)) as ServiceJob[] + startedServices.push(job.serviceId) + const running = await pollServiceStatus( + job.serviceId, + ServiceStatusNumber.Running, + DEFAULT_TEST_TIMEOUT * 8 + ) + const { url } = running.endpoints[0] + const res = await httpGetWithRetry(`${url}/built.txt`) + assert(res.status === 200, `expected built.txt HTTP 200, got ${res.status}`) + assert(res.body.includes('built'), 'built.txt should contain "built"') + + // stop it + await getDockerEngine().stopService(job.serviceId, consumerAddress) + }) +}) diff --git a/src/test/unit/c2d/serviceResourceMatching.test.ts b/src/test/unit/c2d/serviceResourceMatching.test.ts new file mode 100644 index 000000000..955fea501 --- /dev/null +++ b/src/test/unit/c2d/serviceResourceMatching.test.ts @@ -0,0 +1,22 @@ +import { expect } from 'chai' +import { resolveServiceImage } from '../../../components/c2d/serviceResourceMatching.js' + +describe('resolveServiceImage', () => { + it('image + tag → image:tag', () => { + expect(resolveServiceImage('vllm/vllm-openai', 'latest')).to.equal( + 'vllm/vllm-openai:latest' + ) + }) + it('image + checksum → image@sha256', () => { + const c = 'sha256:' + 'a'.repeat(64) + expect(resolveServiceImage('img', undefined, c)).to.equal(`img@${c}`) + }) + it('image only → image:latest', () => { + expect(resolveServiceImage('img')).to.equal('img:latest') + }) + it('dockerfile → {serviceId}-svc-image:latest', () => { + expect( + resolveServiceImage('img', undefined, undefined, 'FROM x', 'SvcID123') + ).to.equal('svcid123-svc-image:latest') + }) +}) diff --git a/src/test/unit/compute.test.ts b/src/test/unit/compute.test.ts index 22258a35a..74acc61ff 100644 --- a/src/test/unit/compute.test.ts +++ b/src/test/unit/compute.test.ts @@ -40,6 +40,7 @@ import { checkManifestPlatform, C2DEngineDocker } from '../../components/c2d/compute_engine_docker.js' +import { ServiceStatusNumber } from '../../@types/C2D/ServiceOnDemand.js' import { C2DDockerConfigSchema } from '../../utils/config/schemas.js' import { ValidateParams } from '../../components/httpRoutes/validateCommands.js' import { Readable } from 'stream' @@ -1230,3 +1231,224 @@ describe('getAlgoChecksums', () => { expect(loggerErrorSpy.called).to.equal(false) }) }) + +describe('service start/restart Docker cleanup on failure', function () { + let engine: any + let network: { id: string; remove: sinon.SinonStub } + + function makeContainer(startRejects: boolean) { + return { + id: 'container-1', + start: startRejects + ? sinon.stub().rejects(new Error('start failed')) + : sinon.stub().resolves(), + stop: sinon.stub().resolves(), + remove: sinon.stub().resolves() + } + } + + beforeEach(function () { + // Bypass the Docker-specific constructor but keep the prototype methods. + engine = Object.create(C2DEngineDocker.prototype) + network = { id: 'net-1', remove: sinon.stub().resolves() } + + engine.db = { + newServiceJob: sinon.stub().resolves(), + updateServiceJob: sinon.stub().resolves() + } + engine.getC2DConfig = sinon.stub().returns({ + hash: 'cluster-hash', + connection: { + serviceOnDemand: { hostPortRange: [30000, 32767], nodeHost: 'localhost' } + } + }) + // Image pull succeeds; the failure we exercise is later, at container create/start. + engine.pullImageRef = sinon.stub().resolves() + engine.buildServiceResourceConstraints = sinon + .stub() + .returns({ Memory: 0, NanoCpus: 0, DeviceRequests: [] }) + // Escrow succeeds (lock + claim) so the pipeline reaches the container phase. + engine.escrow = { + createLock: sinon.stub().resolves('0xlock'), + waitForTransaction: sinon.stub().resolves(), + claimLock: sinon.stub().resolves('0xclaim'), + cancelExpiredLock: sinon.stub().resolves('0xcancel'), + getMinLockTime: sinon.stub().returns(3600) + } + engine.keyManager = { decrypt: sinon.stub().resolves(Buffer.from('{}')) } + }) + + afterEach(() => sinon.restore()) + + async function expectRejects(promise: Promise, messagePart: string) { + let thrown: Error | null = null + try { + await promise + } catch (err: any) { + thrown = err + } + expect(thrown, 'expected the call to reject').to.not.equal(null) + expect(thrown!.message).to.contain(messagePart) + } + + // A fresh Starting job, as createServiceJob would have persisted it. + function makeStartingJob(overrides: any = {}) { + return { + serviceId: 'svc-1', + clusterHash: 'cluster-hash', + environment: 'env-1', + owner: '0xowner', + image: 'nginx', + tag: 'latest', + containerImage: 'nginx:latest', + containerId: '', + networkId: '', + status: 10, // Starting + statusText: 'Starting', + dateCreated: new Date().toISOString(), + expiresAt: Date.now() + 60000, + duration: 60, + exposedPorts: [80], + endpoints: [], + resources: [{ id: 'cpu', amount: 1 }], + payment: { + chainId: 1, + token: '0xtoken', + cost: 10, + lockTx: '', + claimTx: '', + cancelTx: '' + }, + ...overrides + } + } + + it('processServiceStart removes the network and marks Error when createContainer fails', async function () { + engine.docker = { + createNetwork: sinon.stub().resolves(network), + createContainer: sinon.stub().rejects(new Error('createContainer failed')) + } + const job = makeStartingJob({ serviceId: 'svc-1' }) + + await engine.processServiceStart(job) // never throws — failures are persisted as status + + expect(network.remove.calledOnce, 'network.remove should be called').to.equal(true) + expect(job.status).to.equal(ServiceStatusNumber.Error) + // Funds were already claimed before the container step, so no refund here. + expect(engine.escrow.claimLock.calledOnce).to.equal(true) + expect(engine.escrow.cancelExpiredLock.called).to.equal(false) + }) + + it('processServiceStart removes container and network when container.start fails', async function () { + const container = makeContainer(true) + engine.docker = { + createNetwork: sinon.stub().resolves(network), + createContainer: sinon.stub().resolves(container) + } + const job = makeStartingJob({ serviceId: 'svc-2' }) + + await engine.processServiceStart(job) + + expect(container.remove.calledOnce, 'container.remove should be called').to.equal( + true + ) + expect(network.remove.calledOnce, 'network.remove should be called').to.equal(true) + expect(job.status).to.equal(ServiceStatusNumber.Error) + }) + + it('processServiceStart refunds (cancelLock) and marks PullImageFailed when the image pull fails', async function () { + engine.pullImageRef = sinon.stub().rejects(new Error('pull failed')) + engine.docker = { + createNetwork: sinon.stub().resolves(network), + createContainer: sinon.stub().resolves(makeContainer(false)) + } + const job = makeStartingJob({ serviceId: 'svc-img' }) + + await engine.processServiceStart(job) + + expect(engine.escrow.claimLock.called, 'must not claim when image failed').to.equal( + false + ) + expect(engine.escrow.cancelExpiredLock.calledOnce, 'must refund the lock').to.equal( + true + ) + expect(job.status).to.equal(ServiceStatusNumber.PullImageFailed) + expect(engine.docker.createContainer.called, 'must not create a container').to.equal( + false + ) + }) + + it('processServiceStart marks Error and skips the image when createLock fails', async function () { + engine.escrow.createLock = sinon.stub().resolves(null) + engine.docker = { createNetwork: sinon.stub(), createContainer: sinon.stub() } + const job = makeStartingJob({ serviceId: 'svc-lock' }) + + await engine.processServiceStart(job) + + expect(engine.pullImageRef.called, 'must not pull when lock failed').to.equal(false) + expect(engine.escrow.claimLock.called).to.equal(false) + expect(job.status).to.equal(ServiceStatusNumber.Error) + }) + + it('processServiceStart orphan recovery: cancels an unclaimed lock and marks Error', async function () { + engine.docker = { createNetwork: sinon.stub(), createContainer: sinon.stub() } + // Resuming a job left in Locking from a previous process, with a lock but no claim. + const job = makeStartingJob({ + serviceId: 'svc-orphan', + status: ServiceStatusNumber.Locking, + statusText: 'Locking', + payment: { + chainId: 1, + token: '0xtoken', + cost: 10, + lockTx: '0xlock', + claimTx: '', + cancelTx: '' + } + }) + + await engine.processServiceStart(job) + + expect( + engine.escrow.cancelExpiredLock.calledOnce, + 'orphan lock must be refunded' + ).to.equal(true) + expect(engine.escrow.createLock.called, 'must not re-lock an orphan').to.equal(false) + expect(job.status).to.equal(ServiceStatusNumber.Error) + }) + + it('restartService removes the newly created network when createContainer fails', async function () { + const existingJob = { + serviceId: 'svc-3', + clusterHash: 'cluster-hash', + environment: 'env-1', + owner: '0xowner', + image: 'nginx', + tag: 'latest', + containerImage: 'nginx:latest', + containerId: '', // empty → skip pre-teardown + networkId: '', + status: 40, // Running + statusText: 'Running', + dateCreated: new Date().toISOString(), + expiresAt: Date.now() + 60000, + duration: 60, + exposedPorts: [80], + endpoints: [{ containerPort: 80, hostPort: 30001, url: 'http://localhost:30001' }], + resources: [{ id: 'cpu', amount: 1 }], + payment: { chainId: 1, token: '0xtoken' } + } + engine.db.getServiceJob = sinon.stub().resolves([existingJob]) + engine.docker = { + createNetwork: sinon.stub().resolves(network), + createContainer: sinon.stub().rejects(new Error('createContainer failed')) + } + + await expectRejects( + engine.restartService('svc-3', '0xowner', undefined), + 'createContainer failed' + ) + + expect(network.remove.calledOnce, 'network.remove should be called').to.equal(true) + }) +}) diff --git a/src/test/unit/service/serviceHandlers.test.ts b/src/test/unit/service/serviceHandlers.test.ts new file mode 100644 index 000000000..f8d168496 --- /dev/null +++ b/src/test/unit/service/serviceHandlers.test.ts @@ -0,0 +1,488 @@ +import { assert, expect } from 'chai' +import { Readable } from 'stream' +import sinon from 'sinon' +import { streamToObject } from '../../../utils/util.js' +import { PROTOCOL_COMMANDS } from '../../../utils/constants.js' +import { ServiceStatusNumber, ServiceJob } from '../../../@types/C2D/ServiceOnDemand.js' +import { ServiceGetTemplatesHandler } from '../../../components/core/service/getTemplates.js' +import { ServiceGetStatusHandler } from '../../../components/core/service/getStatus.js' +import { ServiceStartHandler } from '../../../components/core/service/startService.js' +import { ServiceStopHandler } from '../../../components/core/service/stopService.js' +import { ServiceExtendHandler } from '../../../components/core/service/extendService.js' +import { ServiceRestartHandler } from '../../../components/core/service/restartService.js' + +const OWNER = '0x0000000000000000000000000000000000000abc' + +function makeJob(overrides: Partial = {}): ServiceJob { + return { + serviceId: 'svc-1', + clusterHash: 'hash-1', + environment: 'env-1', + owner: OWNER, + image: 'img', + tag: 'latest', + containerImage: 'img:latest', + containerId: 'c1', + networkId: 'n1', + status: ServiceStatusNumber.Running, + statusText: 'Running', + dateCreated: new Date(0).toISOString(), + expiresAt: Date.now() + 3600_000, + duration: 3600, + exposedPorts: [8888], + endpoints: [{ containerPort: 8888, hostPort: 31000, url: 'http://localhost:31000' }], + userData: 'ENCRYPTED', + resources: [{ id: 'cpu', amount: 2 }], + payment: { + chainId: 8996, + token: '0xtoken', + lockTx: '0xl', + claimTx: '0xc', + cancelTx: '', + cost: 5 + }, + ...overrides + } +} + +const TEMPLATE = { + id: 'jupyter-cpu', + image: 'quay.io/jupyter/datascience-notebook', + tag: 'latest', + exposedPorts: [8888] +} + +interface FakeOpts { + serviceEnabled?: boolean + serviceJobInDb?: ServiceJob | null + cost?: number | null + envId?: string +} + +function buildFakes(opts: FakeOpts = {}) { + const env: any = { + id: opts.envId ?? 'env-1', + features: { + computeJobs: true, + services: opts.serviceEnabled !== false + }, + resources: [{ id: 'cpu', kind: 'fungible', total: 8, min: 1, max: 8 }] + } + + const escrow = { + createLock: sinon.stub().resolves('0xlock'), + claimLock: sinon.stub().resolves('0xclaim'), + cancelExpiredLock: sinon.stub().resolves('0xcancel'), + waitForTransaction: sinon.stub().resolves(undefined), + getMinLockTime: sinon.stub().returns(3600) + } + + const engine: any = { + db: { + getServiceJob: sinon + .stub() + .resolves( + opts.serviceJobInDb === undefined + ? [] + : opts.serviceJobInDb + ? [opts.serviceJobInDb] + : [] + ), + updateServiceJob: sinon.stub().resolves(1) + }, + escrow, + getComputeEnvironments: sinon.stub().resolves([env]), + getC2DConfig: sinon + .stub() + .returns({ connection: { serviceOnDemand: { maxDurationSeconds: 86400 } } }), + calculateResourcesCost: sinon + .stub() + .returns(opts.cost === undefined ? 10 : opts.cost), + checkAndFillMissingResources: sinon + .stub() + .callsFake((r: any) => Promise.resolve(r ?? [])), + checkIfResourcesAreAvailable: sinon.stub().resolves(undefined), + getEnvPricesForToken: sinon.stub().returns([{ id: 'cpu', price: 1 }]), + // Async start: the handler only persists a Starting record and returns; the escrow + + // image + container work is done later by processServiceStart (driven by the cron). + createServiceJob: sinon.stub().callsFake(() => + Promise.resolve( + makeJob({ + status: ServiceStatusNumber.Starting, + statusText: 'Starting', + containerId: '', + networkId: '', + endpoints: [] + }) + ) + ), + stopService: sinon + .stub() + .callsFake(() => Promise.resolve(makeJob({ status: ServiceStatusNumber.Stopped }))), + restartService: sinon + .stub() + .callsFake(() => + Promise.resolve( + makeJob({ containerId: 'c2', status: ServiceStatusNumber.Running }) + ) + ), + getServiceStatus: sinon + .stub() + .resolves(opts.serviceJobInDb ? [opts.serviceJobInDb] : []) + } + + const engines: any = { + getAllEngines: () => [engine], + getC2DByEnvId: sinon.stub().resolves(engine), + getC2DByHash: sinon.stub().resolves(engine), + fetchServiceTemplates: sinon.stub().resolves([{ ...TEMPLATE }]) + } + + const node: any = { + getRequestMap: () => new Map(), + getConfig: (): any => ({ + rateLimit: undefined as number | undefined, + serviceTemplatesPath: undefined as string | undefined + }), + getC2DEngines: () => engines, + getKeyManager: () => ({ + decrypt: (d: Uint8Array) => Promise.resolve(Buffer.from(d)) + }), + getAuth: () => ({ + validateAuthenticationOrToken: () => Promise.resolve({ valid: true }) + }) + } + + return { node, engine, engines, escrow, env } +} + +function body(response: any): Promise { + return streamToObject(response.stream as Readable) +} + +describe('Service handlers', () => { + afterEach(() => sinon.restore()) + + describe('ServiceGetTemplatesHandler', () => { + it('returns templates from fetchServiceTemplates (200)', async () => { + const { node, engines } = buildFakes() + const res = await new ServiceGetTemplatesHandler(node).handle({ + command: PROTOCOL_COMMANDS.SERVICE_GET_TEMPLATES + } as any) + expect(res.status.httpStatus).to.equal(200) + const templates = await body(res) + expect(templates).to.have.length(1) + expect(engines.fetchServiceTemplates.calledOnce).to.equal(true) + }) + }) + + describe('ServiceGetStatusHandler', () => { + it('400 when consumerAddress is missing', async () => { + const { node } = buildFakes() + const res = await new ServiceGetStatusHandler(node).handle({ + command: PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + serviceId: 'svc-1' + } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('401 when the signature/token is invalid', async () => { + const { node } = buildFakes({ serviceJobInDb: makeJob() }) + node.getAuth = () => ({ + validateAuthenticationOrToken: () => + Promise.resolve({ valid: false, error: 'bad signature' }) + }) + const res = await new ServiceGetStatusHandler(node).handle({ + command: PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + consumerAddress: OWNER, + nonce: '1', + signature: '0xbad', + serviceId: 'svc-1' + } as any) + expect(res.status.httpStatus).to.equal(401) + }) + + it('returns jobs by serviceId with userData stripped (authenticated)', async () => { + const { node } = buildFakes({ serviceJobInDb: makeJob() }) + const res = await new ServiceGetStatusHandler(node).handle({ + command: PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + consumerAddress: OWNER, + nonce: '1', + signature: '0xsig', + serviceId: 'svc-1' + } as any) + expect(res.status.httpStatus).to.equal(200) + const jobs = await body(res) + expect(jobs).to.have.length(1) + expect(jobs[0]).to.not.have.property('userData') + expect(jobs[0].serviceId).to.equal('svc-1') + }) + }) + + describe('ServiceStopHandler', () => { + const baseTask = { + command: PROTOCOL_COMMANDS.SERVICE_STOP, + consumerAddress: OWNER, + nonce: '1', + signature: '0xsig', + serviceId: 'svc-1' + } + + it('400 when service not found', async () => { + const { node } = buildFakes({ serviceJobInDb: null }) + const res = await new ServiceStopHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('401 when caller is not the owner', async () => { + const { node } = buildFakes({ serviceJobInDb: makeJob({ owner: '0xsomeoneelse' }) }) + const res = await new ServiceStopHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(401) + }) + + it('200 and calls engine.stopService on success', async () => { + const { node, engine } = buildFakes({ serviceJobInDb: makeJob() }) + const res = await new ServiceStopHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(200) + expect(engine.stopService.calledOnce).to.equal(true) + const jobs = await body(res) + expect(jobs[0].status).to.equal(ServiceStatusNumber.Stopped) + expect(jobs[0]).to.not.have.property('userData') + }) + }) + + describe('ServiceRestartHandler', () => { + const baseTask = { + command: PROTOCOL_COMMANDS.SERVICE_RESTART, + consumerAddress: OWNER, + nonce: '1', + signature: '0xsig', + serviceId: 'svc-1' + } + + it('400 when not found', async () => { + const { node } = buildFakes({ serviceJobInDb: null }) + const res = await new ServiceRestartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('401 when not owner', async () => { + const { node } = buildFakes({ serviceJobInDb: makeJob({ owner: '0xother' }) }) + const res = await new ServiceRestartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(401) + }) + + it('400 when restarting an expired service', async () => { + const { node } = buildFakes({ + serviceJobInDb: makeJob({ status: ServiceStatusNumber.Expired }) + }) + const res = await new ServiceRestartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('200 and calls engine.restartService on success', async () => { + const { node, engine } = buildFakes({ serviceJobInDb: makeJob() }) + const res = await new ServiceRestartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(200) + expect(engine.restartService.calledOnce).to.equal(true) + const jobs = await body(res) + expect(jobs[0].containerId).to.equal('c2') + expect(jobs[0]).to.not.have.property('userData') + }) + }) + + describe('ServiceExtendHandler', () => { + const baseTask = { + command: PROTOCOL_COMMANDS.SERVICE_EXTEND, + consumerAddress: OWNER, + nonce: '1', + signature: '0xsig', + serviceId: 'svc-1', + additionalDuration: 3600, + payment: { chainId: 8996, token: '0xtoken' } + } + + it('400 when not found', async () => { + const { node } = buildFakes({ serviceJobInDb: null }) + const res = await new ServiceExtendHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('400 when additionalDuration is not strictly positive', async () => { + const { node } = buildFakes({ serviceJobInDb: makeJob() }) + for (const additionalDuration of [0, -1, -3600]) { + const res = await new ServiceExtendHandler(node).handle({ + ...baseTask, + additionalDuration + } as any) + expect( + res.status.httpStatus, + `additionalDuration=${additionalDuration}` + ).to.equal(400) + } + }) + + it('401 when not owner', async () => { + const { node } = buildFakes({ serviceJobInDb: makeJob({ owner: '0xother' }) }) + const res = await new ServiceExtendHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(401) + }) + + it('400 when service is Stopped (bad state)', async () => { + const { node } = buildFakes({ + serviceJobInDb: makeJob({ status: ServiceStatusNumber.Stopped }) + }) + const res = await new ServiceExtendHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('400 when extension exceeds maxDurationSeconds', async () => { + const { node } = buildFakes({ + serviceJobInDb: makeJob({ expiresAt: Date.now() + 86000 * 1000 }) + }) + const res = await new ServiceExtendHandler(node).handle({ + ...baseTask, + additionalDuration: 10000 + } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('402 when escrow lock fails', async () => { + const { node, escrow } = buildFakes({ serviceJobInDb: makeJob() }) + escrow.createLock.resolves(null) + const res = await new ServiceExtendHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(402) + }) + + it('402 and cancels lock when claim fails', async () => { + const { node, escrow } = buildFakes({ serviceJobInDb: makeJob() }) + escrow.claimLock.resolves(null) + const res = await new ServiceExtendHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(402) + expect(escrow.cancelExpiredLock.calledOnce).to.equal(true) + }) + + it('200, advances expiresAt and records an extendPayment', async () => { + const job = makeJob() + const before = job.expiresAt + const { node, engine, escrow } = buildFakes({ serviceJobInDb: job }) + const res = await new ServiceExtendHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(200) + expect(escrow.createLock.calledOnce).to.equal(true) + expect(escrow.claimLock.calledOnce).to.equal(true) + expect(engine.db.updateServiceJob.calledOnce).to.equal(true) + const out = await body(res) + expect(out[0].expiresAt).to.equal(before + 3600 * 1000) + expect(out[0].extendPayments).to.have.length(1) + expect(out[0]).to.not.have.property('userData') + }) + }) + + describe('ServiceStartHandler', () => { + const baseTask = { + command: PROTOCOL_COMMANDS.SERVICE_START, + consumerAddress: OWNER, + nonce: '1', + signature: '0xsig', + environment: 'env-1', + image: 'nginx', + tag: 'alpine', + exposedPorts: [80], + dockerCmd: ['nginx', '-g', 'daemon off;'], + dockerEntrypoint: ['/docker-entrypoint.sh'], + duration: 3600, + payment: { chainId: 8996, token: '0xtoken' } + } + + it('400 when consumerAddress is not a valid address', async () => { + const { node } = buildFakes() + const res = await new ServiceStartHandler(node).handle({ + ...baseTask, + consumerAddress: 'not-an-address' + } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('400 when environment is unknown (getC2DByEnvId throws)', async () => { + const { node, engines } = buildFakes() + engines.getC2DByEnvId.rejects(new Error('not found')) + const res = await new ServiceStartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('403 when services are disabled on the environment', async () => { + const { node } = buildFakes({ serviceEnabled: false }) + const res = await new ServiceStartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(403) + }) + + it('400 when image is missing', async () => { + const { node } = buildFakes() + const { image, ...noImage } = baseTask + const res = await new ServiceStartHandler(node).handle({ ...noImage } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('400 when more than one image mode is set (tag + dockerfile)', async () => { + const { node } = buildFakes() + const res = await new ServiceStartHandler(node).handle({ + ...baseTask, + dockerfile: 'FROM nginx:alpine' + } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('400 when duration exceeds maxDurationSeconds', async () => { + const { node } = buildFakes() + const res = await new ServiceStartHandler(node).handle({ + ...baseTask, + duration: 999999 + } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('400 when no pricing for the token (cost null)', async () => { + const { node } = buildFakes({ cost: null }) + const res = await new ServiceStartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(400) + }) + + it('200 returns immediately with a Starting job and does NOT run escrow synchronously', async () => { + const { node, engine, escrow } = buildFakes() + const res = await new ServiceStartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(200) + const out = await body(res) + // The response is the freshly-persisted Starting record — escrow + container come later. + expect(out[0].status).to.equal(ServiceStatusNumber.Starting) + expect(out[0].endpoints).to.deep.equal([]) + expect(out[0]).to.not.have.property('userData') + // Escrow now runs in the background pipeline, not in the request path. + expect(escrow.createLock.called).to.equal(false) + expect(escrow.claimLock.called).to.equal(false) + // The handler must not invoke the background pipeline itself. + expect(engine.processServiceStart).to.equal(undefined) + }) + + it('200 happy path: calls createServiceJob with env/image/dockerCmd/dockerEntrypoint, strips userData', async () => { + const { node, engine } = buildFakes() + const res = await new ServiceStartHandler(node).handle({ ...baseTask } as any) + expect(res.status.httpStatus).to.equal(200) + assert(engine.createServiceJob.calledOnce, 'createServiceJob should be called') + const { args } = engine.createServiceJob.firstCall + // signature: (environment, image, tag, checksum, dockerfile, additionalDockerFiles, + // dockerCmd, dockerEntrypoint, exposedPorts, resources, duration, owner, + // payment, serviceId, userData) + expect(args[0]).to.equal('env-1') + expect(args[1]).to.equal('nginx') + expect(args[6]).to.deep.equal(['nginx', '-g', 'daemon off;']) + expect(args[7]).to.deep.equal(['/docker-entrypoint.sh']) + // payment carries the server-side cost but no tx hashes yet (filled in by the pipeline). + const payment = args[12] + expect(payment.cost).to.equal(10) + expect(payment.lockTx).to.equal('') + expect(payment.claimTx).to.equal('') + const out = await body(res) + expect(out[0]).to.not.have.property('userData') + }) + }) +}) diff --git a/src/test/unit/service/serviceJobsDatabase.test.ts b/src/test/unit/service/serviceJobsDatabase.test.ts new file mode 100644 index 000000000..f1f69d1c3 --- /dev/null +++ b/src/test/unit/service/serviceJobsDatabase.test.ts @@ -0,0 +1,350 @@ +import { assert, expect } from 'chai' +import { Readable } from 'stream' +import { C2DDatabase } from '../../../components/database/C2DDatabase.js' +import { typesenseSchemas } from '../../../components/database/TypesenseSchemas.js' +import { getConfiguration } from '../../../utils/config.js' +import { + buildEnvOverrideConfig, + OverrideEnvConfig, + setupEnvironment, + tearDownEnvironment, + TEST_ENV_CONFIG_FILE +} from '../../utils/utils.js' +import { ENVIRONMENT_VARIABLES } from '../../../utils/constants.js' +import { OceanNodeConfig } from '../../../@types/OceanNode.js' +import { + C2DClusterType, + ComputeEnvironment, + ComputeJob, + ComputeResource +} from '../../../@types/C2D/C2D.js' +import { ServiceJob, ServiceStatusNumber } from '../../../@types/C2D/ServiceOnDemand.js' +import { C2DEngine } from '../../../components/c2d/compute_engine_base.js' +import { ValidateParams } from '../../../components/httpRoutes/validateCommands.js' + +const CLUSTER_HASH = 'svc-test-cluster' + +/* eslint-disable require-await */ +// Minimal concrete engine bound to a real DB + cluster hash, so we can exercise +// getUsedResources() (which reads running compute + service jobs from the DB). +class SharedAccountingEngine extends C2DEngine { + constructor(db: C2DDatabase) { + super({ type: C2DClusterType.DOCKER, hash: CLUSTER_HASH }, db, null, null, null) + } + + setLimits(limits: Map) { + this.physicalLimits = limits + } + + async getComputeEnvironments(): Promise { + return [] + } + + async checkDockerImage(): Promise { + return { valid: true, reason: null as string, status: 200 } + } + + async startComputeJob(): Promise { + return [] + } + + async stopComputeJob(): Promise { + return [] + } + + async getComputeJobStatus(): Promise { + return [] + } + + async getComputeJobResult(): Promise<{ stream: Readable; headers: any }> { + return null + } + + async cleanupExpiredStorage(): Promise { + return true + } +} +/* eslint-enable require-await */ + +function makeEnv(id: string, resources: ComputeResource[]): ComputeEnvironment { + return { + id, + resources, + runningJobs: 0, + runningfreeJobs: 0, + queuedJobs: 0, + queuedFreeJobs: 0, + queMaxWaitTime: 0, + queMaxWaitTimeFree: 0, + runMaxWaitTime: 0, + runMaxWaitTimeFree: 0, + consumerAddress: '0x0', + fees: {}, + access: { addresses: [], accessLists: null }, + platform: { architecture: 'x86_64', os: 'linux' }, + minJobDuration: 60, + maxJobDuration: 3600, + maxJobs: 10 + } as ComputeEnvironment +} + +function makeServiceJob(overrides: Partial = {}): ServiceJob { + return { + serviceId: 'svc-' + Math.random().toString(36).slice(2), + clusterHash: CLUSTER_HASH, + environment: 'env-a', + owner: '0xowner', + image: 'quay.io/jupyter/datascience-notebook', + tag: 'latest', + containerImage: 'quay.io/jupyter/datascience-notebook:latest', + containerId: 'container-1', + networkId: 'network-1', + status: ServiceStatusNumber.Running, + statusText: 'Running', + dateCreated: new Date(0).toISOString(), + expiresAt: Date.now() + 3600_000, + duration: 3600, + exposedPorts: [8888], + endpoints: [{ containerPort: 8888, hostPort: 31000, url: 'http://localhost:31000' }], + userData: 'ENCRYPTED_BLOB', + resources: [{ id: 'cpu', amount: 2 }], + payment: { + chainId: 8996, + token: '0x123', + lockTx: '0xlock', + claimTx: '0xclaim', + cancelTx: '', + cost: 5 + }, + ...overrides + } +} + +describe('Service Jobs Database', () => { + let envOverrides: OverrideEnvConfig[] + let config: OceanNodeConfig + let db: C2DDatabase = null + + before(async () => { + envOverrides = buildEnvOverrideConfig( + [ENVIRONMENT_VARIABLES.DOCKER_COMPUTE_ENVIRONMENTS], + [ + '[{"socketPath":"/var/run/docker.sock","environments":[{"storageExpiry":604800,"maxJobDuration":3600,"minJobDuration":60,"resources":[{"id":"cpu","total":4,"max":4,"min":1,"type":"cpu"}],"fees":{"1":[{"feeToken":"0x123","prices":[{"id":"cpu","price":1}]}]}}]}]' + ] + ) + envOverrides = await setupEnvironment(TEST_ENV_CONFIG_FILE, envOverrides) + config = await getConfiguration(true) + db = await new C2DDatabase(config.dbConfig, typesenseSchemas.c2dSchemas) + }) + + after(async () => { + await tearDownEnvironment(envOverrides) + }) + + it('inserts and reads back a service job by serviceId', async () => { + const job = makeServiceJob() + await db.newServiceJob(job) + const [found] = await db.getServiceJob(job.serviceId) + assert(found, 'service job not found') + expect(found.serviceId).to.equal(job.serviceId) + expect(found.environment).to.equal('env-a') + expect(found.userData).to.equal('ENCRYPTED_BLOB') + expect(found.resources[0]).to.deep.equal({ id: 'cpu', amount: 2 }) + }) + + it('filters service jobs by owner', async () => { + const mine = makeServiceJob({ owner: '0xalice' }) + const theirs = makeServiceJob({ owner: '0xbob' }) + await db.newServiceJob(mine) + await db.newServiceJob(theirs) + const aliceJobs = await db.getServiceJob(undefined, '0xalice') + expect(aliceJobs.every((j) => j.owner === '0xalice')).to.equal(true) + expect(aliceJobs.find((j) => j.serviceId === mine.serviceId)).to.not.equal(undefined) + }) + + it('updates a service job (status + expiresAt + body)', async () => { + const job = makeServiceJob() + await db.newServiceJob(job) + job.status = ServiceStatusNumber.Stopped + job.statusText = 'Stopped' + job.expiresAt = 123456 + const updated = await db.updateServiceJob(job) + expect(updated).to.equal(1) + const [found] = await db.getServiceJob(job.serviceId) + expect(found.status).to.equal(ServiceStatusNumber.Stopped) + expect(found.expiresAt).to.equal(123456) + }) + + it('getRunningServiceJobs returns only active statuses for the cluster', async () => { + const running = makeServiceJob({ status: ServiceStatusNumber.Running }) + const starting = makeServiceJob({ status: ServiceStatusNumber.Starting }) + const locking = makeServiceJob({ status: ServiceStatusNumber.Locking }) + const claiming = makeServiceJob({ status: ServiceStatusNumber.Claiming }) + const stopped = makeServiceJob({ status: ServiceStatusNumber.Stopped }) + const otherCluster = makeServiceJob({ + status: ServiceStatusNumber.Running, + clusterHash: 'other-cluster' + }) + await db.newServiceJob(running) + await db.newServiceJob(starting) + await db.newServiceJob(locking) + await db.newServiceJob(claiming) + await db.newServiceJob(stopped) + await db.newServiceJob(otherCluster) + + const active = await db.getRunningServiceJobs(CLUSTER_HASH) + const ids = active.map((j) => j.serviceId) + expect(ids).to.include(running.serviceId) + expect(ids).to.include(starting.serviceId) + // the new start-pipeline states must reserve resources too + expect(ids).to.include(locking.serviceId) + expect(ids).to.include(claiming.serviceId) + expect(ids).to.not.include(stopped.serviceId) + expect(ids).to.not.include(otherCluster.serviceId) + }) + + it('getPendingServiceStarts returns mid-start jobs (not Running/terminal) for the cluster', async () => { + const starting = makeServiceJob({ status: ServiceStatusNumber.Starting }) + const locking = makeServiceJob({ status: ServiceStatusNumber.Locking }) + const claiming = makeServiceJob({ status: ServiceStatusNumber.Claiming }) + const running = makeServiceJob({ status: ServiceStatusNumber.Running }) + const stopped = makeServiceJob({ status: ServiceStatusNumber.Stopped }) + const otherCluster = makeServiceJob({ + status: ServiceStatusNumber.Starting, + clusterHash: 'other-cluster' + }) + await db.newServiceJob(starting) + await db.newServiceJob(locking) + await db.newServiceJob(claiming) + await db.newServiceJob(running) + await db.newServiceJob(stopped) + await db.newServiceJob(otherCluster) + + const pending = await db.getPendingServiceStarts(CLUSTER_HASH) + const ids = pending.map((j) => j.serviceId) + expect(ids).to.include(starting.serviceId) + expect(ids).to.include(locking.serviceId) + expect(ids).to.include(claiming.serviceId) + expect(ids).to.not.include(running.serviceId) // already started + expect(ids).to.not.include(stopped.serviceId) + expect(ids).to.not.include(otherCluster.serviceId) + }) + + it('getExpiredServiceJobs returns only Running jobs past expiry', async () => { + const expired = makeServiceJob({ + status: ServiceStatusNumber.Running, + expiresAt: Date.now() - 1000 + }) + const future = makeServiceJob({ + status: ServiceStatusNumber.Running, + expiresAt: Date.now() + 3600_000 + }) + const expiredButStopped = makeServiceJob({ + status: ServiceStatusNumber.Stopped, + expiresAt: Date.now() - 1000 + }) + await db.newServiceJob(expired) + await db.newServiceJob(future) + await db.newServiceJob(expiredButStopped) + + const expiredJobs = await db.getExpiredServiceJobs(CLUSTER_HASH) + const ids = expiredJobs.map((j) => j.serviceId) + expect(ids).to.include(expired.serviceId) + expect(ids).to.not.include(future.serviceId) + expect(ids).to.not.include(expiredButStopped.serviceId) + }) + + describe('shared resource accounting (compute + service)', () => { + let engine: SharedAccountingEngine + + before(async () => { + engine = new SharedAccountingEngine(db) + engine.setLimits( + new Map([ + ['cpu', 10], + ['gpu0', 2] + ]) + ) + // Clean slate: stop any leftover running jobs from earlier tests by marking them Stopped. + const leftovers = await db.getRunningServiceJobs(CLUSTER_HASH) + for (const j of leftovers) { + j.status = ServiceStatusNumber.Stopped + await db.updateServiceJob(j) + } + }) + + it('a running service occupies fungible resources in its own env', async () => { + await db.newServiceJob( + makeServiceJob({ + environment: 'env-shared', + status: ServiceStatusNumber.Running, + resources: [{ id: 'cpu', amount: 3 }] + }) + ) + const env = makeEnv('env-shared', [ + { id: 'cpu', kind: 'fungible', total: 10, min: 1, max: 10 } as ComputeResource + ]) + const used = await engine.getUsedResources(env) + expect(used.usedResources.cpu).to.equal(3) + }) + + it('fungible usage is NOT counted against a different env', async () => { + // The cpu service above is bound to 'env-shared'; a different env sees 0 cpu used. + const otherEnv = makeEnv('env-other', [ + { id: 'cpu', kind: 'fungible', total: 10, min: 1, max: 10 } as ComputeResource + ]) + const used = await engine.getUsedResources(otherEnv) + expect(used.usedResources.cpu ?? 0).to.equal(0) + }) + + it('a running service occupies discrete resources GLOBALLY (any env)', async () => { + await db.newServiceJob( + makeServiceJob({ + environment: 'env-gpu', + status: ServiceStatusNumber.Running, + resources: [{ id: 'gpu0', amount: 1 }] + }) + ) + // Query a DIFFERENT env: discrete usage is global, so it still shows up. + const env = makeEnv('env-elsewhere', [ + { + id: 'gpu0', + kind: 'discrete', + type: 'gpu', + total: 2, + min: 0, + max: 2 + } as ComputeResource + ]) + const used = await engine.getUsedResources(env) + expect(used.usedResources.gpu0).to.equal(1) + }) + + it('checkIfResourcesAreAvailable blocks a compute request when a service holds the GPU', async () => { + // gpu0 total:1 in the env, already 1 in use by the service above (global discrete). + const env = makeEnv('env-elsewhere', [ + { + id: 'gpu0', + kind: 'discrete', + type: 'gpu', + total: 1, + min: 0, + max: 1, + inUse: 1 + } as ComputeResource + ]) + let threw = false + try { + await engine.checkIfResourcesAreAvailable( + [{ id: 'gpu0', amount: 1 }], + env, + false, + [env] + ) + } catch { + threw = true + } + expect(threw).to.equal(true) + }) + }) +}) diff --git a/src/test/unit/service/serviceSchemas.test.ts b/src/test/unit/service/serviceSchemas.test.ts new file mode 100644 index 000000000..1377e7f31 --- /dev/null +++ b/src/test/unit/service/serviceSchemas.test.ts @@ -0,0 +1,137 @@ +import { expect } from 'chai' +import { + ServiceTemplateSchema, + ServiceOnDemandConfigSchema, + C2DEnvironmentConfigSchema +} from '../../../utils/config/schemas.js' + +const baseTemplate = { + id: 'jupyter-cpu', + image: 'quay.io/jupyter/datascience-notebook', + exposedPorts: [8888] +} + +describe('ServiceTemplateSchema', () => { + it('image + tag → valid', () => { + expect( + ServiceTemplateSchema.safeParse({ ...baseTemplate, tag: 'latest' }).success + ).to.equal(true) + }) + it('image + checksum (sha256) → valid', () => { + const checksum = 'sha256:' + 'a'.repeat(64) + expect( + ServiceTemplateSchema.safeParse({ ...baseTemplate, checksum }).success + ).to.equal(true) + }) + it('image + dockerfile → valid', () => { + expect( + ServiceTemplateSchema.safeParse({ ...baseTemplate, dockerfile: 'FROM x' }).success + ).to.equal(true) + }) + it('tag + dockerfile together → invalid', () => { + expect( + ServiceTemplateSchema.safeParse({ ...baseTemplate, tag: 'l', dockerfile: 'FROM x' }) + .success + ).to.equal(false) + }) + it('additionalDockerFiles without dockerfile → invalid', () => { + expect( + ServiceTemplateSchema.safeParse({ + ...baseTemplate, + tag: 'l', + additionalDockerFiles: { 'a.txt': 'x' } + }).success + ).to.equal(false) + }) + it('no tag/checksum/dockerfile → valid (defaults to image:latest at runtime)', () => { + expect(ServiceTemplateSchema.safeParse(baseTemplate).success).to.equal(true) + }) + it('bad id → invalid', () => { + expect( + ServiceTemplateSchema.safeParse({ ...baseTemplate, id: 'Bad Id!' }).success + ).to.equal(false) + }) + it('requiredResources: neither id nor kind → invalid', () => { + expect( + ServiceTemplateSchema.safeParse({ + ...baseTemplate, + requiredResources: [{ min: 1 }] + }).success + ).to.equal(false) + }) + it('requiredResources: both id and kind → invalid', () => { + expect( + ServiceTemplateSchema.safeParse({ + ...baseTemplate, + requiredResources: [{ id: 'cpu', kind: 'fungible', min: 1 }] + }).success + ).to.equal(false) + }) + it('recommended < min → invalid', () => { + expect( + ServiceTemplateSchema.safeParse({ + ...baseTemplate, + requiredResources: [{ id: 'cpu', min: 4, recommended: 2 }] + }).success + ).to.equal(false) + }) + it('valid required + recommended resources → valid', () => { + expect( + ServiceTemplateSchema.safeParse({ + ...baseTemplate, + requiredResources: [{ id: 'cpu', min: 2, recommended: 4 }], + recommendedResources: [{ kind: 'discrete', type: 'gpu', min: 1, recommended: 2 }] + }).success + ).to.equal(true) + }) +}) + +describe('ServiceOnDemandConfigSchema', () => { + it('applies defaults', () => { + const parsed = ServiceOnDemandConfigSchema.parse({ + enabled: true, + nodeHost: 'localhost' + }) + expect(parsed.maxDurationSeconds).to.equal(86400) + expect(parsed.allowImageBuild).to.equal(false) + }) + it('requires nodeHost', () => { + expect(ServiceOnDemandConfigSchema.safeParse({ enabled: true }).success).to.equal( + false + ) + }) + it('rejects unknown keys (strict)', () => { + expect( + ServiceOnDemandConfigSchema.safeParse({ + enabled: true, + nodeHost: 'localhost', + bogus: 1 + }).success + ).to.equal(false) + }) +}) + +describe('C2DEnvironmentConfigSchema features', () => { + const base: any = { + fees: { '8996': [{ feeToken: '0x0', prices: [] as any[] }] }, + storageExpiry: 604800, + maxJobDuration: 3600 + } + it('no features block → both default true', () => { + const parsed: any = C2DEnvironmentConfigSchema.parse(base) + expect(parsed.features).to.deep.equal({ computeJobs: true, services: true }) + }) + it('partial features { computeJobs:false } → services defaults true', () => { + const parsed: any = C2DEnvironmentConfigSchema.parse({ + ...base, + features: { computeJobs: false } + }) + expect(parsed.features).to.deep.equal({ computeJobs: false, services: true }) + }) + it('unknown feature key → invalid (strict)', () => { + expect( + C2DEnvironmentConfigSchema.safeParse({ ...base, features: { compute: true } }) + .success + ).to.equal(false) + }) +}) diff --git a/src/test/unit/service/serviceUtils.test.ts b/src/test/unit/service/serviceUtils.test.ts new file mode 100644 index 000000000..be29488c8 --- /dev/null +++ b/src/test/unit/service/serviceUtils.test.ts @@ -0,0 +1,95 @@ +import { expect } from 'chai' +import type { ServiceJob } from '../../../@types/C2D/ServiceOnDemand.js' +import { + userDataToEnv, + toPublicServiceJob, + decryptUserData, + allocateHostPort, + releaseHostPort +} from '../../../components/core/service/utils.js' + +describe('service utils', () => { + describe('userDataToEnv', () => { + it('maps a decrypted userData object into a stringified env map', () => { + expect(userDataToEnv({ MODEL_ID: 'm', PORT: 8000, FLAG: true })).to.deep.equal({ + MODEL_ID: 'm', + PORT: '8000', + FLAG: 'true' + }) + }) + it('skips null/undefined values', () => { + expect(userDataToEnv({ A: 'x', B: null, C: undefined })).to.deep.equal({ A: 'x' }) + }) + it('empty object → {}', () => { + expect(userDataToEnv({})).to.deep.equal({}) + }) + }) + + describe('toPublicServiceJob', () => { + it('strips userData, keeps other fields', () => { + const job = { + serviceId: 's1', + userData: 'ENCRYPTED', + owner: '0xabc', + endpoints: [] + } as unknown as ServiceJob + const pub = toPublicServiceJob(job) + expect(pub).to.not.have.property('userData') + expect(pub).to.have.property('serviceId', 's1') + }) + it('is null-safe', () => { + expect(toPublicServiceJob(null)).to.equal(null) + }) + }) + + describe('decryptUserData', () => { + const fakeKeyManager = { + decrypt: (data: Uint8Array) => Promise.resolve(Buffer.from(data)) + } as any + it('returns {} when undefined', async () => { + expect(await decryptUserData(undefined, fakeKeyManager)).to.deep.equal({}) + }) + it('decrypts + JSON-parses a hex payload', async () => { + const payload = JSON.stringify({ MODEL_ID: 'm' }) + const hex = Buffer.from(payload).toString('hex') + const out = await decryptUserData(hex, fakeKeyManager) + expect(out).to.deep.equal({ MODEL_ID: 'm' }) + }) + it('propagates when decrypted payload is not valid JSON', async () => { + const hex = Buffer.from('not-json').toString('hex') + let threw = false + try { + await decryptUserData(hex, fakeKeyManager) + } catch { + threw = true + } + expect(threw).to.equal(true) + }) + }) + + describe('allocateHostPort', () => { + it('never hands out the same port to concurrent callers (TOCTOU)', async () => { + // Property under test: concurrent allocations are always unique. allocateHostPort + // reserves a candidate synchronously (allocatedPorts.add) before the async + // isPortFree() check, so no two concurrent callers can return the same port. + // Use a range far larger than the request count — the allocator probes randomly with + // a bounded retry budget, so an exact-fit range would flake (and CI may already hold + // some ports); ample headroom isolates the uniqueness guarantee from exhaustion. + const rangeStart = 41000 + const rangeEnd = 41999 // 1000 ports + const count = 25 + + const ports = await Promise.all( + Array.from({ length: count }, () => allocateHostPort(rangeStart, rangeEnd)) + ) + try { + expect(new Set(ports).size).to.equal(count) // all unique + ports.forEach((p) => + expect(p).to.be.within(rangeStart, rangeEnd, `port ${p} out of range`) + ) + } finally { + ports.forEach((p) => releaseHostPort(p)) + } + }) + }) +}) diff --git a/src/test/unit/service/templateLoader.test.ts b/src/test/unit/service/templateLoader.test.ts new file mode 100644 index 000000000..da0b59dcd --- /dev/null +++ b/src/test/unit/service/templateLoader.test.ts @@ -0,0 +1,85 @@ +import { expect } from 'chai' +import { mkdtempSync, writeFileSync, rmSync } from 'fs' +import { join } from 'path' +import { tmpdir } from 'os' +import { loadServiceTemplates } from '../../../components/core/service/templateLoader.js' + +const valid = (id: string) => ({ + id, + image: 'quay.io/jupyter/datascience-notebook', + tag: 'latest', + exposedPorts: [8888] +}) + +describe('loadServiceTemplates', () => { + let dir: string + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'svc-templates-')) + }) + afterEach(() => { + rmSync(dir, { recursive: true, force: true }) + }) + + it('1. undefined dir → []', async () => { + expect(await loadServiceTemplates(undefined)).to.deep.equal([]) + }) + + it('2. non-existent dir → [] (quiet)', async () => { + expect(await loadServiceTemplates(join(dir, 'does-not-exist'))).to.deep.equal([]) + }) + + it('3. two valid single-template files → both returned', async () => { + writeFileSync(join(dir, 'a.json'), JSON.stringify(valid('jupyter-cpu'))) + writeFileSync(join(dir, 'b.json'), JSON.stringify(valid('jupyter-gpu'))) + const templates = await loadServiceTemplates(dir) + expect(templates.map((t) => t.id).sort()).to.deep.equal([ + 'jupyter-cpu', + 'jupyter-gpu' + ]) + }) + + it('4. file containing an array of templates → all returned', async () => { + writeFileSync(join(dir, 'multi.json'), JSON.stringify([valid('one'), valid('two')])) + const templates = await loadServiceTemplates(dir) + expect(templates.map((t) => t.id).sort()).to.deep.equal(['one', 'two']) + }) + + it('5. malformed JSON skipped; others still load', async () => { + writeFileSync(join(dir, 'bad.json'), '{ not json') + writeFileSync(join(dir, 'good.json'), JSON.stringify(valid('good'))) + const templates = await loadServiceTemplates(dir) + expect(templates.map((t) => t.id)).to.deep.equal(['good']) + }) + + it('6. schema-invalid template skipped (tag + dockerfile together)', async () => { + writeFileSync( + join(dir, 'invalid.json'), + JSON.stringify({ ...valid('inv'), dockerfile: 'FROM x' }) + ) + writeFileSync(join(dir, 'ok.json'), JSON.stringify(valid('ok'))) + const templates = await loadServiceTemplates(dir) + expect(templates.map((t) => t.id)).to.deep.equal(['ok']) + }) + + it('7. duplicate id → first (filename-sorted) wins', async () => { + writeFileSync(join(dir, 'a.json'), JSON.stringify({ ...valid('dup'), tag: 'first' })) + writeFileSync(join(dir, 'b.json'), JSON.stringify({ ...valid('dup'), tag: 'second' })) + const templates = await loadServiceTemplates(dir) + expect(templates).to.have.length(1) + expect(templates[0].tag).to.equal('first') + }) + + it('8. non-.json files ignored', async () => { + writeFileSync(join(dir, 'readme.txt'), 'hello') + writeFileSync(join(dir, 'good.json'), JSON.stringify(valid('good'))) + const templates = await loadServiceTemplates(dir) + expect(templates.map((t) => t.id)).to.deep.equal(['good']) + }) + + it('9. re-read picks up newly added files (no caching)', async () => { + writeFileSync(join(dir, 'a.json'), JSON.stringify(valid('a'))) + expect(await loadServiceTemplates(dir)).to.have.length(1) + writeFileSync(join(dir, 'b.json'), JSON.stringify(valid('b'))) + expect(await loadServiceTemplates(dir)).to.have.length(2) + }) +}) diff --git a/src/utils/config/constants.ts b/src/utils/config/constants.ts index f43d878d6..7ab77eba4 100644 --- a/src/utils/config/constants.ts +++ b/src/utils/config/constants.ts @@ -30,6 +30,7 @@ export const ENV_TO_CONFIG_MAPPING = { ALLOWED_ADMINS: 'allowedAdmins', ALLOWED_ADMINS_LIST: 'allowedAdminsList', DOCKER_COMPUTE_ENVIRONMENTS: 'dockerComputeEnvironments', + SERVICE_TEMPLATES_PATH: 'serviceTemplatesPath', DOCKER_REGISTRY_AUTHS: 'dockerRegistrysAuth', P2P_BOOTSTRAP_NODES: 'p2pConfig.bootstrapNodes', P2P_BOOTSTRAP_TIMEOUT: 'p2pConfig.bootstrapTimeout', diff --git a/src/utils/config/schemas.ts b/src/utils/config/schemas.ts index 46295f804..cdee5fbc9 100644 --- a/src/utils/config/schemas.ts +++ b/src/utils/config/schemas.ts @@ -200,6 +200,136 @@ export const EnvironmentResourceRefSchema = z }) .passthrough() +// ── Per-environment capability flags ────────────────────────────────── + +const ComputeEnvFeaturesSchema = z + .object({ + computeJobs: z.boolean().optional().default(true), + services: z.boolean().optional().default(true) + }) + .strict() // reject unknown feature keys (catches typos like "computejobs") + +// ── Template resource requirements ──────────────────────────────────── + +const TemplateResourceRequirementSchema = z + .object({ + id: z.string().optional(), + kind: z.enum(['discrete', 'fungible']).optional(), + type: z.string().optional(), + min: z.number().min(0), + recommended: z.number().min(0).optional(), + unit: z.string().optional(), + description: z.string().optional() + }) + .strict() + .refine((r) => r.id !== undefined || r.kind !== undefined, { + message: 'Each resource requirement must specify either "id" or "kind"' + }) + .refine((r) => !(r.id !== undefined && r.kind !== undefined), { + message: '"id" and "kind" are mutually exclusive in a resource requirement' + }) + .refine((r) => r.recommended === undefined || r.recommended >= r.min, { + message: '"recommended" must be >= "min"' + }) + +// ── Template ────────────────────────────────────────────────────────── + +const UserConfigurableEnvVarSchema = z + .object({ + key: z.string().min(1), + validation: z.string().optional(), + sensitive: z.boolean().optional() + }) + .strict() + +export const ServiceTemplateSchema = z + .object({ + id: z.string().regex(/^[a-z0-9][a-z0-9_-]{0,63}$/, { + message: 'Template id must match [a-z0-9][a-z0-9_-]{0,63}' + }), + name: z.string().optional(), + description: z.string().optional(), + image: z.string().min(1), + tag: z.string().min(1).optional(), + checksum: z + .string() + .regex(/^sha256:[a-f0-9]{64}$/) + .optional(), + dockerfile: z.string().min(1).optional(), + additionalDockerFiles: z.record(z.string()).optional(), + exposedPorts: z.array(z.number().int().min(1).max(65535)).min(1), + envVars: z.record(z.string()).optional(), + userConfigurableEnvVars: z.array(UserConfigurableEnvVarSchema).optional(), + command: z.array(z.string()).optional(), + entrypoint: z.array(z.string()).optional(), + requiredResources: z.array(TemplateResourceRequirementSchema).optional(), + recommendedResources: z.array(TemplateResourceRequirementSchema).optional() + }) + .strict() + .superRefine((tmpl, ctx) => { + // Validate each regex in userConfigurableEnvVars.validation compiles + ;(tmpl.userConfigurableEnvVars ?? []).forEach((uvar, i) => { + if (uvar.validation) { + try { + // eslint-disable-next-line no-new + new RegExp(uvar.validation) + } catch { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `userConfigurableEnvVars[${i}].validation is not a valid regex: "${uvar.validation}"`, + path: ['userConfigurableEnvVars', i, 'validation'] + }) + } + } + }) + // Warn on shell-injection-prone command patterns (security plan #3) + ;(tmpl.command ?? []).forEach((arg, i) => { + if (/sh\s+-c|`/.test(arg)) { + CONFIG_LOGGER.warn( + `Template "${tmpl.id}" command[${i}] contains shell invocation. ` + + 'This enables injection when userData values are substituted.' + ) + } + }) + + // Image spec mutual exclusion + const imageModesSet = [!!tmpl.tag, !!tmpl.checksum, !!tmpl.dockerfile].filter( + Boolean + ).length + if (imageModesSet > 1) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: + '"tag", "checksum", and "dockerfile" are mutually exclusive — set at most one', + path: ['image'] + }) + } + if (tmpl.additionalDockerFiles && !tmpl.dockerfile) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: '"additionalDockerFiles" requires "dockerfile"', + path: ['additionalDockerFiles'] + }) + } + }) + +// ── Per-daemon service config (no templates here) ───────────────────── + +export const ServiceOnDemandConfigSchema = z + .object({ + enabled: z.boolean(), + nodeHost: z.string().min(1), + hostPortRange: z + .tuple([z.number().int().min(1024), z.number().int().max(65535)]) + .refine((r) => !r || r[0] < r[1], { + message: 'hostPortRange[0] must be less than hostPortRange[1]' + }) + .optional(), + maxDurationSeconds: z.number().int().min(60).optional().default(86400), + allowImageBuild: z.boolean().optional().default(false) + }) + .strict() + export const ComputeResourcesPricingInfoSchema = z.object({ id: z.string(), price: z.number() @@ -265,7 +395,11 @@ export const C2DEnvironmentConfigSchema = z .optional(), free: C2DEnvironmentFreeConfigSchema.optional(), resources: z.array(EnvironmentResourceRefSchema).optional(), - enableNetwork: z.boolean().optional().default(false) + enableNetwork: z.boolean().optional().default(false), + features: ComputeEnvFeaturesSchema.optional().default({ + computeJobs: true, + services: true + }) }) .refine( (data) => @@ -296,7 +430,8 @@ export const C2DDockerConfigSchema = z.array( scanImages: z.boolean().optional().default(false), scanImageDBUpdateInterval: z.number().int().min(3600).optional().default(43200), resources: z.array(ComputeResourceSchema).optional(), - environments: z.array(C2DEnvironmentConfigSchema).min(1) + environments: z.array(C2DEnvironmentConfigSchema).min(1), + serviceOnDemand: ServiceOnDemandConfigSchema.optional() }) .superRefine((dockerConfig, ctx) => { // Reject old format: env-level resources with init/driverVersion/platform indicate full ComputeResource objects @@ -445,6 +580,8 @@ export const OceanNodeConfigSchema = z .optional() .default([]), + serviceTemplatesPath: z.string().optional().default('databases/serviceTemplates/'), + dockerRegistrysAuth: jsonFromString(DockerRegistrysSchema).optional().default({}), authorizedDecrypters: addressArrayFromString.optional().default([]), diff --git a/src/utils/constants.ts b/src/utils/constants.ts index 09959d26a..31dadfa68 100644 --- a/src/utils/constants.ts +++ b/src/utils/constants.ts @@ -49,7 +49,13 @@ export const PROTOCOL_COMMANDS = { PERSISTENT_STORAGE_DELETE_FILE: 'persistentStorageDeleteFile', GET_ACCESS_LIST: 'getAccessList', SEARCH_ACCESS_LIST: 'searchAccessList', - GET_ESCROW_EVENTS: 'getEscrowEvents' + GET_ESCROW_EVENTS: 'getEscrowEvents', + SERVICE_GET_TEMPLATES: 'serviceGetTemplates', + SERVICE_START: 'serviceStart', + SERVICE_STOP: 'serviceStop', + SERVICE_RESTART: 'serviceRestart', + SERVICE_GET_STATUS: 'serviceGetStatus', + SERVICE_EXTEND: 'serviceExtend' } // more visible, keep then close to make sure we always update both export const SUPPORTED_PROTOCOL_COMMANDS: string[] = [ @@ -100,7 +106,13 @@ export const SUPPORTED_PROTOCOL_COMMANDS: string[] = [ PROTOCOL_COMMANDS.PERSISTENT_STORAGE_DELETE_FILE, PROTOCOL_COMMANDS.GET_ACCESS_LIST, PROTOCOL_COMMANDS.SEARCH_ACCESS_LIST, - PROTOCOL_COMMANDS.GET_ESCROW_EVENTS + PROTOCOL_COMMANDS.GET_ESCROW_EVENTS, + PROTOCOL_COMMANDS.SERVICE_GET_TEMPLATES, + PROTOCOL_COMMANDS.SERVICE_START, + PROTOCOL_COMMANDS.SERVICE_STOP, + PROTOCOL_COMMANDS.SERVICE_RESTART, + PROTOCOL_COMMANDS.SERVICE_GET_STATUS, + PROTOCOL_COMMANDS.SERVICE_EXTEND ] export const MetadataStates = { @@ -472,6 +484,11 @@ export const ENVIRONMENT_VARIABLES: Record = { value: process.env.DOCKER_COMPUTE_ENVIRONMENTS, required: false }, + SERVICE_TEMPLATES_PATH: { + name: 'SERVICE_TEMPLATES_PATH', + value: process.env.SERVICE_TEMPLATES_PATH, + required: false + }, DOCKER_REGISTRY_AUTHS: { name: 'DOCKER_REGISTRY_AUTHS', value: process.env.DOCKER_REGISTRY_AUTHS,