From 8fd6cbf35547582bd16b0f87dd79e430c0d0ee24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Olender?= <92638966+TC-MO@users.noreply.github.com> Date: Tue, 24 Mar 2026 20:42:51 +0100 Subject: [PATCH 1/3] docs: add agent onboarding page for AI integrations Add "Apify for AI agents" page covering MCP connection, CLI usage, API quick-start with curl examples, and plain-text docs access for developers building AI agent integrations with the Apify platform. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../integrations/ai/agent-onboarding.md | 272 ++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 sources/platform/integrations/ai/agent-onboarding.md diff --git a/sources/platform/integrations/ai/agent-onboarding.md b/sources/platform/integrations/ai/agent-onboarding.md new file mode 100644 index 0000000000..402a090fe7 --- /dev/null +++ b/sources/platform/integrations/ai/agent-onboarding.md @@ -0,0 +1,272 @@ +--- +title: Build on Apify with AI agents +sidebar_label: Agent onboarding +description: Connect your AI agent to the Apify platform to scrape the web, extract data, and automate workflows using MCP, APIs, and plain-text docs. +sidebar_position: 0 +slug: /integrations/agent-onboarding +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This page helps AI agents and LLM-powered applications integrate with the Apify platform. It covers how to access Apify's documentation, connect to the platform, and run Actors programmatically. + +## What Apify does + +Apify is a cloud platform for web scraping, data extraction, and browser automation. The core building blocks are: + +- **Actors** - Serverless cloud programs that perform scraping, crawling, or automation tasks. Over 4,000 ready-made Actors are available in [Apify Store](https://apify.com/store). +- **Datasets** - Append-only storage for structured results (JSON, CSV, Excel, XML). +- **Key-value stores** - Storage for arbitrary data by string key (JSON, HTML, images, files). +- **Request queues** - URL queues that coordinate crawling across Actor runs. +- **Apify Proxy** - Built-in proxy infrastructure with datacenter and residential IPs. +- **Schedules** - Cron-based automation to trigger Actor runs on a recurring basis. + +## Prerequisite: Get an API token + +A human must create the Apify account and API token. Sign up at [apify.com](https://apify.com), then get your token from the **Integrations** section in [Apify Console](https://console.apify.com/account#/integrations). + +:::caution Token security +Store the API token securely. It grants full access to the Apify account, including running Actors, accessing storage, and managing resources. +::: + +## Connect via MCP + +The fastest way to give an AI agent access to Apify is through the [Apify MCP server](https://mcp.apify.com). It lets agents discover and run Actors, access storage, and search documentation - all through the Model Context Protocol. + +### Streamable HTTP (recommended) + +Provide this server URL to your MCP client: + +```text +https://mcp.apify.com +``` + +You'll be redirected to sign in and approve the connection via OAuth. + +### Local stdio + +For development and testing, run the MCP server locally: + +```bash +npx @apify/mcp-server +``` + +Configure it in your MCP client: + +```json +{ + "mcpServers": { + "apify": { + "command": "npx", + "args": ["-y", "@apify/mcp-server"], + "env": { + "APIFY_TOKEN": "your-api-token" + } + } + } +} +``` + +Read the full [MCP server documentation](/integrations/mcp) for configuration options, available tools, and client-specific setup instructions. + +## Access documentation as plain text + +All Apify documentation is available in formats optimized for LLM consumption. + +### Markdown URLs + +Append `.md` to any documentation page URL to get a clean markdown version: + +```text +https://docs.apify.com/platform/actors.md +https://docs.apify.com/platform/storage.md +https://docs.apify.com/api/v2.md +``` + +### Content negotiation + +Request markdown through the `Accept` header: + +```bash +curl -H "Accept: text/markdown" https://docs.apify.com/platform/actors +``` + +### llms.txt + +The documentation index is available at: + +```text +https://docs.apify.com/llms.txt +``` + +This file lists all documentation pages with descriptions and links. For the complete documentation in a single file: + +```text +https://docs.apify.com/llms-full.txt +``` + +:::note File size +`llms.txt` is large (190K+ characters). For targeted lookups, prefer `.md` URLs for specific pages or use the MCP server's documentation search tool. +::: + +## Common agent workflows + +### Find an Actor for a task + +Search [Apify Store](https://apify.com/store) for an Actor that matches your needs. Use the Apify API to search programmatically: + +```bash +curl "https://api.apify.com/v2/store?search=google+maps&limit=5" \ + -H "Authorization: Bearer YOUR_API_TOKEN" +``` + +Each Actor in the response includes its `id`, `name`, `description`, and `stats` (total runs, user ratings). + +### Run an Actor and get results + +The typical workflow is: start a run, wait for it to finish, then fetch results from the default dataset. + + + + +```javascript +import { ApifyClient } from 'apify-client'; + +const client = new ApifyClient({ token: 'YOUR_API_TOKEN' }); + +// Run an Actor and wait for it to finish +const run = await client.actor('apify/web-scraper').call({ + startUrls: [{ url: 'https://example.com' }], + maxPagesPerCrawl: 10, +}); + +// Fetch results from the default dataset +const { items } = await client.dataset(run.defaultDatasetId).listItems(); +console.log(items); +``` + + + + +```python +from apify_client import ApifyClient + +client = ApifyClient("YOUR_API_TOKEN") + +# Run an Actor and wait for it to finish +run = client.actor("apify/web-scraper").call(run_input={ + "startUrls": [{"url": "https://example.com"}], + "maxPagesPerCrawl": 10, +}) + +# Fetch results from the default dataset +items = client.dataset(run["defaultDatasetId"]).list_items().items +print(items) +``` + + + + +### Run an Actor with synchronous response + +For quick tasks, use the synchronous endpoint to start a run and get results in a single request (waits up to 5 minutes): + +```bash +curl -X POST "https://api.apify.com/v2/acts/apify~web-scraper/run-sync-get-dataset-items" \ + -H "Authorization: Bearer YOUR_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "startUrls": [{"url": "https://example.com"}], + "maxPagesPerCrawl": 10 + }' +``` + +This returns dataset items directly in the response body. + +### Get results from an existing run + +If you have a run ID, fetch results from its default dataset: + + + + +```javascript +const { items } = await client.dataset('DATASET_ID').listItems({ + limit: 100, + offset: 0, + format: 'json', +}); +``` + + + + +```python +items = client.dataset("DATASET_ID").list_items( + limit=100, + offset=0, +).items +``` + + + + +Datasets also support CSV, Excel, XML, and RSS export formats through the `format` parameter. + +## API quick reference + +| Action | Method | Endpoint | +|---|---|---| +| List Actors in store | `GET` | `/v2/store` | +| Get Actor details | `GET` | `/v2/acts/{actorId}` | +| Run an Actor | `POST` | `/v2/acts/{actorId}/runs` | +| Run Actor (sync) | `POST` | `/v2/acts/{actorId}/run-sync-get-dataset-items` | +| Get run status | `GET` | `/v2/actor-runs/{runId}` | +| Get dataset items | `GET` | `/v2/datasets/{datasetId}/items` | +| Get key-value record | `GET` | `/v2/key-value-stores/{storeId}/records/{key}` | +| Abort a run | `POST` | `/v2/actor-runs/{runId}/abort` | + +Base URL: `https://api.apify.com` + +All endpoints require authentication via `Authorization: Bearer YOUR_API_TOKEN` header. + +Read the full [API reference](/api/v2) for all available endpoints. + +## AI framework integrations + +Apify integrates with popular AI and agent frameworks: + +| Framework | Integration | +|---|---| +| LangChain | [Use Apify with LangChain](/integrations/langchain) | +| LangGraph | [Use Apify with LangGraph](/integrations/langgraph) | +| CrewAI | [Use Apify with CrewAI](/integrations/crewai) | +| OpenAI Agents SDK | [Use Apify with OpenAI Agents](/integrations/openai-agents) | +| Google ADK | [Use Apify with Google ADK](/integrations/google-adk) | +| Vercel AI SDK | [Use Apify with Vercel AI SDK](/integrations/vercel-ai-sdk) | +| Haystack | [Use Apify with Haystack](/integrations/haystack) | +| Mastra | [Use Apify with Mastra](/integrations/mastra) | + +## Popular Actors for AI agents + +These Actors are commonly used in agentic workflows: + +| Actor | What it does | +|---|---| +| [Website Content Crawler](https://apify.com/apify/website-content-crawler) | Crawl websites and extract text content in markdown, HTML, or plain text | +| [Google Search Scraper](https://apify.com/apify/google-search-scraper) | Scrape Google Search results for any query | +| [Google Maps Scraper](https://apify.com/compass/crawler-google-places) | Extract business data from Google Maps | +| [Instagram Scraper](https://apify.com/apify/instagram-scraper) | Scrape posts, profiles, and hashtags from Instagram | +| [Amazon Product Scraper](https://apify.com/junglee/amazon-crawler) | Extract product data, pricing, and reviews from Amazon | + +Browse all 4,000+ Actors in [Apify Store](https://apify.com/store). + +## Next steps + +- [MCP server documentation](/integrations/mcp) - Full MCP setup and configuration guide +- [Actor development](/platform/actors/development) - Build your own Actors +- [API reference](/api/v2) - Complete REST API documentation +- [API client for JavaScript](/api/client/js) - JavaScript client library +- [API client for Python](/api/client/python) - Python client library +- [Storage documentation](/platform/storage) - Datasets, key-value stores, and request queues From 4e1d6707d3bb944027ade876e0274146f85c240b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Olender?= <92638966+TC-MO@users.noreply.github.com> Date: Tue, 24 Mar 2026 20:56:56 +0100 Subject: [PATCH 2/3] docs: rewrite agent onboarding as "Apify for AI agents" Replace initial draft with restructured page at /integrations/ai-agents: - Tiered platform overview (core concepts vs additional) - New CLI section with run, inspect, and retrieve commands - curl-only API examples (sync and async workflows) - Honest plain-text docs section with llms.txt limitations - Thin links to MCP, CLI, and API docs instead of duplicating content Add prominent cross-links: - AI agents card on platform homepage - AI agents card in integrations index (first in AI/LLM section) - Tip box on Build with AI page linking to this page Co-Authored-By: Claude Opus 4.6 (1M context) --- .../development/quick-start/build_with_ai.md | 4 + sources/platform/index.mdx | 5 + .../integrations/ai/agent-onboarding.md | 272 ------------------ sources/platform/integrations/ai/ai-agents.md | 147 ++++++++++ sources/platform/integrations/index.mdx | 6 + 5 files changed, 162 insertions(+), 272 deletions(-) delete mode 100644 sources/platform/integrations/ai/agent-onboarding.md create mode 100644 sources/platform/integrations/ai/ai-agents.md diff --git a/sources/platform/actors/development/quick-start/build_with_ai.md b/sources/platform/actors/development/quick-start/build_with_ai.md index 936f90b70d..8559752368 100644 --- a/sources/platform/actors/development/quick-start/build_with_ai.md +++ b/sources/platform/actors/development/quick-start/build_with_ai.md @@ -15,6 +15,10 @@ import TabItem from '@theme/TabItem'; This guide provides best practices for building new Actors or improving existing ones using AI code generation tools by providing the AI agents with the right instructions and context. +:::tip Use Apify from your agent +If you want to **use** Apify from your agent rather than **build** Actors with AI, see [Apify for AI agents](/platform/integrations/ai-agents). +::: + The methods on this page are complementary. Start with the [AI coding assistant instructions](#ai-coding-assistant-instructions) or [Actor templates with AGENTS.md](#use-actor-templates-with-agentsmd) to get going, then add [Agent Skills](#use-agent-skills) and the [Apify MCP server](#use-apify-mcp-server) to give your assistant more context and better results. ## Quick start diff --git a/sources/platform/index.mdx b/sources/platform/index.mdx index 5a6963d92b..aa7df2def4 100644 --- a/sources/platform/index.mdx +++ b/sources/platform/index.mdx @@ -33,6 +33,11 @@ Learn how to run any Actor in Apify Store or create your own. A step-by-step gui desc="Learn everything about web scraping and automation with free courses that will turn you into an expert scraper developer." to="/academy" /> + ## Contents diff --git a/sources/platform/integrations/ai/agent-onboarding.md b/sources/platform/integrations/ai/agent-onboarding.md deleted file mode 100644 index 402a090fe7..0000000000 --- a/sources/platform/integrations/ai/agent-onboarding.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Build on Apify with AI agents -sidebar_label: Agent onboarding -description: Connect your AI agent to the Apify platform to scrape the web, extract data, and automate workflows using MCP, APIs, and plain-text docs. -sidebar_position: 0 -slug: /integrations/agent-onboarding ---- - -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -This page helps AI agents and LLM-powered applications integrate with the Apify platform. It covers how to access Apify's documentation, connect to the platform, and run Actors programmatically. - -## What Apify does - -Apify is a cloud platform for web scraping, data extraction, and browser automation. The core building blocks are: - -- **Actors** - Serverless cloud programs that perform scraping, crawling, or automation tasks. Over 4,000 ready-made Actors are available in [Apify Store](https://apify.com/store). -- **Datasets** - Append-only storage for structured results (JSON, CSV, Excel, XML). -- **Key-value stores** - Storage for arbitrary data by string key (JSON, HTML, images, files). -- **Request queues** - URL queues that coordinate crawling across Actor runs. -- **Apify Proxy** - Built-in proxy infrastructure with datacenter and residential IPs. -- **Schedules** - Cron-based automation to trigger Actor runs on a recurring basis. - -## Prerequisite: Get an API token - -A human must create the Apify account and API token. Sign up at [apify.com](https://apify.com), then get your token from the **Integrations** section in [Apify Console](https://console.apify.com/account#/integrations). - -:::caution Token security -Store the API token securely. It grants full access to the Apify account, including running Actors, accessing storage, and managing resources. -::: - -## Connect via MCP - -The fastest way to give an AI agent access to Apify is through the [Apify MCP server](https://mcp.apify.com). It lets agents discover and run Actors, access storage, and search documentation - all through the Model Context Protocol. - -### Streamable HTTP (recommended) - -Provide this server URL to your MCP client: - -```text -https://mcp.apify.com -``` - -You'll be redirected to sign in and approve the connection via OAuth. - -### Local stdio - -For development and testing, run the MCP server locally: - -```bash -npx @apify/mcp-server -``` - -Configure it in your MCP client: - -```json -{ - "mcpServers": { - "apify": { - "command": "npx", - "args": ["-y", "@apify/mcp-server"], - "env": { - "APIFY_TOKEN": "your-api-token" - } - } - } -} -``` - -Read the full [MCP server documentation](/integrations/mcp) for configuration options, available tools, and client-specific setup instructions. - -## Access documentation as plain text - -All Apify documentation is available in formats optimized for LLM consumption. - -### Markdown URLs - -Append `.md` to any documentation page URL to get a clean markdown version: - -```text -https://docs.apify.com/platform/actors.md -https://docs.apify.com/platform/storage.md -https://docs.apify.com/api/v2.md -``` - -### Content negotiation - -Request markdown through the `Accept` header: - -```bash -curl -H "Accept: text/markdown" https://docs.apify.com/platform/actors -``` - -### llms.txt - -The documentation index is available at: - -```text -https://docs.apify.com/llms.txt -``` - -This file lists all documentation pages with descriptions and links. For the complete documentation in a single file: - -```text -https://docs.apify.com/llms-full.txt -``` - -:::note File size -`llms.txt` is large (190K+ characters). For targeted lookups, prefer `.md` URLs for specific pages or use the MCP server's documentation search tool. -::: - -## Common agent workflows - -### Find an Actor for a task - -Search [Apify Store](https://apify.com/store) for an Actor that matches your needs. Use the Apify API to search programmatically: - -```bash -curl "https://api.apify.com/v2/store?search=google+maps&limit=5" \ - -H "Authorization: Bearer YOUR_API_TOKEN" -``` - -Each Actor in the response includes its `id`, `name`, `description`, and `stats` (total runs, user ratings). - -### Run an Actor and get results - -The typical workflow is: start a run, wait for it to finish, then fetch results from the default dataset. - - - - -```javascript -import { ApifyClient } from 'apify-client'; - -const client = new ApifyClient({ token: 'YOUR_API_TOKEN' }); - -// Run an Actor and wait for it to finish -const run = await client.actor('apify/web-scraper').call({ - startUrls: [{ url: 'https://example.com' }], - maxPagesPerCrawl: 10, -}); - -// Fetch results from the default dataset -const { items } = await client.dataset(run.defaultDatasetId).listItems(); -console.log(items); -``` - - - - -```python -from apify_client import ApifyClient - -client = ApifyClient("YOUR_API_TOKEN") - -# Run an Actor and wait for it to finish -run = client.actor("apify/web-scraper").call(run_input={ - "startUrls": [{"url": "https://example.com"}], - "maxPagesPerCrawl": 10, -}) - -# Fetch results from the default dataset -items = client.dataset(run["defaultDatasetId"]).list_items().items -print(items) -``` - - - - -### Run an Actor with synchronous response - -For quick tasks, use the synchronous endpoint to start a run and get results in a single request (waits up to 5 minutes): - -```bash -curl -X POST "https://api.apify.com/v2/acts/apify~web-scraper/run-sync-get-dataset-items" \ - -H "Authorization: Bearer YOUR_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "startUrls": [{"url": "https://example.com"}], - "maxPagesPerCrawl": 10 - }' -``` - -This returns dataset items directly in the response body. - -### Get results from an existing run - -If you have a run ID, fetch results from its default dataset: - - - - -```javascript -const { items } = await client.dataset('DATASET_ID').listItems({ - limit: 100, - offset: 0, - format: 'json', -}); -``` - - - - -```python -items = client.dataset("DATASET_ID").list_items( - limit=100, - offset=0, -).items -``` - - - - -Datasets also support CSV, Excel, XML, and RSS export formats through the `format` parameter. - -## API quick reference - -| Action | Method | Endpoint | -|---|---|---| -| List Actors in store | `GET` | `/v2/store` | -| Get Actor details | `GET` | `/v2/acts/{actorId}` | -| Run an Actor | `POST` | `/v2/acts/{actorId}/runs` | -| Run Actor (sync) | `POST` | `/v2/acts/{actorId}/run-sync-get-dataset-items` | -| Get run status | `GET` | `/v2/actor-runs/{runId}` | -| Get dataset items | `GET` | `/v2/datasets/{datasetId}/items` | -| Get key-value record | `GET` | `/v2/key-value-stores/{storeId}/records/{key}` | -| Abort a run | `POST` | `/v2/actor-runs/{runId}/abort` | - -Base URL: `https://api.apify.com` - -All endpoints require authentication via `Authorization: Bearer YOUR_API_TOKEN` header. - -Read the full [API reference](/api/v2) for all available endpoints. - -## AI framework integrations - -Apify integrates with popular AI and agent frameworks: - -| Framework | Integration | -|---|---| -| LangChain | [Use Apify with LangChain](/integrations/langchain) | -| LangGraph | [Use Apify with LangGraph](/integrations/langgraph) | -| CrewAI | [Use Apify with CrewAI](/integrations/crewai) | -| OpenAI Agents SDK | [Use Apify with OpenAI Agents](/integrations/openai-agents) | -| Google ADK | [Use Apify with Google ADK](/integrations/google-adk) | -| Vercel AI SDK | [Use Apify with Vercel AI SDK](/integrations/vercel-ai-sdk) | -| Haystack | [Use Apify with Haystack](/integrations/haystack) | -| Mastra | [Use Apify with Mastra](/integrations/mastra) | - -## Popular Actors for AI agents - -These Actors are commonly used in agentic workflows: - -| Actor | What it does | -|---|---| -| [Website Content Crawler](https://apify.com/apify/website-content-crawler) | Crawl websites and extract text content in markdown, HTML, or plain text | -| [Google Search Scraper](https://apify.com/apify/google-search-scraper) | Scrape Google Search results for any query | -| [Google Maps Scraper](https://apify.com/compass/crawler-google-places) | Extract business data from Google Maps | -| [Instagram Scraper](https://apify.com/apify/instagram-scraper) | Scrape posts, profiles, and hashtags from Instagram | -| [Amazon Product Scraper](https://apify.com/junglee/amazon-crawler) | Extract product data, pricing, and reviews from Amazon | - -Browse all 4,000+ Actors in [Apify Store](https://apify.com/store). - -## Next steps - -- [MCP server documentation](/integrations/mcp) - Full MCP setup and configuration guide -- [Actor development](/platform/actors/development) - Build your own Actors -- [API reference](/api/v2) - Complete REST API documentation -- [API client for JavaScript](/api/client/js) - JavaScript client library -- [API client for Python](/api/client/python) - Python client library -- [Storage documentation](/platform/storage) - Datasets, key-value stores, and request queues diff --git a/sources/platform/integrations/ai/ai-agents.md b/sources/platform/integrations/ai/ai-agents.md new file mode 100644 index 0000000000..35bf669c24 --- /dev/null +++ b/sources/platform/integrations/ai/ai-agents.md @@ -0,0 +1,147 @@ +--- +title: Apify for AI agents +sidebar_label: AI agents +description: Connect your AI agent to the Apify platform to scrape the web, extract data, and automate workflows using MCP, the CLI, or the REST API. +sidebar_position: 0.0 +slug: /integrations/ai-agents +--- + +This page is for developers integrating AI agents with the Apify platform. It covers how to connect, run Actors, retrieve data, and access documentation programmatically. + +## What Apify does + +Apify is a cloud platform for web scraping, data extraction, and browser automation. The typical agent workflow is: find an Actor, run it, get structured data back. + +### Core concepts + +- _Actors_ - serverless cloud programs that perform scraping, crawling, or automation tasks. Over 4,000 ready-made Actors are available in [Apify Store](https://apify.com/store). +- _Datasets_ - Append-only storage for structured results. Every Actor run creates a default dataset containing its output. Export as JSON, CSV, Excel, XML, or RSS. +- The _Apify API_ - RESTful API at `https://api.apify.com/v2` for all platform operations. + +### Additional concepts + +- [Key-value stores](/platform/storage/key-value-store) - Store arbitrary data by string key (JSON, HTML, images, files) +- [Request queues](/platform/storage/request-queue) - URL queues that coordinate crawling across Actor runs +- [Apify Proxy](/platform/proxy) - Built-in proxy infrastructure with datacenter and residential IPs +- [Schedules](/platform/schedules) - Cron-based automation to trigger Actor runs on a recurring basis + +## Get an API token + +Sign up at [apify.com](https://apify.com), then get your API token from the **Integrations** section in [Apify Console](https://console.apify.com/account#/integrations). The token authenticates all API and CLI requests. + +## Connect via MCP + +The [Apify MCP server](https://mcp.apify.com) lets AI agents discover and run Actors, access storage, and search documentation through the [Model Context Protocol](https://modelcontextprotocol.io). Point your MCP client to `https://mcp.apify.com` to connect via OAuth, or run the server locally with `npx @apify/actors-mcp-server`. + +Read the full [MCP server documentation](/platform/integrations/mcp) for configuration options, available tools, and client-specific setup. + +## Use the Apify CLI + +The [Apify CLI](/cli) provides direct command-line access to the platform. For AI agents, the CLI is a lightweight, token-efficient alternative to MCP. + +Install and authenticate: + +```bash +npm install -g apify-cli +apify login --token YOUR_API_TOKEN +``` + +Get an Actor's README and input schema: + +```bash +apify actors info apify/web-scraper --readme +apify actors info apify/web-scraper --input +``` + +Run an Actor and print its dataset output: + +```bash +apify actors call apify/web-scraper \ + -i '{"startUrls": [{"url": "https://example.com"}], "maxPagesPerCrawl": 10}' \ + --output-dataset +``` + +Retrieve items from an existing dataset: + +```bash +apify datasets get-items DATASET_ID --format json +``` + +Read the full [CLI documentation](/cli) for all available commands. + +## Use the API directly + +All platform operations are available through the REST API. Authenticate with the `Authorization: Bearer YOUR_API_TOKEN` header. + +Run an Actor and get results in a single request (waits up to 5 minutes): + +```bash +curl -X POST "https://api.apify.com/v2/acts/apify~web-scraper/run-sync-get-dataset-items" \ + -H "Authorization: Bearer YOUR_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"startUrls": [{"url": "https://example.com"}], "maxPagesPerCrawl": 10}' +``` + +For longer runs, start the Actor asynchronously and poll for completion: + +```bash +# Start a run +curl -X POST "https://api.apify.com/v2/acts/apify~web-scraper/runs" \ + -H "Authorization: Bearer YOUR_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"startUrls": [{"url": "https://example.com"}], "maxPagesPerCrawl": 10}' + +# Check run status (use the runId from the response above) +curl "https://api.apify.com/v2/actor-runs/RUN_ID" \ + -H "Authorization: Bearer YOUR_API_TOKEN" + +# Get results from the default dataset +curl "https://api.apify.com/v2/datasets/DATASET_ID/items" \ + -H "Authorization: Bearer YOUR_API_TOKEN" +``` + +### API quick reference + +| Action | Method | Endpoint | +| --- | --- | --- | +| Search Actors in store | `GET` | `/v2/store` | +| Get Actor details | `GET` | `/v2/acts/{actorId}` | +| Run an Actor | `POST` | `/v2/acts/{actorId}/runs` | +| Run Actor (sync) | `POST` | `/v2/acts/{actorId}/run-sync-get-dataset-items` | +| Get run status | `GET` | `/v2/actor-runs/{runId}` | +| Get dataset items | `GET` | `/v2/datasets/{datasetId}/items` | +| Get key-value record | `GET` | `/v2/key-value-stores/{storeId}/records/{key}` | +| Abort a run | `POST` | `/v2/actor-runs/{runId}/abort` | + +Base URL: `https://api.apify.com` + +Read the full [API reference](/api/v2) for all available endpoints. + +## Access documentation as plain text + +Apify documentation is available in formats optimized for programmatic consumption. + +Append `.md` to any documentation page URL to get a clean markdown version: + +```text +https://docs.apify.com/platform/actors.md +https://docs.apify.com/platform/storage.md +https://docs.apify.com/api/v2.md +``` + +Request markdown through the `Accept` header: + +```bash +curl -H "Accept: text/markdown" https://docs.apify.com/platform/actors +``` + +A documentation index is available at `https://docs.apify.com/llms.txt` and the complete documentation in a single file at `https://docs.apify.com/llms-full.txt`. These files follow the [llms.txt specification](https://llmstxt.org/), but can be very large and may be truncated by agents with limited context windows. For targeted lookups, prefer `.md` URLs for specific pages, the MCP server's documentation search tools, or the Apify CLI. + +## Next steps + +- [MCP server documentation](/platform/integrations/mcp) - Full setup and configuration guide +- [CLI documentation](/cli) - Complete command reference +- [API reference](/api/v2) - All REST API endpoints +- [API client for JavaScript](/api/client/js) - JavaScript client library +- [API client for Python](/api/client/python) - Python client library +- [Storage documentation](/platform/storage) - Datasets, key-value stores, and request queues diff --git a/sources/platform/integrations/index.mdx b/sources/platform/integrations/index.mdx index 6e8d8246b9..3d7431e1ad 100644 --- a/sources/platform/integrations/index.mdx +++ b/sources/platform/integrations/index.mdx @@ -168,6 +168,12 @@ If you are working on AI/LLM-related applications, we recommend looking into the These integrations allow you to use Apify Actors as tools and data sources. + Date: Tue, 24 Mar 2026 21:05:13 +0100 Subject: [PATCH 3/3] change hallucinated # of Actors --- sources/platform/integrations/ai/ai-agents.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/platform/integrations/ai/ai-agents.md b/sources/platform/integrations/ai/ai-agents.md index 35bf669c24..63e89c92b3 100644 --- a/sources/platform/integrations/ai/ai-agents.md +++ b/sources/platform/integrations/ai/ai-agents.md @@ -14,7 +14,7 @@ Apify is a cloud platform for web scraping, data extraction, and browser automat ### Core concepts -- _Actors_ - serverless cloud programs that perform scraping, crawling, or automation tasks. Over 4,000 ready-made Actors are available in [Apify Store](https://apify.com/store). +- _Actors_ - serverless cloud programs that perform scraping, crawling, or automation tasks. Thousands of ready-made Actors are available in [Apify Store](https://apify.com/store). - _Datasets_ - Append-only storage for structured results. Every Actor run creates a default dataset containing its output. Export as JSON, CSV, Excel, XML, or RSS. - The _Apify API_ - RESTful API at `https://api.apify.com/v2` for all platform operations.