Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: oven-sh/setup-bun@v2
- name: Use local scrapegraph-js package
run: sed -i 's/"scrapegraph-js": "\^2.2.0"/"scrapegraph-js": "file:..\/.."/' packages/ai-sdk/package.json
- run: bun install
- run: bun run test

Expand All @@ -22,5 +24,10 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: oven-sh/setup-bun@v2
- name: Use local scrapegraph-js package
run: sed -i 's/"scrapegraph-js": "\^2.2.0"/"scrapegraph-js": "file:..\/.."/' packages/ai-sdk/package.json
- run: bun install
- run: bun run build
- run: bun run check
- run: cd packages/ai-sdk && bun run check
- run: cd packages/ai-sdk && bun run build
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ bun add scrapegraph-js

## Quick Start

### API key

Log in to the [ScrapeGraphAI dashboard](https://scrapegraphai.com/) to create an API key. The dashboard also shows your request history, usage, credits, and crawl/monitor activity.

Set it in your environment:

```bash
export SGAI_API_KEY=...
```

```ts
import { ScrapeGraphAI } from "scrapegraph-js";

Expand Down Expand Up @@ -140,6 +150,12 @@ const start = await sgai.crawl.start({
// Check status
const status = await sgai.crawl.get(start.data?.id!);

// Fetch paginated pages with resolved scrape results
const pages = await sgai.crawl.pages(start.data?.id!, {
cursor: 0,
limit: 50,
});

// Control
await sgai.crawl.stop(id);
await sgai.crawl.resume(id);
Expand Down
7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
{
"name": "scrapegraph-js",
"version": "2.1.0",
"version": "2.2.0",
"description": "Official JavaScript/TypeScript SDK for the ScrapeGraph AI API — smart web scraping powered by AI",
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"exports": {
".": {
"import": "./dist/index.js",
"types": "./dist/index.d.ts"
"types": "./dist/index.d.ts",
"import": "./dist/index.js"
}
},
"workspaces": ["packages/*"],
"scripts": {
"dev": "tsup --watch",
"build": "tsup",
Expand Down
200 changes: 200 additions & 0 deletions packages/ai-sdk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# ScrapeGraphAI AI SDK Tools

[![npm version](https://badge.fury.io/js/%40scrapegraph-ai%2Fai-sdk.svg)](https://www.npmjs.com/package/@scrapegraph-ai/ai-sdk)
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)

<p align="center">
<a href="https://scrapegraphai.com">
<img src="../../media/banner.png" alt="ScrapeGraphAI AI SDK Tools" style="width: 100%;">
</a>
</p>

Vercel [AI SDK](https://ai-sdk.dev/docs/introduction) tools for the ScrapeGraphAI API.

## Install

```bash
npm i @scrapegraph-ai/ai-sdk ai
# or
bun add @scrapegraph-ai/ai-sdk ai
```

`ai` is a peer dependency. Install the model provider package you use, for example:

```bash
npm i @ai-sdk/openai
# or
bun add @ai-sdk/openai
```

## Quick Start

### API key

Log in to the [ScrapeGraphAI dashboard](https://scrapegraphai.com/) to create an API key. The dashboard also shows your request history, usage, credits, and crawl/monitor activity.

Set it in your environment:

```bash
export SGAI_API_KEY=...
```

Minimal scrape-only setup:

```ts
import { openai } from "@ai-sdk/openai";
import { generateText, stepCountIs } from "ai";
import { scrapeTool } from "@scrapegraph-ai/ai-sdk";

const result = await generateText({
model: openai("gpt-5-nano"),
prompt: "Find the main headline on https://example.com",
tools: {
scrape: scrapeTool(),
},
stopWhen: stepCountIs(5),
});

console.log(result.text);
```

Use every ScrapeGraphAI tool group:

```ts
import { openai } from "@ai-sdk/openai";
import { generateText, stepCountIs } from "ai";
import {
crawlTools,
extractTool,
monitorTools,
scrapeTool,
searchTool,
} from "@scrapegraph-ai/ai-sdk";

const result = await generateText({
model: openai("gpt-5-nano"),
prompt: "Search for ScrapeGraphAI docs, scrape the best page, and summarize it.",
tools: {
scrape: scrapeTool(),
extract: extractTool(),
search: searchTool(),
...crawlTools(),
...monitorTools(),
},
stopWhen: stepCountIs(10),
});

console.log(result.text);
```

Tools read `SGAI_API_KEY` from the environment by default. You can also pass it explicitly:

```ts
const tools = {
scrape: scrapeTool({ apiKey: process.env.SGAI_API_KEY }),
};
```

## Tools

### scrapeTool

Scrape a webpage with ScrapeGraphAI. Supports markdown, html, json extraction, links, images, summary, branding, and screenshots.

```ts
import { scrapeTool } from "@scrapegraph-ai/ai-sdk";

const tools = {
scrape: scrapeTool(),
};
```

### extractTool

Extract structured JSON from a URL, HTML, or markdown with a natural-language prompt.

```ts
import { extractTool } from "@scrapegraph-ai/ai-sdk";

const tools = {
extract: extractTool(),
};
```

### searchTool

Search the web and optionally extract structured data from search results.

```ts
import { searchTool } from "@scrapegraph-ai/ai-sdk";

const tools = {
search: searchTool(),
};
```

### crawlTools

Start, poll, page through, stop, resume, and delete ScrapeGraphAI crawl jobs.

```ts
import { crawlTools } from "@scrapegraph-ai/ai-sdk";

const tools = {
...crawlTools(),
};
```

Crawl page retrieval is paginated. Use `getCrawl` for status, then `getCrawlPages` for pages and resolved scrape results.

```ts
const tools = {
startCrawl: startCrawlTool(),
getCrawl: getCrawlTool(),
getCrawlPages: getCrawlPagesTool(),
};
```

### monitorTools

Create, list, update, pause, resume, delete, and fetch activity for ScrapeGraphAI monitors.

```ts
import { monitorTools } from "@scrapegraph-ai/ai-sdk";

const tools = {
...monitorTools(),
};
```

## Examples

| Example | Description |
|---------|-------------|
| [`hacker-news.ts`](examples/hacker-news.ts) | Scrape Hacker News with AI SDK tools |
| [`crawl-blog.ts`](examples/crawl-blog.ts) | Crawl ScrapeGraphAI blog pages, fetch paginated crawl results, and summarize them |

Run an example:

```bash
OPENAI_API_KEY=... SGAI_API_KEY=... bun examples/crawl-blog.ts
```

## Environment Variables

| Variable | Description |
|----------|-------------|
| `SGAI_API_KEY` | ScrapeGraphAI API key |
| `OPENAI_API_KEY` | Required by the OpenAI provider examples |

## Development

```bash
bun install
bun run build
bun run check
```

## License

MIT - [ScrapeGraphAI](https://scrapegraphai.com)
77 changes: 77 additions & 0 deletions packages/ai-sdk/examples/crawl-blog.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { openai } from "@ai-sdk/openai";
import { generateText, stepCountIs, type ModelMessage } from "ai";
import { stdin as input, stdout as output } from "node:process";
import { createInterface } from "node:readline/promises";
import { crawlTools } from "@scrapegraph-ai/ai-sdk";

const initialPrompt =
"Find 10 https://scrapegraphai.com/ blog posts. Start a crawl, poll its status, fetch crawled pages with getCrawlPages, then summarize what you found.";
const messages: ModelMessage[] = [];
let activeController: AbortController | undefined;

async function run(prompt: string) {
messages.push({ role: "user", content: prompt });
const controller = new AbortController();
activeController = controller;

try {
const result = await generateText({
model: openai("gpt-5-nano"),
messages,
tools: { ...crawlTools() },
stopWhen: stepCountIs(20),
abortSignal: controller.signal,
onStepFinish: ({ text, toolCalls, toolResults }) => {
if (text) {
console.log(`\n[assistant]\n${text}`);
}

for (const toolCall of toolCalls) {
console.log(`\n[tool] ${toolCall.toolName}`);
console.log(JSON.stringify(toolCall.input, null, 2));
}

for (const toolResult of toolResults) {
console.log(`\n[result] ${toolResult.toolName}`);
console.log(JSON.stringify(toolResult.output, null, 2));
}
},
});

messages.push(...result.response.messages);
console.log(`\n${result.text}\n`);
} catch (error) {
if (controller.signal.aborted) {
console.error("[aborted]");
} else {
console.error(error instanceof Error ? error.message : error);
}
} finally {
if (activeController === controller) {
activeController = undefined;
}
}
}

const rl = createInterface({ input, output });

process.on("SIGINT", () => {
output.write("\n");
if (activeController) {
activeController.abort();
return;
}

rl.close();
process.exit(0);
});

await run(initialPrompt);

while (true) {
const prompt = (await rl.question("> ")).trim();

if (prompt) {
await run(prompt);
}
}
15 changes: 15 additions & 0 deletions packages/ai-sdk/examples/hacker-news.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { openai } from "@ai-sdk/openai";
import { generateText, stepCountIs } from "ai";
import { scrapeTool } from "@scrapegraph-ai/ai-sdk";

const { text } = await generateText({
model: openai("gpt-5-nano"),
prompt:
"Scrape Hacker News and write a short, concise summary of what people are talking about today.",
tools: {
scrape: scrapeTool(),
},
stopWhen: stepCountIs(3),
});

console.log(text);
Loading