diff --git a/docs/content/docs/framework/tools/builtin.en.mdx b/docs/content/docs/framework/tools/builtin.en.mdx
index 28bdd630..4eeace1a 100644
--- a/docs/content/docs/framework/tools/builtin.en.mdx
+++ b/docs/content/docs/framework/tools/builtin.en.mdx
@@ -46,6 +46,7 @@ Built-in tools depend on Volcengine services. Enable the corresponding service a
| `web_scraper` | Aggregated search (invite-only), code [here](https://github.com/volcengine/mcp-server/tree/main/server) | `from veadk.tools.builtin_tools.web_scraper import web_scraper` |
| `vesearch` | Search via the [web-aware Q&A Agent](https://www.volcengine.com/docs/85508/1512748) | `from veadk.tools.builtin_tools.vesearch import vesearch` |
| `link_reader` | Read and parse the content of web links | `from veadk.tools.builtin_tools.link_reader import link_reader` |
+| `web_fetch` | Fetch a web page / PDF over HTTP and extract readable content (plain HTTP, no credentials) | `from veadk.tools.builtin_tools.web_fetch import web_fetch` |
| `image_generate` | [Generate images](https://www.volcengine.com/docs/82379/1541523) from text | `from veadk.tools.builtin_tools.image_generate import image_generate` |
| `image_edit` | [Edit images](https://www.volcengine.com/docs/82379/1541523) (image-to-image) | `from veadk.tools.builtin_tools.image_edit import image_edit` |
| `video_generate` | [Generate videos](https://www.volcengine.com/docs/82379/1520757) from text | `from veadk.tools.builtin_tools.video_generate import video_generate` |
@@ -201,6 +202,57 @@ Environment variables:
- `MODEL_AGENT_API_KEY`: API key for the agent's reasoning model
+## Web fetch (web_fetch)
+
+`web_fetch` does a plain HTTP GET on a given URL and extracts its readable content: HTML is converted to markdown or plain text, and PDFs are extracted to text via `pypdf`. It does **not** execute JavaScript, so pages that render entirely client-side or require login may come back incomplete. Unlike `link_reader`, this tool needs **no credentials of its own** (it is a plain HTTP fetch) — use it to let the agent read articles, docs, or any public URL the user references.
+
+Parameters:
+
+- `url`: the `http(s)` URL to fetch;
+- `extract_mode`: `markdown` (default, keeps headings / links / lists) or `text` (plain text);
+- `max_chars`: maximum characters of extracted content (default `50000`).
+
+Returns `{"url", "title", "content", "truncated"}`, or `{"error": ...}` on failure.
+
+
+- **SSRF protection**: after DNS resolution it blocks private / loopback / link-local / reserved addresses, and re-validates every redirect hop (including ``), following at most 3 hops.
+- **Limits**: 2 MB download cap for HTML (10 MB for PDFs); 30 s request timeout; results cached in-process for 15 minutes.
+- No JavaScript rendering; no socket-level DNS pinning (resolve-then-revalidate only).
+
+
+```python title="examples/tools/web_fetch/agent.py"
+import asyncio
+
+from veadk import Agent, Runner
+from veadk.memory.short_term_memory import ShortTermMemory
+from veadk.tools.builtin_tools.web_fetch import web_fetch
+
+agent = Agent(
+ name="web_fetch_agent",
+ model_name="doubao-seed-1-8-251228",
+ description="An agent that reads web pages and PDFs.",
+ instruction="Use the web_fetch tool to fetch the given URL, then answer based on its content.",
+ tools=[web_fetch],
+)
+
+runner = Runner(agent=agent, short_term_memory=ShortTermMemory())
+
+
+async def main():
+ response = await runner.run(
+ "Fetch https://arxiv.org/pdf/1706.03762 and summarize the paper's core idea"
+ )
+ print(response)
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
+```
+
+Environment variables:
+
+- `MODEL_AGENT_API_KEY`: API key for the agent's reasoning model (the `web_fetch` tool itself needs no extra credentials)
+
## Image generation (image_generate)
`image_generate` generates images from text. For image-to-image editing, see `image_edit` below.
diff --git a/docs/content/docs/framework/tools/builtin.mdx b/docs/content/docs/framework/tools/builtin.mdx
index 634bdf5b..e034a448 100644
--- a/docs/content/docs/framework/tools/builtin.mdx
+++ b/docs/content/docs/framework/tools/builtin.mdx
@@ -46,6 +46,7 @@ agent = Agent(tools=[web_search])
| `web_scraper` | 聚合搜索(邀测),代码见[此处](https://github.com/volcengine/mcp-server/tree/main/server) | `from veadk.tools.builtin_tools.web_scraper import web_scraper` |
| `vesearch` | 调用[联网问答 Agent](https://www.volcengine.com/docs/85508/1512748) 进行搜索 | `from veadk.tools.builtin_tools.vesearch import vesearch` |
| `link_reader` | 读取并解析网页链接内容 | `from veadk.tools.builtin_tools.link_reader import link_reader` |
+| `web_fetch` | 直接抓取网页 / PDF 并抽取正文(纯 HTTP,工具自身无需凭证) | `from veadk.tools.builtin_tools.web_fetch import web_fetch` |
| `image_generate` | 根据文本描述[生成图片](https://www.volcengine.com/docs/82379/1541523) | `from veadk.tools.builtin_tools.image_generate import image_generate` |
| `image_edit` | [编辑图片](https://www.volcengine.com/docs/82379/1541523)(图生图) | `from veadk.tools.builtin_tools.image_edit import image_edit` |
| `video_generate` | 根据文本描述[生成视频](https://www.volcengine.com/docs/82379/1520757) | `from veadk.tools.builtin_tools.video_generate import video_generate` |
@@ -201,6 +202,57 @@ if __name__ == "__main__":
- `MODEL_AGENT_API_KEY`:Agent 推理模型的 API Key
+## 网页抓取(web_fetch)
+
+`web_fetch` 对给定 URL 发起一次普通 HTTP GET 并抽取正文:HTML 转 Markdown 或纯文本,PDF 用 `pypdf` 抽取文字。它**不执行 JavaScript**——纯前端渲染或需要登录的页面可能抽取不全。与 `link_reader` 不同,该工具**自身无需任何凭证**(纯 HTTP 抓取),适合让 Agent 阅读用户给出的文章、文档或任意公开链接。
+
+参数:
+
+- `url`:要抓取的 `http(s)` 链接;
+- `extract_mode`:`markdown`(默认,保留标题 / 链接 / 列表)或 `text`(纯文本);
+- `max_chars`:抽取内容的最大字符数(默认 `50000`)。
+
+返回 `{"url", "title", "content", "truncated"}`,失败时返回 `{"error": ...}`。
+
+
+- **SSRF 防护**:解析域名后拦截私网 / 环回 / 链路本地 / 保留地址,并对每一跳重定向(含 ``)重新校验,最多跟随 3 跳。
+- **上限**:HTML 下载上限 2MB、PDF 10MB;请求超时 30 秒;结果在进程内缓存 15 分钟。
+- 不渲染 JavaScript;未做 socket 级 DNS pinning(仅“解析后校验”)。
+
+
+```python title="examples/tools/web_fetch/agent.py"
+import asyncio
+
+from veadk import Agent, Runner
+from veadk.memory.short_term_memory import ShortTermMemory
+from veadk.tools.builtin_tools.web_fetch import web_fetch
+
+agent = Agent(
+ name="web_fetch_agent",
+ model_name="doubao-seed-1-8-251228",
+ description="An agent that reads web pages and PDFs.",
+ instruction="Use the web_fetch tool to fetch the given URL, then answer based on its content.",
+ tools=[web_fetch],
+)
+
+runner = Runner(agent=agent, short_term_memory=ShortTermMemory())
+
+
+async def main():
+ response = await runner.run(
+ "抓取 https://arxiv.org/pdf/1706.03762 并总结这篇论文的核心思想"
+ )
+ print(response)
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
+```
+
+环境变量:
+
+- `MODEL_AGENT_API_KEY`:Agent 推理模型的 API Key(`web_fetch` 工具本身无需额外凭证)
+
## 图像生成(image_generate)
`image_generate` 根据文本描述生成图片。图生图编辑见下文的 `image_edit`。
diff --git a/veadk/tools/builtin_tools/web_fetch.py b/veadk/tools/builtin_tools/web_fetch.py
new file mode 100644
index 00000000..65c9493c
--- /dev/null
+++ b/veadk/tools/builtin_tools/web_fetch.py
@@ -0,0 +1,382 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A plain-HTTP web fetch tool that extracts readable content.
+
+Does a plain HTTP GET and converts the HTML into bounded markdown/text. It does
+NOT execute JavaScript — for JS-heavy / login-protected pages a headless browser
+is needed instead. The design mirrors OpenClaw's `web_fetch`: Chrome-like
+headers, SSRF protection (block private/internal addresses, re-validate every
+redirect), a download-size cap, a coarse HTML→markdown extractor, and a short
+in-memory cache.
+"""
+
+import html as _html
+import ipaddress
+import re
+import socket
+import time
+from urllib.parse import urljoin, urlparse
+
+import requests
+
+from google.adk.tools import ToolContext
+
+from veadk.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+# ---- limits / defaults (mirror OpenClaw's tools.web.fetch.* config) ----------
+_USER_AGENT = (
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+ "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
+)
+_ACCEPT_LANGUAGE = "en-US,en;q=0.9"
+_TIMEOUT_SECONDS = 30
+_MAX_REDIRECTS = 3
+_MAX_RESPONSE_BYTES = 2_000_000 # cap the download before truncation (HTML)
+_MAX_PDF_BYTES = 10_000_000 # higher cap for PDFs (truncation corrupts parsing)
+_MAX_CHARS_CAP = 200_000 # hard ceiling for the max_chars parameter
+_DEFAULT_MAX_CHARS = 50_000
+_CACHE_TTL_SECONDS = 15 * 60
+_CACHE_MAX_ENTRIES = 128
+
+# Tiny in-process TTL cache: {(url, mode, max_chars): (expires_at, result)}.
+_CACHE: dict[tuple[str, str, int], tuple[float, dict]] = {}
+
+
+class _WebFetchError(Exception):
+ """Internal: surfaced to the model as {"error": ...}."""
+
+
+# ---------------------------------------------------------------- SSRF guard --
+def _assert_public_host(host: str) -> None:
+ """Resolve `host` and reject private/internal/loopback/link-local targets.
+
+ Note: this validates the resolved addresses but does not pin the connection
+ to them, so a determined attacker controlling DNS could still race the
+ re-resolution (TOCTOU). It blocks the common SSRF cases; pinning the socket
+ to the validated IP would be the hardening follow-up.
+ """
+ if not host:
+ raise _WebFetchError("missing host")
+ try:
+ infos = socket.getaddrinfo(host, None)
+ except socket.gaierror as e:
+ raise _WebFetchError(f"DNS resolution failed for {host!r}: {e}")
+ for info in infos:
+ addr = info[4][0]
+ try:
+ ip = ipaddress.ip_address(addr)
+ except ValueError:
+ continue
+ if (
+ ip.is_private
+ or ip.is_loopback
+ or ip.is_link_local
+ or ip.is_reserved
+ or ip.is_multicast
+ or ip.is_unspecified
+ ):
+ raise _WebFetchError(f"Blocked non-public address for host {host!r}: {ip}")
+
+
+def _check_url(url: str) -> str:
+ parsed = urlparse(url)
+ if parsed.scheme not in ("http", "https"):
+ raise _WebFetchError("only http(s) URLs are supported")
+ if not parsed.hostname:
+ raise _WebFetchError("URL has no host")
+ _assert_public_host(parsed.hostname)
+ return parsed.hostname
+
+
+# ---------------------------------------------- coarse HTML -> markdown/text --
+def _normalize_whitespace(value: str) -> str:
+ value = value.replace("\r", "")
+ value = re.sub(r"[ \t]+\n", "\n", value)
+ value = re.sub(r"\n{3,}", "\n\n", value)
+ value = re.sub(r"[ \t]{2,}", " ", value)
+ return value.strip()
+
+
+def _strip_tags(value: str) -> str:
+ return _html.unescape(re.sub(r"<[^>]+>", "", value))
+
+
+def _html_to_markdown(html_text: str) -> tuple[str, str | None]:
+ """Coarse HTML→markdown (ported from OpenClaw's `htmlToMarkdown`)."""
+ title_match = re.search(
+ r"
]*>([\s\S]*?)", html_text, flags=re.IGNORECASE
+ )
+ title = (
+ _normalize_whitespace(_strip_tags(title_match.group(1)))
+ if title_match
+ else None
+ )
+
+ text = re.sub(
+ r"