Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,56 @@ client = APIDeploymentsClient(
The retry logic uses exponential backoff with full jitter and respects the `Retry-After` header on 429 responses.


## Unstract CLI

Installing `unstract-client` also provides the `unstract` command:

```bash
pip install unstract-client
unstract --help
```

### `unstract clone`

Clones an organization's resources to another org, on the same or a different
deployment (e.g. promote **dev** → **QA** → **prod**). Covers adapters,
connectors, workflows, pipelines, API deployments, Prompt Studio projects and
their files, user groups, and sharing state (users matched by email, groups by
name).

Authenticates with each org admin's **Platform API key**; prefer the env vars
so keys never land in shell history:

```bash
export UNSTRACT_SRC_PLATFORM_KEY="<source platform key>"
export UNSTRACT_TGT_PLATFORM_KEY="<target platform key>"

unstract clone \
--source-url https://dev.example.com --source-org org_dev123 \
--target-url https://qa.example.com --target-org org_qa456 \
--dry-run
```

Drop `--dry-run` to perform the clone.

| Option | Description |
|--------|-------------|
| `--dry-run` | Plan only; nothing is written. |
| `--include` / `--exclude` | Comma-separated phases to run / skip. |
| `--on-name-conflict` | `adopt` (default) reuses like-named target resources; `abort` stops. |
| `--clone-group-members` | Also add group members on target, matched by email. |
| `--source-key` / `--target-key` | Platform API keys, if not set via env vars. |
| `--api-prefix` | Backend URL prefix (default `api/v1`). |

Re-runs are idempotent: existing target resources are adopted by name, so a
failed run can be resumed by re-running the same command.

| Exit code | Meaning |
|------|---------|
| `0` | Success. |
| `1` | Completed with failures — see the printed report. |
| `2` | Could not run (setup error or `--on-name-conflict=abort` collision). |

## Questions and Feedback

On Slack, [join great conversations](https://join-slack.unstract.com/) around LLMs, their ecosystem and leveraging them to automate the previously unautomatable!
Expand Down
10 changes: 3 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ authors = [
dependencies = [
"requests>=2.32.3",
"tenacity>=8.2.0",
"click>=8.1",
"rich>=13.7",
]
requires-python = ">=3.11"
readme = "README.md"
Expand All @@ -25,14 +27,8 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]

[project.optional-dependencies]
clone = [
"click>=8.1",
"rich>=13.7",
]

[project.scripts]
unstract-clone = "unstract.clone.cli:main"
unstract = "unstract.cli:main"

[build-system]
requires = ["hatchling"]
Expand Down
30 changes: 30 additions & 0 deletions src/unstract/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Top-level ``unstract`` command group.

Subcommands live in their own subpackages and are registered here so a
single console script (``unstract``) fronts all of them. ``unstract.clone``
keeps its own group + ``main`` so ``python -m unstract.clone`` still works.
"""

from __future__ import annotations

from typing import Any

import click

from unstract.clone.cli import clone_cmd


@click.group(name="unstract")
def cli() -> None:
"""Unstract command-line tools."""


cli.add_command(clone_cmd, name="clone")


def main(argv: list[str] | None = None) -> Any:
return cli(args=argv, standalone_mode=True)


if __name__ == "__main__":
main()
19 changes: 15 additions & 4 deletions src/unstract/clone/cli.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Click-based CLI for ``unstract.clone``.

Single ``clone`` command. Platform keys can be passed via flags
(``--source-key`` / ``--target-key``) or env vars
(``UNSTRACT_SRC_PLATFORM_KEY`` / ``UNSTRACT_TGT_PLATFORM_KEY``) — env vars
are preferred so the key never lands in shell history.
Single ``clone`` command, registered on the top-level ``unstract`` group
(``unstract.cli``) — the canonical invocation is ``unstract clone``. The
local group here only backs ``python -m unstract.clone``.

Platform keys can be passed via flags (``--source-key`` / ``--target-key``)
or env vars (``UNSTRACT_SRC_PLATFORM_KEY`` / ``UNSTRACT_TGT_PLATFORM_KEY``)
— env vars are preferred so the key never lands in shell history.
"""

from __future__ import annotations
Expand Down Expand Up @@ -141,6 +144,12 @@ def cli() -> None:
show_default=True,
help="Per-phase worker count. 1 = strictly sequential.",
)
@click.option(
"--clone-group-members",
is_flag=True,
help="Also add group members on target, matched by email. "
"Members missing on target are skipped and reported.",
)
@click.option("-v", "--verbose", is_flag=True, help="Debug logging")
def clone_cmd(
source_url: str,
Expand All @@ -158,6 +167,7 @@ def clone_cmd(
max_file_size: str,
skip_files: bool,
concurrency: int,
clone_group_members: bool,
verbose: bool,
) -> None:
"""Clone configured resources from one org to another."""
Expand All @@ -178,6 +188,7 @@ def clone_cmd(
file_strategy=effective_strategy,
max_file_size=cap_bytes if cap_bytes is not None else DEFAULT_MAX_FILE_SIZE,
concurrency=concurrency,
clone_group_members=clone_group_members,
)

source = OrgEndpoint(
Expand Down
33 changes: 33 additions & 0 deletions src/unstract/clone/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,39 @@ def get_post_schema(self, entity_path: str) -> frozenset[str]:
self._post_schema_cache[entity_path] = writable
return writable

# ----- org users & groups -----

def list_users(self) -> list[dict[str, Any]]:
"""List org member rows (each carries ``id`` and ``email``)."""
result = self._request("GET", "users/")
return (result or {}).get("members", [])

def list_groups(self) -> list[dict[str, Any]]:
"""List org groups; no server-side name filter — callers match in memory."""
result = self._request("GET", "groups/")
return result if isinstance(result, list) else result.get("results", [])

def create_group(self, payload: dict[str, Any]) -> dict[str, Any]:
"""Create a group; response has no ``id`` — re-list to learn the pk."""
return self._request("POST", "groups/", json=payload)

def list_group_members(self, group_id: Any) -> list[dict[str, Any]]:
"""List a group's member rows (each carries ``email``)."""
result = self._request("GET", f"groups/{group_id}/members/")
return result if isinstance(result, list) else result.get("results", [])

def add_group_members(self, group_id: Any, user_ids: list[int]) -> Any:
"""Bulk-add members by user pk; idempotent server-side."""
return self._request(
"POST", f"groups/{group_id}/members/", json={"user_ids": user_ids}
)

# ----- sharing -----

def share_resource(self, share_path: str, payload: dict[str, Any]) -> Any:
"""Replace-style share update; axes omitted from ``payload`` are untouched."""
return self._request("POST", share_path, json=payload)

# ----- adapters -----

def list_adapters(
Expand Down
9 changes: 8 additions & 1 deletion src/unstract/clone/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

from __future__ import annotations

import threading
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
from unstract.clone.client import PlatformClient
Expand Down Expand Up @@ -53,6 +54,8 @@ class CloneOptions:
max_file_size: int = DEFAULT_MAX_FILE_SIZE
# Per-phase worker fan-out. 1 = sequential (no executor).
concurrency: int = DEFAULT_CONCURRENCY
# Group phase: also add members (matched by email) to cloned groups.
clone_group_members: bool = False

def includes(self, phase_name: str) -> bool:
if self.include is not None and phase_name not in self.include:
Expand Down Expand Up @@ -107,3 +110,7 @@ class CloneContext:
# Source prompt_registry_ids whose CustomTool was skipped; used to
# cascade-skip dependent workflows downstream.
skipped_custom_tool_registry_ids: set[str] = field(default_factory=set)
# Per-run memo for users/groups directory listings (sharing replication
# touches them once per endpoint, never per resource).
share_cache: dict[str, Any] = field(default_factory=dict)
share_cache_lock: threading.Lock = field(default_factory=threading.Lock)
14 changes: 9 additions & 5 deletions src/unstract/clone/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ConnectorPhase,
CustomToolPhase,
FilesPhase,
GroupPhase,
PipelinePhase,
TagPhase,
ToolInstancePhase,
Expand All @@ -35,12 +36,15 @@
logger = logging.getLogger(__name__)

# Strict dependency order. Each entry: (phase_name, phase_class).
# Adapter, connector, tag are independent leaf phases. Downstream phases
# (custom_tool, workflow, tool_instance, workflow_endpoint) land later
# and consume the remap entries these produce. Pipeline + api_deployment
# come last: both FK the workflow and api_deployment additionally
# requires endpoints to be configured before the serializer accepts it.
# Group runs first: every shareable phase consumes its remap entries when
# replicating shared_groups. Adapter, connector, tag are independent leaf
# phases. Downstream phases (custom_tool, workflow, tool_instance,
# workflow_endpoint) land later and consume the remap entries these
# produce. Pipeline + api_deployment come last: both FK the workflow and
# api_deployment additionally requires endpoints to be configured before
# the serializer accepts it.
PHASES: list[tuple[str, type[Phase]]] = [
("group", GroupPhase),
("adapter", AdapterPhase),
("connector", ConnectorPhase),
("tag", TagPhase),
Expand Down
2 changes: 2 additions & 0 deletions src/unstract/clone/phases/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from unstract.clone.phases.connector import ConnectorPhase
from unstract.clone.phases.custom_tool import CustomToolPhase
from unstract.clone.phases.files import FilesPhase
from unstract.clone.phases.group import GroupPhase
from unstract.clone.phases.pipeline import PipelinePhase
from unstract.clone.phases.tag import TagPhase
from unstract.clone.phases.tool_instance import ToolInstancePhase
Expand All @@ -25,6 +26,7 @@
"ConnectorPhase",
"CustomToolPhase",
"FilesPhase",
"GroupPhase",
"Phase",
"PipelinePhase",
"TagPhase",
Expand Down
5 changes: 5 additions & 0 deletions src/unstract/clone/phases/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

class AdapterPhase(Phase):
name = "adapter"
share_path_template = "adapter/{id}/share/"

def run(self, report: CloneReport) -> PhaseResult:
result = report.get_phase(self.name)
Expand Down Expand Up @@ -123,3 +124,7 @@ def _clone_one(

with lock:
self.ctx.remap.record("adapter", src_id, tgt["id"])
# Source detail (fetched above) carries the share axes.
self.apply_share(
src=src, tgt_id=tgt["id"], label=name, result=result, lock=lock
)
10 changes: 10 additions & 0 deletions src/unstract/clone/phases/api_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

class APIDeploymentPhase(Phase):
name = "api_deployment"
share_path_template = "api/deployment/{id}/share/"

def run(self, report: CloneReport) -> PhaseResult:
result = report.get_phase(self.name)
Expand Down Expand Up @@ -150,6 +151,15 @@ def _clone_one(

with lock:
self.ctx.remap.record("api_deployment", src_id, tgt["id"])
# List rows omit the share axes — fetch source detail when needed.
self.apply_share(
src=src,
tgt_id=tgt["id"],
label=api_name,
result=result,
lock=lock,
src_detail_fn=lambda: self.ctx.source.get_api_deployment(src_id),
)

def _warn_if_extra_source_keys(self, src_deployment_id: str, name: str) -> None:
try:
Expand Down
34 changes: 34 additions & 0 deletions src/unstract/clone/phases/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from unstract.clone.context import CloneContext
from unstract.clone.exceptions import CloneError
from unstract.clone.report import CloneReport, PhaseResult
from unstract.clone.sharing import apply_share_state

T = TypeVar("T")

Expand All @@ -22,6 +23,8 @@
# either noise (silently overwritten) or a 400 (when a source-org value
# doesn't validate against the target org). Strip them universally —
# the phase OPTIONS schema covers the entity-specific writable subset.
# ``shared_users`` stays stripped on create — share state is replicated
# post-create instead (see sharing.py).
SERVER_MANAGED: frozenset[str] = frozenset(
{
"id",
Expand Down Expand Up @@ -55,6 +58,9 @@ class Phase(ABC):
"""Abstract phase. One subclass per entity type."""

name: str = ""
# Share endpoint template for shareable resource types, e.g.
# "adapter/{id}/share/" ({id} = target pk). None = not shareable.
share_path_template: str | None = None

def __init__(self, ctx: CloneContext):
self.ctx = ctx
Expand All @@ -64,6 +70,34 @@ def run(self, report: CloneReport) -> PhaseResult:
"""Migrate all entities of this phase's type. Idempotent across runs."""
raise NotImplementedError

def apply_share(
self,
*,
src: dict[str, Any],
tgt_id: str,
label: str,
result: PhaseResult,
lock: threading.Lock,
src_detail_fn: Callable[[], dict[str, Any]] | None = None,
) -> None:
"""Replicate ``src``'s share state onto the target entity.

Pass ``src_detail_fn`` when ``src`` may be a stripped list-row —
the helper fetches the detail only if a share axis is missing.
No-op for phases without ``share_path_template``; never raises.
"""
if self.share_path_template is None:
return
apply_share_state(
self.ctx,
share_path=self.share_path_template.format(id=tgt_id),
entity_label=f"{self.name} '{label}'",
src=src,
result=result,
lock=lock,
src_detail_fn=src_detail_fn,
)

def parallel_map(
self,
items: Iterable[T],
Expand Down
5 changes: 5 additions & 0 deletions src/unstract/clone/phases/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def _has_oauth_tokens(metadata: dict[str, Any]) -> bool:

class ConnectorPhase(Phase):
name = "connector"
share_path_template = "connector/{id}/share/"

def run(self, report: CloneReport) -> PhaseResult:
result = report.get_phase(self.name)
Expand Down Expand Up @@ -157,3 +158,7 @@ def _clone_one(

with lock:
self.ctx.remap.record("connector", src_id, tgt["id"])
# Source detail (fetched above) carries the share axes.
self.apply_share(
src=src, tgt_id=tgt["id"], label=name, result=result, lock=lock
)
Loading
Loading