|
6 | 6 | import base64 |
7 | 7 | import re |
8 | 8 | import sys |
9 | | -from typing import Final |
| 9 | +from pathlib import Path |
| 10 | +from typing import TYPE_CHECKING, Final, Iterable, Literal |
10 | 11 | from urllib.parse import urlparse |
11 | 12 |
|
12 | 13 | import httpx |
|
16 | 17 | from gitingest.utils.exceptions import InvalidGitHubTokenError |
17 | 18 | from server.server_utils import Colors |
18 | 19 |
|
| 20 | +if TYPE_CHECKING: |
| 21 | + from gitingest.schemas import CloneConfig |
| 22 | + |
19 | 23 | # GitHub Personal-Access tokens (classic + fine-grained). |
20 | 24 | # - ghp_ / gho_ / ghu_ / ghs_ / ghr_ → 36 alphanumerics |
21 | 25 | # - github_pat_ → 22 alphanumerics + "_" + 59 alphanumerics |
@@ -321,3 +325,146 @@ def validate_github_token(token: str) -> None: |
321 | 325 | """ |
322 | 326 | if not re.fullmatch(_GITHUB_PAT_PATTERN, token): |
323 | 327 | raise InvalidGitHubTokenError |
| 328 | + |
| 329 | + |
| 330 | +async def checkout_partial_clone(config: CloneConfig, token: str | None) -> None: |
| 331 | + """Configure sparse-checkout for a partially cloned repository. |
| 332 | +
|
| 333 | + Parameters |
| 334 | + ---------- |
| 335 | + config : CloneConfig |
| 336 | + The configuration for cloning the repository, including subpath and blob flag. |
| 337 | + token : str | None |
| 338 | + GitHub personal access token (PAT) for accessing private repositories. |
| 339 | +
|
| 340 | + """ |
| 341 | + subpath = config.subpath.lstrip("/") |
| 342 | + if config.blob: |
| 343 | + # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt) |
| 344 | + subpath = str(Path(subpath).parent.as_posix()) |
| 345 | + checkout_cmd = create_git_command(["git"], config.local_path, config.url, token) |
| 346 | + await run_command(*checkout_cmd, "sparse-checkout", "set", subpath) |
| 347 | + |
| 348 | + |
| 349 | +async def resolve_commit(config: CloneConfig, url: str, token: str | None) -> str: |
| 350 | + """Resolve the commit to use for the clone. |
| 351 | +
|
| 352 | + Parameters |
| 353 | + ---------- |
| 354 | + config : CloneConfig |
| 355 | + The configuration for cloning the repository. |
| 356 | + url : str |
| 357 | + The URL of the remote repository. |
| 358 | + token : str | None |
| 359 | + GitHub personal access token (PAT) for accessing private repositories. |
| 360 | +
|
| 361 | + Returns |
| 362 | + ------- |
| 363 | + str |
| 364 | +
|
| 365 | + """ |
| 366 | + if config.commit: |
| 367 | + commit = config.commit |
| 368 | + elif config.tag: |
| 369 | + commit = await _resolve_ref_to_sha(url, ref=config.tag, kind="tag", token=token) |
| 370 | + elif config.branch: |
| 371 | + commit = await _resolve_ref_to_sha(url, ref=config.branch, kind="branch", token=token) |
| 372 | + else: |
| 373 | + commit = await _resolve_ref_to_sha(url, ref="HEAD", kind="branch", token=token) |
| 374 | + return commit |
| 375 | + |
| 376 | + |
| 377 | +async def _resolve_ref_to_sha( |
| 378 | + url: str, |
| 379 | + ref: str, |
| 380 | + kind: Literal["branch", "tag"], |
| 381 | + *, |
| 382 | + token: str | None = None, |
| 383 | +) -> str: |
| 384 | + """Return the commit SHA that <kind>/<ref> points to in <url>. |
| 385 | +
|
| 386 | + * Branch → first line from ``git ls-remote``. |
| 387 | + * Tag → if annotated, prefer the peeled ``^{}`` line (commit). |
| 388 | +
|
| 389 | + Parameters |
| 390 | + ---------- |
| 391 | + url : str |
| 392 | + The URL of the remote repository. |
| 393 | + ref : str |
| 394 | + The reference to resolve to a commit SHA. |
| 395 | + kind : Literal["branch", "tag"] |
| 396 | + The kind of reference to resolve to a commit SHA. |
| 397 | + token : str | None |
| 398 | + GitHub personal access token (PAT) for accessing private repositories. |
| 399 | +
|
| 400 | + Returns |
| 401 | + ------- |
| 402 | + str |
| 403 | + The commit SHA. |
| 404 | +
|
| 405 | + Raises |
| 406 | + ------ |
| 407 | + ValueError |
| 408 | + If the ref does not exist in the remote repository. |
| 409 | +
|
| 410 | + """ |
| 411 | + await ensure_git_installed() |
| 412 | + |
| 413 | + # Build: git [-c http.<host>/.extraheader=Auth...] ls-remote <url> <pattern> |
| 414 | + cmd: list[str] = ["git"] |
| 415 | + if token and is_github_host(url): |
| 416 | + cmd += ["-c", create_git_auth_header(token, url=url)] |
| 417 | + |
| 418 | + if ref == "HEAD": |
| 419 | + pattern = "HEAD" |
| 420 | + elif kind == "branch": |
| 421 | + pattern = f"refs/heads/{ref}" |
| 422 | + else: # tag |
| 423 | + pattern = f"refs/tags/{ref}*" |
| 424 | + |
| 425 | + cmd += ["ls-remote", url, pattern] |
| 426 | + stdout, _ = await run_command(*cmd) |
| 427 | + |
| 428 | + lines = stdout.decode().splitlines() |
| 429 | + |
| 430 | + sha = _pick_commit_sha(lines) |
| 431 | + if not sha: |
| 432 | + msg = f"{kind} {ref!r} not found in {url}" |
| 433 | + raise ValueError(msg) |
| 434 | + |
| 435 | + return sha |
| 436 | + |
| 437 | + |
| 438 | +def _pick_commit_sha(lines: Iterable[str]) -> str | None: |
| 439 | + """Return a commit SHA from ``git ls-remote`` output. |
| 440 | +
|
| 441 | + • Annotated tag → prefer the peeled line (<sha> refs/tags/x^{}) |
| 442 | + • Branch / lightweight tag → first non-peeled line |
| 443 | +
|
| 444 | +
|
| 445 | + Parameters |
| 446 | + ---------- |
| 447 | + lines : Iterable[str] |
| 448 | + The lines of a ``git ls-remote`` output. |
| 449 | +
|
| 450 | + Returns |
| 451 | + ------- |
| 452 | + str | None |
| 453 | + The commit SHA, or ``None`` if no commit SHA is found. |
| 454 | +
|
| 455 | + """ |
| 456 | + first_non_peeled: str | None = None |
| 457 | + |
| 458 | + for ln in lines: |
| 459 | + if not ln.strip(): |
| 460 | + continue |
| 461 | + |
| 462 | + sha, ref = ln.split(maxsplit=1) |
| 463 | + |
| 464 | + if ref.endswith("^{}"): # peeled commit of annotated tag |
| 465 | + return sha # ← best match, done |
| 466 | + |
| 467 | + if first_non_peeled is None: # remember the first ordinary line |
| 468 | + first_non_peeled = sha |
| 469 | + |
| 470 | + return first_non_peeled # branch or lightweight tag (or None) |
0 commit comments