From ca957e56b0dbf110c19f6791d1f0e6448f4a69ae Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Tue, 22 Aug 2023 15:54:03 +0530 Subject: [PATCH 1/9] Update Jenkinsfile --- Jenkinsfile | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 445611cb7..e837b0b12 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,21 +1,23 @@ -pipeline{ +pipeline { agent any - tools { - maven 'maven' - jdk 'java' - } - - stages{ - stage('checkout'){ - steps{ - checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'github access', url: 'https://github.com/sreenivas449/java-hello-world-with-maven.git']]]) + stages { + stage('build') { + steps { + bat "mvn clean" + } + } + stage('test') { + steps { + bat "mvn test" + echo "Testing Completed" } } - stage('build'){ - steps{ - bat 'mvn package' + stage('deploy') { + steps { + bat "mvn package" + echo "Deployment Completed" } } } -} \ No newline at end of file +} From 74e3778e24e5c2e2d503d33623c53419a301a27f Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Wed, 23 Aug 2023 11:07:11 +0530 Subject: [PATCH 2/9] Delete Jenkinsfile --- Jenkinsfile | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 Jenkinsfile diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index e837b0b12..000000000 --- a/Jenkinsfile +++ /dev/null @@ -1,23 +0,0 @@ -pipeline { - agent any - - stages { - stage('build') { - steps { - bat "mvn clean" - } - } - stage('test') { - steps { - bat "mvn test" - echo "Testing Completed" - } - } - stage('deploy') { - steps { - bat "mvn package" - echo "Deployment Completed" - } - } - } -} From d7daa2e7fae498ec75f63a4d0a379781ee653c8f Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Wed, 30 Aug 2023 11:26:32 +0530 Subject: [PATCH 3/9] Add files via upload --- Jenkinsfile.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 Jenkinsfile.txt diff --git a/Jenkinsfile.txt b/Jenkinsfile.txt new file mode 100644 index 000000000..dfa449349 --- /dev/null +++ b/Jenkinsfile.txt @@ -0,0 +1,21 @@ +pipeline{ + agent any + + tools { + maven 'maven' + jdk 'java' + } + + stages{ + stage('checkout'){ + steps{ + checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'github access', url: 'https://github.com/sreenivas449/java-hello-world-with-maven.git']]]) + } + } + stage('build'){ + steps{ + bat 'mvn package' + } + } + } +} From f0142777ba7f9172545fac427e620c2af7944f4a Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Wed, 30 Aug 2023 11:26:53 +0530 Subject: [PATCH 4/9] Rename Jenkinsfile.txt to Jenkinsfile --- Jenkinsfile.txt => Jenkinsfile | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Jenkinsfile.txt => Jenkinsfile (100%) diff --git a/Jenkinsfile.txt b/Jenkinsfile similarity index 100% rename from Jenkinsfile.txt rename to Jenkinsfile From 3d8ccb35899ef7afaea82854f50b385ed19fc607 Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:25:17 +0530 Subject: [PATCH 5/9] Create ai-pr-review.yml --- .github/workflows/ai-pr-review.yml | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/ai-pr-review.yml diff --git a/.github/workflows/ai-pr-review.yml b/.github/workflows/ai-pr-review.yml new file mode 100644 index 000000000..090a4a2ef --- /dev/null +++ b/.github/workflows/ai-pr-review.yml @@ -0,0 +1,35 @@ +name: AI PR Review (Groq) + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + review: + runs-on: ubuntu-latest + permissions: + pull-requests: write + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests + + - name: Run AI Review + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + LLM_PROVIDER: groq + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + run: | + # Download the PR reviewer script + curl -o pr_reviewer.py https://raw.githubusercontent.com/YOUR-REPO/scripts/pr_reviewer.py + curl -o llm_client.py https://raw.githubusercontent.com/YOUR-REPO/scripts/llm_client.py + python pr_reviewer.py From ebede0153915cbff176eefeb21398786975d60d6 Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:26:55 +0530 Subject: [PATCH 6/9] Create llm_client.py --- llm_client.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 llm_client.py diff --git a/llm_client.py b/llm_client.py new file mode 100644 index 000000000..1d9f8768c --- /dev/null +++ b/llm_client.py @@ -0,0 +1,109 @@ +# file: llm_client.py +import os +import json +import requests +from typing import Dict + +SYSTEM_PROMPT = ( + "You are a senior software engineer reviewing a pull request. " + "Review for code quality, security issues, test coverage, and best practices. " + "Return JSON with fields: summary, suggestions (list), and optionally inline_comments " + "(list of {file, line, comment}). Be concise, specific, and actionable." +) + +def _prompt_for_diff(pr_title: str, pr_body: str, diff_text: str) -> str: + return f"""PR Title: {pr_title} + +PR Description: +{pr_body} + +Diff (unified format): +{diff_text} + +Return strictly valid JSON with keys: summary, suggestions, inline_comments. +""" + +def _post_openai_compatible(url: str, api_key: str, model: str, prompt: str) -> str: + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + payload = { + "model": model, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + "temperature": 0.2, + "max_tokens": 1200, + } + resp = requests.post(url, headers=headers, json=payload, timeout=60) + resp.raise_for_status() + data = resp.json() + # Supports OpenAI-compatible chat APIs + return data["choices"][0]["message"]["content"] + +def _post_huggingface_inference(model_id: str, api_key: str, prompt: str) -> str: + # Basic text-generation style for some HF hosted models. + # Many chat models also accept prompts in chat-template form. + url = f"https://api-inference.huggingface.co/models/{model_id}" + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + payload = { + "inputs": f"{SYSTEM_PROMPT}\n\n{prompt}\n\nReturn strictly valid JSON.", + "parameters": {"max_new_tokens": 1200, "temperature": 0.2}, + } + resp = requests.post(url, headers=headers, json=payload, timeout=60) + resp.raise_for_status() + data = resp.json() + # HF returns a list[ { "generated_text": ... } ] or a dict for some models + if isinstance(data, list) and data and "generated_text" in data[0]: + return data[0]["generated_text"] + # Some chat models return text under different fields; adapt as needed: + return json.dumps(data) + +def generate_review(pr_title: str, pr_body: str, diff_text: str) -> Dict: + provider = os.getenv("LLM_PROVIDER", "groq").lower() + prompt = _prompt_for_diff(pr_title, pr_body, diff_text) + + if provider == "groq": + api_key = os.environ["GROQ_API_KEY"] + model = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant") + url = "https://api.groq.com/openai/v1/chat/completions" + content = _post_openai_compatible(url, api_key, model, prompt) + + elif provider == "openrouter": + api_key = os.environ["OPENROUTER_API_KEY"] + model = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct") + url = "https://openrouter.ai/api/v1/chat/completions" + content = _post_openai_compatible(url, api_key, model, prompt) + + elif provider == "huggingface": + api_key = os.environ["HF_API_KEY"] + model_id = os.getenv("HF_MODEL_ID", "meta-llama/Llama-3.1-8B-Instruct") + content = _post_huggingface_inference(model_id, api_key, prompt) + + elif provider == "local": + # Example: Ollama’s OpenAI-compatible endpoint enabled via `OLLAMA_OPENAI_COMPAT=1` + api_key = os.getenv("LOCAL_API_KEY", "not-needed") + model = os.getenv("LOCAL_MODEL", "llama3.1") + url = os.getenv("LOCAL_OPENAI_URL", "http://localhost:11434/v1/chat/completions") + content = _post_openai_compatible(url, api_key, model, prompt) + + else: + raise ValueError(f"Unknown LLM_PROVIDER: {provider}") + + # Try to parse JSON; if the model returns extra text, attempt to extract JSON block. + try: + # Direct JSON + return json.loads(content) + except json.JSONDecodeError: + # Fallback: extract nearest {...} block + import re + m = re.search(r"\{(?:[^{}]|(?R))*\}", content, flags=re.DOTALL) + if m: + try: + return json.loads(m.group(0)) + except Exception: + pass + # Ultimate fallback + return {"summary": content[:8000], "suggestions": [], "inline_comments": []} From fb414487a2d693c4af119b0037d2c7497ad87aad Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:28:32 +0530 Subject: [PATCH 7/9] Update llm_client.py --- llm_client.py | 176 +++++++++++++++++++++++++------------------------- 1 file changed, 89 insertions(+), 87 deletions(-) diff --git a/llm_client.py b/llm_client.py index 1d9f8768c..931b254c5 100644 --- a/llm_client.py +++ b/llm_client.py @@ -1,109 +1,111 @@ -# file: llm_client.py +# llm_client.py import os +import re import json +import time import requests -from typing import Dict +from typing import Dict, Any, List, Optional, Tuple -SYSTEM_PROMPT = ( - "You are a senior software engineer reviewing a pull request. " - "Review for code quality, security issues, test coverage, and best practices. " - "Return JSON with fields: summary, suggestions (list), and optionally inline_comments " - "(list of {file, line, comment}). Be concise, specific, and actionable." +DEFAULT_SYSTEM_PROMPT = ( + "You are a seasoned senior software engineer and code reviewer. " + "Review the pull request changes for code quality, correctness, security, performance, " + "maintainability, readability, and test coverage. " + "Be specific and actionable. Prefer concise, structured feedback.\n\n" + "Return STRICT JSON with keys:\n" + " - summary: string\n" + " - suggestions: array of strings (actionable, prioritized)\n" + " - inline_comments: array of objects with keys {file: string, line: number, comment: string}\n" ) -def _prompt_for_diff(pr_title: str, pr_body: str, diff_text: str) -> str: - return f"""PR Title: {pr_title} +def build_prompt( + pr_title: str, + pr_body: str, + diff_text: str, + language_hint: Optional[str] = None, + guidelines: Optional[str] = None, +) -> str: + extras = [] + if language_hint: + extras.append(f"Primary language/framework context: {language_hint}") + if guidelines: + extras.append(f"Team guidelines:\n{guidelines}") + extras_block = "\n\n".join(extras).strip() + if extras_block: + extras_block = "\n\n" + extras_block -PR Description: -{pr_body} + return ( + f"PR Title: {pr_title}\n\n" + f"PR Description:\n{(pr_body or '').strip()}\n" + f"{extras_block}\n\n" + "Unified Diff:\n" + f"{diff_text}\n\n" + "Return STRICT JSON only, no prose outside JSON." + ) -Diff (unified format): -{diff_text} - -Return strictly valid JSON with keys: summary, suggestions, inline_comments. -""" - -def _post_openai_compatible(url: str, api_key: str, model: str, prompt: str) -> str: +def _openai_compatible_chat( + url: str, + api_key: str, + model: str, + system_prompt: str, + user_prompt: str, + temperature: float = 0.2, + max_tokens: int = 1200, + retries: int = 3, + timeout: int = 60, + extra_headers: Optional[Dict[str, str]] = None, +) -> str: headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } + if extra_headers: + headers.update(extra_headers) + payload = { "model": model, "messages": [ - {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": prompt}, + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, ], - "temperature": 0.2, - "max_tokens": 1200, + "temperature": temperature, + "max_tokens": max_tokens, } - resp = requests.post(url, headers=headers, json=payload, timeout=60) - resp.raise_for_status() - data = resp.json() - # Supports OpenAI-compatible chat APIs - return data["choices"][0]["message"]["content"] -def _post_huggingface_inference(model_id: str, api_key: str, prompt: str) -> str: - # Basic text-generation style for some HF hosted models. - # Many chat models also accept prompts in chat-template form. + last_err = None + for attempt in range(1, retries + 1): + try: + resp = requests.post(url, headers=headers, json=payload, timeout=timeout) + if resp.status_code == 429: + # Rate limited; exponential backoff + wait = min(2 ** attempt, 10) + time.sleep(wait) + continue + resp.raise_for_status() + data = resp.json() + return data["choices"][0]["message"]["content"] + except Exception as e: + last_err = e + time.sleep(min(2 ** attempt, 5)) + raise RuntimeError(f"LLM call failed after {retries} attempts: {last_err}") + +def _huggingface_inference( + model_id: str, + api_key: str, + system_prompt: str, + user_prompt: str, + temperature: float = 0.2, + max_new_tokens: int = 1200, + retries: int = 3, + timeout: int = 60, +) -> str: url = f"https://api-inference.huggingface.co/models/{model_id}" headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + inputs = f"{system_prompt}\n\n{user_prompt}\n\nReturn STRICT JSON only." payload = { - "inputs": f"{SYSTEM_PROMPT}\n\n{prompt}\n\nReturn strictly valid JSON.", - "parameters": {"max_new_tokens": 1200, "temperature": 0.2}, + "inputs": inputs, + "parameters": {"temperature": temperature, "max_new_tokens": max_new_tokens}, } - resp = requests.post(url, headers=headers, json=payload, timeout=60) - resp.raise_for_status() - data = resp.json() - # HF returns a list[ { "generated_text": ... } ] or a dict for some models - if isinstance(data, list) and data and "generated_text" in data[0]: - return data[0]["generated_text"] - # Some chat models return text under different fields; adapt as needed: - return json.dumps(data) - -def generate_review(pr_title: str, pr_body: str, diff_text: str) -> Dict: - provider = os.getenv("LLM_PROVIDER", "groq").lower() - prompt = _prompt_for_diff(pr_title, pr_body, diff_text) - - if provider == "groq": - api_key = os.environ["GROQ_API_KEY"] - model = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant") - url = "https://api.groq.com/openai/v1/chat/completions" - content = _post_openai_compatible(url, api_key, model, prompt) - - elif provider == "openrouter": - api_key = os.environ["OPENROUTER_API_KEY"] - model = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct") - url = "https://openrouter.ai/api/v1/chat/completions" - content = _post_openai_compatible(url, api_key, model, prompt) - - elif provider == "huggingface": - api_key = os.environ["HF_API_KEY"] - model_id = os.getenv("HF_MODEL_ID", "meta-llama/Llama-3.1-8B-Instruct") - content = _post_huggingface_inference(model_id, api_key, prompt) - - elif provider == "local": - # Example: Ollama’s OpenAI-compatible endpoint enabled via `OLLAMA_OPENAI_COMPAT=1` - api_key = os.getenv("LOCAL_API_KEY", "not-needed") - model = os.getenv("LOCAL_MODEL", "llama3.1") - url = os.getenv("LOCAL_OPENAI_URL", "http://localhost:11434/v1/chat/completions") - content = _post_openai_compatible(url, api_key, model, prompt) - - else: - raise ValueError(f"Unknown LLM_PROVIDER: {provider}") - - # Try to parse JSON; if the model returns extra text, attempt to extract JSON block. - try: - # Direct JSON - return json.loads(content) - except json.JSONDecodeError: - # Fallback: extract nearest {...} block - import re - m = re.search(r"\{(?:[^{}]|(?R))*\}", content, flags=re.DOTALL) - if m: - try: - return json.loads(m.group(0)) - except Exception: - pass - # Ultimate fallback - return {"summary": content[:8000], "suggestions": [], "inline_comments": []} + last_err = None + for attempt in range(1, retries + 1): + try: + resp From fa1da6d804e70b8c45508060980642df0f9ec30b Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:29:34 +0530 Subject: [PATCH 8/9] Create pr_reviewer.py --- pr_reviewer.py | 290 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 pr_reviewer.py diff --git a/pr_reviewer.py b/pr_reviewer.py new file mode 100644 index 000000000..825e32dcf --- /dev/null +++ b/pr_reviewer.py @@ -0,0 +1,290 @@ +# pr_reviewer.py +import os +import json +import math +import requests +from typing import List, Tuple, Dict, Any, Optional + +from llm_client import generate_review + +GITHUB_API = os.getenv("GITHUB_API_URL", "https://api.github.com") +MAX_DIFF_CHARS = int(os.getenv("DIFF_MAX_CHARS", "120000")) # per LLM call +MIN_FILE_CHUNK = int(os.getenv("MIN_FILE_CHUNK", "20000")) # pack small files together + +def _gh_headers(token: str) -> Dict[str, str]: + return { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": "ai-pr-reviewer-bot" + } + +def get_pr_context_from_env() -> Tuple[str, str, int]: + """ + Determines owner, repo, and PR number from typical GitHub Actions envs. + Fallback: environment variable PR_NUMBER. + """ + repo_full = os.getenv("GITHUB_REPOSITORY") # e.g. "owner/repo" + if not repo_full or "/" not in repo_full: + raise RuntimeError("GITHUB_REPOSITORY is not set or invalid (expected 'owner/repo').") + owner, repo = repo_full.split("/", 1) + + pr_number_env = os.getenv("PR_NUMBER") + if pr_number_env: + return owner, repo, int(pr_number_env) + + event_path = os.getenv("GITHUB_EVENT_PATH") + if event_path and os.path.exists(event_path): + try: + with open(event_path, "r", encoding="utf-8") as f: + payload = json.load(f) + pr_number = payload.get("pull_request", {}).get("number") + if pr_number: + return owner, repo, int(pr_number) + except Exception: + pass + + raise RuntimeError("Cannot determine PR number. Set PR_NUMBER env or run from a pull_request event.") + +def fetch_pr_details(owner: str, repo: str, pr_number: int, gh_token: str) -> Tuple[str, str, List[Dict[str, Any]]]: + headers = _gh_headers(gh_token) + + pr_resp = requests.get( + f"{GITHUB_API}/repos/{owner}/{repo}/pulls/{pr_number}", + headers=headers, timeout=30 + ) + pr_resp.raise_for_status() + pr = pr_resp.json() + + # Note: pagination if many files changed + files: List[Dict[str, Any]] = [] + page = 1 + per_page = 100 + while True: + fr = requests.get( + f"{GITHUB_API}/repos/{owner}/{repo}/pulls/{pr_number}/files", + headers=headers, + params={"page": page, "per_page": per_page}, + timeout=30 + ) + fr.raise_for_status() + batch = fr.json() + files.extend(batch) + if len(batch) < per_page: + break + page += 1 + + return pr.get("title", ""), pr.get("body", "") or "", files + +def build_unified_diffs(files: List[Dict[str, Any]]) -> List[Tuple[str, str]]: + """ + Returns list of (filename, unified_diff_text) for files that include a 'patch'. + Skips binary/large files where GitHub omits 'patch'. + """ + diffs: List[Tuple[str, str]] = [] + for f in files: + filename = f.get("filename") + patch = f.get("patch") + if not filename or not patch: + # Skip binary or too-large diffs + continue + diff_text = f"--- a/{filename}\n+++ b/{filename}\n{patch}\n" + diffs.append((filename, diff_text)) + return diffs + +def chunk_diffs_by_size(diffs: List[Tuple[str, str]], max_chars: int, min_chunk: int) -> List[List[Tuple[str, str]]]: + """ + Group per-file diffs into chunks, each not exceeding max_chars. + Tries to pack small files together for better context. + """ + # Sort by size ascending to pack small files first + diffs_sorted = sorted(diffs, key=lambda x: len(x[1])) + chunks: List[List[Tuple[str, str]]] = [] + current: List[Tuple[str, str]] = [] + current_size = 0 + + for item in diffs_sorted: + size = len(item[1]) + # If single file is huge, put it alone (hard cap) + if size >= max_chars: + if current: + chunks.append(current) + current = [] + current_size = 0 + chunks.append([item]) + continue + + if current_size + size <= max_chars or (not current and size < max_chars): + current.append(item) + current_size += size + else: + # finalize current and start new + chunks.append(current) + current = [item] + current_size = size + + if current: + chunks.append(current) + + # Merge tiny chunks if possible + merged: List[List[Tuple[str, str]]] = [] + carry: List[Tuple[str, str]] = [] + carry_size = 0 + for ch in chunks: + ch_size = sum(len(d) for _, d in ch) + if ch_size < min_chunk: + carry.extend(ch) + carry_size += ch_size + if carry_size >= min_chunk: + merged.append(carry) + carry = [] + carry_size = 0 + else: + if carry: + merged.append(carry) + carry = [] + carry_size = 0 + merged.append(ch) + if carry: + merged.append(carry) + return merged + +def assemble_diff_text(chunk: List[Tuple[str, str]]) -> str: + return "\n".join(diff for _, diff in chunk) + +def format_review_comment(aggregated: Dict[str, Any]) -> str: + provider = aggregated.get("_provider", "unknown") + model = aggregated.get("_model", "unknown") + + comment = "### 🤖 AI PR Review (Automated)\n" + comment += f"_Provider: **{provider}**, Model: **{model}**_\n\n" + + summary = aggregated.get("summary") or "" + if summary.strip(): + comment += "**Summary**\n\n" + comment += f"{summary.strip()}\n\n" + + suggestions: List[str] = aggregated.get("suggestions") or [] + if suggestions: + comment += "**Suggestions**\n\n" + for i, s in enumerate(suggestions, 1): + comment += f"{i}. {s.strip()}\n" + comment += "\n" + + inline = aggregated.get("inline_comments") or [] + if inline: + comment += "
Inline comments (suggested locations)\n\n" + for c in inline: + file = c.get("file", "") + line = c.get("line", "") + text = c.get("comment", "").strip() + if text: + comment += f"- `{file}`:{line} — {text}\n" + comment += "\n
\n" + + comment += "\n> _Note_: Inline positions are suggestions only. The bot posts a single review comment to avoid noisy threads." + return comment + +def post_pr_review(owner: str, repo: str, pr_number: int, gh_token: str, body: str, event: str = "COMMENT") -> Dict[str, Any]: + headers = _gh_headers(gh_token) + payload = {"body": body, "event": event} + resp = requests.post( + f"{GITHUB_API}/repos/{owner}/{repo}/pulls/{pr_number}/reviews", + headers=headers, json=payload, timeout=30 + ) + resp.raise_for_status() + return resp.json() + +def aggregate_results(results: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Combine multiple chunk reviews into a single review: + - Concatenate summaries with headings + - Merge suggestions (deduplicate similar lines) + - Merge inline comments + """ + summaries: List[str] = [] + suggestions: List[str] = [] + inline_comments: List[Dict[str, Any]] = [] + provider, model = None, None + + def _norm(s: str) -> str: + return " ".join((s or "").strip().split()) + + seen_suggestions = set() + + for idx, r in enumerate(results, start=1): + if not provider and r.get("_provider"): + provider = r.get("_provider") + if not model and r.get("_model"): + model = r.get("_model") + + s = (r.get("summary") or "").strip() + if s: + summaries.append(f"**Chunk {idx}**:\n{s}") + + for sug in r.get("suggestions") or []: + key = _norm(sug) + if key and key not in seen_suggestions: + suggestions.append(sug) + seen_suggestions.add(key) + + for ic in r.get("inline_comments") or []: + if isinstance(ic, dict) and ic.get("comment"): + inline_comments.append(ic) + + final_summary = "\n\n".join(summaries) if summaries else "No significant issues detected in analyzed diffs." + return { + "summary": final_summary, + "suggestions": suggestions, + "inline_comments": inline_comments, + "_provider": provider or "unknown", + "_model": model or "unknown", + } + +def run(): + gh_token = os.getenv("GITHUB_TOKEN") + if not gh_token: + raise RuntimeError("GITHUB_TOKEN is required.") + + owner, repo, pr_number = get_pr_context_from_env() + + title, body, files = fetch_pr_details(owner, repo, pr_number, gh_token) + per_file_diffs = build_unified_diffs(files) + + if not per_file_diffs: + comment = "### 🤖 AI PR Review (Automated)\nNo textual diffs available (binary or very large files)." + post_pr_review(owner, repo, pr_number, gh_token, comment, event="COMMENT") + return + + # Chunk diffs and review each chunk + chunks = chunk_diffs_by_size(per_file_diffs, MAX_DIFF_CHARS, MIN_FILE_CHUNK) + + # Optional hints to improve review quality (set via env): + language_hint = os.getenv("LANGUAGE_HINT") # e.g., "Python (FastAPI), TypeScript (React)" + guidelines = os.getenv("REVIEW_GUIDELINES") # short team rules or expectations + + all_results: List[Dict[str, Any]] = [] + for ch_index, ch in enumerate(chunks, start=1): + diff_text = assemble_diff_text(ch) + # Truncate just in case (safety) + if len(diff_text) > MAX_DIFF_CHARS: + diff_text = diff_text[:MAX_DIFF_CHARS] + "\n...TRUNCATED BY BOT..." + + try: + result = generate_review(title, body, diff_text, language_hint, guidelines) + except Exception as e: + result = { + "summary": f"Chunk {ch_index}: Failed to generate review due to error: {e}", + "suggestions": [], + "inline_comments": [], + "_provider": os.getenv("LLM_PROVIDER", "groq"), + "_model": os.getenv("GROQ_MODEL", "llama-3.1-8b-instant"), + } + all_results.append(result) + + aggregated = aggregate_results(all_results) + comment = format_review_comment(aggregated) + post_pr_review(owner, repo, pr_number, gh_token, comment, event="COMMENT") + +if __name__ == "__main__": + run() From 778c8adb690a75bdf10947739f6e04aa2fbf79d9 Mon Sep 17 00:00:00 2001 From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:31:39 +0530 Subject: [PATCH 9/9] Update ai-pr-review.yml --- .github/workflows/ai-pr-review.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ai-pr-review.yml b/.github/workflows/ai-pr-review.yml index 090a4a2ef..fa4d9a1d0 100644 --- a/.github/workflows/ai-pr-review.yml +++ b/.github/workflows/ai-pr-review.yml @@ -28,8 +28,11 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} LLM_PROVIDER: groq GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + # Optional hints: + # LANGUAGE_HINT: "Python (FastAPI), TypeScript (React)" + # REVIEW_GUIDELINES: "Require tests for public APIs; avoid broad try/except; use f-strings; follow PEP8." + # Tuning: + # DIFF_MAX_CHARS: "120000" + # MIN_FILE_CHUNK: "20000" run: | - # Download the PR reviewer script - curl -o pr_reviewer.py https://raw.githubusercontent.com/YOUR-REPO/scripts/pr_reviewer.py - curl -o llm_client.py https://raw.githubusercontent.com/YOUR-REPO/scripts/llm_client.py python pr_reviewer.py