From ca957e56b0dbf110c19f6791d1f0e6448f4a69ae Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Tue, 22 Aug 2023 15:54:03 +0530
Subject: [PATCH 1/9] Update Jenkinsfile

---
 Jenkinsfile | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 445611cb7..e837b0b12 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,21 +1,23 @@
-pipeline{
+pipeline {
     agent any
 
-    tools {
-         maven 'maven'
-         jdk 'java'
-    }
-
-    stages{
-        stage('checkout'){
-            steps{
-                checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'github access', url: 'https://github.com/sreenivas449/java-hello-world-with-maven.git']]])
+    stages {
+        stage('build') {
+            steps {
+                bat "mvn clean"
+            }
+        }
+        stage('test') {
+            steps {
+                bat "mvn test"
+                echo "Testing Completed"
             }
         }
-        stage('build'){
-            steps{
-               bat 'mvn package'
+        stage('deploy') {
+            steps {
+                bat "mvn package"
+                echo "Deployment Completed"
             }
         }
     }
-}
\ No newline at end of file
+}

From 74e3778e24e5c2e2d503d33623c53419a301a27f Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Wed, 23 Aug 2023 11:07:11 +0530
Subject: [PATCH 2/9] Delete Jenkinsfile

---
 Jenkinsfile | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 Jenkinsfile

diff --git a/Jenkinsfile b/Jenkinsfile
deleted file mode 100644
index e837b0b12..000000000
--- a/Jenkinsfile
+++ /dev/null
@@ -1,23 +0,0 @@
-pipeline {
-    agent any
-
-    stages {
-        stage('build') {
-            steps {
-                bat "mvn clean"
-            }
-        }
-        stage('test') {
-            steps {
-                bat "mvn test"
-                echo "Testing Completed"
-            }
-        }
-        stage('deploy') {
-            steps {
-                bat "mvn package"
-                echo "Deployment Completed"
-            }
-        }
-    }
-}

From d7daa2e7fae498ec75f63a4d0a379781ee653c8f Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Wed, 30 Aug 2023 11:26:32 +0530
Subject: [PATCH 3/9] Add files via upload

---
 Jenkinsfile.txt | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 Jenkinsfile.txt

diff --git a/Jenkinsfile.txt b/Jenkinsfile.txt
new file mode 100644
index 000000000..dfa449349
--- /dev/null
+++ b/Jenkinsfile.txt
@@ -0,0 +1,21 @@
+pipeline{
+    agent any
+
+    tools {
+         maven 'maven'
+         jdk 'java'
+    }
+
+    stages{
+        stage('checkout'){
+            steps{
+                checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'github access', url: 'https://github.com/sreenivas449/java-hello-world-with-maven.git']]])
+            }
+        }
+        stage('build'){
+            steps{
+               bat 'mvn package'
+            }
+        }
+    }
+}

From f0142777ba7f9172545fac427e620c2af7944f4a Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Wed, 30 Aug 2023 11:26:53 +0530
Subject: [PATCH 4/9] Rename Jenkinsfile.txt to Jenkinsfile

---
 Jenkinsfile.txt => Jenkinsfile | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Jenkinsfile.txt => Jenkinsfile (100%)

diff --git a/Jenkinsfile.txt b/Jenkinsfile
similarity index 100%
rename from Jenkinsfile.txt
rename to Jenkinsfile

From 3d8ccb35899ef7afaea82854f50b385ed19fc607 Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:25:17 +0530
Subject: [PATCH 5/9] Create ai-pr-review.yml

---
 .github/workflows/ai-pr-review.yml | 35 ++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 .github/workflows/ai-pr-review.yml

diff --git a/.github/workflows/ai-pr-review.yml b/.github/workflows/ai-pr-review.yml
new file mode 100644
index 000000000..090a4a2ef
--- /dev/null
+++ b/.github/workflows/ai-pr-review.yml
@@ -0,0 +1,35 @@
+name: AI PR Review (Groq)
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  review:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+
+      - name: Run AI Review
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          LLM_PROVIDER: groq
+          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+        run: |
+          # Download the PR reviewer script
+          curl -o pr_reviewer.py https://raw.githubusercontent.com/YOUR-REPO/scripts/pr_reviewer.py
+          curl -o llm_client.py https://raw.githubusercontent.com/YOUR-REPO/scripts/llm_client.py
+          python pr_reviewer.py

From ebede0153915cbff176eefeb21398786975d60d6 Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:26:55 +0530
Subject: [PATCH 6/9] Create llm_client.py

---
 llm_client.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 llm_client.py

diff --git a/llm_client.py b/llm_client.py
new file mode 100644
index 000000000..1d9f8768c
--- /dev/null
+++ b/llm_client.py
@@ -0,0 +1,109 @@
+# file: llm_client.py
+import os
+import json
+import requests
+from typing import Dict
+
+SYSTEM_PROMPT = (
+    "You are a senior software engineer reviewing a pull request. "
+    "Review for code quality, security issues, test coverage, and best practices. "
+    "Return JSON with fields: summary, suggestions (list), and optionally inline_comments "
+    "(list of {file, line, comment}). Be concise, specific, and actionable."
+)
+
+def _prompt_for_diff(pr_title: str, pr_body: str, diff_text: str) -> str:
+    return f"""PR Title: {pr_title}
+
+PR Description:
+{pr_body}
+
+Diff (unified format):
+{diff_text}
+
+Return strictly valid JSON with keys: summary, suggestions, inline_comments.
+"""
+
+def _post_openai_compatible(url: str, api_key: str, model: str, prompt: str) -> str:
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": prompt},
+        ],
+        "temperature": 0.2,
+        "max_tokens": 1200,
+    }
+    resp = requests.post(url, headers=headers, json=payload, timeout=60)
+    resp.raise_for_status()
+    data = resp.json()
+    # Supports OpenAI-compatible chat APIs
+    return data["choices"][0]["message"]["content"]
+
+def _post_huggingface_inference(model_id: str, api_key: str, prompt: str) -> str:
+    # Basic text-generation style for some HF hosted models.
+    # Many chat models also accept prompts in chat-template form.
+    url = f"https://api-inference.huggingface.co/models/{model_id}"
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    payload = {
+        "inputs": f"{SYSTEM_PROMPT}\n\n{prompt}\n\nReturn strictly valid JSON.",
+        "parameters": {"max_new_tokens": 1200, "temperature": 0.2},
+    }
+    resp = requests.post(url, headers=headers, json=payload, timeout=60)
+    resp.raise_for_status()
+    data = resp.json()
+    # HF returns a list[ { "generated_text": ... } ] or a dict for some models
+    if isinstance(data, list) and data and "generated_text" in data[0]:
+        return data[0]["generated_text"]
+    # Some chat models return text under different fields; adapt as needed:
+    return json.dumps(data)
+
+def generate_review(pr_title: str, pr_body: str, diff_text: str) -> Dict:
+    provider = os.getenv("LLM_PROVIDER", "groq").lower()
+    prompt = _prompt_for_diff(pr_title, pr_body, diff_text)
+
+    if provider == "groq":
+        api_key = os.environ["GROQ_API_KEY"]
+        model = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
+        url = "https://api.groq.com/openai/v1/chat/completions"
+        content = _post_openai_compatible(url, api_key, model, prompt)
+
+    elif provider == "openrouter":
+        api_key = os.environ["OPENROUTER_API_KEY"]
+        model = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct")
+        url = "https://openrouter.ai/api/v1/chat/completions"
+        content = _post_openai_compatible(url, api_key, model, prompt)
+
+    elif provider == "huggingface":
+        api_key = os.environ["HF_API_KEY"]
+        model_id = os.getenv("HF_MODEL_ID", "meta-llama/Llama-3.1-8B-Instruct")
+        content = _post_huggingface_inference(model_id, api_key, prompt)
+
+    elif provider == "local":
+        # Example: Ollama’s OpenAI-compatible endpoint enabled via `OLLAMA_OPENAI_COMPAT=1`
+        api_key = os.getenv("LOCAL_API_KEY", "not-needed")
+        model = os.getenv("LOCAL_MODEL", "llama3.1")
+        url = os.getenv("LOCAL_OPENAI_URL", "http://localhost:11434/v1/chat/completions")
+        content = _post_openai_compatible(url, api_key, model, prompt)
+
+    else:
+        raise ValueError(f"Unknown LLM_PROVIDER: {provider}")
+
+    # Try to parse JSON; if the model returns extra text, attempt to extract JSON block.
+    try:
+        # Direct JSON
+        return json.loads(content)
+    except json.JSONDecodeError:
+        # Fallback: extract nearest {...} block
+        import re
+        m = re.search(r"\{(?:[^{}]|(?R))*\}", content, flags=re.DOTALL)
+        if m:
+            try:
+                return json.loads(m.group(0))
+            except Exception:
+                pass
+        # Ultimate fallback
+        return {"summary": content[:8000], "suggestions": [], "inline_comments": []}

From fb414487a2d693c4af119b0037d2c7497ad87aad Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:28:32 +0530
Subject: [PATCH 7/9] Update llm_client.py

---
 llm_client.py | 176 +++++++++++++++++++++++++-------------------------
 1 file changed, 89 insertions(+), 87 deletions(-)

diff --git a/llm_client.py b/llm_client.py
index 1d9f8768c..931b254c5 100644
--- a/llm_client.py
+++ b/llm_client.py
@@ -1,109 +1,111 @@
-# file: llm_client.py
+# llm_client.py
 import os
+import re
 import json
+import time
 import requests
-from typing import Dict
+from typing import Dict, Any, List, Optional, Tuple
 
-SYSTEM_PROMPT = (
-    "You are a senior software engineer reviewing a pull request. "
-    "Review for code quality, security issues, test coverage, and best practices. "
-    "Return JSON with fields: summary, suggestions (list), and optionally inline_comments "
-    "(list of {file, line, comment}). Be concise, specific, and actionable."
+DEFAULT_SYSTEM_PROMPT = (
+    "You are a seasoned senior software engineer and code reviewer. "
+    "Review the pull request changes for code quality, correctness, security, performance, "
+    "maintainability, readability, and test coverage. "
+    "Be specific and actionable. Prefer concise, structured feedback.\n\n"
+    "Return STRICT JSON with keys:\n"
+    "  - summary: string\n"
+    "  - suggestions: array of strings (actionable, prioritized)\n"
+    "  - inline_comments: array of objects with keys {file: string, line: number, comment: string}\n"
 )
 
-def _prompt_for_diff(pr_title: str, pr_body: str, diff_text: str) -> str:
-    return f"""PR Title: {pr_title}
+def build_prompt(
+    pr_title: str,
+    pr_body: str,
+    diff_text: str,
+    language_hint: Optional[str] = None,
+    guidelines: Optional[str] = None,
+) -> str:
+    extras = []
+    if language_hint:
+        extras.append(f"Primary language/framework context: {language_hint}")
+    if guidelines:
+        extras.append(f"Team guidelines:\n{guidelines}")
+    extras_block = "\n\n".join(extras).strip()
+    if extras_block:
+        extras_block = "\n\n" + extras_block
 
-PR Description:
-{pr_body}
+    return (
+        f"PR Title: {pr_title}\n\n"
+        f"PR Description:\n{(pr_body or '').strip()}\n"
+        f"{extras_block}\n\n"
+        "Unified Diff:\n"
+        f"{diff_text}\n\n"
+        "Return STRICT JSON only, no prose outside JSON."
+    )
 
-Diff (unified format):
-{diff_text}
-
-Return strictly valid JSON with keys: summary, suggestions, inline_comments.
-"""
-
-def _post_openai_compatible(url: str, api_key: str, model: str, prompt: str) -> str:
+def _openai_compatible_chat(
+    url: str,
+    api_key: str,
+    model: str,
+    system_prompt: str,
+    user_prompt: str,
+    temperature: float = 0.2,
+    max_tokens: int = 1200,
+    retries: int = 3,
+    timeout: int = 60,
+    extra_headers: Optional[Dict[str, str]] = None,
+) -> str:
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json",
     }
+    if extra_headers:
+        headers.update(extra_headers)
+
     payload = {
         "model": model,
         "messages": [
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": prompt},
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
         ],
-        "temperature": 0.2,
-        "max_tokens": 1200,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
     }
-    resp = requests.post(url, headers=headers, json=payload, timeout=60)
-    resp.raise_for_status()
-    data = resp.json()
-    # Supports OpenAI-compatible chat APIs
-    return data["choices"][0]["message"]["content"]
 
-def _post_huggingface_inference(model_id: str, api_key: str, prompt: str) -> str:
-    # Basic text-generation style for some HF hosted models.
-    # Many chat models also accept prompts in chat-template form.
+    last_err = None
+    for attempt in range(1, retries + 1):
+        try:
+            resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
+            if resp.status_code == 429:
+                # Rate limited; exponential backoff
+                wait = min(2 ** attempt, 10)
+                time.sleep(wait)
+                continue
+            resp.raise_for_status()
+            data = resp.json()
+            return data["choices"][0]["message"]["content"]
+        except Exception as e:
+            last_err = e
+            time.sleep(min(2 ** attempt, 5))
+    raise RuntimeError(f"LLM call failed after {retries} attempts: {last_err}")
+
+def _huggingface_inference(
+    model_id: str,
+    api_key: str,
+    system_prompt: str,
+    user_prompt: str,
+    temperature: float = 0.2,
+    max_new_tokens: int = 1200,
+    retries: int = 3,
+    timeout: int = 60,
+) -> str:
     url = f"https://api-inference.huggingface.co/models/{model_id}"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    inputs = f"{system_prompt}\n\n{user_prompt}\n\nReturn STRICT JSON only."
     payload = {
-        "inputs": f"{SYSTEM_PROMPT}\n\n{prompt}\n\nReturn strictly valid JSON.",
-        "parameters": {"max_new_tokens": 1200, "temperature": 0.2},
+        "inputs": inputs,
+        "parameters": {"temperature": temperature, "max_new_tokens": max_new_tokens},
     }
-    resp = requests.post(url, headers=headers, json=payload, timeout=60)
-    resp.raise_for_status()
-    data = resp.json()
-    # HF returns a list[ { "generated_text": ... } ] or a dict for some models
-    if isinstance(data, list) and data and "generated_text" in data[0]:
-        return data[0]["generated_text"]
-    # Some chat models return text under different fields; adapt as needed:
-    return json.dumps(data)
-
-def generate_review(pr_title: str, pr_body: str, diff_text: str) -> Dict:
-    provider = os.getenv("LLM_PROVIDER", "groq").lower()
-    prompt = _prompt_for_diff(pr_title, pr_body, diff_text)
-
-    if provider == "groq":
-        api_key = os.environ["GROQ_API_KEY"]
-        model = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
-        url = "https://api.groq.com/openai/v1/chat/completions"
-        content = _post_openai_compatible(url, api_key, model, prompt)
-
-    elif provider == "openrouter":
-        api_key = os.environ["OPENROUTER_API_KEY"]
-        model = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct")
-        url = "https://openrouter.ai/api/v1/chat/completions"
-        content = _post_openai_compatible(url, api_key, model, prompt)
-
-    elif provider == "huggingface":
-        api_key = os.environ["HF_API_KEY"]
-        model_id = os.getenv("HF_MODEL_ID", "meta-llama/Llama-3.1-8B-Instruct")
-        content = _post_huggingface_inference(model_id, api_key, prompt)
-
-    elif provider == "local":
-        # Example: Ollama’s OpenAI-compatible endpoint enabled via `OLLAMA_OPENAI_COMPAT=1`
-        api_key = os.getenv("LOCAL_API_KEY", "not-needed")
-        model = os.getenv("LOCAL_MODEL", "llama3.1")
-        url = os.getenv("LOCAL_OPENAI_URL", "http://localhost:11434/v1/chat/completions")
-        content = _post_openai_compatible(url, api_key, model, prompt)
-
-    else:
-        raise ValueError(f"Unknown LLM_PROVIDER: {provider}")
-
-    # Try to parse JSON; if the model returns extra text, attempt to extract JSON block.
-    try:
-        # Direct JSON
-        return json.loads(content)
-    except json.JSONDecodeError:
-        # Fallback: extract nearest {...} block
-        import re
-        m = re.search(r"\{(?:[^{}]|(?R))*\}", content, flags=re.DOTALL)
-        if m:
-            try:
-                return json.loads(m.group(0))
-            except Exception:
-                pass
-        # Ultimate fallback
-        return {"summary": content[:8000], "suggestions": [], "inline_comments": []}
+    last_err = None
+    for attempt in range(1, retries + 1):
+        try:
+            resp

From fa1da6d804e70b8c45508060980642df0f9ec30b Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:29:34 +0530
Subject: [PATCH 8/9] Create pr_reviewer.py

---
 pr_reviewer.py | 290 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 290 insertions(+)
 create mode 100644 pr_reviewer.py

diff --git a/pr_reviewer.py b/pr_reviewer.py
new file mode 100644
index 000000000..825e32dcf
--- /dev/null
+++ b/pr_reviewer.py
@@ -0,0 +1,290 @@
+# pr_reviewer.py
+import os
+import json
+import math
+import requests
+from typing import List, Tuple, Dict, Any, Optional
+
+from llm_client import generate_review
+
+GITHUB_API = os.getenv("GITHUB_API_URL", "https://api.github.com")
+MAX_DIFF_CHARS = int(os.getenv("DIFF_MAX_CHARS", "120000"))  # per LLM call
+MIN_FILE_CHUNK = int(os.getenv("MIN_FILE_CHUNK", "20000"))   # pack small files together
+
+def _gh_headers(token: str) -> Dict[str, str]:
+    return {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+        "User-Agent": "ai-pr-reviewer-bot"
+    }
+
+def get_pr_context_from_env() -> Tuple[str, str, int]:
+    """
+    Determines owner, repo, and PR number from typical GitHub Actions envs.
+    Fallback: environment variable PR_NUMBER.
+    """
+    repo_full = os.getenv("GITHUB_REPOSITORY")  # e.g. "owner/repo"
+    if not repo_full or "/" not in repo_full:
+        raise RuntimeError("GITHUB_REPOSITORY is not set or invalid (expected 'owner/repo').")
+    owner, repo = repo_full.split("/", 1)
+
+    pr_number_env = os.getenv("PR_NUMBER")
+    if pr_number_env:
+        return owner, repo, int(pr_number_env)
+
+    event_path = os.getenv("GITHUB_EVENT_PATH")
+    if event_path and os.path.exists(event_path):
+        try:
+            with open(event_path, "r", encoding="utf-8") as f:
+                payload = json.load(f)
+            pr_number = payload.get("pull_request", {}).get("number")
+            if pr_number:
+                return owner, repo, int(pr_number)
+        except Exception:
+            pass
+
+    raise RuntimeError("Cannot determine PR number. Set PR_NUMBER env or run from a pull_request event.")
+
+def fetch_pr_details(owner: str, repo: str, pr_number: int, gh_token: str) -> Tuple[str, str, List[Dict[str, Any]]]:
+    headers = _gh_headers(gh_token)
+
+    pr_resp = requests.get(
+        f"{GITHUB_API}/repos/{owner}/{repo}/pulls/{pr_number}",
+        headers=headers, timeout=30
+    )
+    pr_resp.raise_for_status()
+    pr = pr_resp.json()
+
+    # Note: pagination if many files changed
+    files: List[Dict[str, Any]] = []
+    page = 1
+    per_page = 100
+    while True:
+        fr = requests.get(
+            f"{GITHUB_API}/repos/{owner}/{repo}/pulls/{pr_number}/files",
+            headers=headers,
+            params={"page": page, "per_page": per_page},
+            timeout=30
+        )
+        fr.raise_for_status()
+        batch = fr.json()
+        files.extend(batch)
+        if len(batch) < per_page:
+            break
+        page += 1
+
+    return pr.get("title", ""), pr.get("body", "") or "", files
+
+def build_unified_diffs(files: List[Dict[str, Any]]) -> List[Tuple[str, str]]:
+    """
+    Returns list of (filename, unified_diff_text) for files that include a 'patch'.
+    Skips binary/large files where GitHub omits 'patch'.
+    """
+    diffs: List[Tuple[str, str]] = []
+    for f in files:
+        filename = f.get("filename")
+        patch = f.get("patch")
+        if not filename or not patch:
+            # Skip binary or too-large diffs
+            continue
+        diff_text = f"--- a/{filename}\n+++ b/{filename}\n{patch}\n"
+        diffs.append((filename, diff_text))
+    return diffs
+
+def chunk_diffs_by_size(diffs: List[Tuple[str, str]], max_chars: int, min_chunk: int) -> List[List[Tuple[str, str]]]:
+    """
+    Group per-file diffs into chunks, each not exceeding max_chars.
+    Tries to pack small files together for better context.
+    """
+    # Sort by size ascending to pack small files first
+    diffs_sorted = sorted(diffs, key=lambda x: len(x[1]))
+    chunks: List[List[Tuple[str, str]]] = []
+    current: List[Tuple[str, str]] = []
+    current_size = 0
+
+    for item in diffs_sorted:
+        size = len(item[1])
+        # If single file is huge, put it alone (hard cap)
+        if size >= max_chars:
+            if current:
+                chunks.append(current)
+                current = []
+                current_size = 0
+            chunks.append([item])
+            continue
+
+        if current_size + size <= max_chars or (not current and size < max_chars):
+            current.append(item)
+            current_size += size
+        else:
+            # finalize current and start new
+            chunks.append(current)
+            current = [item]
+            current_size = size
+
+    if current:
+        chunks.append(current)
+
+    # Merge tiny chunks if possible
+    merged: List[List[Tuple[str, str]]] = []
+    carry: List[Tuple[str, str]] = []
+    carry_size = 0
+    for ch in chunks:
+        ch_size = sum(len(d) for _, d in ch)
+        if ch_size < min_chunk:
+            carry.extend(ch)
+            carry_size += ch_size
+            if carry_size >= min_chunk:
+                merged.append(carry)
+                carry = []
+                carry_size = 0
+        else:
+            if carry:
+                merged.append(carry)
+                carry = []
+                carry_size = 0
+            merged.append(ch)
+    if carry:
+        merged.append(carry)
+    return merged
+
+def assemble_diff_text(chunk: List[Tuple[str, str]]) -> str:
+    return "\n".join(diff for _, diff in chunk)
+
+def format_review_comment(aggregated: Dict[str, Any]) -> str:
+    provider = aggregated.get("_provider", "unknown")
+    model = aggregated.get("_model", "unknown")
+
+    comment = "### 🤖 AI PR Review (Automated)\n"
+    comment += f"_Provider: **{provider}**, Model: **{model}**_\n\n"
+
+    summary = aggregated.get("summary") or ""
+    if summary.strip():
+        comment += "**Summary**\n\n"
+        comment += f"{summary.strip()}\n\n"
+
+    suggestions: List[str] = aggregated.get("suggestions") or []
+    if suggestions:
+        comment += "**Suggestions**\n\n"
+        for i, s in enumerate(suggestions, 1):
+            comment += f"{i}. {s.strip()}\n"
+        comment += "\n"
+
+    inline = aggregated.get("inline_comments") or []
+    if inline:
+        comment += "<details><summary>Inline comments (suggested locations)</summary>\n\n"
+        for c in inline:
+            file = c.get("file", "")
+            line = c.get("line", "")
+            text = c.get("comment", "").strip()
+            if text:
+                comment += f"- `{file}`:{line} — {text}\n"
+        comment += "\n</details>\n"
+
+    comment += "\n> _Note_: Inline positions are suggestions only. The bot posts a single review comment to avoid noisy threads."
+    return comment
+
+def post_pr_review(owner: str, repo: str, pr_number: int, gh_token: str, body: str, event: str = "COMMENT") -> Dict[str, Any]:
+    headers = _gh_headers(gh_token)
+    payload = {"body": body, "event": event}
+    resp = requests.post(
+        f"{GITHUB_API}/repos/{owner}/{repo}/pulls/{pr_number}/reviews",
+        headers=headers, json=payload, timeout=30
+    )
+    resp.raise_for_status()
+    return resp.json()
+
+def aggregate_results(results: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Combine multiple chunk reviews into a single review:
+    - Concatenate summaries with headings
+    - Merge suggestions (deduplicate similar lines)
+    - Merge inline comments
+    """
+    summaries: List[str] = []
+    suggestions: List[str] = []
+    inline_comments: List[Dict[str, Any]] = []
+    provider, model = None, None
+
+    def _norm(s: str) -> str:
+        return " ".join((s or "").strip().split())
+
+    seen_suggestions = set()
+
+    for idx, r in enumerate(results, start=1):
+        if not provider and r.get("_provider"):
+            provider = r.get("_provider")
+        if not model and r.get("_model"):
+            model = r.get("_model")
+
+        s = (r.get("summary") or "").strip()
+        if s:
+            summaries.append(f"**Chunk {idx}**:\n{s}")
+
+        for sug in r.get("suggestions") or []:
+            key = _norm(sug)
+            if key and key not in seen_suggestions:
+                suggestions.append(sug)
+                seen_suggestions.add(key)
+
+        for ic in r.get("inline_comments") or []:
+            if isinstance(ic, dict) and ic.get("comment"):
+                inline_comments.append(ic)
+
+    final_summary = "\n\n".join(summaries) if summaries else "No significant issues detected in analyzed diffs."
+    return {
+        "summary": final_summary,
+        "suggestions": suggestions,
+        "inline_comments": inline_comments,
+        "_provider": provider or "unknown",
+        "_model": model or "unknown",
+    }
+
+def run():
+    gh_token = os.getenv("GITHUB_TOKEN")
+    if not gh_token:
+        raise RuntimeError("GITHUB_TOKEN is required.")
+
+    owner, repo, pr_number = get_pr_context_from_env()
+
+    title, body, files = fetch_pr_details(owner, repo, pr_number, gh_token)
+    per_file_diffs = build_unified_diffs(files)
+
+    if not per_file_diffs:
+        comment = "### 🤖 AI PR Review (Automated)\nNo textual diffs available (binary or very large files)."
+        post_pr_review(owner, repo, pr_number, gh_token, comment, event="COMMENT")
+        return
+
+    # Chunk diffs and review each chunk
+    chunks = chunk_diffs_by_size(per_file_diffs, MAX_DIFF_CHARS, MIN_FILE_CHUNK)
+
+    # Optional hints to improve review quality (set via env):
+    language_hint = os.getenv("LANGUAGE_HINT")  # e.g., "Python (FastAPI), TypeScript (React)"
+    guidelines = os.getenv("REVIEW_GUIDELINES")  # short team rules or expectations
+
+    all_results: List[Dict[str, Any]] = []
+    for ch_index, ch in enumerate(chunks, start=1):
+        diff_text = assemble_diff_text(ch)
+        # Truncate just in case (safety)
+        if len(diff_text) > MAX_DIFF_CHARS:
+            diff_text = diff_text[:MAX_DIFF_CHARS] + "\n...TRUNCATED BY BOT..."
+
+        try:
+            result = generate_review(title, body, diff_text, language_hint, guidelines)
+        except Exception as e:
+            result = {
+                "summary": f"Chunk {ch_index}: Failed to generate review due to error: {e}",
+                "suggestions": [],
+                "inline_comments": [],
+                "_provider": os.getenv("LLM_PROVIDER", "groq"),
+                "_model": os.getenv("GROQ_MODEL", "llama-3.1-8b-instant"),
+            }
+        all_results.append(result)
+
+    aggregated = aggregate_results(all_results)
+    comment = format_review_comment(aggregated)
+    post_pr_review(owner, repo, pr_number, gh_token, comment, event="COMMENT")
+
+if __name__ == "__main__":
+    run()

From 778c8adb690a75bdf10947739f6e04aa2fbf79d9 Mon Sep 17 00:00:00 2001
From: Rajkumar PP <79452802+rajkumarpp@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:31:39 +0530
Subject: [PATCH 9/9] Update ai-pr-review.yml

---
 .github/workflows/ai-pr-review.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ai-pr-review.yml b/.github/workflows/ai-pr-review.yml
index 090a4a2ef..fa4d9a1d0 100644
--- a/.github/workflows/ai-pr-review.yml
+++ b/.github/workflows/ai-pr-review.yml
@@ -28,8 +28,11 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           LLM_PROVIDER: groq
           GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          # Optional hints:
+          # LANGUAGE_HINT: "Python (FastAPI), TypeScript (React)"
+          # REVIEW_GUIDELINES: "Require tests for public APIs; avoid broad try/except; use f-strings; follow PEP8."
+          # Tuning:
+          # DIFF_MAX_CHARS: "120000"
+          # MIN_FILE_CHUNK: "20000"
         run: |
-          # Download the PR reviewer script
-          curl -o pr_reviewer.py https://raw.githubusercontent.com/YOUR-REPO/scripts/pr_reviewer.py
-          curl -o llm_client.py https://raw.githubusercontent.com/YOUR-REPO/scripts/llm_client.py
           python pr_reviewer.py