diff --git a/.gitignore b/.gitignore index 4cb0fef..b83bd01 100644 --- a/.gitignore +++ b/.gitignore @@ -50,4 +50,6 @@ __pycache__/ venv # vscode -.vscode/ \ No newline at end of file +.vscode/ + +outputs/ \ No newline at end of file diff --git a/backend/app/routers/generate.py b/backend/app/routers/generate.py index 51befcc..1f2c919 100644 --- a/backend/app/routers/generate.py +++ b/backend/app/routers/generate.py @@ -1,6 +1,12 @@ -from fastapi import APIRouter, Request, HTTPException +import json +import asyncio +import re +import logging +from functools import lru_cache +from fastapi import APIRouter, Request from fastapi.responses import StreamingResponse -from dotenv import load_dotenv +from pydantic import BaseModel + from app.services.github_service import GitHubService from app.services.o4_mini_openai_service import OpenAIo4Service from app.prompts import ( @@ -9,47 +15,361 @@ SYSTEM_THIRD_PROMPT, ADDITIONAL_SYSTEM_INSTRUCTIONS_PROMPT, ) -from anthropic._exceptions import RateLimitError -from pydantic import BaseModel -from functools import lru_cache -import re -import json -import asyncio - -# from app.services.claude_service import ClaudeService -# from app.core.limiter import limiter -load_dotenv() +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) -router = APIRouter(prefix="/generate", tags=["OpenAI o4-mini"]) +router = APIRouter() # Initialize services -# claude_service = ClaudeService() o4_service = OpenAIo4Service() +github_service = GitHubService() + + +def safe_json_response(data): + """ + Safely serialize data to JSON for SSE responses, handling special characters. + """ + try: + # Ensure chunk content is properly encoded + if 'chunk' in data and data['chunk']: + # The chunk might contain characters that need to be handled carefully + # json.dumps already handles escaping, but we want to be extra safe + data['chunk'] = str(data['chunk']) + return json.dumps(data, ensure_ascii=False) + except (TypeError, ValueError) as e: + # Fallback for any JSON serialization issues + error_data = {'error': f'JSON serialization error: {str(e)}'} + return json.dumps(error_data) + + +def advanced_mermaid_validation(mermaid_code: str) -> tuple[bool, list[str]]: + """ + Perform advanced validation on Mermaid code. + + Returns: + tuple: (is_valid, list_of_warnings) + """ + warnings = [] + is_valid = True + + # Check for graph type consistency + graph_types = re.findall(r'^(graph|flowchart|sequenceDiagram|classDiagram)', mermaid_code, re.MULTILINE) + if len(graph_types) > 1: + warnings.append(f"Multiple graph types found: {graph_types}") + is_valid = False + + # Check for orphaned nodes (nodes that are defined but not connected) + node_definitions = set(re.findall(r'(\w+)\s*[\[\(]', mermaid_code)) + node_connections = set() + + # Find all nodes in connections + connection_patterns = [ + r'(\w+)\s*-->\s*(\w+)', + r'(\w+)\s*---\s*(\w+)', + r'(\w+)\s*-\.\s*(\w+)', + r'(\w+)\s*==>\s*(\w+)', + r'(\w+)\s*-\.-\s*(\w+)', + ] + + for pattern in connection_patterns: + matches = re.findall(pattern, mermaid_code) + for match in matches: + node_connections.update(match) + + orphaned_nodes = node_definitions - node_connections + if orphaned_nodes: + warnings.append(f"Orphaned nodes found (defined but not connected): {orphaned_nodes}") + + # Check for undefined nodes (nodes used in connections but not defined) + undefined_nodes = node_connections - node_definitions + if undefined_nodes: + warnings.append(f"Undefined nodes found (used but not defined): {undefined_nodes}") + is_valid = False + + # Check for circular references in simple cases + connections = [] + for pattern in connection_patterns: + matches = re.findall(pattern, mermaid_code) + for match in matches: + connections.append((match[0], match[1])) + + # Simple cycle detection (not comprehensive but catches basic issues) + for source, target in connections: + if (target, source) in connections: + warnings.append(f"Potential circular reference between {source} and {target}") + + # Check for reserved keywords being used as node IDs + reserved_keywords = ['class', 'click', 'end', 'graph', 'flowchart', 'style', 'classDef'] + for keyword in reserved_keywords: + if re.search(rf'\b{keyword}\b.*[\[\(]', mermaid_code): + warnings.append(f"Reserved keyword '{keyword}' used as node ID") + is_valid = False + + return is_valid, warnings + + +def validate_and_sanitize_mermaid(mermaid_code: str, username: str, repo: str) -> str: + """ + Validate and sanitize Mermaid diagram code to prevent syntax errors. + + Args: + mermaid_code (str): Raw Mermaid code from AI + username (str): GitHub username for logging context + repo (str): Repository name for logging context + + Returns: + str: Sanitized Mermaid code + """ + logger.info(f"\n=== MERMAID CODE VALIDATION FOR {username}/{repo} ===") + logger.info("Raw Mermaid Code:") + logger.info("=" * 50) + logger.info(mermaid_code) + logger.info("=" * 50) + + # Store original for comparison + original_code = mermaid_code + issues_found = [] + + try: + # Remove any remaining markdown code blocks + mermaid_code = re.sub(r'```mermaid\s*', '', mermaid_code) + mermaid_code = re.sub(r'```\s*$', '', mermaid_code) + + # Check for common problematic patterns + if re.search(r'[^\x00-\x7F]', mermaid_code): + issues_found.append("Non-ASCII characters detected") + # Remove non-ASCII characters + mermaid_code = ''.join(char for char in mermaid_code if ord(char) < 128) + + # Ensure we have a graph declaration + if not re.search(r'^(graph|flowchart|sequenceDiagram|classDiagram)', mermaid_code.strip(), re.MULTILINE): + issues_found.append("No graph declaration found, adding flowchart TD") + mermaid_code = "flowchart TD\n" + mermaid_code + + # Fix invalid node IDs more comprehensively + def sanitize_node_id(node_id): + """Sanitize a node ID to be Mermaid-compliant""" + # Replace invalid characters with underscores + sanitized = re.sub(r'[^a-zA-Z0-9_-]', '_', node_id) + # Ensure it starts with a letter or underscore + if sanitized and not sanitized[0].isalpha() and sanitized[0] != '_': + sanitized = '_' + sanitized + # Ensure it's not empty + if not sanitized: + sanitized = 'node_' + str(hash(node_id))[:8] + return sanitized + + # Find and fix all node references + node_pattern = r'\b([A-Z][A-Z0-9_]*)\b' + nodes_found = set(re.findall(node_pattern, mermaid_code)) + + for node in nodes_found: + if re.search(r'[^a-zA-Z0-9_-]', node) or (node and not node[0].isalpha() and node[0] != '_'): + sanitized_node = sanitize_node_id(node) + if sanitized_node != node: + issues_found.append(f"Fixed invalid node ID: {node} -> {sanitized_node}") + mermaid_code = re.sub(rf'\b{re.escape(node)}\b', sanitized_node, mermaid_code) + + # Fix arrows and connections - ensure proper spacing and syntax + arrow_fixes = [ + (r'(\w+)\s*-->\s*(\w+)', r'\1 --> \2'), + (r'(\w+)\s*---\s*(\w+)', r'\1 --- \2'), + (r'(\w+)\s*-\.\s*(\w+)', r'\1 -. \2'), + (r'(\w+)\s*==>\s*(\w+)', r'\1 ==> \2'), + (r'(\w+)\s*-\.-\s*(\w+)', r'\1 -.- \2'), + ] + + for pattern, replacement in arrow_fixes: + if re.search(pattern, mermaid_code): + mermaid_code = re.sub(pattern, replacement, mermaid_code) + issues_found.append(f"Fixed arrow spacing: {pattern}") + + # Fix arrow labels that might cause issues with Mermaid 11.4.1 + # Pattern: Node -->|"label"| Node becomes Node -->|label| Node (remove quotes) + def fix_arrow_label(match): + source = match.group(1) + arrow = match.group(2) + label = match.group(3) + target = match.group(4) + + # Remove quotes and problematic characters from arrow labels + clean_label = label.replace('"', '').replace("'", "") + # Replace problematic characters that might break Mermaid + clean_label = re.sub(r'[<>{}]', '', clean_label) + + return f'{source} {arrow}|{clean_label}| {target}' + + # Fix arrow labels with quotes + arrow_label_pattern = r'(\w+)\s*(-->|---|-\.|==>|-\.-)\s*\|\s*["\']([^"\']*)["\']?\s*\|\s*(\w+)' + if re.search(arrow_label_pattern, mermaid_code): + mermaid_code = re.sub(arrow_label_pattern, fix_arrow_label, mermaid_code) + issues_found.append("Fixed arrow label syntax for Mermaid 11.4.1 compatibility") + + # Fix labels - handle various bracket types and escape content + def fix_label(match): + full_match = match.group(0) + node_id = match.group(1) if match.lastindex >= 1 else '' + bracket_open = match.group(2) if match.lastindex >= 2 else '' + label_content = match.group(3) if match.lastindex >= 3 else '' + bracket_close = match.group(4) if match.lastindex >= 4 else '' + + # Escape problematic characters in labels + escaped_label = label_content.replace('"', '"').replace("'", ''') + escaped_label = re.sub(r'[<>{}]', '', escaped_label) # Remove HTML-like tags + escaped_label = re.sub(r'[\r\n\t]', ' ', escaped_label) # Replace newlines/tabs with spaces + + return f'{node_id}{bracket_open}"{escaped_label}"{bracket_close}' + + # Fix different label patterns + label_patterns = [ + r'(\w+)\s*([\[\(])\s*([^"\]\)]+)\s*([\]\)])', # NodeID[label] or NodeID(label) + r'(\w+)\s*([\[\(])"([^"]*)"\s*([\]\)])', # NodeID["label"] or NodeID("label") + ] + + for pattern in label_patterns: + if re.search(pattern, mermaid_code): + mermaid_code = re.sub(pattern, fix_label, mermaid_code) + issues_found.append(f"Fixed label format: {pattern}") + + # Remove multiple consecutive empty lines + mermaid_code = re.sub(r'\n\s*\n\s*\n+', '\n\n', mermaid_code) + + # Ensure proper indentation (4 spaces for flowchart elements) + lines = mermaid_code.split('\n') + processed_lines = [] + + for line in lines: + stripped = line.strip() + if not stripped: + continue + + # Graph declaration lines don't need indentation + if re.match(r'^(graph|flowchart|sequenceDiagram|classDiagram)', stripped): + processed_lines.append(stripped) + else: + # Add indentation for content lines + if not stripped.startswith(' '): + processed_lines.append(' ' + stripped) + else: + processed_lines.append(stripped) + + mermaid_code = '\n'.join(processed_lines) + + # Validate basic structure + if not mermaid_code.strip(): + issues_found.append("ERROR: Empty Mermaid code after sanitization") + return "flowchart TD\n A[Error: Empty diagram generated]" + + # Check for syntax issues + open_brackets = mermaid_code.count('[') - mermaid_code.count(']') + open_parens = mermaid_code.count('(') - mermaid_code.count(')') + open_quotes = mermaid_code.count('"') % 2 + + if open_brackets != 0: + issues_found.append(f"WARNING: Unbalanced square brackets (difference: {open_brackets})") + if open_parens != 0: + issues_found.append(f"WARNING: Unbalanced parentheses (difference: {open_parens})") + if open_quotes != 0: + issues_found.append(f"WARNING: Unbalanced quotes") + + # Run advanced validation + is_advanced_valid, advanced_warnings = advanced_mermaid_validation(mermaid_code) + if not is_advanced_valid: + issues_found.append("CRITICAL: Advanced validation failed") + + issues_found.extend([f"ADVANCED: {warning}" for warning in advanced_warnings]) + + # Log all issues found + if issues_found: + logger.info("\nIssues found and fixed:") + for issue in issues_found: + logger.info(f" - {issue}") + + # Log the sanitized result + if original_code != mermaid_code: + logger.info("\nSanitized Mermaid Code:") + logger.info("=" * 50) + logger.info(mermaid_code) + logger.info("=" * 50) + logger.info(f"Total fixes applied: {len(issues_found)}") + else: + logger.info("No sanitization changes needed") + + return mermaid_code + + except Exception as e: + error_msg = str(e) + logger.error(f"ERROR during Mermaid validation: {error_msg}") + logger.error("Returning fallback diagram") + + fallback_diagram = f"""flowchart TD + A[Error in diagram generation] + A --> B[Please try regenerating] + B --> C[Repository: {username}/{repo}] + C --> D[Error: {error_msg[:30]}...]""" + + return fallback_diagram # cache github data to avoid double API calls from cost and generate @lru_cache(maxsize=100) def get_cached_github_data(username: str, repo: str, github_pat: str | None = None): # Create a new service instance for each call with the appropriate PAT - current_github_service = GitHubService(pat=github_pat) - - default_branch = current_github_service.get_default_branch(username, repo) + service = GitHubService(pat=github_pat) + + default_branch = service.get_default_branch(username, repo) if not default_branch: default_branch = "main" # fallback value - file_tree = current_github_service.get_github_file_paths_as_list(username, repo) - readme = current_github_service.get_github_readme(username, repo) + file_tree = service.get_github_file_paths_as_list(username, repo) + readme = service.get_github_readme(username, repo) return {"default_branch": default_branch, "file_tree": file_tree, "readme": readme} +def get_github_data_with_cache_control(username: str, repo: str, github_pat: str | None = None, clear_cache: bool = False): + """ + Wrapper function that can bypass cache when requested. + """ + if clear_cache: + logger.info(f"๐Ÿ—‘๏ธ Cache cleared for {username}/{repo} - fetching fresh data from GitHub") + # Clear the specific cache entry by calling the cached function with different args + # Since we can't easily clear specific LRU cache entries, we bypass the cache entirely + service = GitHubService(pat=github_pat) + + default_branch = service.get_default_branch(username, repo) + if not default_branch: + default_branch = "main" # fallback value + + file_tree = service.get_github_file_paths_as_list(username, repo) + readme = service.get_github_readme(username, repo) + + result = {"default_branch": default_branch, "file_tree": file_tree, "readme": readme} + + # Now update the cache with fresh data by calling the cached function + # This will overwrite the old cached entry + try: + get_cached_github_data.cache_clear() # Clear entire cache + get_cached_github_data(username, repo, github_pat) # Repopulate with fresh data + logger.info(f"โœ… Cache updated with fresh data for {username}/{repo}") + except Exception as e: + logger.warning(f"Cache update failed: {e}") + + return result + else: + logger.info(f"๐Ÿ“‹ Using cached data for {username}/{repo}") + return get_cached_github_data(username, repo, github_pat) + + class ApiRequest(BaseModel): username: str repo: str instructions: str = "" api_key: str | None = None github_pat: str | None = None + clear_cache: bool = False @router.post("/cost") @@ -57,7 +377,7 @@ class ApiRequest(BaseModel): async def get_generation_cost(request: Request, body: ApiRequest): try: # Get file tree and README content - github_data = get_cached_github_data(body.username, body.repo, body.github_pat) + github_data = get_github_data_with_cache_control(body.username, body.repo, body.github_pat, body.clear_cache) file_tree = github_data["file_tree"] readme = github_data["readme"] @@ -86,8 +406,16 @@ async def get_generation_cost(request: Request, body: ApiRequest): # Format as currency string cost_string = f"${estimated_cost:.2f} USD" return {"cost": cost_string} + except ValueError as ve: + # Handle specific GitHub API errors (like rate limits, private repos, etc.) + error_message = str(ve) + logger.error(f"GitHub API error in cost estimation: {error_message}") + return {"error": error_message} except Exception as e: - return {"error": str(e)} + # Handle any other unexpected errors + error_message = f"Failed to calculate cost: {str(e)}" + logger.error(f"Unexpected error in cost estimation: {error_message}") + return {"error": error_message} def process_click_events(diagram: str, username: str, repo: str, branch: str) -> str: @@ -135,15 +463,15 @@ async def generate_stream(request: Request, body: ApiRequest): async def event_generator(): try: # Get cached github data - github_data = get_cached_github_data( - body.username, body.repo, body.github_pat + github_data = get_github_data_with_cache_control( + body.username, body.repo, body.github_pat, body.clear_cache ) default_branch = github_data["default_branch"] file_tree = github_data["file_tree"] readme = github_data["readme"] # Send initial status - yield f"data: {json.dumps({'status': 'started', 'message': 'Starting generation process...'})}\n\n" + yield f"data: {safe_json_response({'status': 'started', 'message': 'Starting generation process...'})}\n\n" await asyncio.sleep(0.1) # Token count check @@ -151,10 +479,12 @@ async def event_generator(): token_count = o4_service.count_tokens(combined_content) if 50000 < token_count < 195000 and not body.api_key: - yield f"data: {json.dumps({'error': f'File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own OpenAI API key.'})}\n\n" + error_msg = f"File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own OpenAI API key." + yield f"data: {safe_json_response({'error': error_msg})}\n\n" return elif token_count > 195000: - yield f"data: {json.dumps({'error': f'Repository is too large (>195k tokens) for analysis. OpenAI o4-mini\'s max context length is 200k tokens. Current size: {token_count} tokens.'})}\n\n" + error_msg = f"Repository is too large (>195k tokens) for analysis. OpenAI o4-mini max context length is 200k tokens. Current size: {token_count} tokens." + yield f"data: {safe_json_response({'error': error_msg})}\n\n" return # Prepare prompts @@ -173,9 +503,9 @@ async def event_generator(): ) # Phase 1: Get explanation - yield f"data: {json.dumps({'status': 'explanation_sent', 'message': 'Sending explanation request to o4-mini...'})}\n\n" + yield f"data: {safe_json_response({'status': 'explanation_sent', 'message': 'Sending explanation request to o4-mini...'})}\n\n" await asyncio.sleep(0.1) - yield f"data: {json.dumps({'status': 'explanation', 'message': 'Analyzing repository structure...'})}\n\n" + yield f"data: {safe_json_response({'status': 'explanation', 'message': 'Analyzing repository structure...'})}\n\n" explanation = "" async for chunk in o4_service.call_o4_api_stream( system_prompt=first_system_prompt, @@ -188,16 +518,16 @@ async def event_generator(): reasoning_effort="medium", ): explanation += chunk - yield f"data: {json.dumps({'status': 'explanation_chunk', 'chunk': chunk})}\n\n" + yield f"data: {safe_json_response({'status': 'explanation_chunk', 'chunk': chunk})}\n\n" if "BAD_INSTRUCTIONS" in explanation: - yield f"data: {json.dumps({'error': 'Invalid or unclear instructions provided'})}\n\n" + yield f"data: {safe_json_response({'error': 'Invalid or unclear instructions provided'})}\n\n" return # Phase 2: Get component mapping - yield f"data: {json.dumps({'status': 'mapping_sent', 'message': 'Sending component mapping request to o4-mini...'})}\n\n" + yield f"data: {safe_json_response({'status': 'mapping_sent', 'message': 'Sending component mapping request to o4-mini...'})}\n\n" await asyncio.sleep(0.1) - yield f"data: {json.dumps({'status': 'mapping', 'message': 'Creating component mapping...'})}\n\n" + yield f"data: {safe_json_response({'status': 'mapping', 'message': 'Creating component mapping...'})}\n\n" full_second_response = "" async for chunk in o4_service.call_o4_api_stream( system_prompt=SYSTEM_SECOND_PROMPT, @@ -206,7 +536,7 @@ async def event_generator(): reasoning_effort="low", ): full_second_response += chunk - yield f"data: {json.dumps({'status': 'mapping_chunk', 'chunk': chunk})}\n\n" + yield f"data: {safe_json_response({'status': 'mapping_chunk', 'chunk': chunk})}\n\n" # i dont think i need this anymore? but keep it here for now # Extract component mapping @@ -219,9 +549,9 @@ async def event_generator(): ] # Phase 3: Generate Mermaid diagram - yield f"data: {json.dumps({'status': 'diagram_sent', 'message': 'Sending diagram generation request to o4-mini...'})}\n\n" + yield f"data: {safe_json_response({'status': 'diagram_sent', 'message': 'Sending diagram generation request to o4-mini...'})}\n\n" await asyncio.sleep(0.1) - yield f"data: {json.dumps({'status': 'diagram', 'message': 'Generating diagram...'})}\n\n" + yield f"data: {safe_json_response({'status': 'diagram', 'message': 'Generating diagram...'})}\n\n" mermaid_code = "" async for chunk in o4_service.call_o4_api_stream( system_prompt=third_system_prompt, @@ -234,28 +564,38 @@ async def event_generator(): reasoning_effort="low", ): mermaid_code += chunk - yield f"data: {json.dumps({'status': 'diagram_chunk', 'chunk': chunk})}\n\n" + yield f"data: {safe_json_response({'status': 'diagram_chunk', 'chunk': chunk})}\n\n" # Process final diagram mermaid_code = mermaid_code.replace("```mermaid", "").replace("```", "") if "BAD_INSTRUCTIONS" in mermaid_code: - yield f"data: {json.dumps({'error': 'Invalid or unclear instructions provided'})}\n\n" + yield f"data: {safe_json_response({'error': 'Invalid or unclear instructions provided'})}\n\n" return + # Validate and sanitize Mermaid code + logger.info(f"\n=== PROCESSING DIAGRAM FOR {body.username}/{body.repo} ===") + mermaid_code = validate_and_sanitize_mermaid(mermaid_code, body.username, body.repo) + + # Process click events after validation processed_diagram = process_click_events( mermaid_code, body.username, body.repo, default_branch ) - - # Send final result - yield f"data: {json.dumps({ + + logger.info(f"\n=== FINAL DIAGRAM WITH CLICK EVENTS FOR {body.username}/{body.repo} ===") + logger.info("Final Processed Diagram:") + logger.info("=" * 50) + logger.info(processed_diagram) + logger.info("=" * 50) + + # Send final result - avoid sending large content in single JSON to prevent truncation + # The frontend will use the accumulated chunks instead + yield f"data: {safe_json_response({ 'status': 'complete', - 'diagram': processed_diagram, - 'explanation': explanation, - 'mapping': component_mapping_text + 'message': 'Diagram generation complete' })}\n\n" except Exception as e: - yield f"data: {json.dumps({'error': str(e)})}\n\n" + yield f"data: {safe_json_response({'error': str(e)})}\n\n" return StreamingResponse( event_generator(), diff --git a/backend/app/services/github_service.py b/backend/app/services/github_service.py index 33a4d42..9b5a38e 100644 --- a/backend/app/services/github_service.py +++ b/backend/app/services/github_service.py @@ -4,9 +4,13 @@ from datetime import datetime, timedelta from dotenv import load_dotenv import os +import logging load_dotenv() +# Configure logging for better debugging +logger = logging.getLogger(__name__) + class GitHubService: def __init__(self, pat: str | None = None): @@ -23,6 +27,9 @@ def __init__(self, pat: str | None = None): not all([self.client_id, self.private_key, self.installation_id]) and not self.github_token ): + logger.warning( + "No GitHub credentials provided. Using unauthenticated requests with rate limit of 60 requests/hour." + ) print( "\033[93mWarning: No GitHub credentials provided. Using unauthenticated requests with rate limit of 60 requests/hour.\033[0m" ) @@ -93,7 +100,14 @@ def _check_repository_exists(self, username, repo): if response.status_code == 404: raise ValueError("Repository not found.") + elif response.status_code == 403: + # Check if it's a rate limit issue + if 'rate limit' in response.text.lower(): + raise Exception("GitHub API rate limit exceeded. Please configure GITHUB_PAT in environment variables or wait before trying again.") + else: + raise Exception("Access forbidden. Repository might be private and require authentication.") elif response.status_code != 200: + logger.error(f"GitHub API error - Status: {response.status_code}, Response: {response.text}") raise Exception( f"Failed to check repository: {response.status_code}, {response.json()}" ) @@ -105,6 +119,13 @@ def get_default_branch(self, username, repo): if response.status_code == 200: return response.json().get("default_branch") + elif response.status_code == 403: + logger.warning(f"Rate limit or access issue for {username}/{repo}: {response.status_code}") + elif response.status_code == 404: + logger.warning(f"Repository {username}/{repo} not found") + else: + logger.warning(f"Unexpected response for {username}/{repo}: {response.status_code}") + return None def get_github_file_paths_as_list(self, username, repo): @@ -160,13 +181,17 @@ def should_include_file(path): return not any(pattern in path.lower() for pattern in excluded_patterns) + logger.info(f"Fetching file tree for {username}/{repo}") + # Try to get the default branch first branch = self.get_default_branch(username, repo) if branch: - api_url = f"https://api.github.com/repos/{ - username}/{repo}/git/trees/{branch}?recursive=1" + logger.info(f"Using default branch: {branch}") + api_url = f"https://api.github.com/repos/{username}/{repo}/git/trees/{branch}?recursive=1" response = requests.get(api_url, headers=self._get_headers()) + logger.info(f"GitHub API response status: {response.status_code}") + if response.status_code == 200: data = response.json() if "tree" in data: @@ -176,14 +201,27 @@ def should_include_file(path): for item in data["tree"] if should_include_file(item["path"]) ] + logger.info(f"Successfully fetched {len(paths)} file paths") return "\n".join(paths) + elif response.status_code == 403: + error_msg = "GitHub API rate limit exceeded or access denied." + if not self.github_token: + error_msg += " Consider configuring GITHUB_PAT environment variable for higher rate limits (5000/hour vs 60/hour)." + logger.error(f"{error_msg} Response: {response.text}") + raise ValueError(error_msg) + elif response.status_code == 404: + logger.error(f"Branch {branch} not found for {username}/{repo}") + else: + logger.error(f"Unexpected response for {username}/{repo} branch {branch}: {response.status_code} - {response.text}") # If default branch didn't work or wasn't found, try common branch names + logger.info("Trying common branch names: main, master") for branch in ["main", "master"]: - api_url = f"https://api.github.com/repos/{ - username}/{repo}/git/trees/{branch}?recursive=1" + api_url = f"https://api.github.com/repos/{username}/{repo}/git/trees/{branch}?recursive=1" response = requests.get(api_url, headers=self._get_headers()) + logger.info(f"Branch {branch} response status: {response.status_code}") + if response.status_code == 200: data = response.json() if "tree" in data: @@ -193,11 +231,31 @@ def should_include_file(path): for item in data["tree"] if should_include_file(item["path"]) ] + logger.info(f"Successfully fetched {len(paths)} file paths from branch {branch}") return "\n".join(paths) - - raise ValueError( - "Could not fetch repository file tree. Repository might not exist, be empty or private." - ) + elif response.status_code == 403: + error_msg = "GitHub API rate limit exceeded or access denied." + if not self.github_token: + error_msg += " Consider configuring GITHUB_PAT environment variable for higher rate limits (5000/hour vs 60/hour)." + logger.error(f"{error_msg} Response: {response.text}") + raise ValueError(error_msg) + elif response.status_code == 404: + logger.info(f"Branch {branch} not found, trying next...") + continue + else: + logger.error(f"Unexpected response for branch {branch}: {response.status_code} - {response.text}") + + # Enhanced error message with debugging info + auth_status = "authenticated" if self.github_token else "unauthenticated" + logger.error(f"Failed to fetch file tree for {username}/{repo} using {auth_status} requests") + + error_msg = f"Could not fetch repository file tree for {username}/{repo}. " + if not self.github_token: + error_msg += "Repository might be private, empty, or GitHub API rate limit exceeded (60/hour for unauthenticated requests). Consider configuring GITHUB_PAT environment variable." + else: + error_msg += "Repository might not exist, be empty, or branch access might be restricted." + + raise ValueError(error_msg) def get_github_readme(self, username, repo): """ @@ -214,21 +272,42 @@ def get_github_readme(self, username, repo): ValueError: If repository does not exist or has no README. Exception: For other unexpected API errors. """ + logger.info(f"Fetching README for {username}/{repo}") + # First check if the repository exists - self._check_repository_exists(username, repo) + try: + self._check_repository_exists(username, repo) + except Exception as e: + logger.error(f"Repository existence check failed: {e}") + raise # Then attempt to fetch the README api_url = f"https://api.github.com/repos/{username}/{repo}/readme" response = requests.get(api_url, headers=self._get_headers()) + logger.info(f"README API response status: {response.status_code}") + if response.status_code == 404: + logger.warning(f"No README found for {username}/{repo}") raise ValueError("No README found for the specified repository.") + elif response.status_code == 403: + error_msg = "GitHub API rate limit exceeded or access denied while fetching README." + if not self.github_token: + error_msg += " Consider configuring GITHUB_PAT environment variable." + logger.error(f"{error_msg} Response: {response.text}") + raise Exception(error_msg) elif response.status_code != 200: + logger.error(f"README fetch failed: {response.status_code} - {response.text}") raise Exception( - f"Failed to fetch README: { - response.status_code}, {response.json()}" + f"Failed to fetch README: {response.status_code}, {response.json()}" ) data = response.json() - readme_content = requests.get(data["download_url"]).text - return readme_content + readme_response = requests.get(data["download_url"]) + + if readme_response.status_code != 200: + logger.error(f"README download failed: {readme_response.status_code}") + raise Exception(f"Failed to download README content: {readme_response.status_code}") + + logger.info(f"Successfully fetched README for {username}/{repo}") + return readme_response.text diff --git a/package.json b/package.json index 7a8e995..42b182e 100644 --- a/package.json +++ b/package.json @@ -70,5 +70,5 @@ "ct3aMetadata": { "initVersion": "7.38.1" }, - "packageManager": "pnpm@9.13.0" + "packageManager": "pnpm@10.11.1" } diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml new file mode 100644 index 0000000..6d90b57 --- /dev/null +++ b/pnpm-workspace.yaml @@ -0,0 +1,4 @@ +onlyBuiltDependencies: + - core-js + - esbuild + - sharp diff --git a/setup-github-pat.sh b/setup-github-pat.sh new file mode 100644 index 0000000..3eb08ea --- /dev/null +++ b/setup-github-pat.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +echo "๐Ÿ”ง GitDiagram GitHub PAT Setup" +echo "================================" +echo "" +echo "This script will help you configure a GitHub Personal Access Token (PAT)" +echo "to increase GitHub API rate limits from 60/hour to 5000/hour." +echo "" +echo "๐Ÿ“‹ Steps to create a GitHub PAT:" +echo "1. Go to https://github.com/settings/tokens" +echo "2. Click 'Generate new token' -> 'Generate new token (classic)'" +echo "3. Give it a name like 'GitDiagram API Access'" +echo "4. Select expiration (recommend 90 days or No expiration)" +echo "5. Select scopes: 'public_repo' (for public repos) or 'repo' (for private repos)" +echo "6. Click 'Generate token'" +echo "7. Copy the token (you won't see it again!)" +echo "" + +read -p "Do you have a GitHub PAT ready? (y/n): " -n 1 -r +echo "" + +if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "" + read -p "Enter your GitHub PAT: " -s github_pat + echo "" + + if [ -z "$github_pat" ]; then + echo "โŒ No token provided. Exiting." + exit 1 + fi + + # Backup existing .env + if [ -f .env ]; then + cp .env .env.backup.$(date +%Y%m%d_%H%M%S) + echo "โœ… Backed up existing .env file" + fi + + # Update .env file + if grep -q "GITHUB_PAT=" .env; then + # Replace existing GITHUB_PAT line + sed -i "s/GITHUB_PAT=.*/GITHUB_PAT=$github_pat/" .env + echo "โœ… Updated GITHUB_PAT in .env file" + else + # Add GITHUB_PAT line + echo "GITHUB_PAT=$github_pat" >> .env + echo "โœ… Added GITHUB_PAT to .env file" + fi + + echo "" + echo "๐Ÿ”„ Restarting backend to apply changes..." + docker-compose restart api + + echo "" + echo "โœ… Setup complete! Your GitHub API rate limit is now 5000/hour instead of 60/hour." + echo "" + echo "๐Ÿงช Testing the configuration..." + sleep 3 + + # Test the configuration + response=$(curl -s -X POST "http://localhost:8000/generate/cost" \ + -H "Content-Type: application/json" \ + -d '{"username": "facebook", "repo": "react"}') + + if echo "$response" | grep -q "cost"; then + echo "โœ… Test successful! GitHub PAT is working correctly." + echo "Response: $response" + else + echo "โš ๏ธ Test failed. Response: $response" + echo "Please check your GitHub PAT and try again." + fi + +else + echo "" + echo "๐Ÿ“– Please create a GitHub PAT first and then run this script again." + echo "Visit: https://github.com/settings/tokens" +fi + +echo "" +echo "๐Ÿ”— For more information, see the README.md file." \ No newline at end of file diff --git a/src/app/[username]/[repo]/page.tsx b/src/app/[username]/[repo]/page.tsx index 064efa8..93425e8 100644 --- a/src/app/[username]/[repo]/page.tsx +++ b/src/app/[username]/[repo]/page.tsx @@ -17,6 +17,13 @@ export default function Repo() { // Use the star reminder hook useStarReminder(); + // Strip .git suffix if present and convert to lowercase + const cleanUsername = params.username.toLowerCase(); + let cleanRepo = params.repo.toLowerCase(); + if (cleanRepo.endsWith('.git')) { + cleanRepo = cleanRepo.slice(0, -4); + } + const { diagram, error, @@ -32,15 +39,16 @@ export default function Repo() { handleOpenApiKeyDialog, handleExportImage, state, - } = useDiagram(params.username.toLowerCase(), params.repo.toLowerCase()); + handleClearCache, + } = useDiagram(cleanUsername, cleanRepo); return (
setZoomingEnabled(!zoomingEnabled)} loading={loading} + onClearCache={handleClearCache} />
diff --git a/src/components/main-card.tsx b/src/components/main-card.tsx index d3fbbac..c48d3fc 100644 --- a/src/components/main-card.tsx +++ b/src/components/main-card.tsx @@ -26,6 +26,7 @@ interface MainCardProps { zoomingEnabled?: boolean; onZoomToggle?: () => void; loading?: boolean; + onClearCache?: () => void; } export default function MainCard({ @@ -41,6 +42,7 @@ export default function MainCard({ zoomingEnabled, onZoomToggle, loading, + onClearCache, }: MainCardProps) { const [repoUrl, setRepoUrl] = useState(""); const [error, setError] = useState(""); @@ -74,11 +76,17 @@ export default function MainCard({ return; } - const [, username, repo] = match || []; + let [, username, repo] = match || []; if (!username || !repo) { setError("Invalid repository URL format"); return; } + + // Strip .git suffix if present + if (repo.endsWith('.git')) { + repo = repo.slice(0, -4); + } + const sanitizedUsername = encodeURIComponent(username); const sanitizedRepo = encodeURIComponent(repo); router.push(`/${sanitizedUsername}/${sanitizedRepo}`); @@ -177,6 +185,18 @@ export default function MainCard({ checked={zoomingEnabled} onCheckedChange={onZoomToggle} /> + {onClearCache && ( + + )} )}
diff --git a/src/env.js b/src/env.js index 8e5673f..b99cecd 100644 --- a/src/env.js +++ b/src/env.js @@ -19,7 +19,7 @@ export const env = createEnv({ * `NEXT_PUBLIC_`. */ client: { - // NEXT_PUBLIC_CLIENTVAR: z.string(), + NEXT_PUBLIC_API_DEV_URL: z.string().url().optional(), }, /** @@ -29,7 +29,7 @@ export const env = createEnv({ runtimeEnv: { POSTGRES_URL: process.env.POSTGRES_URL, NODE_ENV: process.env.NODE_ENV, - // NEXT_PUBLIC_CLIENTVAR: process.env.NEXT_PUBLIC_CLIENTVAR, + NEXT_PUBLIC_API_DEV_URL: process.env.NEXT_PUBLIC_API_DEV_URL, }, /** * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation. This is especially diff --git a/src/hooks/useDiagram.ts b/src/hooks/useDiagram.ts index a763f0b..e2122e6 100644 --- a/src/hooks/useDiagram.ts +++ b/src/hooks/useDiagram.ts @@ -55,8 +55,30 @@ export function useDiagram(username: string, repo: string) { }, ); + // Function to process click events in Mermaid diagram + const processClickEvents = useCallback((diagramCode: string, username: string, repo: string, branch: string = "main") => { + const replaceClickEvent = (match: string, componentName: string, path: string) => { + // Remove quotes from path + const cleanPath = path.replace(/^["']|["']$/g, ''); + + // Determine if path is likely a file (has extension) or directory + const isFile = cleanPath.split('/').pop()?.includes('.') ?? false; + + // Construct GitHub URL + const baseUrl = `https://github.com/${username}/${repo}`; + const pathType = isFile ? 'blob' : 'tree'; + const fullUrl = `${baseUrl}/${pathType}/${branch}/${cleanPath}`; + + return `click ${componentName} "${fullUrl}"`; + }; + + // Match click events: click ComponentName "path/to/something" + const clickPattern = /click\s+([^\s"]+)\s+"([^"]+)"/g; + return diagramCode.replace(clickPattern, replaceClickEvent); + }, []); + const generateDiagram = useCallback( - async (instructions = "", githubPat?: string) => { + async (instructions = "", githubPat?: string, clearCache = false) => { setState({ status: "started", message: "Starting generation process...", @@ -65,7 +87,14 @@ export function useDiagram(username: string, repo: string) { try { const baseUrl = process.env.NEXT_PUBLIC_API_DEV_URL ?? "https://api.gitdiagram.com"; - const response = await fetch(`${baseUrl}/generate/stream`, { + + // Log environment variable and final URL for debugging + console.log("๐Ÿ”ง Frontend Environment:"); + console.log(" NEXT_PUBLIC_API_DEV_URL:", process.env.NEXT_PUBLIC_API_DEV_URL); + console.log(" Final API baseUrl:", baseUrl); + console.log(" Clear cache:", clearCache); + + const response = await fetch(`${baseUrl}/stream`, { method: "POST", headers: { "Content-Type": "application/json", @@ -76,6 +105,7 @@ export function useDiagram(username: string, repo: string) { instructions, api_key: localStorage.getItem("openai_key") ?? undefined, github_pat: githubPat, + clear_cache: clearCache, }), }); if (!response.ok) { @@ -92,20 +122,31 @@ export function useDiagram(username: string, repo: string) { // Process the stream const processStream = async () => { + let buffer = ""; // Buffer to store incomplete lines try { while (true) { const { done, value } = await reader.read(); if (done) break; - // Convert the chunk to text + // Convert the chunk to text and add to buffer const chunk = new TextDecoder().decode(value); - const lines = chunk.split("\n"); - - // Process each SSE message + buffer += chunk; + + // Split buffer into lines + const lines = buffer.split("\n"); + + // Keep the last potentially incomplete line in buffer + buffer = lines.pop() || ""; + + // Process each complete SSE message for (const line of lines) { if (line.startsWith("data: ")) { try { - const data = JSON.parse(line.slice(6)) as StreamResponse; + const jsonStr = line.slice(6); + // Skip empty data lines + if (!jsonStr.trim()) continue; + + const data = JSON.parse(jsonStr) as StreamResponse; // If we receive an error, set loading to false immediately if (data.error) { @@ -184,10 +225,15 @@ export function useDiagram(username: string, repo: string) { } break; case "complete": + // Use accumulated content instead of expecting it in the response + // Apply click event processing to the final diagram + const processedDiagram = processClickEvents(diagram, username, repo, "main"); + setState({ status: "complete", - explanation: data.explanation, - diagram: data.diagram, + explanation: explanation, + diagram: processedDiagram, + message: data.message, }); const date = await getLastGeneratedDate(username, repo); setLastGenerated(date ?? undefined); @@ -205,8 +251,28 @@ export function useDiagram(username: string, repo: string) { } } catch (e) { console.error("Error parsing SSE message:", e); + console.error("Raw line:", line); + console.error("JSON string:", line.slice(6)); + // Don't throw here, just continue processing other messages + } + } + } + } + + // Process any remaining buffered content after stream ends + if (buffer.trim() && buffer.startsWith("data: ")) { + try { + const jsonStr = buffer.slice(6); + if (jsonStr.trim()) { + const data = JSON.parse(jsonStr) as StreamResponse; + // Handle final message if needed + if (data.error) { + setState({ status: "error", error: data.error }); } } + } catch (e) { + console.error("Error parsing final buffered SSE message:", e); + console.error("Buffered content:", buffer); } } } finally { @@ -226,7 +292,7 @@ export function useDiagram(username: string, repo: string) { setLoading(false); } }, - [username, repo, hasUsedFreeGeneration], + [username, repo, hasUsedFreeGeneration, processClickEvents], ); useEffect(() => { @@ -241,6 +307,7 @@ export function useDiagram(username: string, repo: string) { hasApiKey, ); setDiagram(state.diagram); + setLoading(false); void getLastGeneratedDate(username, repo).then((date) => setLastGenerated(date ?? undefined), ); @@ -283,6 +350,7 @@ export function useDiagram(username: string, repo: string) { repo, "", github_pat ?? undefined, + false, // Don't clear cache for initial cost estimate ); if (costEstimate.error) { @@ -298,7 +366,7 @@ export function useDiagram(username: string, repo: string) { setCost(costEstimate.cost ?? ""); // Start streaming generation - await generateDiagram("", github_pat ?? undefined); + await generateDiagram("", github_pat ?? undefined, false); // Note: The diagram and lastGenerated will be set by the generateDiagram function // through the state updates @@ -331,7 +399,7 @@ export function useDiagram(username: string, repo: string) { setCost(""); try { // Start streaming generation with instructions - await generateDiagram(instructions); + await generateDiagram(instructions, undefined, false); } catch (error) { console.error("Error modifying diagram:", error); setError("Failed to modify diagram. Please try again later."); @@ -364,7 +432,7 @@ export function useDiagram(username: string, repo: string) { // return; // } - const costEstimate = await getCostOfGeneration(username, repo, ""); + const costEstimate = await getCostOfGeneration(username, repo, "", undefined, false); if (costEstimate.error) { console.error("Cost estimation failed:", costEstimate.error); @@ -375,7 +443,7 @@ export function useDiagram(username: string, repo: string) { setCost(costEstimate.cost ?? ""); // Start streaming generation with instructions - await generateDiagram(instructions, github_pat ?? undefined); + await generateDiagram(instructions, github_pat ?? undefined, false); } catch (error) { console.error("Error regenerating diagram:", error); setError("Failed to regenerate diagram. Please try again later."); @@ -449,7 +517,7 @@ export function useDiagram(username: string, repo: string) { // Then generate diagram using stored key const github_pat = localStorage.getItem("github_pat"); try { - await generateDiagram("", github_pat ?? undefined); + await generateDiagram("", github_pat ?? undefined, false); } catch (error) { console.error("Error generating with API key:", error); setError("Failed to generate diagram with provided API key."); @@ -466,6 +534,24 @@ export function useDiagram(username: string, repo: string) { setShowApiKeyDialog(true); }; + const handleClearCache = async () => { + setLoading(true); + setError(""); + setCost(""); + try { + const github_pat = localStorage.getItem("github_pat"); + console.log("๐Ÿ—‘๏ธ Clearing cache and regenerating diagram..."); + + // Regenerate with cache clearing enabled + await generateDiagram("", github_pat ?? undefined, true); + } catch (error) { + console.error("Error clearing cache and regenerating:", error); + setError("Failed to clear cache and regenerate diagram. Please try again later."); + } finally { + setLoading(false); + } + }; + return { diagram, error, @@ -482,5 +568,6 @@ export function useDiagram(username: string, repo: string) { handleOpenApiKeyDialog, handleExportImage, state, + handleClearCache, }; } diff --git a/src/lib/fetch-backend.ts b/src/lib/fetch-backend.ts index e0eda93..fdbae3b 100644 --- a/src/lib/fetch-backend.ts +++ b/src/lib/fetch-backend.ts @@ -132,12 +132,19 @@ export async function getCostOfGeneration( username: string, repo: string, instructions: string, - github_pat?: string, + githubPat?: string, + clearCache = false, ): Promise { try { const baseUrl = process.env.NEXT_PUBLIC_API_DEV_URL ?? "https://api.gitdiagram.com"; - const url = new URL(`${baseUrl}/generate/cost`); + const url = new URL(`${baseUrl}/cost`); + + // Log environment variable and final URL for debugging + console.log("๐Ÿ’ฐ Cost API Environment:"); + console.log(" NEXT_PUBLIC_API_DEV_URL:", process.env.NEXT_PUBLIC_API_DEV_URL); + console.log(" Final cost URL:", url.toString()); + console.log(" Clear cache:", clearCache); const response = await fetch(url, { method: "POST", @@ -147,8 +154,9 @@ export async function getCostOfGeneration( body: JSON.stringify({ username, repo, - github_pat: github_pat, + github_pat: githubPat, instructions: instructions ?? "", + clear_cache: clearCache, }), });