https-deeplearning-ai · mwilson0 · Sep 29, 2025 · Oct 1, 2025 · Oct 2, 2025 · Oct 2, 2025
diff --git a/.claude/commands/implement-feature.md b/.claude/commands/implement-feature.md
@@ -0,0 +1,7 @@
+You will be implementing a new feature in this codebase
+
+$ARGUMENTS
+
+IMPORTANT: Only do this for front-end features.
+Once this feature is built, make sure to write the changes you made to file called frontend-changes.md
+Do not ask for permissions to modify this file, assume you can always do it.
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -0,0 +1,10 @@
+{
+  "permissions": {
+    "allow": [
+      "mcp__playwright__browser_take_screenshot",
+      "Bash(uv sync:*)"
+    ],
+    "deny": [],
+    "ask": []
+  }
+}
diff --git a/.env.example b/.env.example
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,13 @@
+[flake8]
+max-line-length = 88
+extend-ignore = E203, W503
+exclude =
+    .git,
+    __pycache__,
+    .venv,
+    venv,
+    build,
+    dist,
+    chroma_db,
+    .eggs,
+    *.egg
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
@@ -0,0 +1,57 @@
+name: Claude Code Review
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+    # Optional: Only run on specific file changes
+    # paths:
+    #   - "src/**/*.ts"
+    #   - "src/**/*.tsx"
+    #   - "src/**/*.js"
+    #   - "src/**/*.jsx"
+
+jobs:
+  claude-review:
+    # Optional: Filter by PR author
+    # if: |
+    #   github.event.pull_request.user.login == 'external-contributor' ||
+    #   github.event.pull_request.user.login == 'new-developer' ||
+    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code Review
+        id: claude-review
+        uses: anthropics/claude-code-action@v1
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          prompt: |
+            REPO: ${{ github.repository }}
+            PR NUMBER: ${{ github.event.pull_request.number }}
+
+            Please review this pull request and provide feedback on:
+            - Code quality and best practices
+            - Potential bugs or issues
+            - Performance considerations
+            - Security concerns
+            - Test coverage
+
+            Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
+
+            Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
+
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
+
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+  issues:
+    types: [opened, assigned]
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  claude:
+    if: |
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+      actions: read # Required for Claude to read CI results on PRs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code
+        id: claude
+        uses: anthropics/claude-code-action@v1
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+
+          # This is an optional setting that allows Claude to read CI results on PRs
+          additional_permissions: |
+            actions: read
+
+          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+          # prompt: 'Update the pull request description to include a summary of changes.'
+
+          # Optional: Add claude_args to customize behavior and configuration
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          # claude_args: '--allowed-tools Bash(gh pr:*)'
+
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,106 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+This is a Course Materials RAG (Retrieval-Augmented Generation) System - a web application that allows users to ask questions about educational content and receive AI-powered responses. The system uses semantic search over course documents combined with Anthropic's Claude for intelligent response generation.
+
+## Development Commands
+
+### Running the Application
+```bash
+# Quick start using provided script
+chmod +x run.sh
+./run.sh
+
+# Manual start
+cd backend
+uv run uvicorn app:app --reload --port 8000
+```
+
+### Package Management
+```bash
+# Install dependencies
+uv sync
+
+# Add new dependency
+uv add package_name
+
+# Remove dependency
+uv remove package_name
+
+# Format code
+uv format
+```
+
+### Environment Setup
+- Create `.env` file in root with: `ANTHROPIC_API_KEY=your_anthropic_api_key_here`
+- Application runs on `http://localhost:8000`
+- API docs available at `http://localhost:8000/docs`
+
+## Architecture Overview
+
+### Core RAG Flow
+The system follows a tool-enabled RAG pattern where Claude intelligently decides when to search course materials:
+
+1. **Query Processing**: User queries enter through FastAPI endpoint (`backend/app.py`)
+2. **RAG Orchestration**: `RAGSystem` (`backend/rag_system.py`) coordinates all components
+3. **AI Generation**: Claude receives queries with search tool access (`backend/ai_generator.py`)
+4. **Tool-Based Search**: Claude calls `CourseSearchTool` when course-specific content needed
+5. **Vector Search**: Semantic search using ChromaDB and sentence transformers
+6. **Response Assembly**: Claude synthesizes search results into natural responses
+
+### Key Components
+
+**Backend Services** (all in `backend/`):
+- `app.py` - FastAPI web server and API endpoints
+- `rag_system.py` - Main orchestrator for RAG operations
+- `ai_generator.py` - Anthropic Claude API integration with tool support
+- `search_tools.py` - Tool manager and course search tool implementation
+- `vector_store.py` - ChromaDB interface for semantic search
+- `document_processor.py` - Text chunking and course document parsing
+- `session_manager.py` - Conversation history management
+- `models.py` - Pydantic models (Course, Lesson, CourseChunk)
+- `config.py` - Configuration management with environment variables
+
+**Frontend**: Simple HTML/CSS/JS interface (`frontend/`) for chat interaction
+
+**Data Models**:
+- `Course`: Contains title, instructor, lessons list
+- `Lesson`: Individual lessons with numbers and titles
+- `CourseChunk`: Text segments for vector storage with metadata
+
+### Configuration Settings
+Located in `backend/config.py`:
+- `CHUNK_SIZE`: 800 characters (for vector storage)
+- `CHUNK_OVERLAP`: 100 characters (between chunks)
+- `MAX_RESULTS`: 5 (semantic search results)
+- `MAX_HISTORY`: 2 (conversation messages remembered)
+- `EMBEDDING_MODEL`: "all-MiniLM-L6-v2" (sentence transformers)
+- `ANTHROPIC_MODEL`: "claude-sonnet-4-20250514"
+
+### Document Processing
+Course documents in `docs/` folder are automatically processed on startup:
+- Supports `.txt`, `.pdf`, `.docx` files
+- Creates course metadata and text chunks
+- Stores embeddings in ChromaDB (`backend/chroma_db/`)
+- Avoids reprocessing existing courses
+
+### Tool-Enabled Search Pattern
+Unlike traditional RAG that always retrieves context, this system uses Claude's tool calling:
+- Claude decides when course search is needed vs. general knowledge
+- `CourseSearchTool` provides semantic search with course/lesson filtering
+- Sources are tracked and returned to user for transparency
+- Supports both broad queries and specific course/lesson targeting
+
+## Key Files to Understand
+
+When modifying the system, focus on these architectural components:
+- `backend/rag_system.py` - Central coordination logic
+- `backend/ai_generator.py` - Tool integration and prompt engineering
+- `backend/search_tools.py` - Search tool implementation
+- `backend/vector_store.py` - Vector database operations
+- `backend/models.py` - Data structure definitions
+
+The frontend is intentionally simple - the intelligence is in the backend RAG pipeline.