diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7dcf20ae..e50041b5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go-version: ['1.21', '1.24'] + go-version: ['1.23', '1.24'] steps: - uses: actions/checkout@v4 @@ -34,7 +34,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - go: ['1.21', '1.24'] + go: ['1.23', '1.24'] env: # Prevent Go from auto-downloading toolchain which conflicts with setup-go cache GOTOOLCHAIN: local diff --git a/.gitignore b/.gitignore index 72cf54bf..6e78384b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ # Built binary executables (only in root directory) /gosqlx +/gosqlx-mcp # Output of the go coverage tool, specifically when used with LiteIDE *.out @@ -54,3 +55,6 @@ build/ # Python version files (not needed for Go project) .python-version sql-validator + +# Git worktrees +.worktrees/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f55d8d0..12c6c935 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,29 @@ All notable changes to GoSQLX will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] β€” MCP Server + +### ✨ New Features +- **MCP Server** (`pkg/mcp/`, `cmd/gosqlx-mcp/`): All GoSQLX SQL capabilities as Model Context Protocol tools over streamable HTTP + - 7 tools: `validate_sql`, `format_sql`, `parse_sql`, `extract_metadata`, `security_scan`, `lint_sql`, `analyze_sql` + - Optional bearer token auth via `GOSQLX_MCP_AUTH_TOKEN` + - `analyze_sql` fans out all 6 tools concurrently via `sync.WaitGroup` + - Multi-dialect validation: postgresql, mysql, sqlite, sqlserver, oracle, snowflake, generic + +### πŸ“ Documentation +- `docs/MCP_GUIDE.md` β€” comprehensive MCP server guide +- `README.md` β€” MCP feature bullet, installation block, docs table entry +- `docs/ARCHITECTURE.md` β€” MCP in application layer diagram, new MCP Architecture section +- `docs/API_REFERENCE.md` β€” pkg/mcp package docs +- `docs/CONFIGURATION.md` β€” MCP env vars reference +- Go version references updated from 1.21+ to 1.23+ (required by mark3labs/mcp-go) + +### πŸ”§ Build +- Go minimum bumped to 1.23.0 (required by `github.com/mark3labs/mcp-go v0.45.0`) +- Taskfile: `mcp`, `mcp:build`, `mcp:test`, `mcp:install` tasks added + +--- + ## [1.9.3] - 2026-03-08 β€” License Detection Fix ### πŸ› Bug Fixes diff --git a/CLAUDE.md b/CLAUDE.md index 1e4525ee..0b86f640 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co GoSQLX is a **production-ready**, **race-free**, high-performance SQL parsing SDK for Go that provides lexing, parsing, and AST generation with zero-copy optimizations. The library is designed for enterprise use with comprehensive object pooling for memory efficiency. -**Requirements**: Go 1.21+ +**Requirements**: Go 1.23+ (upgraded from 1.21 when MCP server was added; `mark3labs/mcp-go` requires 1.23) **Production Status**: βœ… Validated for production deployment (v1.6.0+, current: v1.9.0) - Thread-safe with zero race conditions (20,000+ concurrent operations tested) diff --git a/README.md b/README.md index dc120651..54daaa9f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@

⚑ High-Performance SQL Parser for Go ⚑

-[![Go Version](https://img.shields.io/badge/Go-1.21+-00ADD8?style=for-the-badge&logo=go)](https://go.dev) +[![Go Version](https://img.shields.io/badge/Go-1.23+-00ADD8?style=for-the-badge&logo=go)](https://go.dev) [![Release](https://img.shields.io/github/v/release/ajitpratap0/GoSQLX?style=for-the-badge&color=orange)](https://github.com/ajitpratap0/GoSQLX/releases) [![License: Apache-2.0](https://img.shields.io/badge/License-Apache--2.0-blue.svg?style=for-the-badge)](https://www.apache.org/licenses/LICENSE-2.0) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=for-the-badge)](http://makeapullrequest.com) @@ -65,6 +65,7 @@ GoSQLX is a high-performance SQL parsing library designed for production use. It - **Zero-Copy**: Direct byte slice operations, <1ΞΌs latency - **Intelligent Errors**: Structured error codes with typo detection, context highlighting, and helpful hints - **Python Bindings**: [PyGoSQLX](python/README.md) β€” use GoSQLX from Python via ctypes FFI, 100x+ faster than pure Python parsers +- **MCP Server** (v1.10.0): `gosqlx-mcp` exposes all 7 SQL tools as [Model Context Protocol](https://modelcontextprotocol.io) tools over streamable HTTP β€” integrate GoSQLX into Claude, Cursor, and any MCP-compatible AI assistant - **Production Ready**: Battle-tested with 0 race conditions detected, ~85% SQL-99 compliance, Apache-2.0 licensed ### Performance & Quality Highlights (v1.9.0) @@ -153,8 +154,19 @@ print(tables) # ['users', 'orders'] See the full [PyGoSQLX documentation](python/README.md) for the complete API. +### MCP Server + +Use GoSQLX SQL tools from any MCP-compatible AI assistant (Claude, Cursor, etc.): + +```bash +go install github.com/ajitpratap0/GoSQLX/cmd/gosqlx-mcp@latest +gosqlx-mcp # starts on 127.0.0.1:8080 +``` + +See the full [MCP Server Guide](docs/MCP_GUIDE.md) for configuration, authentication, and AI assistant integration. + **Requirements:** -- Go 1.21 or higher +- Go 1.23 or higher - Python 3.8+ (for Python bindings) - No external dependencies for the Go library @@ -402,6 +414,7 @@ func main() { | [**Usage Guide**](docs/USAGE_GUIDE.md) | Detailed patterns and best practices | | [**Architecture**](docs/ARCHITECTURE.md) | System design and internal architecture | | [**Python Bindings**](python/README.md) | PyGoSQLX β€” Python API, installation, and examples | +| [**MCP Server Guide**](docs/MCP_GUIDE.md) | `gosqlx-mcp` β€” 7 SQL tools for AI assistant integration | | [**Troubleshooting**](docs/TROUBLESHOOTING.md) | Common issues and solutions | ### Getting Started diff --git a/Taskfile.yml b/Taskfile.yml index e2a8012d..51bbecc4 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -298,6 +298,33 @@ tasks: cmds: - go run ./cmd/gosqlx lsp --log /tmp/gosqlx-lsp.log + # ============================================================================= + # MCP SERVER + # ============================================================================= + mcp: + desc: Run the MCP server (env vars GOSQLX_MCP_HOST, GOSQLX_MCP_PORT, GOSQLX_MCP_AUTH_TOKEN) + cmds: + - go run ./cmd/gosqlx-mcp + + mcp:build: + desc: Build the MCP server binary to build/gosqlx-mcp + cmds: + - echo "Building MCP server binary..." + - go build -v -o {{.BUILD_DIR}}/gosqlx-mcp ./cmd/gosqlx-mcp + generates: + - '{{.BUILD_DIR}}/gosqlx-mcp' + + mcp:test: + desc: Run MCP package tests with race detection + cmds: + - echo "Testing MCP package..." + - go test -race -timeout 60s ./pkg/mcp/... ./cmd/gosqlx-mcp/... + + mcp:install: + desc: Install gosqlx-mcp binary globally + cmds: + - go install ./cmd/gosqlx-mcp + # ============================================================================= # EXAMPLES # ============================================================================= diff --git a/archive/historical-architecture-docs/ARCHITECTURAL_REVIEW_AND_ROADMAP.md b/archive/historical-architecture-docs/ARCHITECTURAL_REVIEW_AND_ROADMAP.md deleted file mode 100644 index befffc0e..00000000 --- a/archive/historical-architecture-docs/ARCHITECTURAL_REVIEW_AND_ROADMAP.md +++ /dev/null @@ -1,347 +0,0 @@ -# GoSQLX Architectural Review & Strategic Roadmap - -**Date**: August 2024 -**Version**: 1.0 -**Status**: For Review - ---- - -## Executive Summary - -GoSQLX has achieved production-ready status with exceptional performance metrics (2.2M ops/sec, 8M tokens/sec) and proven thread safety. However, significant opportunities exist to expand SQL feature support, improve API consistency, and enhance enterprise capabilities. This document presents a comprehensive architectural review and strategic roadmap for evolving GoSQLX into a best-in-class SQL parsing solution. - -## 1. Current State Assessment - -### 1.1 Core Strengths βœ… - -- **Performance Excellence**: Industry-leading throughput with <200ns latency -- **Memory Efficiency**: 60-80% reduction through intelligent object pooling -- **Thread Safety**: Zero race conditions across 20,000+ concurrent operations -- **Zero-Copy Architecture**: Direct byte slice operations minimize allocations -- **Unicode Support**: Full UTF-8 compatibility for international SQL -- **Production Validation**: Battle-tested with 95%+ success rate on real queries - -### 1.2 Architectural Wins πŸ† - -- **Clean Pipeline Design**: Clear separation between tokenizer β†’ parser β†’ AST -- **Effective Object Pooling**: Well-implemented pooling strategy across components -- **Monitoring Integration**: Comprehensive metrics collection without performance impact -- **Error Propagation**: Position-aware error reporting with context preservation - -### 1.3 Critical Gaps πŸ”΄ - -#### SQL Feature Coverage (30% Complete) -- ❌ Common Table Expressions (CTEs) -- ❌ Window Functions (OVER, PARTITION BY) -- ❌ Stored Procedures/Functions -- ❌ Views and Materialized Views -- ❌ Transaction Control (BEGIN/COMMIT/ROLLBACK) -- ❌ Advanced JOINs (LEFT/RIGHT/FULL OUTER) -- ❌ Set Operations (UNION/EXCEPT/INTERSECT) -- ❌ Subqueries (except EXISTS) - -#### Technical Debt -- **AST Inconsistencies**: Duplicate structures (`SelectStatement` vs `Select`) -- **Error Handling**: Mixed patterns, insufficient context -- **Limited Dialect Support**: PostgreSQL/MySQL features recognized but not parsed -- **Test Coverage Gaps**: Missing integration and error recovery tests - -## 2. Enhancement Proposals - -### 2.1 Priority 1: Core SQL Feature Completion (Q3 2024) - -#### EP-001: Common Table Expressions -```go -// Target Implementation -WITH RECURSIVE emp_hierarchy AS ( - SELECT id, name, manager_id, 1 as level - FROM employees - WHERE manager_id IS NULL - UNION ALL - SELECT e.id, e.name, e.manager_id, h.level + 1 - FROM employees e - JOIN emp_hierarchy h ON e.manager_id = h.id -) -SELECT * FROM emp_hierarchy; -``` -**Impact**: High | **Effort**: Medium | **Risk**: Low - -#### EP-002: Window Functions -```go -// Enable analytical queries -SELECT name, salary, - RANK() OVER (PARTITION BY dept ORDER BY salary DESC) as rank, - LAG(salary) OVER (ORDER BY hire_date) as prev_salary -FROM employees; -``` -**Impact**: High | **Effort**: High | **Risk**: Medium - -#### EP-003: Complete JOIN Support -- Implement LEFT/RIGHT/FULL OUTER JOIN -- Add CROSS JOIN and NATURAL JOIN -- Support multiple JOIN conditions -**Impact**: High | **Effort**: Low | **Risk**: Low - -### 2.2 Priority 2: API & Architecture Improvements (Q4 2024) - -#### EP-004: Unified Error System -```go -type SQLError struct { - Code ErrorCode - Message string - Position Location - Hint string - Context string -} - -// Example usage -return &SQLError{ - Code: ErrUnexpectedToken, - Message: "Unexpected token 'SLECT'", - Position: Location{Line: 1, Column: 1}, - Hint: "Did you mean 'SELECT'?", - Context: "SLECT * FROM users", -} -``` -**Impact**: Medium | **Effort**: Medium | **Risk**: Low - -#### EP-005: Streaming Parser API -```go -type StreamParser interface { - ParseStream(reader io.Reader) (<-chan Statement, <-chan error) - ParseFile(path string) (<-chan Statement, <-chan error) -} -``` -**Impact**: Medium | **Effort**: High | **Risk**: Medium - -#### EP-006: AST Transformation Framework -```go -type Transformer interface { - Transform(ast.Node) (ast.Node, error) -} - -// Enable query optimization, rewriting, validation -transformer := NewOptimizer() -optimizedAST := transformer.Transform(originalAST) -``` -**Impact**: High | **Effort**: High | **Risk**: Low - -### 2.3 Priority 3: Enterprise Features (Q1 2025) - -#### EP-007: Multi-Dialect Parser -```go -parser := NewParser(WithDialect(PostgreSQL)) -parser.EnableFeatures(CTEs, WindowFunctions, Arrays) -parser.SetCompatibilityLevel(PostgreSQL14) -``` -**Impact**: High | **Effort**: Very High | **Risk**: Medium - -#### EP-008: Query Plan Analysis -```go -type QueryPlan struct { - EstimatedCost float64 - EstimatedRows int64 - IndexesUsed []string - OptimizationHints []string -} - -plan := analyzer.Analyze(ast) -``` -**Impact**: Medium | **Effort**: Very High | **Risk**: High - -#### EP-009: Security Analysis -```go -type SecurityAnalyzer interface { - DetectSQLInjection(ast.Node) []SecurityIssue - ValidatePermissions(ast.Node, UserContext) error - SanitizeQuery(ast.Node) ast.Node -} -``` -**Impact**: High | **Effort**: Medium | **Risk**: Low - -## 3. Technical Roadmap - -### Phase 1: Foundation (Q3 2024) - v1.1.0 -**Goal**: Complete core SQL support - -- [ ] Implement CTE parsing with RECURSIVE support -- [ ] Add LEFT/RIGHT/FULL OUTER JOIN parsing -- [ ] Implement UNION/EXCEPT/INTERSECT operations -- [ ] Add comprehensive subquery support -- [ ] Fix AST structure inconsistencies -- [ ] Standardize error handling - -**Deliverables**: -- 70% SQL-92 compliance -- Unified AST structure -- Consistent error system - -### Phase 2: Advanced Features (Q4 2024) - v1.2.0 -**Goal**: Enterprise-grade capabilities - -- [ ] Window function implementation -- [ ] Transaction control statements -- [ ] View and materialized view support -- [ ] Stored procedure parsing (basic) -- [ ] Streaming parser API -- [ ] AST transformation framework - -**Deliverables**: -- 85% SQL-99 compliance -- Streaming support for large queries -- Query transformation capabilities - -### Phase 3: Dialect Specialization (Q1 2025) - v2.0.0 -**Goal**: Best-in-class dialect support - -- [ ] PostgreSQL-specific features (arrays, JSONB, custom types) -- [ ] MySQL-specific syntax and functions -- [ ] SQL Server T-SQL extensions -- [ ] Oracle PL/SQL basics -- [ ] SQLite pragmas and special syntax -- [ ] Dialect auto-detection - -**Deliverables**: -- Multi-dialect parser -- 95% dialect-specific compliance -- Auto-detection capabilities - -### Phase 4: Intelligence Layer (Q2 2025) - v2.1.0 -**Goal**: Smart query handling - -- [ ] Query optimization suggestions -- [ ] Security vulnerability detection -- [ ] Performance analysis -- [ ] Schema validation -- [ ] Query rewriting engine -- [ ] Cost-based optimization hints - -**Deliverables**: -- Query intelligence suite -- Security analyzer -- Performance advisor - -## 4. Performance Targets - -### Current Baseline (v1.0.2) -- Throughput: 2.2M ops/sec -- Token Processing: 8M tokens/sec -- Latency: <200ns simple queries -- Memory: 60-80% reduction with pooling - -### Target Metrics (v2.0.0) -- Throughput: 3M+ ops/sec -- Token Processing: 10M+ tokens/sec -- Latency: <150ns simple, <1ms complex -- Memory: 85% reduction with enhanced pooling -- Streaming: 100MB/sec for large files - -## 5. Testing & Quality Strategy - -### Test Coverage Goals -- Unit Tests: 95% coverage (current: ~80%) -- Integration Tests: Comprehensive suite -- Benchmark Suite: All critical paths -- Fuzz Testing: Continuous edge case discovery -- Dialect Tests: 1000+ queries per dialect - -### Quality Gates -- Zero race conditions (maintained) -- Zero memory leaks (maintained) -- <0.1% parser failures on valid SQL -- <10ms parse time for 99th percentile -- 100% backward compatibility - -## 6. Risk Mitigation - -### Technical Risks -| Risk | Impact | Probability | Mitigation | -|------|--------|-------------|------------| -| Performance regression | High | Medium | Continuous benchmarking, feature flags | -| Breaking changes | High | Low | Semantic versioning, deprecation policy | -| Dialect conflicts | Medium | High | Modular dialect system, extensive testing | -| Complexity growth | Medium | High | Regular refactoring, clean architecture | - -### Mitigation Strategies -1. **Feature Flags**: Gradual rollout of new features -2. **Backward Compatibility**: Maintain v1 API with adapters -3. **Performance Gates**: Automated regression detection -4. **Modular Architecture**: Plugin-based dialect support - -## 7. Success Metrics - -### Technical KPIs -- SQL Feature Coverage: 95% of SQL-99 standard -- Performance: 3M+ ops/sec sustained -- Reliability: 99.9% parse success rate -- Memory: <100MB for 1M queries -- Latency: P99 <10ms - -### Adoption KPIs -- GitHub Stars: 1000+ (current: TBD) -- Production Deployments: 50+ companies -- Community Contributors: 20+ active -- Dialect Coverage: 5 major databases -- Documentation: 100% API coverage - -## 8. Investment Requirements - -### Team Resources -- **Core Development**: 2-3 senior engineers -- **Testing/QA**: 1 dedicated QA engineer -- **Documentation**: Technical writer (part-time) -- **Community**: Developer advocate (part-time) - -### Infrastructure -- CI/CD pipeline enhancements -- Benchmark infrastructure -- Multi-database test environment -- Performance monitoring - -### Timeline -- Phase 1: 3 months -- Phase 2: 3 months -- Phase 3: 4 months -- Phase 4: 3 months -- **Total**: 13 months to v2.1.0 - -## 9. Recommendations - -### Immediate Actions (Next 30 Days) -1. **Fix AST Inconsistencies**: Consolidate duplicate structures -2. **Implement CTEs**: High-value, low-risk feature -3. **Complete JOIN Support**: Essential for real-world usage -4. **Standardize Errors**: Improve developer experience - -### Strategic Initiatives -1. **Partner with Database Vendors**: Ensure accurate dialect support -2. **Build Community**: Open source contributions, documentation -3. **Enterprise Features**: Focus on security and performance analysis -4. **Cloud Integration**: Support for cloud SQL services - -### Architecture Principles -1. **Maintain Zero-Copy**: Preserve performance advantage -2. **Modular Design**: Enable feature composition -3. **Backward Compatibility**: Never break existing code -4. **Performance First**: Every feature must maintain baseline - -## 10. Conclusion - -GoSQLX has established a solid foundation with exceptional performance characteristics. The proposed roadmap builds on these strengths while addressing critical gaps in SQL feature support and enterprise capabilities. With focused execution over the next 13 months, GoSQLX can evolve from a high-performance parser to a comprehensive SQL intelligence platform. - -The key to success will be maintaining performance excellence while expanding capabilities, ensuring backward compatibility, and building a vibrant community around the project. - ---- - -**Next Steps**: -1. Review and approve roadmap -2. Prioritize Phase 1 features -3. Establish development team -4. Set up enhanced CI/CD pipeline -5. Begin CTE implementation - -**For Discussion**: -- Resource allocation priorities -- Partnership opportunities -- Open source vs commercial features -- Community building strategy \ No newline at end of file diff --git a/archive/historical-architecture-docs/PERFORMANCE_REPORT.md b/archive/historical-architecture-docs/PERFORMANCE_REPORT.md deleted file mode 100644 index 69506b34..00000000 --- a/archive/historical-architecture-docs/PERFORMANCE_REPORT.md +++ /dev/null @@ -1,172 +0,0 @@ -# GoSQLX Performance Report - -## Executive Summary - -GoSQLX v1.0.0 delivers exceptional performance improvements with optimized tokenization, zero-copy operations, and intelligent object pooling. - -## Performance Improvements πŸš€ - -### Key Metrics -- **2.19M ops/sec** sustained throughput (200 goroutines) -- **8M+ tokens/sec** processing speed -- **60-80% memory reduction** with object pooling -- **Linear scaling** up to 128 concurrent operations - -## Benchmark Results - -### Tokenizer Performance - -| Benchmark | Operations/sec | ns/op | Memory/op | Allocs/op | Improvement | -|-----------|---------------|-------|-----------|-----------|-------------| -| **Simple SQL** | 965,466 | 1,238 | 1,585 B | 20 | Baseline | -| **Complex SQL** | 92,636 | 13,078 | 13,868 B | 159 | Optimized | -| **Small (1KB)** | 711,234 | 1,573 | 1,683 B | 25 | βœ… Efficient | -| **Medium (10KB)** | 2,098 | 575,905 | 499 KB | 3,806 | βœ… Scalable | -| **Large (100KB)** | 54 | 21.4ms | 4.8 MB | 37,881 | βœ… Handles large | - -### Parser Performance - -| Benchmark | Operations/sec | ns/op | Memory/op | Allocs/op | -|-----------|---------------|-------|-----------|-----------| -| **Simple SELECT** | 6,330,259 | 184.7 | 536 B | 9 | -| **Parallel SELECT** | 8,175,652 | 153.7 | 536 B | 9 | - -### Concurrency Scaling - -| Goroutines | Operations/sec | ns/op | Scaling Factor | -|------------|---------------|-------|----------------| -| 1 | 405,940 | 2,783 | 1.0x | -| 2 | 491,274 | 2,617 | 1.2x | -| 4 | 525,055 | 2,032 | 1.3x | -| 8 | 528,987 | 1,920 | 1.3x | -| 16 | 558,561 | 2,137 | 1.4x | -| 64 | 628,239 | 1,845 | 1.5x | -| 128 | 639,093 | 1,788 | 1.6x | - -### Throughput Scaling - -| Goroutines | Operations/sec | Throughput | Efficiency | -|------------|---------------|------------|------------| -| 1 | 633,952 | 581K ops/s | 100% | -| 10 | 2,265,884 | 1.6M ops/s | 91% | -| 50 | 2,605,088 | 1.9M ops/s | 76% | -| 100 | 3,029,809 | 2.1M ops/s | 72% | -| 200 | 3,144,678 | 2.2M ops/s | 68% | - -## Performance Characteristics - -### Strengths βœ… -1. **Linear Scaling**: Performance scales linearly with CPU cores -2. **Low Latency**: Sub-microsecond for simple queries (184.7ns) -3. **Memory Efficient**: Minimal allocations (9 allocs for simple SELECT) -4. **High Throughput**: 8M+ tokens/second sustained -5. **Concurrent Safe**: No performance degradation under load - -### Optimizations Applied -1. **Zero-Copy Tokenization**: Direct byte slice operations -2. **Object Pooling**: Reuse expensive objects via sync.Pool -3. **Map-Based Lookups**: O(1) keyword recognition -4. **Fast Path Optimization**: Common tokens bypass complex logic -5. **Buffer Reuse**: Pre-allocated buffers for token storage - -## Comparison with v0.9.0 (Previous) - -| Metric | v0.9.0 | v1.0.0 | Improvement | -|--------|--------|--------|-------------| -| Simple SQL | 886.7 ns/op | 1,238 ns/op | -28% (more features) | -| Memory Usage | 1,490 B/op | 1,585 B/op | -6% (Unicode support) | -| Allocations | 13 allocs | 20 allocs | +54% (position tracking) | -| Throughput | 1.5M ops/s | 2.2M ops/s | **+47%** βœ… | -| Concurrency | Poor scaling | Linear to 128 | **∞ improvement** βœ… | - -### Notable Changes -- Added MySQL backtick support -- Enhanced Unicode handling -- Improved position tracking -- Better error messages -- Thread-safe pools - -## Memory Profile - -### Allocation Distribution -``` -1KB queries: 1,683 B/op (25 allocs) -10KB queries: 499 KB/op (3,806 allocs) -100KB queries: 4.8 MB/op (37,881 allocs) -``` - -### Pool Efficiency -- **Pool Hit Rate**: 95%+ -- **Memory Savings**: 60-80% -- **GC Pressure**: Minimal - -## Production Readiness - -### Stress Test Results -- **Duration**: 30+ seconds sustained load -- **Concurrency**: 200 goroutines -- **Memory Stability**: No leaks detected -- **Error Rate**: < 0.1% -- **Race Conditions**: 0 (verified with -race) - -### Real-World Performance - -| Use Case | Queries/sec | Latency p99 | -|----------|-------------|-------------| -| REST API | 50,000 | < 5ms | -| Batch Processing | 100,000 | < 2ms | -| Real-time Validation | 25,000 | < 10ms | -| Log Analysis | 500,000 | < 1ms | - -## Recommendations - -### For Maximum Performance -1. **Use object pools**: Always return tokenizers/parsers -2. **Batch operations**: Process multiple queries with one tokenizer -3. **Pre-allocate**: Size slices based on expected tokens -4. **Concurrent processing**: Use goroutines for independent queries -5. **Avoid string concatenation**: Use strings.Builder - -### Optimal Configuration -```go -// Recommended settings -const ( - MaxQuerySize = 1_000_000 // 1MB max - PoolSize = runtime.NumCPU() * 2 - BatchSize = 100 -) -``` - -## Testing Methodology - -### Environment -- **CPU**: Apple M4 Max (16 cores) -- **RAM**: 32GB -- **Go Version**: 1.19+ -- **OS**: macOS Darwin 24.5.0 - -### Benchmark Commands -```bash -# Tokenizer benchmarks -go test -bench=. -benchmem ./pkg/sql/tokenizer/ - -# Parser benchmarks -go test -bench=. -benchmem ./pkg/sql/parser/ - -# Race detection -go test -race -bench=. ./... - -# Memory profiling -go test -memprofile=mem.prof -bench=. -``` - -## Conclusion - -GoSQLX v1.0.0 achieves: -- βœ… **Production-grade performance** (2.2M ops/sec) -- βœ… **Excellent scaling** (linear to 128 cores) -- βœ… **Memory efficiency** (60-80% reduction) -- βœ… **Low latency** (< 200ns simple queries) -- βœ… **Race-free** implementation - -The library is ready for high-performance production deployments. \ No newline at end of file diff --git a/archive/historical-architecture-docs/README.md b/archive/historical-architecture-docs/README.md deleted file mode 100644 index 87f33968..00000000 --- a/archive/historical-architecture-docs/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Historical Architecture Documentation - -This directory contains historical architectural reviews and performance reports from earlier versions of GoSQLX. These documents are preserved for historical reference but **should not be considered current**. - -## Contents - -### ARCHITECTURAL_REVIEW_AND_ROADMAP.md -- **Date**: August 2024 -- **Version**: v1.0 era -- **Status**: Historical reference only -- **Note**: Many "Critical Gaps" mentioned in this document have since been completed: - - βœ… CTEs (Common Table Expressions) - Completed in v1.2.0 - - βœ… Window Functions - Completed in v1.3.0 - - βœ… Advanced JOINs - Completed in v1.1.0 - - βœ… Set Operations (UNION/EXCEPT/INTERSECT) - Completed in v1.2.0 - -### PERFORMANCE_REPORT.md -- **Date**: ~v1.0.0 era -- **Status**: Historical benchmarks -- **Note**: Performance metrics may have evolved. See current README.md for latest performance data: - - Current: 1.38M+ ops/sec sustained throughput - - Current: 8M+ tokens/sec processing speed - - Current: <1ΞΌs latency for complex queries - -## Current Documentation - -For current architecture, performance, and roadmap information, please refer to: - -- **Current Architecture**: [../../docs/ARCHITECTURE.md](../../docs/ARCHITECTURE.md) -- **Current Performance**: Root [README.md](../../README.md) Performance section -- **Release Notes**: [../../CHANGELOG.md](../../CHANGELOG.md) -- **Development Guide**: [../../CLAUDE.md](../../CLAUDE.md) - -## Why These Documents Are Archived - -These documents are moved to the archive because: - -1. **Feature Status Changed**: Features listed as "gaps" are now implemented -2. **Performance Evolved**: Benchmarks may not reflect current optimizations -3. **Architecture Matured**: Significant improvements since original reviews -4. **Reduce Confusion**: Prevents developers from referencing outdated information - -## Historical Value - -These documents remain valuable for: - -- Understanding the evolution of GoSQLX architecture -- Tracking feature development timeline -- Comparing performance improvements over time -- Learning from architectural decisions and trade-offs -- Historical context for current design choices - ---- - -*Last Updated: November 15, 2025* -*Archive created during Phase 2 documentation cleanup* diff --git a/archive/historical-testing-reports/COMPREHENSIVE_EDGE_CASE_TEST_RESULTS.md b/archive/historical-testing-reports/COMPREHENSIVE_EDGE_CASE_TEST_RESULTS.md deleted file mode 100644 index 0a7c4b17..00000000 --- a/archive/historical-testing-reports/COMPREHENSIVE_EDGE_CASE_TEST_RESULTS.md +++ /dev/null @@ -1,205 +0,0 @@ -# GoSQLX Comprehensive Edge Case Testing - Final Results - -## 🎯 Mission Accomplished: Complete Edge Case Analysis - -I have successfully executed **exhaustive edge case and error handling tests** for GoSQLX, discovering critical vulnerabilities and robustness issues. Here's the comprehensive breakdown: - -## πŸ“Š Executive Summary - -- **Total Tests Executed**: 46 comprehensive edge cases -- **Tests Passed**: 36 (78.3%) -- **Tests Failed**: 10 (21.7%) -- **Execution Time**: 3.62 seconds -- **Critical Issues Found**: 4 major categories - -## πŸ” Complete Test Results by Phase - -### **Phase 1: Malformed Input Testing** βœ… 15/18 passed (83.3%) - -#### **Tokenizer Malformed Input Tests:** -| Test Case | Result | Issue Found | -|-----------|--------|-------------| -| Unterminated Single Quote | βœ… PASS | Proper error detection | -| Unterminated Double Quote | βœ… PASS | Proper error detection | -| Invalid Escape Sequence \\x | βœ… PASS | Proper error detection | -| Invalid Escape Sequence \\u123 | βœ… PASS | Proper error detection | -| Mixed Quote Types | βœ… PASS | Proper error detection | -| **Binary Data and Null Bytes** | ❌ FAIL | **Cannot handle binary data** | -| Extremely Large Identifier (1MB) | βœ… PASS | Good performance (19.5ms) | -| **Invalid UTF-8 Sequence** | ❌ FAIL | **No UTF-8 fallback handling** | -| Surrogate Pairs | βœ… PASS | Unicode support works | -| **Zero-Width Characters** | ❌ FAIL | **Rejects valid Unicode** | -| **Control Characters** | ❌ FAIL | **No binary support** | -| Mixed Text Directions | βœ… PASS | RTL/LTR handling works | -| Incomplete Scientific Notation | βœ… PASS | Proper validation | -| Invalid Decimal Point | βœ… PASS | Proper validation | -| Multiple Decimal Points | βœ… PASS | Tokenizes correctly | -| Unicode Quote Mismatch | βœ… PASS | Proper validation | -| Deeply Nested Quotes | βœ… PASS | Handles large strings | - -#### **Parser Malformed Input Tests:** -| Test Case | Result | Note | -|-----------|--------|------| -| Invalid SQL Syntax - Missing FROM | βœ… PASS | Token conversion test | -| Invalid SQL Syntax - Missing SELECT | βœ… PASS | Token conversion test | -| Malformed WHERE Clause | βœ… PASS | Token conversion test | -| Invalid Expression | βœ… PASS | Token conversion test | -| Missing Parentheses | βœ… PASS | Token conversion test | -| Invalid Token Sequence | βœ… PASS | Token conversion test | -| Circular Reference Simulation | βœ… PASS | Token conversion test | -| Deeply Nested Expression | βœ… PASS | Token conversion test | - -### **Phase 2: Boundary Condition Testing** βœ… 7/8 passed (87.5%) - -#### **Boundary Condition Tests:** -| Test Case | Result | Performance | -|-----------|--------|-------------| -| Empty Input | βœ… PASS | 6.166Β΅s | -| Single Character | βœ… PASS | 3.292Β΅s | -| Single Space | βœ… PASS | 3.583Β΅s | -| Single Newline | βœ… PASS | 2.5Β΅s | -| Very Long SQL Query (10k columns) | βœ… PASS | 967.708Β΅s | -| Maximum Token Length (100k chars) | βœ… PASS | 1.05ms | -| Many Small Tokens (50k tokens) | βœ… PASS | **3.57 seconds** | - -#### **Unicode Edge Case Tests:** -| Test Case | Result | Issue | -|-----------|--------|-------| -| **Invalid UTF-8 Byte Sequence** | ❌ FAIL | **No error recovery** | -| High Surrogate without Low | βœ… PASS | Simplified test | -| Low Surrogate without High | βœ… PASS | Simplified test | -| **Zero Width No-Break Space** | ❌ FAIL | **Unicode handling gap** | -| Combining Characters | βœ… PASS | Good Unicode support | -| Emoji and Extended Unicode | βœ… PASS | Excellent emoji support | -| Private Use Area Characters | βœ… PASS | Good Unicode support | -| Non-characters | βœ… PASS | Proper handling | - -### **Phase 3: Resource Exhaustion Testing** βœ… 2/3 passed (66.7%) - -#### **Resource Tests:** -| Test Case | Result | Performance | Critical Finding | -|-----------|--------|-------------|------------------| -| Memory Pressure Test (1000 concurrent) | βœ… PASS | 4.45ms | Excellent memory management | -| Pool Contention Test (100 workers Γ— 50 ops) | βœ… PASS | 16.58ms | Good pool performance | -| **Race Condition Detection** | ❌ FAIL | 315.625Β΅s | **🚨 CRITICAL: Concurrent access causes panics** | - -**Race Condition Details:** -- **Error**: `slice bounds out of range [:4] with capacity 0` -- **Cause**: Multiple goroutines accessing same tokenizer instance -- **Impact**: Production crashes under concurrent load - -### **Phase 4: Error Reporting Quality** ❌ 0/3 passed (0%) - -#### **Error Location Tests:** -| Test Case | Expected | Actual | Issue | -|-----------|----------|--------|-------| -| **Unterminated String Location** | Line 1, Col 7 | Line 1, Col 5 | **Off by 2 characters** | -| **Invalid Character Location** | Line 1, Col 7 | Line 1, Col 5 | **Consistent offset error** | -| **Multi-line Error Location** | Line 5, Col 6 | Line 5, Col 4 | **Multi-line calculation bug** | - -## 🚨 Critical Security and Reliability Issues - -### **1. CRITICAL: Race Condition Vulnerability** -- **Risk Level**: πŸ”΄ **HIGH** -- **Impact**: Production crashes, potential DoS -- **Details**: Concurrent tokenizer access causes slice bounds panics -- **Recommendation**: Immediate fix required before production - -### **2. CRITICAL: Binary Data Handling Failures** -- **Risk Level**: 🟑 **MEDIUM-HIGH** -- **Impact**: Data processing failures, potential bypasses -- **Affected**: Null bytes, control characters, zero-width spaces -- **Recommendation**: Implement robust character handling - -### **3. ERROR: Location Reporting Inaccuracy** -- **Risk Level**: 🟑 **MEDIUM** -- **Impact**: Poor developer experience, debugging difficulties -- **Details**: Column positions consistently off by 2 -- **Recommendation**: Fix location calculation algorithm - -### **4. PERFORMANCE: Large Token Processing** -- **Risk Level**: 🟒 **LOW** -- **Impact**: Potential performance degradation -- **Details**: 50k tokens take 3.57 seconds -- **Recommendation**: Consider optimization for high-volume scenarios - -## πŸ’‘ Discovered Edge Cases That Break the System - -### **Binary and Special Character Failures:** -```go -// These inputs cause "invalid character" errors: -"SELECT \x00\x01\x02" // Null and control bytes -"SELECT \xC0\x80" // Invalid UTF-8 -"SELECT\u200B test" // Zero-width space -"SELECT \x01name\x02" // ASCII control characters -``` - -### **Race Condition Trigger:** -```go -// This pattern causes slice bounds panics: -tok, _ := tokenizer.New() -// Multiple goroutines calling tok.Tokenize() simultaneously -``` - -### **Error Location Bugs:** -```go -// All error locations are off by 2 columns: -"SELECT 'unterminated" // Reports column 5 instead of 7 -"SELECT @invalid" // Reports column 5 instead of 7 -``` - -## πŸ”§ Specific Recommendations for Production Readiness - -### **Immediate (Block Production Release):** -1. **Fix race condition** - Add mutex protection or document thread-safety -2. **Implement UTF-8 fallback** - Handle invalid sequences gracefully -3. **Fix error location calculation** - Correct column position logic - -### **High Priority:** -4. **Binary data support** - Handle control and null characters -5. **Zero-width character support** - Treat as valid whitespace -6. **Unicode normalization** - Consistent quote handling - -### **Medium Priority:** -7. **Performance optimization** - Improve large token processing -8. **Enhanced error messages** - More context and accuracy -9. **Configuration options** - Strict vs. lenient parsing modes - -## πŸ“ˆ Performance Characteristics Discovered - -### **Excellent Performance:** -- Empty input: 6Β΅s -- Large identifiers (1MB): 19.5ms -- Memory pressure (1000 concurrent): 4.45ms -- Pool contention: 16.58ms - -### **Performance Concerns:** -- Many small tokens (50k): 3.57 seconds -- Average test time: 78.7ms (acceptable) - -## πŸ† Overall Assessment - -**GoSQLX demonstrates strong foundational architecture with excellent memory management and Unicode support for standard cases. However, critical concurrency and character handling issues prevent production deployment without fixes.** - -### **Strengths:** -βœ… Robust memory management -βœ… Good Unicode emoji/character support -βœ… Proper SQL syntax validation -βœ… Excellent performance for large data -βœ… Strong error detection for malformed SQL - -### **Critical Weaknesses:** -❌ Race conditions in concurrent access -❌ Poor binary/control character handling -❌ Inaccurate error location reporting -❌ No UTF-8 fallback mechanisms - -## 🎯 Final Recommendation - -**DO NOT deploy to production** until race condition and character handling issues are resolved. With these fixes, GoSQLX should achieve >95% test coverage and be suitable for production use. - -**Estimated Fix Time:** 2-3 weeks for critical issues, 4-6 weeks for full robustness. - ---- - -*This comprehensive analysis was generated through 46 exhaustive edge case tests designed to find every possible failure mode in the GoSQLX tokenizer and parser.* \ No newline at end of file diff --git a/archive/historical-testing-reports/EDGE_CASE_ANALYSIS_REPORT.md b/archive/historical-testing-reports/EDGE_CASE_ANALYSIS_REPORT.md deleted file mode 100644 index edb637c2..00000000 --- a/archive/historical-testing-reports/EDGE_CASE_ANALYSIS_REPORT.md +++ /dev/null @@ -1,175 +0,0 @@ -# GoSQLX Comprehensive Edge Case Testing Report - -## Executive Summary -After running an exhaustive edge case and error handling test suite against the GoSQLX tokenizer and parser, we discovered several critical issues that need attention. Overall, **36 out of 46 tests passed (78.3%)**, indicating a generally robust implementation but with specific areas requiring improvement. - -## Test Coverage Overview - -### βœ… **Strengths - What Works Well** -1. **Basic SQL Parsing**: All standard SQL statements parse correctly -2. **Large Input Handling**: Successfully handles very large identifiers (1MB+) and long queries -3. **Unicode Support**: Handles most Unicode characters including emoji and extended character sets -4. **Memory Management**: Performs well under memory pressure with 1000 concurrent operations -5. **String Literals**: Properly handles escaped quotes and multi-line strings -6. **Error Detection**: Correctly identifies malformed syntax like unterminated strings and invalid escapes -7. **Boundary Conditions**: Handles empty input, single characters, and edge cases gracefully - -### 🚨 **Critical Issues Found** - -#### **1. Binary Data and Character Handling Failures** -- **Binary/Null Bytes**: Fails on `\x00\x01\x02` with "invalid character" error -- **Invalid UTF-8 Sequences**: Cannot handle malformed UTF-8 like `\xC0\x80` -- **Zero-Width Characters**: Rejects zero-width spaces `\u200B` and no-break spaces `\uFEFF` -- **Control Characters**: Fails on ASCII control characters `\x01\x02\x03` - -**Impact**: High - Real-world SQL may contain binary data or unusual Unicode characters - -#### **2. Race Condition in Concurrent Access** -- **Issue**: Concurrent tokenizer access causes slice bounds panic -- **Error**: `slice bounds out of range [:4] with capacity 0` -- **Location**: Multiple goroutines using same tokenizer instance - -**Impact**: Critical - Could cause production crashes under load - -#### **3. Error Location Reporting Inaccuracies** -- **Column Position**: Off by 2 characters consistently -- **Multi-line Queries**: Incorrect line/column reporting for errors -- **Expected vs Actual**: Error positions don't match expected locations - -**Impact**: Medium - Affects developer experience and debugging - -## Detailed Failure Analysis - -### Phase 1: Malformed Input Testing -**Result**: 15/18 tests passed (83.3%) - -**Failures**: -1. **Binary Data Handling**: Tokenizer treats binary data as invalid characters instead of handling gracefully -2. **Invalid UTF-8**: No fallback mechanism for malformed UTF-8 sequences -3. **Zero-Width Characters**: Incorrectly classified as invalid rather than whitespace - -**Recommendation**: Implement more robust character handling with fallback strategies for non-standard input. - -### Phase 2: Boundary Condition Testing -**Result**: 11/15 tests passed (73.3%) - -**Strengths**: -- Excellent performance with large inputs -- Proper handling of empty and minimal inputs -- Good Unicode support for standard characters - -**Failures**: -- Zero-width and control character handling -- Some UTF-8 edge cases - -### Phase 3: Resource Exhaustion Testing -**Result**: 2/3 tests passed (66.7%) - -**Strengths**: -- Memory pressure handling works well -- Pool contention management is effective - -**Critical Failure**: -- **Race Condition**: Concurrent access to the same tokenizer instance causes panics -- **Root Cause**: Shared state modification without proper synchronization - -### Phase 4: Error Reporting Quality -**Result**: 0/3 tests passed (0%) - -**All Tests Failed**: -- Error location calculation is consistently inaccurate -- Column positions are off by 2 characters -- Multi-line error reporting needs improvement - -## Security Implications - -### πŸ” **Low Risk Issues** -- Basic input validation is working -- Memory exhaustion protection is adequate -- No buffer overflow vulnerabilities detected - -### ⚠️ **Medium Risk Issues** -- Binary data handling could lead to unexpected behavior -- Error message accuracy affects security logging - -### 🚨 **High Risk Issues** -- **Race Conditions**: Could be exploited for denial of service -- **Character Handling**: Improper Unicode handling might bypass input validation - -## Performance Analysis - -### **Execution Times** -- **Total Test Time**: 3.62 seconds -- **Average Test Time**: 78.7ms -- **Longest Test**: "Many Small Tokens" (3.57 seconds) -- **Memory Pressure Test**: 4.45ms (excellent) - -### **Memory Usage** -- Handles 1GB+ of test data efficiently -- Pool management works correctly under contention -- No memory leaks detected - -## Specific Recommendations - -### **Immediate (Critical)** -1. **Fix Race Condition**: - - Make tokenizer state thread-safe or document thread safety requirements - - Add mutex protection for shared state - - Consider using sync.Pool for tokenizer instances - -2. **Improve Character Handling**: - - Add UTF-8 validation with fallback to replacement characters - - Handle zero-width characters as whitespace - - Support binary data in string contexts - -### **Short-term (High Priority)** -3. **Fix Error Reporting**: - - Correct column position calculations - - Improve multi-line error location accuracy - - Add more context to error messages - -4. **Enhanced Unicode Support**: - - Better handling of combining characters - - Support for right-to-left text - - Proper normalization of Unicode quotes - -### **Medium-term (Enhancement)** -5. **Robustness Improvements**: - - Add configuration options for strict vs. lenient parsing - - Implement recovery mechanisms for malformed input - - Add comprehensive logging for debugging - -6. **Performance Optimizations**: - - Optimize handling of very large token sequences - - Improve memory allocation patterns - - Add streaming support for huge inputs - -## Test Results Summary by Category - -| Category | Passed | Total | Success Rate | Critical Issues | -|----------|--------|-------|-------------|----------------| -| Malformed Input | 15 | 18 | 83.3% | UTF-8 handling | -| Boundary Conditions | 11 | 15 | 73.3% | Character support | -| Resource Exhaustion | 2 | 3 | 66.7% | Race conditions | -| Error Reporting | 0 | 3 | 0% | Location accuracy | -| **Overall** | **36** | **46** | **78.3%** | **Concurrency safety** | - -## Conclusion - -GoSQLX shows strong fundamentals with excellent performance characteristics and good basic functionality. However, the discovered issues, particularly the race condition and character handling problems, need immediate attention before production deployment. - -The 78.3% pass rate is respectable for an initial implementation, but the specific failures in concurrent access and error reporting are concerning for production use. With the recommended fixes, GoSQLX should achieve >95% test pass rate and be suitable for production deployment. - -## Next Steps - -1. **Priority 1**: Fix the race condition issue -2. **Priority 2**: Improve character and UTF-8 handling -3. **Priority 3**: Correct error location reporting -4. **Priority 4**: Add comprehensive integration tests -5. **Priority 5**: Performance optimization and enhancement features - ---- - -*Report generated by comprehensive edge case testing suite* -*Test execution time: 3.62 seconds* -*Total test coverage: 46 comprehensive edge cases* \ No newline at end of file diff --git a/archive/historical-testing-reports/README.md b/archive/historical-testing-reports/README.md deleted file mode 100644 index f6bc1339..00000000 --- a/archive/historical-testing-reports/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# Historical Testing Reports Archive - -## Purpose -This directory contains historical testing reports that documented critical issues **that have since been resolved**. These reports are archived to maintain testing history but should not be used to assess current codebase status. - -## Archived Reports - -### πŸ“ COMPREHENSIVE_EDGE_CASE_TEST_RESULTS.md -- **Date**: Early testing phase -- **Status**: ❌ OUTDATED - Critical issues shown are RESOLVED -- **Issues Documented**: Race conditions, binary data handling, error location accuracy -- **Current Status**: βœ… All issues fixed and validated - -### πŸ“ EDGE_CASE_ANALYSIS_REPORT.md -- **Date**: Early testing phase -- **Status**: ❌ OUTDATED - Critical issues shown are RESOLVED -- **Issues Documented**: Concurrent access problems, character handling failures -- **Current Status**: βœ… All issues fixed and validated - -## ⚠️ Important Notice - -**DO NOT USE THESE REPORTS FOR CURRENT STATUS ASSESSMENT** - -These reports show a 78.3% pass rate and critical race conditions. The current codebase has: -- βœ… **Zero race conditions** (validated with 26,000+ concurrent operations) -- βœ… **95%+ success rate** on real-world SQL -- βœ… **Production ready status** with enterprise-grade performance - -## Current Status Reports - -For current codebase assessment, refer to: -- [**CHANGELOG.md**](../../CHANGELOG.md) - Release history and validation status -- [**CLAUDE.md**](../../CLAUDE.md) - Production readiness documentation and current metrics - -## Why Archived? - -These reports were moved to prevent confusion between resolved historical issues and current production-ready status. They remain available for: -- Development history tracking -- Understanding the testing evolution -- Reference for similar projects - ---- - -**Archive Date**: 2025-08-22 -**Reason**: Critical issues documented in these reports have been resolved -**Current Status**: Production Ready βœ… \ No newline at end of file diff --git a/cmd/gosqlx-mcp/main.go b/cmd/gosqlx-mcp/main.go new file mode 100644 index 00000000..54545d77 --- /dev/null +++ b/cmd/gosqlx-mcp/main.go @@ -0,0 +1,60 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package main is the entry point for the gosqlx-mcp MCP server. +// +// The server exposes GoSQLX SQL processing capabilities as MCP tools +// accessible over streamable HTTP transport. +// +// # Environment variables +// +// GOSQLX_MCP_HOST bind host (default: 127.0.0.1) +// GOSQLX_MCP_PORT bind port (default: 8080) +// GOSQLX_MCP_AUTH_TOKEN bearer token; empty disables auth +// +// # Usage +// +// gosqlx-mcp +// GOSQLX_MCP_PORT=9090 gosqlx-mcp +// GOSQLX_MCP_AUTH_TOKEN=secret gosqlx-mcp +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "syscall" + + gosqlxmcp "github.com/ajitpratap0/GoSQLX/pkg/mcp" +) + +func main() { + if err := run(); err != nil { + fmt.Fprintf(os.Stderr, "gosqlx-mcp: %v\n", err) + os.Exit(1) + } +} + +func run() error { + cfg, err := gosqlxmcp.LoadConfig() + if err != nil { + return fmt.Errorf("configuration error: %w", err) + } + + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + + return gosqlxmcp.New(cfg).Start(ctx) +} diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md index ea93622a..7e1cdd72 100644 --- a/docs/API_REFERENCE.md +++ b/docs/API_REFERENCE.md @@ -37,6 +37,7 @@ github.com/ajitpratap0/GoSQLX/ β”‚ β”œβ”€β”€ metrics/ # Performance monitoring (73.9% coverage) β”‚ β”œβ”€β”€ linter/ # SQL linting rules engine (96.7% coverage) β”‚ β”œβ”€β”€ lsp/ # Language Server Protocol (70.2% coverage) +β”‚ β”œβ”€β”€ mcp/ # MCP server β€” 7 SQL tools over streamable HTTP β”‚ β”œβ”€β”€ config/ # Configuration management (81.8% coverage) β”‚ └── gosqlx/testing/ # Testing utilities (95.0% coverage) ``` @@ -1844,6 +1845,56 @@ func main() { --- +## MCP Package + +### Package: `github.com/ajitpratap0/GoSQLX/pkg/mcp` + +MCP server exposing all GoSQLX capabilities as Model Context Protocol tools over streamable HTTP. + +### Types + +#### `Config` +```go +type Config struct { + Host string // GOSQLX_MCP_HOST (default "127.0.0.1") + Port int // GOSQLX_MCP_PORT (default 8080, range 1–65535) + AuthToken string // GOSQLX_MCP_AUTH_TOKEN (default "" = auth disabled) +} +``` + +### Functions + +#### `LoadConfig() (*Config, error)` +Load from env vars. Returns error only if `GOSQLX_MCP_PORT` is non-integer or out of range. + +#### `DefaultConfig() *Config` +Returns `Config{Host: "127.0.0.1", Port: 8080}` with auth disabled. + +#### `New(cfg *Config) *Server` +Create server with all 7 tools registered. Registers tools once at construction β€” no dynamic registration. + +#### `(s *Server) Start(ctx context.Context) error` +Bind to `cfg.Addr()`, serve streamable HTTP. Blocks until ctx cancelled or fatal error. Graceful shutdown on context cancellation. + +#### `BearerAuthMiddleware(cfg *Config, next http.Handler) http.Handler` +Returns `next` unchanged when `cfg.AuthEnabled()` is false. When enabled, enforces `Authorization: Bearer ` on all requests; returns HTTP 401 on failure. + +### Registered Tools + +| Tool | Description | +|------|-------------| +| `validate_sql` | SQL syntax validation with optional dialect | +| `format_sql` | SQL formatting (indent, keyword case, semicolon) | +| `parse_sql` | AST summary β€” statement count and types | +| `extract_metadata` | Tables, columns, functions referenced | +| `security_scan` | Injection pattern detection with severity | +| `lint_sql` | Style rule enforcement (L001–L010) | +| `analyze_sql` | Concurrent composite of all 6 above | + +See [MCP Server Guide](MCP_GUIDE.md) for complete tool schemas and JSON response formats. + +--- + ## Configuration Package ### Package: `github.com/ajitpratap0/GoSQLX/pkg/config` @@ -2267,6 +2318,7 @@ GoSQLX achieves **~80-85% SQL-99 compliance** with comprehensive support for: - **GitHub Repository**: https://github.com/ajitpratap0/GoSQLX - **Documentation**: See `/docs` directory - `GETTING_STARTED.md` - Quick start guide + - `MCP_GUIDE.md` - MCP server and AI assistant integration - `USAGE_GUIDE.md` - Comprehensive usage guide - `LSP_GUIDE.md` - LSP server and IDE integration - `LINTING_RULES.md` - All 10 linting rules reference diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 114eb65c..4f5428f0 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -16,6 +16,7 @@ - [LSP Architecture](#lsp-architecture) - [Linter Architecture](#linter-architecture) - [Security Scanner Architecture](#security-scanner-architecture) +- [MCP Architecture](#mcp-architecture) ## System Overview @@ -35,16 +36,16 @@ GoSQLX is a production-ready, high-performance SQL parsing library with comprehe ### High-Level Architecture (v1.6.0) ``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Application Layer & Tools β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ CLI Tool β”‚ LSP Server β”‚ Linter β”‚ Security β”‚ β”‚ -β”‚ β”‚ (validate, β”‚ (JSON-RPC β”‚ (10 rules: β”‚ Scanner β”‚ β”‚ -β”‚ β”‚ format, β”‚ handler, β”‚ L001-L010, β”‚ (8 patterns, β”‚ β”‚ -β”‚ β”‚ analyze, β”‚ rate limit, β”‚ whitespace,β”‚ injection β”‚ β”‚ -β”‚ β”‚ parse) β”‚ doc mgmt) β”‚ style) β”‚ detection) β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Application Layer & Tools β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ CLI Tool β”‚ LSP Server β”‚ Linter β”‚ Security β”‚ MCP Server β”‚ β”‚ +β”‚ β”‚ (validate, β”‚ (JSON-RPC β”‚ (10 rules: β”‚ Scanner β”‚ (7 tools, β”‚ β”‚ +β”‚ β”‚ format, β”‚ handler, β”‚ L001-L010, β”‚ (injection β”‚ HTTP, β”‚ β”‚ +β”‚ β”‚ analyze, β”‚ rate limit, β”‚ whitespace,β”‚ detection)β”‚ bearer auth,β”‚ β”‚ +β”‚ β”‚ parse) β”‚ doc mgmt) β”‚ style) β”‚ β”‚ streaming) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β–Ό β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ GoSQLX API (pkg/gosqlx) β”‚ @@ -172,6 +173,8 @@ The codebase is organized into focused packages with clear responsibilities and - **Features**: Rate limiting (100 req/sec), content limits (10MB messages, 5MB documents), UTF-8 safe position handling - **Integration**: Used by VSCode extension and other LSP clients +- **pkg/mcp**: MCP server β€” Config, BearerAuthMiddleware, 7 tool handlers, Server (thin adapter over pkg/gosqlx, pkg/linter, pkg/sql/security) + - **pkg/linter** (96.7% coverage): SQL linting and style checking - **Architecture**: Linter β†’ Rules β†’ Context - **linter.go**: Linting engine with file/directory support @@ -1511,4 +1514,44 @@ This architecture has been validated for production use with comprehensive testi - **Unicode Support**: 8 international languages (full UTF-8 compliance) - **Load Testing**: Extended runs with stable memory profiles - **LSP Stress**: 1000+ requests/min sustained (rate limited to 100/sec) -- **Security**: 50+ injection patterns tested across 8 attack categories \ No newline at end of file +- **Security**: 50+ injection patterns tested across 8 attack categories + +--- + +## MCP Architecture + +The MCP server (`pkg/mcp/`) is a thin HTTP adapter with no business logic β€” every tool handler delegates to existing GoSQLX packages. + +### Component Overview + +``` +cmd/gosqlx-mcp/main.go + └─ LoadConfig() + New() + Start() + └─ pkg/mcp.Server + β”œβ”€ BearerAuthMiddleware (optional, wraps next http.Handler) + └─ StreamableHTTPServer (mark3labs/mcp-go) + └─ 7 registered tool handlers + β”œβ”€ validate_sql β†’ pkg/gosqlx.Validate / ParseWithDialect + β”œβ”€ format_sql β†’ pkg/gosqlx.Format + β”œβ”€ parse_sql β†’ pkg/gosqlx.Parse + β”œβ”€ extract_metadataβ†’ pkg/gosqlx.Parse + ExtractMetadata + β”œβ”€ security_scan β†’ pkg/sql/security.NewScanner().ScanSQL + β”œβ”€ lint_sql β†’ pkg/linter.New(10 rules).LintString + └─ analyze_sql β†’ concurrent fan-out of all 6 above +``` + +### Tool β†’ Package Mapping + +| MCP Tool | Package | Key Functions | +|----------|---------|--------------| +| `validate_sql` | `pkg/gosqlx` | `Validate()`, `ParseWithDialect()` | +| `format_sql` | `pkg/gosqlx` | `Format()`, `FormatOptions{}` | +| `parse_sql` | `pkg/gosqlx` | `Parse()` | +| `extract_metadata` | `pkg/gosqlx` | `Parse()`, `ExtractMetadata()` | +| `security_scan` | `pkg/sql/security` | `NewScanner()`, `ScanSQL()` | +| `lint_sql` | `pkg/linter` | `New(rules...)`, `LintString()` | +| `analyze_sql` | all above | `sync.WaitGroup` concurrent fan-out | + +### Concurrency + +`analyze_sql` launches 6 goroutines via `sync.WaitGroup`, one per tool, collecting results through a buffered channel. Partial failures are surfaced under an `"errors"` key; successful results are always returned. \ No newline at end of file diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index a0ade830..6dffb2a0 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -495,5 +495,38 @@ git commit -m "config: update GoSQLX settings" --- +## MCP Server Environment Variables + +The `gosqlx-mcp` server is configured exclusively via environment variables. No YAML config file is used or read. All variables are optional with safe defaults for local development. + +| Variable | Default | Type | Validation | +|----------|---------|------|-----------| +| `GOSQLX_MCP_HOST` | `127.0.0.1` | string | Any valid bind address | +| `GOSQLX_MCP_PORT` | `8080` | integer | 1–65535 | +| `GOSQLX_MCP_AUTH_TOKEN` | *(empty)* | string | Empty = auth disabled; whitespace-trimmed | + +### Examples + +```bash +# Local development (all defaults) +gosqlx-mcp + +# Custom port +GOSQLX_MCP_PORT=9090 gosqlx-mcp + +# Expose to network with auth +GOSQLX_MCP_HOST=0.0.0.0 GOSQLX_MCP_PORT=8080 GOSQLX_MCP_AUTH_TOKEN=my-secret gosqlx-mcp +``` + +### Notes + +- `GOSQLX_MCP_AUTH_TOKEN` enables bearer token authentication. When set, all requests must include `Authorization: Bearer `. +- `GOSQLX_MCP_PORT` rejects out-of-range or non-integer values at startup with a descriptive error. +- MCP server configuration is independent of `.gosqlx.yml` β€” the YAML config file is not read by `gosqlx-mcp`. + +See [MCP Server Guide](MCP_GUIDE.md) for the full startup and auth reference. + +--- + **Last Updated**: December 2025 **Version**: v1.6.0 diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 0900f504..e349a1e0 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -15,7 +15,7 @@ Welcome! This guide will get you parsing SQL in under 5 minutes. No prior experi ## Step 1: Install GoSQLX (30 seconds) -**Requirements**: Go 1.21+ +**Requirements**: Go 1.23+ ### Option A: Install CLI Tool (Recommended) ```bash @@ -418,6 +418,7 @@ gosqlx lsp --log /tmp/lsp.log - **[Usage Guide](USAGE_GUIDE.md)** - Comprehensive patterns and examples - **[CLI Guide](CLI_GUIDE.md)** - Full CLI documentation and all commands - **[LSP Guide](LSP_GUIDE.md)** - Complete LSP server documentation for IDE integration +- **[MCP Server Guide](MCP_GUIDE.md)** β€” Use GoSQLX as MCP tools inside Claude, Cursor, and other AI assistants - **[Linting Rules](LINTING_RULES.md)** - All 10 linting rules (L001-L010) reference - **[Configuration](CONFIGURATION.md)** - Configuration file (.gosqlx.yml) guide - **[API Reference](API_REFERENCE.md)** - Complete API documentation diff --git a/docs/MCP_GUIDE.md b/docs/MCP_GUIDE.md new file mode 100644 index 00000000..285f40eb --- /dev/null +++ b/docs/MCP_GUIDE.md @@ -0,0 +1,727 @@ +# GoSQLX MCP Server Guide + +**Version**: v1.10.0 +**Last Updated**: 2026-03-09 + +## Table of Contents + +1. [Overview](#overview) +2. [Installation](#installation) +3. [Quick Start](#quick-start) +4. [Starting the Server](#starting-the-server) +5. [Configuration](#configuration) +6. [Authentication](#authentication) +7. [Tools Reference](#tools-reference) + - [validate\_sql](#validate_sql) + - [format\_sql](#format_sql) + - [parse\_sql](#parse_sql) + - [extract\_metadata](#extract_metadata) + - [security\_scan](#security_scan) + - [lint\_sql](#lint_sql) + - [analyze\_sql](#analyze_sql) +8. [AI Assistant Integration](#ai-assistant-integration) +9. [Embedding as a Go Library](#embedding-as-a-go-library) +10. [Troubleshooting](#troubleshooting) + +--- + +## Overview + +The GoSQLX MCP server (`gosqlx-mcp`) exposes all GoSQLX SQL capabilities as [Model Context Protocol](https://modelcontextprotocol.io) tools over streamable HTTP. This lets AI assistants like Claude and Cursor call SQL validation, formatting, parsing, linting, and security scanning directly during a conversation. + +### Key Features + +- **7 SQL Tools**: validate, format, parse, extract metadata, security scan, lint, and composite analyze +- **Streamable HTTP**: Compatible with any MCP client that supports the streamable HTTP transport +- **Optional Bearer Auth**: Protect the server with a token when exposing to a network +- **Multi-Dialect Validation**: postgresql, mysql, sqlite, sqlserver, oracle, snowflake, generic +- **Concurrent Analysis**: `analyze_sql` fans out all 6 tools via `sync.WaitGroup` β€” one round trip for a full SQL health report +- **Zero Business Logic Duplication**: Every tool delegates to the existing `pkg/gosqlx`, `pkg/linter`, and `pkg/sql/security` packages + +--- + +## Installation + +### Install via go install (Recommended) + +```bash +go install github.com/ajitpratap0/GoSQLX/cmd/gosqlx-mcp@latest +``` + +The binary is placed in `$GOPATH/bin`. Make sure that directory is in your `PATH`. + +### Build from Source + +```bash +git clone https://github.com/ajitpratap0/GoSQLX.git +cd GoSQLX +go build -o gosqlx-mcp ./cmd/gosqlx-mcp +``` + +### Run without Installing + +```bash +go run github.com/ajitpratap0/GoSQLX/cmd/gosqlx-mcp@latest +``` + +--- + +## Quick Start + +### Start the Server + +```bash +gosqlx-mcp +# gosqlx-mcp: listening on 127.0.0.1:8080 (auth=false) +``` + +### Smoke Test with curl + +```bash +# Validate SQL +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"validate_sql","arguments":{"sql":"SELECT 1"}}}' +``` + +Expected response: + +```json +{"valid": true} +``` + +### MCP Inspector + +```bash +npx @modelcontextprotocol/inspector http://127.0.0.1:8080/mcp +``` + +This opens an interactive browser UI to browse and call all 7 tools. + +--- + +## Starting the Server + +### Environment Variable Examples + +```bash +# Local development (all defaults) +gosqlx-mcp + +# Custom port +GOSQLX_MCP_PORT=9090 gosqlx-mcp + +# Expose to network with bearer auth +GOSQLX_MCP_HOST=0.0.0.0 GOSQLX_MCP_PORT=8080 GOSQLX_MCP_AUTH_TOKEN=my-secret gosqlx-mcp +``` + +### Task Commands + +If you have [Task](https://taskfile.dev) installed: + +```bash +task mcp # Run the MCP server +task mcp:build # Build the gosqlx-mcp binary +task mcp:test # Run MCP package tests +task mcp:install # Install gosqlx-mcp to GOPATH/bin +``` + +### Graceful Shutdown + +The server listens for context cancellation. When the process receives `SIGINT` or `SIGTERM`, it calls `http.Server.Shutdown` and drains in-flight requests before exiting. + +--- + +## Configuration + +The `gosqlx-mcp` server is configured exclusively via environment variables. No YAML file is read. All variables are optional β€” safe defaults are applied for local development. + +| Variable | Default | Type | Validation | +|----------|---------|------|-----------| +| `GOSQLX_MCP_HOST` | `127.0.0.1` | string | Any valid bind address | +| `GOSQLX_MCP_PORT` | `8080` | integer | 1–65535; non-integer or out-of-range β†’ startup error | +| `GOSQLX_MCP_AUTH_TOKEN` | *(empty)* | string | Empty = auth disabled; value is whitespace-trimmed | + +**Notes:** + +- `GOSQLX_MCP_PORT` fails fast at startup with a descriptive error if the value is not a valid port number. +- `GOSQLX_MCP_AUTH_TOKEN` enables bearer token auth for all requests when set to a non-empty string. +- MCP server configuration is independent of `.gosqlx.yml` β€” the YAML config is not read by `gosqlx-mcp`. + +--- + +## Authentication + +By default the server accepts all requests without authentication. To enable bearer token auth, set `GOSQLX_MCP_AUTH_TOKEN`: + +```bash +GOSQLX_MCP_AUTH_TOKEN=supersecret gosqlx-mcp +# gosqlx-mcp: listening on 127.0.0.1:8080 (auth=true) +``` + +All requests must then include the `Authorization` header: + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer supersecret" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"validate_sql","arguments":{"sql":"SELECT 1"}}}' +``` + +Requests missing or carrying an incorrect token receive HTTP `401 Unauthorized`. The `BearerAuthMiddleware` wraps the streamable HTTP handler and is a no-op when auth is disabled. + +--- + +## Tools Reference + +All tools accept a required `sql` string parameter. The server returns tool-semantic failures (e.g., invalid SQL) as a valid JSON result with `valid: false` rather than as a protocol error. Protocol errors (missing required parameter, server fault) return an MCP error response. + +--- + +### validate\_sql + +**Description**: Validate SQL syntax. Optionally specify a dialect. + +#### Parameters + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `sql` | string | yes | β€” | The SQL string to validate | +| `dialect` | string | no | *(generic)* | One of: `generic`, `mysql`, `postgresql`, `sqlite`, `sqlserver`, `oracle`, `snowflake` | + +#### Response + +| Field | Type | Description | +|-------|------|-------------| +| `valid` | bool | `true` if syntax is valid | +| `error` | string | *(present on failure)* Parse error message | +| `dialect` | string | *(present when dialect was specified)* Echo of the dialect used | + +#### Example + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"validate_sql","arguments":{"sql":"SELECT id FROM users","dialect":"postgresql"}}}' +``` + +```json +{ + "valid": true, + "dialect": "postgresql" +} +``` + +Invalid SQL: + +```json +{ + "valid": false, + "error": "unexpected token 'FORM' at position 7" +} +``` + +--- + +### format\_sql + +**Description**: Format SQL with configurable indentation and keyword casing. + +#### Parameters + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `sql` | string | yes | β€” | The SQL string to format | +| `indent_size` | integer | no | `2` | Spaces per indent level | +| `uppercase_keywords` | boolean | no | `false` | Uppercase SQL keywords | +| `add_semicolon` | boolean | no | `false` | Append a trailing semicolon | + +#### Response + +| Field | Type | Description | +|-------|------|-------------| +| `formatted_sql` | string | The formatted SQL output | +| `options` | object | Echo of the options used (`indent_size`, `uppercase_keywords`, `add_semicolon`) | + +#### Example + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"format_sql","arguments":{"sql":"select id,name from users where active=true","uppercase_keywords":true,"indent_size":4}}}' +``` + +```json +{ + "formatted_sql": "SELECT\n id,\n name\nFROM users\nWHERE active = true", + "options": { + "indent_size": 4, + "uppercase_keywords": true, + "add_semicolon": false + } +} +``` + +--- + +### parse\_sql + +**Description**: Parse SQL and return an AST summary: statement count and types. + +#### Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sql` | string | yes | The SQL string to parse | + +#### Response + +| Field | Type | Description | +|-------|------|-------------| +| `statement_count` | integer | Number of statements parsed | +| `statement_types` | array of string | Go type names of each parsed statement (e.g. `*ast.SelectStatement`) | + +#### Example + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"parse_sql","arguments":{"sql":"SELECT 1; INSERT INTO t VALUES (1)"}}}' +``` + +```json +{ + "statement_count": 2, + "statement_types": [ + "*ast.SelectStatement", + "*ast.InsertStatement" + ] +} +``` + +--- + +### extract\_metadata + +**Description**: Extract tables, columns, and functions referenced in SQL. + +#### Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sql` | string | yes | The SQL string to analyze | + +#### Response + +| Field | Type | Description | +|-------|------|-------------| +| `tables` | array of string | Table names referenced in the query | +| `columns` | array of string | Column names referenced in the query | +| `functions` | array of string | Function names called in the query | + +#### Example + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"extract_metadata","arguments":{"sql":"SELECT u.id, COUNT(o.id) FROM users u JOIN orders o ON u.id = o.user_id GROUP BY u.id"}}}' +``` + +```json +{ + "tables": ["users", "orders"], + "columns": ["id", "id"], + "functions": ["COUNT"] +} +``` + +--- + +### security\_scan + +**Description**: Scan SQL for injection patterns: tautologies, UNION attacks, stacked queries, comment bypasses, and more. + +#### Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sql` | string | yes | The SQL string to scan | + +#### Response + +| Field | Type | Description | +|-------|------|-------------| +| `is_clean` | bool | `true` if no findings detected | +| `has_critical` | bool | `true` if any CRITICAL severity finding | +| `has_high` | bool | `true` if any HIGH or CRITICAL finding | +| `total_count` | integer | Total number of findings | +| `critical_count` | integer | Number of CRITICAL findings | +| `high_count` | integer | Number of HIGH findings | +| `medium_count` | integer | Number of MEDIUM findings | +| `low_count` | integer | Number of LOW findings | +| `findings` | array of object | Each finding: `severity`, `pattern`, `description`, `risk`, `suggestion` | + +#### Example + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"security_scan","arguments":{"sql":"SELECT * FROM users WHERE id = 1 OR 1=1"}}}' +``` + +```json +{ + "is_clean": false, + "has_critical": true, + "has_high": true, + "total_count": 1, + "critical_count": 1, + "high_count": 0, + "medium_count": 0, + "low_count": 0, + "findings": [ + { + "severity": "CRITICAL", + "pattern": "tautology", + "description": "Tautology injection detected: always-true condition", + "risk": "Authentication bypass or full table disclosure", + "suggestion": "Use parameterized queries; never interpolate user input into SQL" + } + ] +} +``` + +--- + +### lint\_sql + +**Description**: Lint SQL against all 10 GoSQLX style rules (L001–L010). + +#### Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sql` | string | yes | The SQL string to lint | + +#### Response + +| Field | Type | Description | +|-------|------|-------------| +| `violation_count` | integer | Number of violations found | +| `violations` | array of object | Each violation: `rule`, `rule_name`, `severity`, `message`, `line`, `column`, `suggestion` | + +#### Lint Rules + +| Rule | Name | Category | +|------|------|----------| +| L001 | TrailingWhitespace | whitespace | +| L002 | MixedIndentation | whitespace | +| L003 | ConsecutiveBlankLines | whitespace | +| L004 | IndentationDepth | whitespace | +| L005 | LongLines | whitespace | +| L006 | ColumnAlignment | style | +| L007 | KeywordCase | keywords | +| L008 | CommaPlacement | style | +| L009 | AliasingConsistency | style | +| L010 | RedundantWhitespace | whitespace | + +#### Example + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"lint_sql","arguments":{"sql":"select id,name from users"}}}' +``` + +```json +{ + "violation_count": 1, + "violations": [ + { + "rule": "L007", + "rule_name": "KeywordCase", + "severity": "warning", + "message": "Keyword 'select' should be uppercase", + "line": 1, + "column": 1, + "suggestion": "Use 'SELECT' instead of 'select'" + } + ] +} +``` + +--- + +### analyze\_sql + +**Description**: Run all 6 analysis tools concurrently and return a composite report. Results are keyed by tool name (`validate`, `parse`, `metadata`, `security`, `lint`, `format`). Partial failures appear under an `errors` key. + +#### Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sql` | string | yes | The SQL string to analyze | + +#### Response + +| Key | Type | Description | +|-----|------|-------------| +| `validate` | object | Output of `validate_sql` | +| `parse` | object | Output of `parse_sql` | +| `metadata` | object | Output of `extract_metadata` | +| `security` | object | Output of `security_scan` | +| `lint` | object | Output of `lint_sql` | +| `format` | object | Output of `format_sql` (indent_size=2, defaults) | +| `errors` | object | *(present only on partial failure)* Map of tool name β†’ error message | + +#### Example + +```bash +curl -s -X POST http://127.0.0.1:8080/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"analyze_sql","arguments":{"sql":"SELECT id FROM users"}}}' +``` + +```json +{ + "validate": { + "valid": true + }, + "parse": { + "statement_count": 1, + "statement_types": ["*ast.SelectStatement"] + }, + "metadata": { + "tables": ["users"], + "columns": ["id"], + "functions": [] + }, + "security": { + "is_clean": true, + "has_critical": false, + "has_high": false, + "total_count": 0, + "critical_count": 0, + "high_count": 0, + "medium_count": 0, + "low_count": 0, + "findings": [] + }, + "lint": { + "violation_count": 0, + "violations": [] + }, + "format": { + "formatted_sql": "SELECT id\nFROM users", + "options": { + "indent_size": 2, + "uppercase_keywords": false, + "add_semicolon": false + } + } +} +``` + +--- + +## AI Assistant Integration + +### Claude Desktop + +Add `gosqlx-mcp` to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS): + +```json +{ + "mcpServers": { + "gosqlx": { + "command": "gosqlx-mcp", + "env": { + "GOSQLX_MCP_PORT": "8080" + } + } + } +} +``` + +After restarting Claude Desktop, the 7 GoSQLX tools appear in the tool panel. Claude can now validate, lint, and analyze SQL on your behalf in any conversation. + +### Cursor + +Add the MCP server to your Cursor configuration (`.cursor/mcp.json` in your project root, or the global `~/.cursor/mcp.json`): + +```json +{ + "mcpServers": { + "gosqlx": { + "url": "http://127.0.0.1:8080/mcp" + } + } +} +``` + +Start `gosqlx-mcp` before opening Cursor (or add it to a startup script). Cursor will connect to the running server and expose the tools in its Agent mode. + +### With Authentication + +When running with `GOSQLX_MCP_AUTH_TOKEN`: + +```json +{ + "mcpServers": { + "gosqlx": { + "url": "http://127.0.0.1:8080/mcp", + "headers": { + "Authorization": "Bearer your-token-here" + } + } + } +} +``` + +--- + +## Embedding as a Go Library + +Import `pkg/mcp` directly to embed the MCP server in your own application: + +```go +import "github.com/ajitpratap0/GoSQLX/pkg/mcp" + +func main() { + cfg, err := mcp.LoadConfig() + if err != nil { + log.Fatal(err) + } + srv := mcp.New(cfg) + if err := srv.Start(context.Background()); err != nil { + log.Fatal(err) + } +} +``` + +### Public API + +| Symbol | Signature | Description | +|--------|-----------|-------------| +| `Config` | struct | Server configuration | +| `LoadConfig` | `() (*Config, error)` | Load from env vars | +| `DefaultConfig` | `() *Config` | Defaults: `127.0.0.1:8080`, auth disabled | +| `New` | `(cfg *Config) *Server` | Create server with all 7 tools registered | +| `(*Server).Start` | `(ctx context.Context) error` | Bind, serve, block until ctx cancelled | +| `BearerAuthMiddleware` | `(cfg *Config, next http.Handler) http.Handler` | Auth wrapper; no-op when auth is disabled | +| `(*Config).Addr` | `() string` | Returns `"host:port"` | +| `(*Config).AuthEnabled` | `() bool` | Reports whether auth token is set | + +### Config Struct + +```go +type Config struct { + Host string // GOSQLX_MCP_HOST (default "127.0.0.1") + Port int // GOSQLX_MCP_PORT (default 8080, range 1–65535) + AuthToken string // GOSQLX_MCP_AUTH_TOKEN (default "" = auth disabled) +} +``` + +### Custom Context with Cancellation + +```go +ctx, cancel := context.WithCancel(context.Background()) +defer cancel() + +// Cancel on SIGINT +go func() { + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt) + <-c + cancel() +}() + +cfg := mcp.DefaultConfig() +srv := mcp.New(cfg) +log.Fatal(srv.Start(ctx)) +``` + +--- + +## Troubleshooting + +### Server Won't Start β€” "address already in use" + +Another process is using port 8080. Change the port: + +```bash +GOSQLX_MCP_PORT=9090 gosqlx-mcp +``` + +Or find and stop the conflicting process: + +```bash +lsof -i :8080 +``` + +### Server Won't Start β€” "GOSQLX_MCP_PORT: expected integer" + +The port value is not a valid integer: + +```bash +# Wrong +GOSQLX_MCP_PORT=abc gosqlx-mcp + +# Correct +GOSQLX_MCP_PORT=8080 gosqlx-mcp +``` + +### HTTP 401 on All Requests + +Authentication is enabled but the token is missing or wrong. Check `GOSQLX_MCP_AUTH_TOKEN` and include the header: + +```bash +curl ... -H "Authorization: Bearer your-token" +``` + +### "command not found: gosqlx-mcp" + +`$GOPATH/bin` is not in your `PATH`: + +```bash +export PATH="$PATH:$(go env GOPATH)/bin" +``` + +### MCP Inspector Can't Connect + +Verify the server is running and listening on the correct address: + +```bash +curl -s http://127.0.0.1:8080/mcp +# Should return an MCP protocol response, not "connection refused" +``` + +### analyze\_sql Returns Partial Results with "errors" Key + +One or more sub-tools failed. The `errors` map identifies which tools failed and why. Successful results are always returned alongside errors. + +```json +{ + "validate": {"valid": false, "error": "syntax error"}, + "errors": { + "parse": "parse failed: unexpected token at position 0", + "metadata": "parse failed: unexpected token at position 0" + } +} +``` + +The format, security scan, and lint tools operate on the raw SQL string independently and may still succeed. + +--- + +## Resources + +- **Repository**: https://github.com/ajitpratap0/GoSQLX +- **Issues**: https://github.com/ajitpratap0/GoSQLX/issues +- **MCP Specification**: https://modelcontextprotocol.io/specification +- **mark3labs/mcp-go**: https://github.com/mark3labs/mcp-go + +--- + +**Last Updated**: 2026-03-09 +**Version**: v1.10.0 diff --git a/go.mod b/go.mod index a7871acb..0ab6c48e 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,10 @@ module github.com/ajitpratap0/GoSQLX -go 1.21 +go 1.23.0 require ( github.com/fsnotify/fsnotify v1.9.0 + github.com/mark3labs/mcp-go v0.45.0 github.com/spf13/cobra v1.10.1 github.com/spf13/pflag v1.0.9 golang.org/x/term v0.20.0 @@ -11,6 +12,14 @@ require ( ) require ( + github.com/bahlo/generic-list-go v0.2.0 // indirect + github.com/buger/jsonparser v1.1.1 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/invopop/jsonschema v0.13.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/spf13/cast v1.7.1 // indirect + github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect golang.org/x/sys v0.20.0 // indirect ) diff --git a/go.sum b/go.sum index 255b5ac8..240a7384 100644 --- a/go.sum +++ b/go.sum @@ -1,13 +1,48 @@ +github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= +github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= +github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= +github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E= +github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mark3labs/mcp-go v0.45.0 h1:s0S8qR/9fWaQ3pHxz7pm1uQ0DrswoSnRIxKIjbiQtkc= +github.com/mark3labs/mcp-go v0.45.0/go.mod h1:YnJfOL382MIWDx1kMY+2zsRHU/q78dBg9aFb8W6Thdw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= +github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= +github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= diff --git a/pkg/mcp/config.go b/pkg/mcp/config.go new file mode 100644 index 00000000..d78a9025 --- /dev/null +++ b/pkg/mcp/config.go @@ -0,0 +1,102 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mcp provides a Model Context Protocol (MCP) server for GoSQLX. +// It exposes SQL parsing, validation, formatting, linting, and security +// scanning as MCP tools accessible over streamable HTTP transport. +// +// # Quick start +// +// cfg, err := mcp.LoadConfig() +// if err != nil { +// log.Fatal(err) +// } +// srv := mcp.New(cfg) +// srv.Start(context.Background()) +// +// # Environment variables +// +// GOSQLX_MCP_HOST bind host (default: 127.0.0.1) +// GOSQLX_MCP_PORT bind port (default: 8080) +// GOSQLX_MCP_AUTH_TOKEN bearer token; empty disables auth +package mcp + +import ( + "fmt" + "os" + "strconv" + "strings" +) + +// Config holds all MCP server configuration loaded from environment variables. +// Use LoadConfig or DefaultConfig to obtain a valid Config; the zero value is not valid. +type Config struct { + // Host is the interface to bind to. + // Source: GOSQLX_MCP_HOST (default: "127.0.0.1") + Host string + + // Port is the TCP port to listen on (1–65535). + // Source: GOSQLX_MCP_PORT (default: 8080) + Port int + + // AuthToken is the optional bearer token for request authentication. + // When non-empty every request must carry "Authorization: Bearer ". + // Source: GOSQLX_MCP_AUTH_TOKEN (default: "" β€” auth disabled) + AuthToken string +} + +// LoadConfig reads configuration from environment variables, applying defaults +// for any variables that are unset or empty. +func LoadConfig() (*Config, error) { + cfg := DefaultConfig() + + if v := os.Getenv("GOSQLX_MCP_HOST"); v != "" { + cfg.Host = v + } + + if v := os.Getenv("GOSQLX_MCP_PORT"); v != "" { + port, err := strconv.Atoi(v) + if err != nil { + return nil, fmt.Errorf("GOSQLX_MCP_PORT: expected integer, got %q", v) + } + if port < 1 || port > 65535 { + return nil, fmt.Errorf("GOSQLX_MCP_PORT: %d is out of range (1–65535)", port) + } + cfg.Port = port + } + + if v := strings.TrimSpace(os.Getenv("GOSQLX_MCP_AUTH_TOKEN")); v != "" { + cfg.AuthToken = v + } + + return cfg, nil +} + +// DefaultConfig returns a Config with all defaults applied (auth disabled). +func DefaultConfig() *Config { + return &Config{ + Host: "127.0.0.1", + Port: 8080, + } +} + +// Addr returns the "host:port" string suitable for net/http ListenAndServe. +func (c *Config) Addr() string { + return fmt.Sprintf("%s:%d", c.Host, c.Port) +} + +// AuthEnabled reports whether bearer token authentication is configured. +func (c *Config) AuthEnabled() bool { + return c.AuthToken != "" +} diff --git a/pkg/mcp/config_test.go b/pkg/mcp/config_test.go new file mode 100644 index 00000000..09ed8320 --- /dev/null +++ b/pkg/mcp/config_test.go @@ -0,0 +1,121 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcp + +import ( + "os" + "testing" +) + +func TestLoadConfig_Defaults(t *testing.T) { + // Clear any env vars that might be set + os.Unsetenv("GOSQLX_MCP_HOST") + os.Unsetenv("GOSQLX_MCP_PORT") + os.Unsetenv("GOSQLX_MCP_AUTH_TOKEN") + + cfg, err := LoadConfig() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.Host != "127.0.0.1" { + t.Errorf("Host = %q, want %q", cfg.Host, "127.0.0.1") + } + if cfg.Port != 8080 { + t.Errorf("Port = %d, want 8080", cfg.Port) + } + if cfg.AuthEnabled() { + t.Error("AuthEnabled() = true, want false") + } + if cfg.Addr() != "127.0.0.1:8080" { + t.Errorf("Addr() = %q, want %q", cfg.Addr(), "127.0.0.1:8080") + } +} + +func TestLoadConfig_CustomPort(t *testing.T) { + os.Setenv("GOSQLX_MCP_PORT", "9090") + defer os.Unsetenv("GOSQLX_MCP_PORT") + + cfg, err := LoadConfig() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.Port != 9090 { + t.Errorf("Port = %d, want 9090", cfg.Port) + } +} + +func TestLoadConfig_InvalidPort(t *testing.T) { + tests := []struct { + name string + val string + }{ + {"non-numeric", "abc"}, + {"zero", "0"}, + {"negative", "-1"}, + {"too-large", "99999"}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + os.Setenv("GOSQLX_MCP_PORT", tc.val) + defer os.Unsetenv("GOSQLX_MCP_PORT") + _, err := LoadConfig() + if err == nil { + t.Errorf("expected error for port %q, got nil", tc.val) + } + }) + } +} + +func TestLoadConfig_AuthToken(t *testing.T) { + os.Setenv("GOSQLX_MCP_AUTH_TOKEN", "supersecret") + defer os.Unsetenv("GOSQLX_MCP_AUTH_TOKEN") + + cfg, err := LoadConfig() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cfg.AuthEnabled() { + t.Error("AuthEnabled() = false, want true") + } + if cfg.AuthToken != "supersecret" { + t.Errorf("AuthToken = %q, want %q", cfg.AuthToken, "supersecret") + } +} + +func TestLoadConfig_CustomHost(t *testing.T) { + os.Setenv("GOSQLX_MCP_HOST", "0.0.0.0") + defer os.Unsetenv("GOSQLX_MCP_HOST") + + cfg, err := LoadConfig() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.Host != "0.0.0.0" { + t.Errorf("Host = %q, want %q", cfg.Host, "0.0.0.0") + } +} + +func TestDefaultConfig(t *testing.T) { + cfg := DefaultConfig() + if cfg == nil { + t.Fatal("DefaultConfig() returned nil") + } + if cfg.Port != 8080 { + t.Errorf("Port = %d, want 8080", cfg.Port) + } + if cfg.AuthEnabled() { + t.Error("AuthEnabled() = true, want false") + } +} diff --git a/pkg/mcp/middleware.go b/pkg/mcp/middleware.go new file mode 100644 index 00000000..8076bfa4 --- /dev/null +++ b/pkg/mcp/middleware.go @@ -0,0 +1,49 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcp + +import ( + "net/http" + "strings" +) + +// BearerAuthMiddleware returns an http.Handler that enforces bearer token +// authentication when cfg.AuthEnabled() is true. When auth is disabled it +// passes all requests through unchanged. On failure it responds 401 with a +// WWW-Authenticate header. +func BearerAuthMiddleware(cfg *Config, next http.Handler) http.Handler { + if !cfg.AuthEnabled() { + return next + } + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if extractBearerToken(r) != cfg.AuthToken { + w.Header().Set("WWW-Authenticate", `Bearer realm="gosqlx-mcp"`) + http.Error(w, "Unauthorized", http.StatusUnauthorized) + return + } + next.ServeHTTP(w, r) + }) +} + +// extractBearerToken parses the "Authorization: Bearer " header. +// Returns an empty string if the header is absent or malformed. +func extractBearerToken(r *http.Request) string { + const prefix = "Bearer " + auth := r.Header.Get("Authorization") + if !strings.HasPrefix(auth, prefix) { + return "" + } + return auth[len(prefix):] +} diff --git a/pkg/mcp/middleware_test.go b/pkg/mcp/middleware_test.go new file mode 100644 index 00000000..f3ab7173 --- /dev/null +++ b/pkg/mcp/middleware_test.go @@ -0,0 +1,156 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcp + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestBearerAuthMiddleware_AuthDisabled(t *testing.T) { + cfg := DefaultConfig() // no auth token + called := false + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusOK) + }) + + handler := BearerAuthMiddleware(cfg, next) + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + rr := httptest.NewRecorder() + handler.ServeHTTP(rr, req) + + if !called { + t.Error("next handler was not called when auth is disabled") + } + if rr.Code != http.StatusOK { + t.Errorf("status = %d, want 200", rr.Code) + } +} + +func TestBearerAuthMiddleware_ValidToken(t *testing.T) { + cfg := &Config{Host: "127.0.0.1", Port: 8080, AuthToken: "secret"} + called := false + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusOK) + }) + + handler := BearerAuthMiddleware(cfg, next) + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + req.Header.Set("Authorization", "Bearer secret") + rr := httptest.NewRecorder() + handler.ServeHTTP(rr, req) + + if !called { + t.Error("next handler was not called with valid token") + } + if rr.Code != http.StatusOK { + t.Errorf("status = %d, want 200", rr.Code) + } +} + +func TestBearerAuthMiddleware_InvalidToken(t *testing.T) { + cfg := &Config{Host: "127.0.0.1", Port: 8080, AuthToken: "secret"} + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Error("next handler should not be called with invalid token") + }) + + handler := BearerAuthMiddleware(cfg, next) + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + req.Header.Set("Authorization", "Bearer wrongtoken") + rr := httptest.NewRecorder() + handler.ServeHTTP(rr, req) + + if rr.Code != http.StatusUnauthorized { + t.Errorf("status = %d, want 401", rr.Code) + } + if rr.Header().Get("WWW-Authenticate") == "" { + t.Error("WWW-Authenticate header missing on 401") + } +} + +func TestBearerAuthMiddleware_MissingHeader(t *testing.T) { + cfg := &Config{Host: "127.0.0.1", Port: 8080, AuthToken: "secret"} + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Error("next handler should not be called with missing header") + }) + + handler := BearerAuthMiddleware(cfg, next) + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + rr := httptest.NewRecorder() + handler.ServeHTTP(rr, req) + + if rr.Code != http.StatusUnauthorized { + t.Errorf("status = %d, want 401", rr.Code) + } +} + +func TestBearerAuthMiddleware_MalformedHeader(t *testing.T) { + cfg := &Config{Host: "127.0.0.1", Port: 8080, AuthToken: "secret"} + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Error("next handler should not be called with malformed header") + }) + + tests := []struct { + name string + header string + }{ + {"Token prefix", "Token secret"}, + {"Basic prefix", "Basic secret"}, + {"no prefix", "secret"}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + handler := BearerAuthMiddleware(cfg, next) + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + req.Header.Set("Authorization", tc.header) + rr := httptest.NewRecorder() + handler.ServeHTTP(rr, req) + + if rr.Code != http.StatusUnauthorized { + t.Errorf("status = %d, want 401", rr.Code) + } + }) + } +} + +func TestExtractBearerToken(t *testing.T) { + tests := []struct { + name string + header string + wantToken string + }{ + {"valid", "Bearer mytoken", "mytoken"}, + {"empty", "", ""}, + {"no bearer", "Token mytoken", ""}, + {"basic", "Basic dXNlcjpwYXNz", ""}, + {"bearer no token", "Bearer ", ""}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + if tc.header != "" { + req.Header.Set("Authorization", tc.header) + } + got := extractBearerToken(req) + if got != tc.wantToken { + t.Errorf("extractBearerToken() = %q, want %q", got, tc.wantToken) + } + }) + } +} diff --git a/pkg/mcp/server.go b/pkg/mcp/server.go new file mode 100644 index 00000000..8dfdfd3f --- /dev/null +++ b/pkg/mcp/server.go @@ -0,0 +1,166 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcp + +import ( + "context" + "fmt" + "log" + "net/http" + + "github.com/mark3labs/mcp-go/mcp" + mcpserver "github.com/mark3labs/mcp-go/server" +) + +// Server wraps the MCP server with all GoSQLX tools registered. +type Server struct { + cfg *Config + mcpSrv *mcpserver.MCPServer +} + +// New creates a Server with all 7 GoSQLX tools registered. +func New(cfg *Config) *Server { + s := &Server{cfg: cfg} + s.mcpSrv = mcpserver.NewMCPServer( + "gosqlx-mcp", + "1.9.3", + mcpserver.WithToolCapabilities(false), + ) + s.registerTools() + return s +} + +// Start binds to cfg.Addr() and serves using streamable HTTP transport. +// It blocks until ctx is cancelled or a fatal error occurs. +func (s *Server) Start(ctx context.Context) error { + streamSrv := mcpserver.NewStreamableHTTPServer(s.mcpSrv) + wrapped := BearerAuthMiddleware(s.cfg, streamSrv) + + httpSrv := &http.Server{ + Addr: s.cfg.Addr(), + Handler: wrapped, + } + + go func() { + <-ctx.Done() + _ = httpSrv.Shutdown(context.Background()) + }() + + log.Printf("gosqlx-mcp: listening on %s (auth=%v)\n", s.cfg.Addr(), s.cfg.AuthEnabled()) + if err := httpSrv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + return fmt.Errorf("server error: %w", err) + } + return nil +} + +// registerTools adds all 7 GoSQLX tools with their JSON Schema definitions. +func (s *Server) registerTools() { + // validate_sql + s.mcpSrv.AddTool( + mcp.NewTool("validate_sql", + mcp.WithDescription("Validate SQL syntax. Returns {valid: bool, error?: string, dialect?: string}."), + mcp.WithString("sql", + mcp.Required(), + mcp.Description("The SQL string to validate"), + ), + mcp.WithString("dialect", + mcp.Description("SQL dialect: generic, mysql, postgresql, sqlite, sqlserver, oracle, snowflake"), + mcp.Enum("generic", "mysql", "postgresql", "sqlite", "sqlserver", "oracle", "snowflake"), + ), + ), + handleValidateSQL, + ) + + // format_sql + s.mcpSrv.AddTool( + mcp.NewTool("format_sql", + mcp.WithDescription("Format SQL with configurable indentation and keyword casing."), + mcp.WithString("sql", + mcp.Required(), + mcp.Description("The SQL string to format"), + ), + mcp.WithNumber("indent_size", + mcp.Description("Spaces per indent level (default: 2)"), + ), + mcp.WithBoolean("uppercase_keywords", + mcp.Description("Uppercase SQL keywords (default: false)"), + ), + mcp.WithBoolean("add_semicolon", + mcp.Description("Append a trailing semicolon (default: false)"), + ), + ), + handleFormatSQL, + ) + + // parse_sql + s.mcpSrv.AddTool( + mcp.NewTool("parse_sql", + mcp.WithDescription("Parse SQL and return an AST summary: statement count and types."), + mcp.WithString("sql", + mcp.Required(), + mcp.Description("The SQL string to parse"), + ), + ), + handleParseSQL, + ) + + // extract_metadata + s.mcpSrv.AddTool( + mcp.NewTool("extract_metadata", + mcp.WithDescription("Extract tables, columns, and functions referenced in SQL."), + mcp.WithString("sql", + mcp.Required(), + mcp.Description("The SQL string to analyze"), + ), + ), + handleExtractMetadata, + ) + + // security_scan + s.mcpSrv.AddTool( + mcp.NewTool("security_scan", + mcp.WithDescription("Scan SQL for injection patterns: tautologies, UNION attacks, stacked queries, comment bypasses, and more."), + mcp.WithString("sql", + mcp.Required(), + mcp.Description("The SQL string to scan"), + ), + ), + handleSecurityScan, + ) + + // lint_sql + s.mcpSrv.AddTool( + mcp.NewTool("lint_sql", + mcp.WithDescription("Lint SQL against all 10 GoSQLX style rules (L001–L010)."), + mcp.WithString("sql", + mcp.Required(), + mcp.Description("The SQL string to lint"), + ), + ), + handleLintSQL, + ) + + // analyze_sql + s.mcpSrv.AddTool( + mcp.NewTool("analyze_sql", + mcp.WithDescription("Run all 6 analysis tools concurrently and return a composite report (validate, parse, metadata, security, lint, format)."), + mcp.WithString("sql", + mcp.Required(), + mcp.Description("The SQL string to analyze"), + ), + ), + handleAnalyzeSQL, + ) +} diff --git a/pkg/mcp/server_test.go b/pkg/mcp/server_test.go new file mode 100644 index 00000000..1f86449d --- /dev/null +++ b/pkg/mcp/server_test.go @@ -0,0 +1,55 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcp + +import ( + "testing" +) + +func TestNew_CreatesServer(t *testing.T) { + cfg := DefaultConfig() + srv := New(cfg) + if srv == nil { + t.Fatal("New() returned nil") + } + if srv.cfg != cfg { + t.Error("cfg not stored on server") + } + if srv.mcpSrv == nil { + t.Error("mcpSrv is nil after New()") + } +} + +func TestServer_AuthDisabled(t *testing.T) { + cfg := DefaultConfig() + if cfg.AuthEnabled() { + t.Error("DefaultConfig should have auth disabled") + } + srv := New(cfg) + if srv == nil { + t.Fatal("New() returned nil") + } +} + +func TestServer_AuthEnabled(t *testing.T) { + cfg := &Config{Host: "127.0.0.1", Port: 8080, AuthToken: "testtoken"} + if !cfg.AuthEnabled() { + t.Error("Config with AuthToken should have auth enabled") + } + srv := New(cfg) + if srv == nil { + t.Fatal("New() returned nil") + } +} diff --git a/pkg/mcp/tools.go b/pkg/mcp/tools.go new file mode 100644 index 00000000..481b4417 --- /dev/null +++ b/pkg/mcp/tools.go @@ -0,0 +1,391 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcp + +import ( + "context" + "encoding/json" + "fmt" + "sync" + + mcpmcp "github.com/mark3labs/mcp-go/mcp" + + "github.com/ajitpratap0/GoSQLX/pkg/gosqlx" + "github.com/ajitpratap0/GoSQLX/pkg/linter" + "github.com/ajitpratap0/GoSQLX/pkg/linter/rules/keywords" + "github.com/ajitpratap0/GoSQLX/pkg/linter/rules/style" + "github.com/ajitpratap0/GoSQLX/pkg/linter/rules/whitespace" + sqlkeywords "github.com/ajitpratap0/GoSQLX/pkg/sql/keywords" + "github.com/ajitpratap0/GoSQLX/pkg/sql/security" +) + +// --------------------------------------------------------------------------- +// Internal result functions +// These return map[string]any so that handleAnalyzeSQL can fan-out +// concurrently and collect results without JSON round-trips. +// --------------------------------------------------------------------------- + +// validateSQLInternal validates a SQL string, optionally against a specific dialect. +// Parse/validate failures are represented as {valid: false, error: ...} with a nil +// error return (tool-semantic failure, not a protocol error). +// A missing/empty sql argument returns a non-nil error (protocol error). +func validateSQLInternal(sql, dialect string) (map[string]any, error) { + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + + if dialect != "" { + _, err := gosqlx.ParseWithDialect(sql, sqlkeywords.SQLDialect(dialect)) + if err != nil { + return map[string]any{ + "valid": false, + "dialect": dialect, + "error": err.Error(), + }, nil + } + return map[string]any{ + "valid": true, + "dialect": dialect, + }, nil + } + + err := gosqlx.Validate(sql) + if err != nil { + return map[string]any{ + "valid": false, + "error": err.Error(), + }, nil + } + return map[string]any{ + "valid": true, + }, nil +} + +// formatSQLInternal formats a SQL string using the provided options. +func formatSQLInternal(sql string, indentSize int, uppercaseKeywords, addSemicolon bool) (map[string]any, error) { + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + + opts := gosqlx.FormatOptions{ + IndentSize: indentSize, + UppercaseKeywords: uppercaseKeywords, + AddSemicolon: addSemicolon, + } + + formatted, err := gosqlx.Format(sql, opts) + if err != nil { + return nil, fmt.Errorf("format failed: %w", err) + } + + return map[string]any{ + "formatted_sql": formatted, + "options": map[string]any{ + "indent_size": indentSize, + "uppercase_keywords": uppercaseKeywords, + "add_semicolon": addSemicolon, + }, + }, nil +} + +// parseSQLInternal parses a SQL string and returns statement count and types. +func parseSQLInternal(sql string) (map[string]any, error) { + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + + tree, err := gosqlx.Parse(sql) + if err != nil { + return nil, fmt.Errorf("parse failed: %w", err) + } + + stmtTypes := make([]string, 0, len(tree.Statements)) + for _, stmt := range tree.Statements { + stmtTypes = append(stmtTypes, fmt.Sprintf("%T", stmt)) + } + + return map[string]any{ + "statement_count": len(tree.Statements), + "statement_types": stmtTypes, + }, nil +} + +// extractMetadataInternal parses a SQL string and extracts tables, columns, and functions. +func extractMetadataInternal(sql string) (map[string]any, error) { + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + + tree, err := gosqlx.Parse(sql) + if err != nil { + return nil, fmt.Errorf("parse failed: %w", err) + } + + meta := gosqlx.ExtractMetadata(tree) + + tables := meta.Tables + if tables == nil { + tables = []string{} + } + columns := meta.Columns + if columns == nil { + columns = []string{} + } + functions := meta.Functions + if functions == nil { + functions = []string{} + } + + return map[string]any{ + "tables": tables, + "columns": columns, + "functions": functions, + }, nil +} + +// securityScanInternal scans a SQL string for injection patterns and other threats. +func securityScanInternal(sql string) (map[string]any, error) { + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + + scanner := security.NewScanner() + result := scanner.ScanSQL(sql) + + findings := make([]map[string]any, 0, len(result.Findings)) + for _, f := range result.Findings { + findings = append(findings, map[string]any{ + "severity": string(f.Severity), + "pattern": string(f.Pattern), + "description": f.Description, + "risk": f.Risk, + "suggestion": f.Suggestion, + }) + } + + return map[string]any{ + "is_clean": result.IsClean(), + "has_critical": result.HasCritical(), + "has_high": result.HasHighOrAbove(), + "total_count": result.TotalCount, + "critical_count": result.CriticalCount, + "high_count": result.HighCount, + "medium_count": result.MediumCount, + "low_count": result.LowCount, + "findings": findings, + }, nil +} + +// lintSQLInternal runs the full linter rule set against a SQL string. +func lintSQLInternal(sql string) (map[string]any, error) { + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + + result := newFullLinter().LintString(sql, "") + + violations := make([]map[string]any, 0, len(result.Violations)) + for _, v := range result.Violations { + violations = append(violations, map[string]any{ + "rule": v.Rule, + "rule_name": v.RuleName, + "severity": string(v.Severity), + "message": v.Message, + "line": v.Location.Line, + "column": v.Location.Column, + "suggestion": v.Suggestion, + }) + } + + return map[string]any{ + "violation_count": len(result.Violations), + "violations": violations, + }, nil +} + +// --------------------------------------------------------------------------- +// Private helpers +// --------------------------------------------------------------------------- + +// newFullLinter mirrors createLinter() from cmd/gosqlx/cmd/lint.go exactly, +// using a fixed line-length of 100 for the MCP context. +func newFullLinter() *linter.Linter { + return linter.New( + whitespace.NewTrailingWhitespaceRule(), // L001 + whitespace.NewMixedIndentationRule(), // L002 + whitespace.NewConsecutiveBlankLinesRule(1), // L003 + whitespace.NewIndentationDepthRule(4, 4), // L004 + whitespace.NewLongLinesRule(100), // L005 + whitespace.NewRedundantWhitespaceRule(), // L010 + + style.NewColumnAlignmentRule(), // L006 + style.NewCommaPlacementRule(style.CommaTrailing), // L008 + style.NewAliasingConsistencyRule(true), // L009 + + keywords.NewKeywordCaseRule(keywords.CaseUpper), // L007 + ) +} + +// toolResult marshals a map[string]any result into an MCP CallToolResult. +func toolResult(data map[string]any) (*mcpmcp.CallToolResult, error) { + b, err := json.Marshal(data) + if err != nil { + return nil, fmt.Errorf("failed to marshal result: %w", err) + } + return mcpmcp.NewToolResultText(string(b)), nil +} + +// --------------------------------------------------------------------------- +// MCP handler functions +// --------------------------------------------------------------------------- + +// handleValidateSQL is the MCP tool handler for "validate_sql". +func handleValidateSQL(ctx context.Context, req mcpmcp.CallToolRequest) (*mcpmcp.CallToolResult, error) { + sql := req.GetString("sql", "") + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + dialect := req.GetString("dialect", "") + result, err := validateSQLInternal(sql, dialect) + if err != nil { + return nil, err + } + return toolResult(result) +} + +// handleFormatSQL is the MCP tool handler for "format_sql". +func handleFormatSQL(ctx context.Context, req mcpmcp.CallToolRequest) (*mcpmcp.CallToolResult, error) { + sql := req.GetString("sql", "") + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + indentSize := req.GetInt("indent_size", 2) + uppercaseKeywords := req.GetBool("uppercase_keywords", false) + addSemicolon := req.GetBool("add_semicolon", false) + result, err := formatSQLInternal(sql, indentSize, uppercaseKeywords, addSemicolon) + if err != nil { + return nil, err + } + return toolResult(result) +} + +// handleParseSQL is the MCP tool handler for "parse_sql". +func handleParseSQL(ctx context.Context, req mcpmcp.CallToolRequest) (*mcpmcp.CallToolResult, error) { + sql := req.GetString("sql", "") + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + result, err := parseSQLInternal(sql) + if err != nil { + return nil, err + } + return toolResult(result) +} + +// handleExtractMetadata is the MCP tool handler for "extract_metadata". +func handleExtractMetadata(ctx context.Context, req mcpmcp.CallToolRequest) (*mcpmcp.CallToolResult, error) { + sql := req.GetString("sql", "") + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + result, err := extractMetadataInternal(sql) + if err != nil { + return nil, err + } + return toolResult(result) +} + +// handleSecurityScan is the MCP tool handler for "security_scan". +func handleSecurityScan(ctx context.Context, req mcpmcp.CallToolRequest) (*mcpmcp.CallToolResult, error) { + sql := req.GetString("sql", "") + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + result, err := securityScanInternal(sql) + if err != nil { + return nil, err + } + return toolResult(result) +} + +// handleLintSQL is the MCP tool handler for "lint_sql". +func handleLintSQL(ctx context.Context, req mcpmcp.CallToolRequest) (*mcpmcp.CallToolResult, error) { + sql := req.GetString("sql", "") + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + result, err := lintSQLInternal(sql) + if err != nil { + return nil, err + } + return toolResult(result) +} + +// handleAnalyzeSQL is the MCP tool handler for "analyze_sql". +// It fans out all six analysis tools concurrently and merges the results. +func handleAnalyzeSQL(ctx context.Context, req mcpmcp.CallToolRequest) (*mcpmcp.CallToolResult, error) { + sql := req.GetString("sql", "") + if sql == "" { + return nil, fmt.Errorf("parameter 'sql' is required and must not be empty") + } + + type namedResult struct { + name string + data map[string]any + err error + } + + tasks := []struct { + name string + fn func() (map[string]any, error) + }{ + {"validate", func() (map[string]any, error) { return validateSQLInternal(sql, "") }}, + {"parse", func() (map[string]any, error) { return parseSQLInternal(sql) }}, + {"metadata", func() (map[string]any, error) { return extractMetadataInternal(sql) }}, + {"security", func() (map[string]any, error) { return securityScanInternal(sql) }}, + {"lint", func() (map[string]any, error) { return lintSQLInternal(sql) }}, + {"format", func() (map[string]any, error) { return formatSQLInternal(sql, 2, false, false) }}, + } + + results := make(chan namedResult, len(tasks)) + var wg sync.WaitGroup + for _, t := range tasks { + wg.Add(1) + go func(t struct { + name string + fn func() (map[string]any, error) + }) { + defer wg.Done() + data, err := t.fn() + results <- namedResult{name: t.name, data: data, err: err} + }(t) + } + wg.Wait() + close(results) + + combined := make(map[string]any, len(tasks)+1) + errs := make(map[string]string) + for r := range results { + if r.err != nil { + errs[r.name] = r.err.Error() + } else { + combined[r.name] = r.data + } + } + if len(errs) > 0 { + combined["errors"] = errs + } + return toolResult(combined) +} diff --git a/pkg/mcp/tools_test.go b/pkg/mcp/tools_test.go new file mode 100644 index 00000000..e6d5afc3 --- /dev/null +++ b/pkg/mcp/tools_test.go @@ -0,0 +1,424 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcp + +import ( + "context" + "encoding/json" + "testing" + + mcpmcp "github.com/mark3labs/mcp-go/mcp" +) + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +// makeReq builds a minimal CallToolRequest with the given string params. +func makeReq(params map[string]any) mcpmcp.CallToolRequest { + req := mcpmcp.CallToolRequest{} + req.Params.Arguments = params + return req +} + +// unmarshalResult parses the text content of a CallToolResult as JSON. +func unmarshalResult(t *testing.T, res *mcpmcp.CallToolResult) map[string]any { + t.Helper() + if res == nil { + t.Fatal("expected non-nil CallToolResult") + } + if len(res.Content) == 0 { + t.Fatal("expected at least one content item in CallToolResult") + } + // Content[0] is a TextContent whose Text field holds the JSON payload. + textContent, ok := res.Content[0].(mcpmcp.TextContent) + if !ok { + t.Fatalf("expected TextContent, got %T", res.Content[0]) + } + var out map[string]any + if err := json.Unmarshal([]byte(textContent.Text), &out); err != nil { + t.Fatalf("failed to unmarshal result JSON: %v\nraw: %s", err, textContent.Text) + } + return out +} + +// --------------------------------------------------------------------------- +// handleValidateSQL +// --------------------------------------------------------------------------- + +func TestHandleValidateSQL(t *testing.T) { + ctx := context.Background() + + t.Run("valid SQL returns valid=true", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT id FROM users"}) + res, err := handleValidateSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + valid, ok := data["valid"].(bool) + if !ok || !valid { + t.Errorf("expected valid=true, got %v", data["valid"]) + } + }) + + t.Run("invalid SQL returns valid=false without protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT FROM"}) + res, err := handleValidateSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected protocol error for invalid SQL: %v", err) + } + data := unmarshalResult(t, res) + valid, ok := data["valid"].(bool) + if !ok || valid { + t.Errorf("expected valid=false for invalid SQL, got %v", data["valid"]) + } + if _, hasErr := data["error"]; !hasErr { + t.Error("expected 'error' key in result for invalid SQL") + } + }) + + t.Run("empty sql returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": ""}) + _, err := handleValidateSQL(ctx, req) + if err == nil { + t.Fatal("expected error for empty sql, got nil") + } + }) + + t.Run("missing sql param returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{}) + _, err := handleValidateSQL(ctx, req) + if err == nil { + t.Fatal("expected error for missing sql param, got nil") + } + }) + + t.Run("valid SQL with dialect", func(t *testing.T) { + req := makeReq(map[string]any{ + "sql": "SELECT id FROM users", + "dialect": "postgresql", + }) + res, err := handleValidateSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + valid, ok := data["valid"].(bool) + if !ok || !valid { + t.Errorf("expected valid=true, got %v", data["valid"]) + } + if data["dialect"] != "postgresql" { + t.Errorf("expected dialect=postgresql, got %v", data["dialect"]) + } + }) +} + +// --------------------------------------------------------------------------- +// handleFormatSQL +// --------------------------------------------------------------------------- + +func TestHandleFormatSQL(t *testing.T) { + ctx := context.Background() + + t.Run("valid SQL returns formatted_sql", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "select id from users"}) + res, err := handleFormatSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + if _, ok := data["formatted_sql"]; !ok { + t.Error("expected 'formatted_sql' key in result") + } + }) + + t.Run("custom options reflected in result", func(t *testing.T) { + req := makeReq(map[string]any{ + "sql": "select id from users", + "indent_size": 4, + "uppercase_keywords": true, + "add_semicolon": true, + }) + res, err := handleFormatSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + if _, ok := data["formatted_sql"]; !ok { + t.Error("expected 'formatted_sql' key in result") + } + opts, ok := data["options"].(map[string]any) + if !ok { + t.Fatal("expected 'options' map in result") + } + if opts["indent_size"] != float64(4) { + t.Errorf("expected indent_size=4, got %v", opts["indent_size"]) + } + if opts["uppercase_keywords"] != true { + t.Errorf("expected uppercase_keywords=true, got %v", opts["uppercase_keywords"]) + } + if opts["add_semicolon"] != true { + t.Errorf("expected add_semicolon=true, got %v", opts["add_semicolon"]) + } + }) + + t.Run("empty sql returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": ""}) + _, err := handleFormatSQL(ctx, req) + if err == nil { + t.Fatal("expected error for empty sql, got nil") + } + }) + + t.Run("missing sql param returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{}) + _, err := handleFormatSQL(ctx, req) + if err == nil { + t.Fatal("expected error for missing sql param, got nil") + } + }) +} + +// --------------------------------------------------------------------------- +// handleParseSQL +// --------------------------------------------------------------------------- + +func TestHandleParseSQL(t *testing.T) { + ctx := context.Background() + + t.Run("valid SQL returns statement info", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT id FROM users; SELECT name FROM orders"}) + res, err := handleParseSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + if _, ok := data["statement_count"]; !ok { + t.Error("expected 'statement_count' key in result") + } + if _, ok := data["statement_types"]; !ok { + t.Error("expected 'statement_types' key in result") + } + count := data["statement_count"].(float64) + if count < 1 { + t.Errorf("expected at least 1 statement, got %v", count) + } + }) + + t.Run("empty sql returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": ""}) + _, err := handleParseSQL(ctx, req) + if err == nil { + t.Fatal("expected error for empty sql, got nil") + } + }) + + t.Run("missing sql param returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{}) + _, err := handleParseSQL(ctx, req) + if err == nil { + t.Fatal("expected error for missing sql param, got nil") + } + }) +} + +// --------------------------------------------------------------------------- +// handleExtractMetadata +// --------------------------------------------------------------------------- + +func TestHandleExtractMetadata(t *testing.T) { + ctx := context.Background() + + t.Run("valid SQL returns metadata keys", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT u.id, o.name FROM users u JOIN orders o ON u.id = o.user_id"}) + res, err := handleExtractMetadata(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + for _, key := range []string{"tables", "columns", "functions"} { + if _, ok := data[key]; !ok { + t.Errorf("expected key %q in result", key) + } + } + }) + + t.Run("empty sql returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": ""}) + _, err := handleExtractMetadata(ctx, req) + if err == nil { + t.Fatal("expected error for empty sql, got nil") + } + }) + + t.Run("missing sql param returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{}) + _, err := handleExtractMetadata(ctx, req) + if err == nil { + t.Fatal("expected error for missing sql param, got nil") + } + }) +} + +// --------------------------------------------------------------------------- +// handleSecurityScan +// --------------------------------------------------------------------------- + +func TestHandleSecurityScan(t *testing.T) { + ctx := context.Background() + + t.Run("clean SQL returns is_clean=true", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT id FROM users WHERE id = 42"}) + res, err := handleSecurityScan(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + for _, key := range []string{"is_clean", "has_critical", "has_high", "total_count", + "critical_count", "high_count", "medium_count", "low_count", "findings"} { + if _, ok := data[key]; !ok { + t.Errorf("expected key %q in result", key) + } + } + }) + + t.Run("empty sql returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": ""}) + _, err := handleSecurityScan(ctx, req) + if err == nil { + t.Fatal("expected error for empty sql, got nil") + } + }) + + t.Run("missing sql param returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{}) + _, err := handleSecurityScan(ctx, req) + if err == nil { + t.Fatal("expected error for missing sql param, got nil") + } + }) +} + +// --------------------------------------------------------------------------- +// handleLintSQL +// --------------------------------------------------------------------------- + +func TestHandleLintSQL(t *testing.T) { + ctx := context.Background() + + t.Run("valid SQL returns lint result", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT id FROM users"}) + res, err := handleLintSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + if _, ok := data["violation_count"]; !ok { + t.Error("expected 'violation_count' key in result") + } + if _, ok := data["violations"]; !ok { + t.Error("expected 'violations' key in result") + } + }) + + t.Run("empty sql returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": ""}) + _, err := handleLintSQL(ctx, req) + if err == nil { + t.Fatal("expected error for empty sql, got nil") + } + }) + + t.Run("missing sql param returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{}) + _, err := handleLintSQL(ctx, req) + if err == nil { + t.Fatal("expected error for missing sql param, got nil") + } + }) +} + +// --------------------------------------------------------------------------- +// handleAnalyzeSQL +// --------------------------------------------------------------------------- + +func TestHandleAnalyzeSQL(t *testing.T) { + ctx := context.Background() + + t.Run("valid SQL returns all six analysis keys", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT id, name FROM users WHERE id = 1"}) + res, err := handleAnalyzeSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + + expectedKeys := []string{"validate", "parse", "metadata", "security", "lint", "format"} + for _, key := range expectedKeys { + if _, ok := data[key]; !ok { + t.Errorf("expected key %q in analyze result", key) + } + } + }) + + t.Run("validate sub-result contains valid field", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "SELECT 1"}) + res, err := handleAnalyzeSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + validateData, ok := data["validate"].(map[string]any) + if !ok { + t.Fatal("expected 'validate' sub-result to be a map") + } + if _, ok := validateData["valid"]; !ok { + t.Error("expected 'valid' field in validate sub-result") + } + }) + + t.Run("format sub-result contains formatted_sql", func(t *testing.T) { + req := makeReq(map[string]any{"sql": "select id from users"}) + res, err := handleAnalyzeSQL(ctx, req) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + data := unmarshalResult(t, res) + formatData, ok := data["format"].(map[string]any) + if !ok { + t.Fatal("expected 'format' sub-result to be a map") + } + if _, ok := formatData["formatted_sql"]; !ok { + t.Error("expected 'formatted_sql' field in format sub-result") + } + }) + + t.Run("empty sql returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{"sql": ""}) + _, err := handleAnalyzeSQL(ctx, req) + if err == nil { + t.Fatal("expected error for empty sql, got nil") + } + }) + + t.Run("missing sql param returns protocol error", func(t *testing.T) { + req := makeReq(map[string]any{}) + _, err := handleAnalyzeSQL(ctx, req) + if err == nil { + t.Fatal("expected error for missing sql param, got nil") + } + }) +}