diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt new file mode 100644 index 00000000..78204cb6 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt @@ -0,0 +1,38 @@ +MEETING TRANSCRIPT - QA TEAM +Date: Wednesday, September 18, 2025 +Time: 10:00 AM - 11:30 AM +Participants: Maria (QA Lead), Tom (Senior QA Engineer), Lisa (QA Automation Engineer), Roberto (Manual Testing Specialist) + +[10:02] Maria: Let's review CRM migration testing progress. Tom, report on data import tests? + +[10:03] Tom: Found critical issues. Import failures with special characters in addresses and names. + +[10:06] Tom: UTF-8 parsing problems with accents, currency symbols, and Asian characters. + +[10:08] Tom: 12% of records affected - about 15,000 out of 125,000 total records. + +[10:09] Roberto: Confirmed. Also, failed imports corrupt entire batches. + +[10:12] Lisa: No atomic transactions for batches? + +[10:13] Tom: Correct. Each record processed independently without rollback. + +[10:15] Roberto: Found referential integrity issues - orphaned references between contacts and companies. + +[10:19] Maria: Need three validation types: pre-import, during import, and post-import. + +[10:25] Tom: Recommend smaller migration batches to reduce risk? + +[10:26] Maria: Excellent. Batches of 5,000 records with validation between each. + +[10:30] Maria: Four recommendations: UTF-8 parser fix, atomic transactions, handle orphaned references, small batch migration. + +[10:33] Roberto: Also need concurrency testing during migration. + +[10:40] Maria: Complete additional testing in one week. Feasible? + +[10:42] Tom: Will share test cases today. + +[10:44] Maria: Friday 2 PM meeting before management review. + +[10:45] Lisa: Will prepare testing metrics dashboard. \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt new file mode 100644 index 00000000..aa6deb24 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt @@ -0,0 +1,32 @@ +MEETING TRANSCRIPT - DEVELOPMENT TEAM +Date: Monday, September 16, 2025 +Time: 09:00 AM - 10:15 AM +Participants: Alice (Tech Lead), John (Senior Developer), Sarah (Backend Developer), Mike (DevOps Engineer) + +[09:02] Alice: Let's review the search API deployed last week. Any issues? + +[09:03] Sarah: API works but performance degrades with 1,000+ queries per minute. Response times jump from 200ms to 3 seconds. + +[09:05] John: Elasticsearch queries and no caching layer? + +[09:06] Sarah: Exactly. Complex queries are slow, and we need Redis caching. + +[09:07] Mike: Also hitting CPU limits during spikes. Need auto-scaling. + +[09:08] Alice: Three priorities: query optimization, Redis cache, and infrastructure scaling. + +[09:11] Sarah: Propose 15-minute TTL cache with event-based invalidation. + +[09:13] John: I'll optimize bool queries and add calculated index fields. + +[09:17] Mike: Can set up auto-scaling by tomorrow - scale to 6 instances at 70% CPU. + +[09:18] Sarah: Starting Redis today, basic version by Wednesday. + +[09:19] John: New indexes and query optimization ready for testing Wednesday. + +[09:24] Alice: Clear plan. Mike handles scaling, Sarah implements cache, John optimizes queries. + +[09:26] Alice: I'll coordinate with product team on deployment impacts and QA for load testing. + +[09:30] Alice: Meeting Wednesday 3 PM to review progress. Thanks team! \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt new file mode 100644 index 00000000..7d516d08 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt @@ -0,0 +1,36 @@ +MEETING TRANSCRIPT - MANAGEMENT SYNC +Date: Friday, September 20, 2025 +Time: 02:00 PM - 03:00 PM +Participants: David (Project Manager), Alice (Tech Lead), Maria (QA Lead), Emma (Product Manager), Carlos (DevOps Manager) + +[14:03] Emma: Good progress. Users report 40% search speed improvement, but support tickets show peak hour performance issues. + +[14:05] Alice: We've identified bottlenecks. Working on Redis caching and Elasticsearch query optimization. + +[14:06] David: Can we resolve issues without impacting October migration date? + +[14:09] Alice: Recommend two-week extension for complete migration due to performance issues. + +[14:10] Maria: QA agrees. Found data import blockers with special characters and integrity issues. + +[14:12] Maria: Need one week to fix issues, another for complete re-testing. + +[14:14] Carlos: Infrastructure supports extension for proper rollback and disaster recovery testing. + +[14:15] Emma: Could we do partial migration on original date? + +[14:17] Alice: Yes. Contact management module first, reports and analytics in phase two. + +[14:21] Maria: Phased migration ideal for QA - validate each module independently. + +[14:22] David: Proposal: Phase 1 - Contact management October 15th. Phase 2 - Complete migration October 30th. + +[14:23] Alice: Reasonable timeline for performance fixes. + +[14:24] Emma: Works from product perspective. Will update stakeholder communications. + +[14:25] Maria: QA commits to these timelines. + +[14:26] Carlos: Will prepare deployment strategies for both phases. + +[14:32] David: Carlos, send deployment calendar by Monday. Thanks team! \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt new file mode 100644 index 00000000..c5730a84 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt @@ -0,0 +1,31 @@ +WEEKLY REPORT - QA TEAM +Week of September 16-20, 2025 +Prepared by: Maria Gonzalez, QA Lead + +=== EXECUTIVE SUMMARY === +QA team identified critical issues in CRM migration testing. Significant problems in legacy data import and referential integrity require immediate attention. + +=== TESTING COMPLETED === +- Functional: Contact management (100%), Authentication (100%), Search (75%), Analytics (60%) +- Data import: 125,000 legacy records tested, 12 critical issues found +- Performance: Core modules complete, identified issues with 500+ concurrent users + +=== CRITICAL ISSUES === +**QA-2025-001 - Data Import Failures** +- UTF-8 parsing problems with special characters +- 15,000 records affected (12% of total) +- Escalated to development + +**QA-2025-002 - Transaction Integrity** +- Failed imports leave batches in inconsistent state +- No atomic transactions for batches +- Requires architecture redesign + +**QA-2025-003 - Orphaned References** +- 2,300 records with invalid company/contact references +- Pending business logic decision + +=== METRICS === +- Test cases executed: 847 of 1,200 (70.6%) +- Pass rate: 79.3%, Automation coverage: 36% +- Bugs: 28 total (4 critical, 8 high, 12 medium, 4 low) diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt new file mode 100644 index 00000000..932c920b --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt @@ -0,0 +1,30 @@ +WEEKLY REPORT - DEVELOPMENT TEAM +Week of September 16-20, 2025 +Prepared by: Alice Thompson, Tech Lead + +=== EXECUTIVE SUMMARY === +Development team completed critical infrastructure components but identified performance bottlenecks requiring attention before production deployment. + +=== KEY ACCOMPLISHMENTS === +- Database schema and indexes completed for CRM +- 12 of 18 API endpoints integrated with authentication +- Contact management: 95% complete, Search: 80%, Analytics: 70% + +=== TECHNICAL CHALLENGES === +- Critical: Search API degrades at 1,000+ queries/minute (200ms to 3+ seconds) +- Root cause: Complex Elasticsearch queries without caching layer +- Multi-filter searches average 1.2 seconds execution time + +=== ACTION PLAN NEXT WEEK === +1. Redis cache implementation (Sarah) - Basic by Wednesday, complete by Friday +2. Elasticsearch query optimization (John) - Testing ready Wednesday +3. Auto-scaling setup (Mike) - Scale to 6 instances at 70% CPU + +=== METRICS === +- Story points: 43 of 50 completed (86%) +- Bugs: 7 reported, 12 resolved +- Code coverage: 78% (target: 80%) + +=== TIMELINE === +- October 15 Contact Management: 85% confidence, 2 sprints remaining +- October 30 Complete Migration: 90% confidence, 4 sprints remaining \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md new file mode 100644 index 00000000..333025cc --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md @@ -0,0 +1,19 @@ +šŸ“„ Indexing documents... + +šŸ” Search: 'Can you summarize the performance issues in the API?' + +šŸ¤– Asking to model: llama-smoltalk-3.2-1b-instruct + +## šŸ’” Question: +Can you summarize the performance issues in the API? + +## šŸ“ Answer: +The primary performance issue in the API is the slow response times of 3 seconds or more from the 1,000+ queries per minute. The search API, in particular, is experiencing performance degradations, with complex Elasticsearch queries causing the issues. A proposed solution is to implement a 15-minute TTL cache with event-based invalidation to improve response times. Additionally, a three-tiered approach involving optimization of bool queries and added calculated index fields is being implemented to improve query performance. Finally, auto-scaling for the infrastructure is set up to scale to 6 instances at 70% CPU. + + +## Stats +āœ… Indexed 5 documents in 250ms + +šŸ” Search Latency: 57ms + +šŸ¤– AI Latency: 21019ms | 5.8 tokens/s \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md new file mode 100644 index 00000000..5463f5ce --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md @@ -0,0 +1,25 @@ +šŸ“„ Indexing documents... + +šŸ” Search: 'Can you summarize the performance issues in the API?' + +šŸ¤– Asking to model: dolphin3.0-qwen2.5-0.5b + +## šŸ’” Question: +Can you summarize the performance issues in the API? +## šŸ“ Answer: + +The performance issues in the Search API deployed on September 16, 2025, include: + +- Degradation in performance at 1,000+ queries per minute, resulting in a 200ms to 3-second response time for complex queries. +- High response times for queries that do not utilize caching, causing them to take significantly longer than 2 seconds. +- Inability to scale to handle spikes in query traffic, leading to increased CPU limits. + +These issues are primarily attributed to the complexity and inefficiency of the Elasticsearch queries, as well as the lack of caching layer. This indicates a need for optimization and addressing these specific performance bottlenecks to ensure the API's scalability and effectiveness for the development team. + +## Stats + +āœ… Indexed 5 documents in 627ms + +šŸ” Search Latency: 81ms + +šŸ¤– AI Latency: 16044ms | 9.5 tokens/s \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md new file mode 100644 index 00000000..c3beb29e --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md @@ -0,0 +1,28 @@ +šŸ“„ Indexing documents... + +šŸ” Search: 'Can you summarize the performance issues in the API?' + +šŸ¤– Asking to model: smollm2-1.7b-instruct + +## šŸ’” Question: + +Can you summarize the performance issues in the API? +## šŸ“ Answer: + +The summary of the performance issues in the API can be summarized as follows: + +- The API works but performance degrades at 1,000+ queries per minute, resulting in response times jumping from 200ms to 3 seconds. +- The root cause of these issues is the lack of a caching layer in the Elasticsearch queries. +- The team proposed a few solutions, including a 15-minute TTL cache with event-based invalidation, which would be implemented by Sarah. +- They also proposed optimizing boolean queries and adding calculated index fields, which would be taken care of by John. +- To handle the performance spikes, they suggested auto-scaling the infrastructure, with Mike working on this and aiming to scale to 6 instances at 70% CPU by Wednesday. +- They also proposed implementing Redis cache, which would be done by Sarah. +- The team discussed the timeline and timeline of the changes and proposed a phased migration approach: complete migration on October 30th, followed by a partial migration on October 15th. + +## Stats + +āœ… Indexed 5 documents in 141ms + +šŸ” Search Latency: 26ms + +šŸ¤– AI Latency: 47561ms | 4.8 tokens/s diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-elasticsearch-is-so-cool.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-elasticsearch-is-so-cool.md new file mode 100644 index 00000000..96a312e8 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-elasticsearch-is-so-cool.md @@ -0,0 +1,11 @@ +>>> Why Elastic is so cool? + +## Raw Response + +```json +{"created":1762881411,"object":"chat.completion","id":"0178b570-4e13-4c1b-9ff4-e2ca5bff1c67","model":"dolphin3.0-qwen2.5-0.5b","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"Elastic is a versatile technology that supports a wide range of applications. Its coolness stems from its ability to manage complex environments and provide a seamless integration with other technologies."}}],"usage":{"prompt_tokens":14,"completion_tokens":35,"total_tokens":49}} +``` + +## Answer + +Elastic is a versatile technology that supports a wide range of applications. Its coolness stems from its ability to manage complex environments and provide a seamless integration with other technologies. diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt new file mode 100644 index 00000000..6ad807a9 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt @@ -0,0 +1,230 @@ +absl-py==2.3.1 +aiohappyeyeballs==2.4.6 +aiohttp==3.11.13 +aiosignal==1.3.2 +alembic==1.14.1 +annotated-types==0.7.0 +anyio==4.10.0 +appdirs==1.4.4 +appnope==0.1.4 +asgiref==3.8.1 +asttokens==3.0.0 +async-timeout==5.0.1 +attrs==25.1.0 +auth0-python==4.8.1 +backoff==2.2.1 +bcrypt==4.3.0 +beautifulsoup4==4.13.3 +blinker==1.9.0 +build==1.2.2.post1 +cachetools==5.5.2 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +chroma-hnswlib==0.7.6 +chromadb==0.5.23 +click==8.1.8 +cohere==5.14.0 +coloredlogs==15.0.1 +comm==0.2.2 +crewai==0.102.0 +crewai-tools==0.36.0 +cryptography==44.0.2 +dataclasses-json==0.6.7 +debugpy==1.8.12 +decorator==5.2.1 +Deprecated==1.2.18 +deprecation==2.1.0 +distro==1.9.0 +docker==7.1.0 +docstring_parser==0.16 +durationpy==0.9 +elastic-transport==8.17.0 +elasticsearch==8.17.0 +embedchain==0.1.127 +et_xmlfile==2.0.0 +exceptiongroup==1.3.0 +executing==2.2.0 +fastapi==0.104.1 +fastavro==1.10.0 +filelock==3.17.0 +flatbuffers==25.2.10 +frozenlist==1.5.0 +fsspec==2025.2.0 +google-api-core==2.24.1 +google-auth==2.38.0 +google-cloud-aiplatform==1.82.0 +google-cloud-bigquery==3.30.0 +google-cloud-core==2.4.2 +google-cloud-resource-manager==1.14.1 +google-cloud-storage==2.19.0 +google-crc32c==1.6.0 +google-genai==1.30.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.68.0 +gptcache==0.1.44 +grpc-google-iam-v1==0.14.0 +grpcio==1.70.0 +grpcio-status==1.70.0 +grpcio-tools==1.70.0 +h11==0.14.0 +h2==4.2.0 +hpack==4.1.0 +httpcore==1.0.7 +httptools==0.6.4 +httpx==0.28.1 +httpx-sse==0.4.0 +huggingface-hub==0.29.1 +humanfriendly==10.0 +hyperframe==6.1.0 +idna==3.10 +importlib_metadata==8.5.0 +importlib_resources==6.5.2 +instructor==1.7.2 +ipykernel==6.29.5 +ipython==9.0.1 +ipython_pygments_lexers==1.1.1 +jedi==0.19.2 +Jinja2==3.1.5 +jiter==0.8.2 +json5==0.10.0 +json_repair==0.39.1 +jsonpatch==1.33 +jsonpickle==4.0.2 +jsonpointer==3.0.0 +jsonref==1.1.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +kubernetes==32.0.1 +lancedb==0.20.0 +langchain==0.3.19 +langchain-cohere==0.3.5 +langchain-community==0.3.18 +langchain-core==0.3.40 +langchain-experimental==0.3.4 +langchain-openai==0.2.14 +langchain-text-splitters==0.3.6 +langextract==1.0.8 +langsmith==0.1.147 +litellm==1.60.2 +Mako==1.3.9 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +marshmallow==3.26.1 +matplotlib-inline==0.1.7 +mdurl==0.1.2 +mem0ai==0.1.60 +ml_collections==1.1.0 +mmh3==5.1.0 +monotonic==1.6 +more-itertools==10.7.0 +mpmath==1.3.0 +multidict==6.1.0 +mypy-extensions==1.0.0 +nest-asyncio==1.6.0 +networkx==3.4.2 +nodeenv==1.9.1 +numpy==1.26.4 +oauthlib==3.2.2 +onnxruntime==1.20.1 +openai==1.65.2 +openpyxl==3.1.5 +opentelemetry-api==1.30.0 +opentelemetry-exporter-otlp-proto-common==1.30.0 +opentelemetry-exporter-otlp-proto-grpc==1.30.0 +opentelemetry-exporter-otlp-proto-http==1.30.0 +opentelemetry-instrumentation==0.51b0 +opentelemetry-instrumentation-asgi==0.51b0 +opentelemetry-instrumentation-fastapi==0.51b0 +opentelemetry-proto==1.30.0 +opentelemetry-sdk==1.30.0 +opentelemetry-semantic-conventions==0.51b0 +opentelemetry-util-http==0.51b0 +orjson==3.10.15 +overrides==7.7.0 +packaging==24.2 +pandas==2.2.3 +parso==0.8.4 +pdfminer.six==20231228 +pdfplumber==0.11.5 +pexpect==4.9.0 +pillow==11.1.0 +platformdirs==4.3.6 +portalocker==2.10.1 +posthog==3.18.0 +prompt_toolkit==3.0.50 +propcache==0.3.0 +proto-plus==1.26.0 +protobuf==5.29.3 +psutil==7.0.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +pyarrow==19.0.1 +pyasn1==0.6.1 +pyasn1_modules==0.4.1 +pycparser==2.22 +pydantic==2.5.0 +pydantic-settings==2.8.1 +pydantic_core==2.14.1 +Pygments==2.19.1 +PyJWT==2.10.1 +pylance==0.23.2 +pypdf==5.3.1 +pypdfium2==4.30.1 +PyPika==0.48.9 +pyproject_hooks==1.2.0 +pyright==1.1.396 +pysbd==0.3.4 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytube==15.0.0 +pytz==2024.2 +pyvis==0.3.2 +PyYAML==6.0.2 +pyzmq==26.2.1 +qdrant-client==1.13.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rich==13.9.4 +rpds-py==0.23.1 +rsa==4.9 +schema==0.7.7 +shapely==2.0.7 +shellingham==1.5.4 +six==1.17.0 +sniffio==1.3.1 +soupsieve==2.6 +SQLAlchemy==2.0.38 +stack-data==0.6.3 +starlette==0.27.0 +sympy==1.13.3 +tabulate==0.9.0 +tenacity==9.0.0 +tiktoken==0.7.0 +tokenizers==0.20.3 +tomli==2.2.1 +tomli_w==1.2.0 +tornado==6.4.2 +tqdm==4.67.1 +traitlets==5.14.3 +typer==0.15.2 +types-requests==2.32.0.20250301 +typing-inspect==0.9.0 +typing_extensions==4.12.2 +tzdata==2025.1 +urllib3==2.3.0 +uv==0.6.3 +uvicorn==0.24.0 +uvloop==0.21.0 +watchfiles==1.0.4 +wcwidth==0.2.13 +websocket-client==1.8.0 +websockets==15.0.1 +wrapt==1.17.2 +yarl==1.18.3 +zipp==3.21.0 diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py new file mode 100644 index 00000000..66362c63 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py @@ -0,0 +1,118 @@ +import os +import time + +from elasticsearch import Elasticsearch, helpers +from openai import OpenAI + +ES_URL = "http://localhost:9200" +ES_API_KEY = "your-api-key-here" +INDEX_NAME = "team-data" +LOCAL_AI_URL = "http://localhost:8080/v1" # Local AI server URL +DATASET_FOLDER = "./Dataset" + + +es_client = Elasticsearch(ES_URL, api_key=ES_API_KEY) +ai_client = OpenAI(base_url=LOCAL_AI_URL, api_key="sk-x") + + +def build_documents(dataset_folder, index_name): + for filename in os.listdir(dataset_folder): + if filename.endswith(".txt"): + filepath = os.path.join(dataset_folder, filename) + + with open(filepath, "r", encoding="utf-8") as file: + content = file.read() + + yield { + "_index": index_name, + "_source": {"file_title": filename, "file_content": content}, + } + + +def index_documents(): + try: + start_time = time.time() + + success, _ = helpers.bulk( + es_client, build_documents(DATASET_FOLDER, INDEX_NAME) + ) + + end_time = time.time() + bulk_latency = (end_time - start_time) * 1000 # ms + + return success, bulk_latency + except Exception as e: + print(f"āŒ Error: {str(e)}") + return 0, 0 + + +def semantic_search(query, size=3): + start_time = time.time() + search_body = { + "query": {"semantic": {"field": "semantic_field", "query": query}}, + "size": size, + } + + response = es_client.search(index=INDEX_NAME, body=search_body) + search_latency = (time.time() - start_time) * 1000 # ms + + return response["hits"]["hits"], search_latency + + +def query_local_ai(prompt, model): + start_time = time.time() + + try: + response = ai_client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + ) + + ai_latency = (time.time() - start_time) * 1000 # ms + + # Extract response text + response_text = response.choices[0].message.content + + # Calculate tokens per second if usage info is available + tokens_per_second = 0 + if hasattr(response, "usage") and response.usage: + total_tokens = response.usage.completion_tokens + if ai_latency > 0: + tokens_per_second = (total_tokens / ai_latency) * 1000 # tokens/second + + return response_text, ai_latency, tokens_per_second + except Exception as e: + ai_latency = (time.time() - start_time) * 1000 + + return f"Error: {str(e)}", ai_latency, 0 + + +if __name__ == "__main__": + print("šŸ“„ Indexing documents...") + success, bulk_latency = index_documents() + + time.sleep(2) # Wait for indexing to complete + + query = "Can you summarize the performance issues in the API?" + + print(f"šŸ” Search: '{query}'") + search_results, search_latency = semantic_search(query) + + context = "Information found:\n" + for hit in search_results: + source = hit["_source"] + context += f"File: {source['file_title']}\n" + context += f"Content: {source['file_content']}\n\n" + + prompt = f"{context}\nQuestion: {query}\nAnswer:" + + ai_model = "dolphin3.0-qwen2.5-0.5b" + + print(f"šŸ¤– Asking to model: {ai_model}") + response, ai_latency, tokens_per_second = query_local_ai(prompt, ai_model) + + print(f"\nšŸ’” Question: {query}\nšŸ“ Answer: {response}") + + print(f"āœ… Indexed {success} documents in {bulk_latency:.0f}ms") + print(f"šŸ” Search Latency: {search_latency:.0f}ms") + print(f"šŸ¤– AI Latency: {ai_latency:.0f}ms | {tokens_per_second:.1f} tokens/s")