diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt
new file mode 100644
index 00000000..78204cb6
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt
@@ -0,0 +1,38 @@
+MEETING TRANSCRIPT - QA TEAM
+Date: Wednesday, September 18, 2025
+Time: 10:00 AM - 11:30 AM
+Participants: Maria (QA Lead), Tom (Senior QA Engineer), Lisa (QA Automation Engineer), Roberto (Manual Testing Specialist)
+
+[10:02] Maria: Let's review CRM migration testing progress. Tom, report on data import tests?
+
+[10:03] Tom: Found critical issues. Import failures with special characters in addresses and names.
+
+[10:06] Tom: UTF-8 parsing problems with accents, currency symbols, and Asian characters.
+
+[10:08] Tom: 12% of records affected - about 15,000 out of 125,000 total records.
+
+[10:09] Roberto: Confirmed. Also, failed imports corrupt entire batches.
+
+[10:12] Lisa: No atomic transactions for batches?
+
+[10:13] Tom: Correct. Each record processed independently without rollback.
+
+[10:15] Roberto: Found referential integrity issues - orphaned references between contacts and companies.
+
+[10:19] Maria: Need three validation types: pre-import, during import, and post-import.
+
+[10:25] Tom: Recommend smaller migration batches to reduce risk?
+
+[10:26] Maria: Excellent. Batches of 5,000 records with validation between each.
+
+[10:30] Maria: Four recommendations: UTF-8 parser fix, atomic transactions, handle orphaned references, small batch migration.
+
+[10:33] Roberto: Also need concurrency testing during migration.
+
+[10:40] Maria: Complete additional testing in one week. Feasible?
+
+[10:42] Tom: Will share test cases today.
+
+[10:44] Maria: Friday 2 PM meeting before management review.
+
+[10:45] Lisa: Will prepare testing metrics dashboard.
\ No newline at end of file
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt
new file mode 100644
index 00000000..aa6deb24
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt
@@ -0,0 +1,32 @@
+MEETING TRANSCRIPT - DEVELOPMENT TEAM
+Date: Monday, September 16, 2025
+Time: 09:00 AM - 10:15 AM
+Participants: Alice (Tech Lead), John (Senior Developer), Sarah (Backend Developer), Mike (DevOps Engineer)
+
+[09:02] Alice: Let's review the search API deployed last week. Any issues?
+
+[09:03] Sarah: API works but performance degrades with 1,000+ queries per minute. Response times jump from 200ms to 3 seconds.
+
+[09:05] John: Elasticsearch queries and no caching layer?
+
+[09:06] Sarah: Exactly. Complex queries are slow, and we need Redis caching.
+
+[09:07] Mike: Also hitting CPU limits during spikes. Need auto-scaling.
+
+[09:08] Alice: Three priorities: query optimization, Redis cache, and infrastructure scaling.
+
+[09:11] Sarah: Propose 15-minute TTL cache with event-based invalidation.
+
+[09:13] John: I'll optimize bool queries and add calculated index fields.
+
+[09:17] Mike: Can set up auto-scaling by tomorrow - scale to 6 instances at 70% CPU.
+
+[09:18] Sarah: Starting Redis today, basic version by Wednesday.
+
+[09:19] John: New indexes and query optimization ready for testing Wednesday.
+
+[09:24] Alice: Clear plan. Mike handles scaling, Sarah implements cache, John optimizes queries.
+
+[09:26] Alice: I'll coordinate with product team on deployment impacts and QA for load testing.
+
+[09:30] Alice: Meeting Wednesday 3 PM to review progress. Thanks team!
\ No newline at end of file
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt
new file mode 100644
index 00000000..7d516d08
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt
@@ -0,0 +1,36 @@
+MEETING TRANSCRIPT - MANAGEMENT SYNC
+Date: Friday, September 20, 2025
+Time: 02:00 PM - 03:00 PM
+Participants: David (Project Manager), Alice (Tech Lead), Maria (QA Lead), Emma (Product Manager), Carlos (DevOps Manager)
+
+[14:03] Emma: Good progress. Users report 40% search speed improvement, but support tickets show peak hour performance issues.
+
+[14:05] Alice: We've identified bottlenecks. Working on Redis caching and Elasticsearch query optimization.
+
+[14:06] David: Can we resolve issues without impacting October migration date?
+
+[14:09] Alice: Recommend two-week extension for complete migration due to performance issues.
+
+[14:10] Maria: QA agrees. Found data import blockers with special characters and integrity issues.
+
+[14:12] Maria: Need one week to fix issues, another for complete re-testing.
+
+[14:14] Carlos: Infrastructure supports extension for proper rollback and disaster recovery testing.
+
+[14:15] Emma: Could we do partial migration on original date?
+
+[14:17] Alice: Yes. Contact management module first, reports and analytics in phase two.
+
+[14:21] Maria: Phased migration ideal for QA - validate each module independently.
+
+[14:22] David: Proposal: Phase 1 - Contact management October 15th. Phase 2 - Complete migration October 30th.
+
+[14:23] Alice: Reasonable timeline for performance fixes.
+
+[14:24] Emma: Works from product perspective. Will update stakeholder communications.
+
+[14:25] Maria: QA commits to these timelines.
+
+[14:26] Carlos: Will prepare deployment strategies for both phases.
+
+[14:32] David: Carlos, send deployment calendar by Monday. Thanks team!
\ No newline at end of file
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt
new file mode 100644
index 00000000..c5730a84
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt
@@ -0,0 +1,31 @@
+WEEKLY REPORT - QA TEAM
+Week of September 16-20, 2025
+Prepared by: Maria Gonzalez, QA Lead
+
+=== EXECUTIVE SUMMARY ===
+QA team identified critical issues in CRM migration testing. Significant problems in legacy data import and referential integrity require immediate attention.
+
+=== TESTING COMPLETED ===
+- Functional: Contact management (100%), Authentication (100%), Search (75%), Analytics (60%)
+- Data import: 125,000 legacy records tested, 12 critical issues found
+- Performance: Core modules complete, identified issues with 500+ concurrent users
+
+=== CRITICAL ISSUES ===
+**QA-2025-001 - Data Import Failures**
+- UTF-8 parsing problems with special characters
+- 15,000 records affected (12% of total)
+- Escalated to development
+
+**QA-2025-002 - Transaction Integrity**  
+- Failed imports leave batches in inconsistent state
+- No atomic transactions for batches
+- Requires architecture redesign
+
+**QA-2025-003 - Orphaned References**
+- 2,300 records with invalid company/contact references
+- Pending business logic decision
+
+=== METRICS ===
+- Test cases executed: 847 of 1,200 (70.6%)
+- Pass rate: 79.3%, Automation coverage: 36%
+- Bugs: 28 total (4 critical, 8 high, 12 medium, 4 low)
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt
new file mode 100644
index 00000000..932c920b
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt
@@ -0,0 +1,30 @@
+WEEKLY REPORT - DEVELOPMENT TEAM
+Week of September 16-20, 2025
+Prepared by: Alice Thompson, Tech Lead
+
+=== EXECUTIVE SUMMARY ===
+Development team completed critical infrastructure components but identified performance bottlenecks requiring attention before production deployment.
+
+=== KEY ACCOMPLISHMENTS ===
+- Database schema and indexes completed for CRM
+- 12 of 18 API endpoints integrated with authentication
+- Contact management: 95% complete, Search: 80%, Analytics: 70%
+
+=== TECHNICAL CHALLENGES ===
+- Critical: Search API degrades at 1,000+ queries/minute (200ms to 3+ seconds)
+- Root cause: Complex Elasticsearch queries without caching layer
+- Multi-filter searches average 1.2 seconds execution time
+
+=== ACTION PLAN NEXT WEEK ===
+1. Redis cache implementation (Sarah) - Basic by Wednesday, complete by Friday
+2. Elasticsearch query optimization (John) - Testing ready Wednesday  
+3. Auto-scaling setup (Mike) - Scale to 6 instances at 70% CPU
+
+=== METRICS ===
+- Story points: 43 of 50 completed (86%)
+- Bugs: 7 reported, 12 resolved
+- Code coverage: 78% (target: 80%)
+
+=== TIMELINE ===
+- October 15 Contact Management: 85% confidence, 2 sprints remaining
+- October 30 Complete Migration: 90% confidence, 4 sprints remaining
\ No newline at end of file
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md
new file mode 100644
index 00000000..333025cc
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md
@@ -0,0 +1,19 @@
+📥 Indexing documents...
+
+🔍 Search: 'Can you summarize the performance issues in the API?'
+
+🤖 Asking to model: llama-smoltalk-3.2-1b-instruct
+
+## 💡 Question: 
+Can you summarize the performance issues in the API?
+
+## 📝 Answer: 
+The primary performance issue in the API is the slow response times of 3 seconds or more from the 1,000+ queries per minute. The search API, in particular, is experiencing performance degradations, with complex Elasticsearch queries causing the issues. A proposed solution is to implement a 15-minute TTL cache with event-based invalidation to improve response times. Additionally, a three-tiered approach involving optimization of bool queries and added calculated index fields is being implemented to improve query performance. Finally, auto-scaling for the infrastructure is set up to scale to 6 instances at 70% CPU.
+
+
+## Stats
+✅ Indexed 5 documents in 250ms
+
+🔍 Search Latency: 57ms
+
+🤖 AI Latency: 21019ms | 5.8 tokens/s
\ No newline at end of file
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md
new file mode 100644
index 00000000..5463f5ce
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md
@@ -0,0 +1,25 @@
+📥 Indexing documents...
+
+🔍 Search: 'Can you summarize the performance issues in the API?'
+
+🤖 Asking to model: dolphin3.0-qwen2.5-0.5b
+
+## 💡 Question: 
+Can you summarize the performance issues in the API?
+## 📝 Answer: 
+
+The performance issues in the Search API deployed on September 16, 2025, include:
+
+- Degradation in performance at 1,000+ queries per minute, resulting in a 200ms to 3-second response time for complex queries.
+- High response times for queries that do not utilize caching, causing them to take significantly longer than 2 seconds.
+- Inability to scale to handle spikes in query traffic, leading to increased CPU limits.
+
+These issues are primarily attributed to the complexity and inefficiency of the Elasticsearch queries, as well as the lack of caching layer. This indicates a need for optimization and addressing these specific performance bottlenecks to ensure the API's scalability and effectiveness for the development team.
+
+## Stats
+
+✅ Indexed 5 documents in 627ms
+
+🔍 Search Latency: 81ms
+
+🤖 AI Latency: 16044ms | 9.5 tokens/s
\ No newline at end of file
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md
new file mode 100644
index 00000000..c3beb29e
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md
@@ -0,0 +1,28 @@
+📥 Indexing documents...
+
+🔍 Search: 'Can you summarize the performance issues in the API?'
+
+🤖 Asking to model: smollm2-1.7b-instruct
+
+## 💡 Question: 
+
+Can you summarize the performance issues in the API?
+## 📝 Answer: 
+
+The summary of the performance issues in the API can be summarized as follows:
+
+- The API works but performance degrades at 1,000+ queries per minute, resulting in response times jumping from 200ms to 3 seconds.
+- The root cause of these issues is the lack of a caching layer in the Elasticsearch queries.
+- The team proposed a few solutions, including a 15-minute TTL cache with event-based invalidation, which would be implemented by Sarah.
+- They also proposed optimizing boolean queries and adding calculated index fields, which would be taken care of by John.
+- To handle the performance spikes, they suggested auto-scaling the infrastructure, with Mike working on this and aiming to scale to 6 instances at 70% CPU by Wednesday.
+- They also proposed implementing Redis cache, which would be done by Sarah.
+- The team discussed the timeline and timeline of the changes and proposed a phased migration approach: complete migration on October 30th, followed by a partial migration on October 15th.
+
+## Stats
+
+✅ Indexed 5 documents in 141ms
+
+🔍 Search Latency: 26ms
+
+🤖 AI Latency: 47561ms | 4.8 tokens/s
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-elasticsearch-is-so-cool.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-elasticsearch-is-so-cool.md
new file mode 100644
index 00000000..96a312e8
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-elasticsearch-is-so-cool.md
@@ -0,0 +1,11 @@
+>>> Why Elastic is so cool?
+
+## Raw Response
+
+```json
+{"created":1762881411,"object":"chat.completion","id":"0178b570-4e13-4c1b-9ff4-e2ca5bff1c67","model":"dolphin3.0-qwen2.5-0.5b","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"Elastic is a versatile technology that supports a wide range of applications. Its coolness stems from its ability to manage complex environments and provide a seamless integration with other technologies."}}],"usage":{"prompt_tokens":14,"completion_tokens":35,"total_tokens":49}}
+```
+
+## Answer
+
+Elastic is a versatile technology that supports a wide range of applications. Its coolness stems from its ability to manage complex environments and provide a seamless integration with other technologies.
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt
new file mode 100644
index 00000000..6ad807a9
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt
@@ -0,0 +1,230 @@
+absl-py==2.3.1
+aiohappyeyeballs==2.4.6
+aiohttp==3.11.13
+aiosignal==1.3.2
+alembic==1.14.1
+annotated-types==0.7.0
+anyio==4.10.0
+appdirs==1.4.4
+appnope==0.1.4
+asgiref==3.8.1
+asttokens==3.0.0
+async-timeout==5.0.1
+attrs==25.1.0
+auth0-python==4.8.1
+backoff==2.2.1
+bcrypt==4.3.0
+beautifulsoup4==4.13.3
+blinker==1.9.0
+build==1.2.2.post1
+cachetools==5.5.2
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+chroma-hnswlib==0.7.6
+chromadb==0.5.23
+click==8.1.8
+cohere==5.14.0
+coloredlogs==15.0.1
+comm==0.2.2
+crewai==0.102.0
+crewai-tools==0.36.0
+cryptography==44.0.2
+dataclasses-json==0.6.7
+debugpy==1.8.12
+decorator==5.2.1
+Deprecated==1.2.18
+deprecation==2.1.0
+distro==1.9.0
+docker==7.1.0
+docstring_parser==0.16
+durationpy==0.9
+elastic-transport==8.17.0
+elasticsearch==8.17.0
+embedchain==0.1.127
+et_xmlfile==2.0.0
+exceptiongroup==1.3.0
+executing==2.2.0
+fastapi==0.104.1
+fastavro==1.10.0
+filelock==3.17.0
+flatbuffers==25.2.10
+frozenlist==1.5.0
+fsspec==2025.2.0
+google-api-core==2.24.1
+google-auth==2.38.0
+google-cloud-aiplatform==1.82.0
+google-cloud-bigquery==3.30.0
+google-cloud-core==2.4.2
+google-cloud-resource-manager==1.14.1
+google-cloud-storage==2.19.0
+google-crc32c==1.6.0
+google-genai==1.30.0
+google-resumable-media==2.7.2
+googleapis-common-protos==1.68.0
+gptcache==0.1.44
+grpc-google-iam-v1==0.14.0
+grpcio==1.70.0
+grpcio-status==1.70.0
+grpcio-tools==1.70.0
+h11==0.14.0
+h2==4.2.0
+hpack==4.1.0
+httpcore==1.0.7
+httptools==0.6.4
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.29.1
+humanfriendly==10.0
+hyperframe==6.1.0
+idna==3.10
+importlib_metadata==8.5.0
+importlib_resources==6.5.2
+instructor==1.7.2
+ipykernel==6.29.5
+ipython==9.0.1
+ipython_pygments_lexers==1.1.1
+jedi==0.19.2
+Jinja2==3.1.5
+jiter==0.8.2
+json5==0.10.0
+json_repair==0.39.1
+jsonpatch==1.33
+jsonpickle==4.0.2
+jsonpointer==3.0.0
+jsonref==1.1.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+kubernetes==32.0.1
+lancedb==0.20.0
+langchain==0.3.19
+langchain-cohere==0.3.5
+langchain-community==0.3.18
+langchain-core==0.3.40
+langchain-experimental==0.3.4
+langchain-openai==0.2.14
+langchain-text-splitters==0.3.6
+langextract==1.0.8
+langsmith==0.1.147
+litellm==1.60.2
+Mako==1.3.9
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mem0ai==0.1.60
+ml_collections==1.1.0
+mmh3==5.1.0
+monotonic==1.6
+more-itertools==10.7.0
+mpmath==1.3.0
+multidict==6.1.0
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+networkx==3.4.2
+nodeenv==1.9.1
+numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.20.1
+openai==1.65.2
+openpyxl==3.1.5
+opentelemetry-api==1.30.0
+opentelemetry-exporter-otlp-proto-common==1.30.0
+opentelemetry-exporter-otlp-proto-grpc==1.30.0
+opentelemetry-exporter-otlp-proto-http==1.30.0
+opentelemetry-instrumentation==0.51b0
+opentelemetry-instrumentation-asgi==0.51b0
+opentelemetry-instrumentation-fastapi==0.51b0
+opentelemetry-proto==1.30.0
+opentelemetry-sdk==1.30.0
+opentelemetry-semantic-conventions==0.51b0
+opentelemetry-util-http==0.51b0
+orjson==3.10.15
+overrides==7.7.0
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+pdfminer.six==20231228
+pdfplumber==0.11.5
+pexpect==4.9.0
+pillow==11.1.0
+platformdirs==4.3.6
+portalocker==2.10.1
+posthog==3.18.0
+prompt_toolkit==3.0.50
+propcache==0.3.0
+proto-plus==1.26.0
+protobuf==5.29.3
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==19.0.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pycparser==2.22
+pydantic==2.5.0
+pydantic-settings==2.8.1
+pydantic_core==2.14.1
+Pygments==2.19.1
+PyJWT==2.10.1
+pylance==0.23.2
+pypdf==5.3.1
+pypdfium2==4.30.1
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+pyright==1.1.396
+pysbd==0.3.4
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytube==15.0.0
+pytz==2024.2
+pyvis==0.3.2
+PyYAML==6.0.2
+pyzmq==26.2.1
+qdrant-client==1.13.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+rich==13.9.4
+rpds-py==0.23.1
+rsa==4.9
+schema==0.7.7
+shapely==2.0.7
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.38
+stack-data==0.6.3
+starlette==0.27.0
+sympy==1.13.3
+tabulate==0.9.0
+tenacity==9.0.0
+tiktoken==0.7.0
+tokenizers==0.20.3
+tomli==2.2.1
+tomli_w==1.2.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+typer==0.15.2
+types-requests==2.32.0.20250301
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uv==0.6.3
+uvicorn==0.24.0
+uvloop==0.21.0
+watchfiles==1.0.4
+wcwidth==0.2.13
+websocket-client==1.8.0
+websockets==15.0.1
+wrapt==1.17.2
+yarl==1.18.3
+zipp==3.21.0
diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py
new file mode 100644
index 00000000..66362c63
--- /dev/null
+++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py
@@ -0,0 +1,118 @@
+import os
+import time
+
+from elasticsearch import Elasticsearch, helpers
+from openai import OpenAI
+
+ES_URL = "http://localhost:9200"
+ES_API_KEY = "your-api-key-here"
+INDEX_NAME = "team-data"
+LOCAL_AI_URL = "http://localhost:8080/v1"  # Local AI server URL
+DATASET_FOLDER = "./Dataset"
+
+
+es_client = Elasticsearch(ES_URL, api_key=ES_API_KEY)
+ai_client = OpenAI(base_url=LOCAL_AI_URL, api_key="sk-x")
+
+
+def build_documents(dataset_folder, index_name):
+    for filename in os.listdir(dataset_folder):
+        if filename.endswith(".txt"):
+            filepath = os.path.join(dataset_folder, filename)
+
+            with open(filepath, "r", encoding="utf-8") as file:
+                content = file.read()
+
+            yield {
+                "_index": index_name,
+                "_source": {"file_title": filename, "file_content": content},
+            }
+
+
+def index_documents():
+    try:
+        start_time = time.time()
+
+        success, _ = helpers.bulk(
+            es_client, build_documents(DATASET_FOLDER, INDEX_NAME)
+        )
+
+        end_time = time.time()
+        bulk_latency = (end_time - start_time) * 1000  # ms
+
+        return success, bulk_latency
+    except Exception as e:
+        print(f"❌ Error: {str(e)}")
+        return 0, 0
+
+
+def semantic_search(query, size=3):
+    start_time = time.time()
+    search_body = {
+        "query": {"semantic": {"field": "semantic_field", "query": query}},
+        "size": size,
+    }
+
+    response = es_client.search(index=INDEX_NAME, body=search_body)
+    search_latency = (time.time() - start_time) * 1000  # ms
+
+    return response["hits"]["hits"], search_latency
+
+
+def query_local_ai(prompt, model):
+    start_time = time.time()
+
+    try:
+        response = ai_client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+        )
+
+        ai_latency = (time.time() - start_time) * 1000  # ms
+
+        # Extract response text
+        response_text = response.choices[0].message.content
+
+        # Calculate tokens per second if usage info is available
+        tokens_per_second = 0
+        if hasattr(response, "usage") and response.usage:
+            total_tokens = response.usage.completion_tokens
+            if ai_latency > 0:
+                tokens_per_second = (total_tokens / ai_latency) * 1000  # tokens/second
+
+        return response_text, ai_latency, tokens_per_second
+    except Exception as e:
+        ai_latency = (time.time() - start_time) * 1000
+
+        return f"Error: {str(e)}", ai_latency, 0
+
+
+if __name__ == "__main__":
+    print("📥 Indexing documents...")
+    success, bulk_latency = index_documents()
+
+    time.sleep(2)  # Wait for indexing to complete
+
+    query = "Can you summarize the performance issues in the API?"
+
+    print(f"🔍 Search: '{query}'")
+    search_results, search_latency = semantic_search(query)
+
+    context = "Information found:\n"
+    for hit in search_results:
+        source = hit["_source"]
+        context += f"File: {source['file_title']}\n"
+        context += f"Content: {source['file_content']}\n\n"
+
+    prompt = f"{context}\nQuestion: {query}\nAnswer:"
+
+    ai_model = "dolphin3.0-qwen2.5-0.5b"
+
+    print(f"🤖 Asking to model: {ai_model}")
+    response, ai_latency, tokens_per_second = query_local_ai(prompt, ai_model)
+
+    print(f"\n💡 Question: {query}\n📝 Answer: {response}")
+
+    print(f"✅ Indexed {success} documents in {bulk_latency:.0f}ms")
+    print(f"🔍 Search Latency: {search_latency:.0f}ms")
+    print(f"🤖 AI Latency: {ai_latency:.0f}ms | {tokens_per_second:.1f} tokens/s")