Skip to content

Commit 0adccaa

Browse files
Implement ChatMK (#3)
1 parent 00cb96e commit 0adccaa

File tree

11 files changed

+1400
-28
lines changed

11 files changed

+1400
-28
lines changed

.github/workflows/jekyll.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,31 @@ jobs:
3838
ruby-version: '3.1' # Not needed with a .ruby-version file
3939
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
4040
cache-version: 0 # Increment this number if you need to re-download cached gems
41+
# Setup Python for ChatMK embeddings
42+
- name: Setup Python
43+
uses: actions/setup-python@v4
44+
with:
45+
python-version: '3.11'
46+
cache: 'pip'
47+
# Install ChatMK dependencies
48+
- name: Install Python dependencies
49+
run: |
50+
python -m pip install --upgrade pip
51+
pip install sentence-transformers>=2.2.0 torch>=1.11.0 numpy>=1.17.0
52+
# Cache model downloads
53+
- name: Cache sentence-transformers model
54+
uses: actions/cache@v3
55+
with:
56+
path: ~/.cache/huggingface
57+
key: ${{ runner.os }}-huggingface-v1
58+
restore-keys: |
59+
${{ runner.os }}-huggingface-
4160
- name: Setup Pages
4261
id: pages
4362
uses: actions/configure-pages@v5
4463
- name: Build with Jekyll
4564
# Outputs to the './_site' directory by default
65+
# ChatMK plugin generates embeddings during build
4666
run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}"
4767
env:
4868
JEKYLL_ENV: production

_includes/header.html

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ <h4>{{ site.heading }}</h4>
3636
</a>
3737
<!-- Mobile Buttons Group -->
3838
<div class="mobile-buttons-group is-hidden-tablet">
39+
<div class="navbar-item" onclick="openChatMKModal()">
40+
<div class="buttons">
41+
<a class="button is-text">
42+
<span class="chatmk-btn">@></span>
43+
</a>
44+
</div>
45+
</div>
3946
<div class="navbar-item" onclick="openSearchModal()">
4047
<div class="buttons">
4148
<a class="button is-text">
@@ -96,6 +103,10 @@ <h4>{{ site.heading }}</h4>
96103
<div class="navbar-end is-hidden-mobile">
97104
<div class="navbar-item">
98105
<div class="buttons">
106+
<!-- ChatMK Button -->
107+
<a class="button is-text" onclick="openChatMKModal()">
108+
<span class="chatmk-btn">@></span>
109+
</a>
99110
<!-- Command Palette Button -->
100111
<a class="button is-text" onclick="openSearchModal()">
101112
<span class="cmd-palette-btn">⌘K</span>

_layouts/post.html

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,5 +272,20 @@ <h5 class="block-title">Metadata</h5>
272272
<script src="/assets/js/headerLinks.js"></script>
273273
<script src="/assets/js/footnoteTooltip.js"></script>
274274
<script src="/assets/js/hashArt.js"></script>
275+
276+
<!-- ChatMK Scripts -->
277+
<script type="module">
278+
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
279+
280+
// Configure transformers.js to use Hugging Face CDN for model files
281+
env.remoteURL = 'https://huggingface.co/';
282+
env.allowRemoteModels = true;
283+
env.localURL = null;
284+
285+
window.transformers = { pipeline, env };
286+
</script>
287+
<script src="/assets/js/chatmkSearch.js"></script>
288+
<script type="module" src="/assets/js/chatmkAI.js"></script>
289+
<script src="/assets/js/chatmkModal.js"></script>
275290
</body>
276291
</html>

_plugins/chatmk_generate_data.rb

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
require 'json'
2+
3+
module Jekyll
4+
class ChatMKDataGenerator < Jekyll::Generator
5+
safe true
6+
priority :low
7+
8+
def generate(site)
9+
# Store the site reference for the hook
10+
@site = site
11+
@brain_data = extract_brain_data(site)
12+
end
13+
14+
private
15+
16+
def generate_excerpt(item, max_length = 200)
17+
# Try front matter excerpt or Jekyll's auto-excerpt first
18+
excerpt = item.data['excerpt'] ||
19+
item.excerpt.to_s.gsub(/<\/?[^>]*>/, "").strip
20+
21+
# If still empty, extract from content
22+
if excerpt.to_s.strip.empty?
23+
content_text = item.content.gsub(/<\/?[^>]*>/, "").strip
24+
# Take first paragraph or max_length characters
25+
excerpt = content_text.split("\n\n").first || ""
26+
excerpt = excerpt[0..max_length] + (excerpt.length > max_length ? "..." : "")
27+
end
28+
29+
excerpt
30+
end
31+
32+
def extract_brain_data(site)
33+
# Initialize our data structure
34+
brain_data = {
35+
pages: [],
36+
notes: []
37+
}
38+
39+
# Process all pages from /pages directory
40+
site.pages.each do |page|
41+
# Only include pages from the /pages directory
42+
next unless page.path.start_with?('pages/')
43+
44+
# Use filename as title if no title is set
45+
title = page.data['title'] || File.basename(page.path, '.*').capitalize
46+
47+
brain_data[:pages] << {
48+
title: title,
49+
content: page.content,
50+
url: page.url,
51+
excerpt: generate_excerpt(page, 200)
52+
}
53+
end
54+
55+
# Process notes with season filter (only spring and summer)
56+
if site.collections.key?('notes')
57+
site.collections['notes'].docs.each do |note|
58+
season = note.data['season']
59+
# Only include notes with season 'spring' or 'summer'
60+
next unless season == 'spring' || season == 'summer'
61+
62+
brain_data[:notes] << {
63+
title: note.data['title'],
64+
content: note.content,
65+
url: note.url,
66+
tags: note.data['tags'] || [],
67+
excerpt: generate_excerpt(note, 150)
68+
}
69+
end
70+
end
71+
72+
# Return the data for the hook to write
73+
return brain_data
74+
end
75+
end
76+
77+
# Hook to write the file after all processing is complete
78+
Jekyll::Hooks.register :site, :post_write do |site|
79+
generator = site.generators.find { |g| g.is_a?(ChatMKDataGenerator) }
80+
if generator && generator.instance_variable_get(:@brain_data)
81+
brain_data = generator.instance_variable_get(:@brain_data)
82+
83+
# Write to destination assets/json directory
84+
dest_dir = File.join(site.dest, 'assets', 'json')
85+
dest_path = File.join(dest_dir, 'chatmk-data.json')
86+
FileUtils.mkdir_p(dest_dir)
87+
File.open(dest_path, 'w') do |f|
88+
f.write(JSON.generate(brain_data))
89+
end
90+
91+
# Generate embeddings using Python script
92+
embedding_script = File.join(site.source, '_plugins', 'chatmk_generate_embeddings.py')
93+
if File.exist?(embedding_script)
94+
temp_path = dest_path + '.tmp'
95+
system("python3 #{embedding_script} #{dest_path} #{temp_path}")
96+
if File.exist?(temp_path)
97+
FileUtils.mv(temp_path, dest_path)
98+
Jekyll.logger.info "ChatMKDataGenerator:", "Added embeddings to chatmk-data.json"
99+
else
100+
Jekyll.logger.warn "ChatMKDataGenerator:", "Failed to generate embeddings, continuing without them"
101+
end
102+
else
103+
Jekyll.logger.warn "ChatMKDataGenerator:", "Embedding script not found, skipping embeddings"
104+
end
105+
106+
end
107+
end
108+
end
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Generate embeddings for ChatMK content using sentence-transformers
4+
"""
5+
import json
6+
import sys
7+
import os
8+
9+
def generate_embeddings(input_file, output_file):
10+
try:
11+
from sentence_transformers import SentenceTransformer
12+
except ImportError:
13+
print("Error: sentence-transformers not installed. Install with: pip install sentence-transformers")
14+
sys.exit(1)
15+
16+
try:
17+
# Load the content data
18+
with open(input_file, 'r') as f:
19+
data = json.load(f)
20+
21+
# Initialize the embedding model (lightweight and good for semantic search)
22+
print("Loading embedding model...")
23+
model = SentenceTransformer('all-MiniLM-L6-v2')
24+
25+
# Generate embeddings for pages
26+
print(f"Generating embeddings for {len(data['pages'])} pages...")
27+
for page in data['pages']:
28+
# Combine title and excerpt for better embedding
29+
text = f"{page['title']}. {page['excerpt']}"
30+
embedding = model.encode(text).tolist()
31+
page['embedding'] = embedding
32+
33+
# Generate embeddings for notes
34+
print(f"Generating embeddings for {len(data['notes'])} notes...")
35+
for note in data['notes']:
36+
# Combine title and excerpt for better embedding
37+
text = f"{note['title']}. {note['excerpt']}"
38+
embedding = model.encode(text).tolist()
39+
note['embedding'] = embedding
40+
41+
# Save the enhanced data
42+
with open(output_file, 'w') as f:
43+
json.dump(data, f, separators=(',', ':'))
44+
45+
print(f"Generated embeddings for {len(data['pages'])} pages and {len(data['notes'])} notes")
46+
47+
except Exception as e:
48+
print(f"Error generating embeddings: {e}")
49+
sys.exit(1)
50+
51+
if __name__ == "__main__":
52+
if len(sys.argv) != 3:
53+
print("Usage: python generate_embeddings.py <input_file> <output_file>")
54+
sys.exit(1)
55+
56+
input_file = sys.argv[1]
57+
output_file = sys.argv[2]
58+
59+
if not os.path.exists(input_file):
60+
print(f"Error: Input file '{input_file}' not found")
61+
sys.exit(1)
62+
63+
generate_embeddings(input_file, output_file)

0 commit comments

Comments
 (0)