one-data-cookie
diff --git a/‎.github/workflows/jekyll.yml‎
Lines changed: 20 additions & 0 deletions b/‎.github/workflows/jekyll.yml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎_includes/header.html‎
Lines changed: 11 additions & 0 deletions b/‎_includes/header.html‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎_layouts/post.html‎
Lines changed: 15 additions & 0 deletions b/‎_layouts/post.html‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎_plugins/chatmk_generate_data.rb‎
Lines changed: 108 additions & 0 deletions b/‎_plugins/chatmk_generate_data.rb‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎_plugins/chatmk_generate_embeddings.py‎
Lines changed: 63 additions & 0 deletions b/‎_plugins/chatmk_generate_embeddings.py‎
Lines changed: 63 additions & 0 deletions
@@ -38,11 +38,31 @@ jobs:
           ruby-version: '3.1' # Not needed with a .ruby-version file
           bundler-cache: true # runs 'bundle install' and caches installed gems automatically
           cache-version: 0 # Increment this number if you need to re-download cached gems
+      # Setup Python for ChatMK embeddings
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+      # Install ChatMK dependencies
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install sentence-transformers>=2.2.0 torch>=1.11.0 numpy>=1.17.0
+      # Cache model downloads
+      - name: Cache sentence-transformers model
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/huggingface
+          key: ${{ runner.os }}-huggingface-v1
+          restore-keys: |
+            ${{ runner.os }}-huggingface-
       - name: Setup Pages
         id: pages
         uses: actions/configure-pages@v5
       - name: Build with Jekyll
         # Outputs to the './_site' directory by default
+        # ChatMK plugin generates embeddings during build
         run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}"
         env:
           JEKYLL_ENV: production
 
@@ -36,6 +36,13 @@ <h4>{{ site.heading }}</h4>
       </a>
       <!-- Mobile Buttons Group -->
       <div class="mobile-buttons-group is-hidden-tablet">
+        <div class="navbar-item" onclick="openChatMKModal()">
+          <div class="buttons">
+            <a class="button is-text">
+              <span class="chatmk-btn">@></span>
+            </a>
+          </div>
+        </div>
         <div class="navbar-item" onclick="openSearchModal()">
           <div class="buttons">
             <a class="button is-text">
@@ -96,6 +103,10 @@ <h4>{{ site.heading }}</h4>
       <div class="navbar-end is-hidden-mobile">
         <div class="navbar-item">
           <div class="buttons">
+            <!-- ChatMK Button -->
+            <a class="button is-text" onclick="openChatMKModal()">
+              <span class="chatmk-btn">@></span>
+            </a>
             <!-- Command Palette Button -->
             <a class="button is-text" onclick="openSearchModal()">
               <span class="cmd-palette-btn">⌘K</span>
 
@@ -272,5 +272,20 @@ <h5 class="block-title">Metadata</h5>
     <script src="/assets/js/headerLinks.js"></script>
     <script src="/assets/js/footnoteTooltip.js"></script>
     <script src="/assets/js/hashArt.js"></script>
+    
+    <!-- ChatMK Scripts -->
+    <script type="module">
+      import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
+      
+      // Configure transformers.js to use Hugging Face CDN for model files
+      env.remoteURL = 'https://huggingface.co/';
+      env.allowRemoteModels = true;
+      env.localURL = null;
+      
+      window.transformers = { pipeline, env };
+    </script>
+    <script src="/assets/js/chatmkSearch.js"></script>
+    <script type="module" src="/assets/js/chatmkAI.js"></script>
+    <script src="/assets/js/chatmkModal.js"></script>
 </body>
 </html>
@@ -0,0 +1,108 @@
+require 'json'
+
+module Jekyll
+  class ChatMKDataGenerator < Jekyll::Generator
+    safe true
+    priority :low
+
+    def generate(site)
+      # Store the site reference for the hook
+      @site = site
+      @brain_data = extract_brain_data(site)
+    end
+    
+    private
+    
+    def generate_excerpt(item, max_length = 200)
+      # Try front matter excerpt or Jekyll's auto-excerpt first
+      excerpt = item.data['excerpt'] || 
+               item.excerpt.to_s.gsub(/<\/?[^>]*>/, "").strip
+      
+      # If still empty, extract from content
+      if excerpt.to_s.strip.empty?
+        content_text = item.content.gsub(/<\/?[^>]*>/, "").strip
+        # Take first paragraph or max_length characters
+        excerpt = content_text.split("\n\n").first || ""
+        excerpt = excerpt[0..max_length] + (excerpt.length > max_length ? "..." : "")
+      end
+      
+      excerpt
+    end
+    
+    def extract_brain_data(site)
+      # Initialize our data structure
+      brain_data = {
+        pages: [],
+        notes: []
+      }
+
+      # Process all pages from /pages directory
+      site.pages.each do |page|
+        # Only include pages from the /pages directory
+        next unless page.path.start_with?('pages/')
+        
+        # Use filename as title if no title is set
+        title = page.data['title'] || File.basename(page.path, '.*').capitalize
+        
+        brain_data[:pages] << {
+          title: title,
+          content: page.content,
+          url: page.url,
+          excerpt: generate_excerpt(page, 200)
+        }
+      end
+
+      # Process notes with season filter (only spring and summer)
+      if site.collections.key?('notes')
+        site.collections['notes'].docs.each do |note|
+          season = note.data['season']
+          # Only include notes with season 'spring' or 'summer'
+          next unless season == 'spring' || season == 'summer'
+          
+          brain_data[:notes] << {
+            title: note.data['title'],
+            content: note.content,
+            url: note.url,
+            tags: note.data['tags'] || [],
+            excerpt: generate_excerpt(note, 150)
+          }
+        end
+      end
+
+      # Return the data for the hook to write
+      return brain_data
+    end
+  end
+  
+  # Hook to write the file after all processing is complete
+  Jekyll::Hooks.register :site, :post_write do |site|
+    generator = site.generators.find { |g| g.is_a?(ChatMKDataGenerator) }
+    if generator && generator.instance_variable_get(:@brain_data)
+      brain_data = generator.instance_variable_get(:@brain_data)
+      
+      # Write to destination assets/json directory
+      dest_dir = File.join(site.dest, 'assets', 'json')
+      dest_path = File.join(dest_dir, 'chatmk-data.json')
+      FileUtils.mkdir_p(dest_dir)
+      File.open(dest_path, 'w') do |f|
+        f.write(JSON.generate(brain_data))
+      end
+      
+      # Generate embeddings using Python script
+      embedding_script = File.join(site.source, '_plugins', 'chatmk_generate_embeddings.py')
+      if File.exist?(embedding_script)
+        temp_path = dest_path + '.tmp'
+        system("python3 #{embedding_script} #{dest_path} #{temp_path}")
+        if File.exist?(temp_path)
+          FileUtils.mv(temp_path, dest_path)
+          Jekyll.logger.info "ChatMKDataGenerator:", "Added embeddings to chatmk-data.json"
+        else
+          Jekyll.logger.warn "ChatMKDataGenerator:", "Failed to generate embeddings, continuing without them"
+        end
+      else
+        Jekyll.logger.warn "ChatMKDataGenerator:", "Embedding script not found, skipping embeddings"
+      end
+      
+    end
+  end
+end
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+"""
+Generate embeddings for ChatMK content using sentence-transformers
+"""
+import json
+import sys
+import os
+
+def generate_embeddings(input_file, output_file):
+    try:
+        from sentence_transformers import SentenceTransformer
+    except ImportError:
+        print("Error: sentence-transformers not installed. Install with: pip install sentence-transformers")
+        sys.exit(1)
+    
+    try:
+        # Load the content data
+        with open(input_file, 'r') as f:
+            data = json.load(f)
+        
+        # Initialize the embedding model (lightweight and good for semantic search)
+        print("Loading embedding model...")
+        model = SentenceTransformer('all-MiniLM-L6-v2')
+        
+        # Generate embeddings for pages
+        print(f"Generating embeddings for {len(data['pages'])} pages...")
+        for page in data['pages']:
+            # Combine title and excerpt for better embedding
+            text = f"{page['title']}. {page['excerpt']}"
+            embedding = model.encode(text).tolist()
+            page['embedding'] = embedding
+        
+        # Generate embeddings for notes
+        print(f"Generating embeddings for {len(data['notes'])} notes...")
+        for note in data['notes']:
+            # Combine title and excerpt for better embedding
+            text = f"{note['title']}. {note['excerpt']}"
+            embedding = model.encode(text).tolist()
+            note['embedding'] = embedding
+        
+        # Save the enhanced data
+        with open(output_file, 'w') as f:
+            json.dump(data, f, separators=(',', ':'))
+        
+        print(f"Generated embeddings for {len(data['pages'])} pages and {len(data['notes'])} notes")
+        
+    except Exception as e:
+        print(f"Error generating embeddings: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: python generate_embeddings.py <input_file> <output_file>")
+        sys.exit(1)
+    
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+    
+    if not os.path.exists(input_file):
+        print(f"Error: Input file '{input_file}' not found")
+        sys.exit(1)
+    
+    generate_embeddings(input_file, output_file)