Improve setup process

raffaem · raffaem · commit d50cde49673d · 2025-09-30T14:07:47.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+*.bin
+*.onnx
+*.log
+__pycache__/
+/chroma_db
+vcvarsall.sh	
+search_context.txt
diff --git a/README.md b/README.md
@@ -105,19 +105,43 @@ Example: I converted [this article](https://www.theatlantic.com/newsletters/arch
 
    > The script will:
    > - Pull the SearxNG Docker image
-   > - Create and activate a Python virtual environment
-   > - **USER ACTION NEEDED** Set your `GOOGLE_API_KEY` (edit the script to use your real key). [Obtain your API key (Currently Gemini, OpenAI and ollama is supported)](https://ai.google.dev/gemini-api/docs/api-key) from your preferred LLM provider. (Only needed when google mode is set, else set in model_config.py)
-   > - Start the SearxNG Docker container
+   > - Install [infinity](https://github.com/michaelfeil/infinity) in a dedicated Python environment `infinity_env`
+   > - Create a Python virtual environment for CoexistAI in `coexist_env`
    > - Install Python dependencies
-   > - Start the FastAPI server
 
-4. **That’s it!**  
-   The FastAPI and MCP server will start automatically and you’re ready to go.
+4. **Run!**
+
+   Call `zsh quick_start.sh` or `bash quick_start.sh` to start the FastAPI server and MCP server.
 
 **Note:**  
 - Make sure Docker, Python 3, and pip are installed on your system.  
-- Edit quick_setup.sh to set your real `GOOGLE_API_KEY` before running (needed if using google models) 
-- Windows users can use [WSL](https://docs.microsoft.com/en-us/windows/wsl/) or Git Bash to run the script, or follow manual setup steps.
+- Edit  `quick_start.sh` to set your real `GOOGLE_API_KEY` before running (needed if using google models) 
+
+
+### Windows user
+
+On Windows:
+
+1. Run a Valkey container with instructions found [here](https://github.com/valkey-io/valkey-py)
+
+```
+docker run -p 6379:6379 -it valkey/valkey:latest
+```
+
+2. Run a SearXNG instance with the instructions [here](https://www.tanyongsheng.com/note/setting-up-searxng-on-windows-localhost-your-private-customizable-search-engine/).
+Make sure to enable JSON output in SearXNG (otherwise you will get a 403 Forbidden error each time).
+Make sure the instance work by visiting [this](http://localhost:8080/search?q=When%20was%20Napoleon%20born?&engines=google,brave&format=json) (you may need to change the port).
+
+3. In `model_config.py`, set `START_SEARXNG = 0` (since we are running our own instance) and the correct port in `PORT_NUM_SEARXNG`
+
+4. To install, run the following in Git Bash:
+```
+wget -q https://raw.githubusercontent.com/nathan818fr/vcvars-bash/refs/heads/main/vcvarsall.sh -O vcvarsall.sh
+eval "$(./vcvarsall.sh x64)"
+bash quick_setup.sh
+```
+
+5. To run, run `bash quick_start.sh` in Git Bash
 
 ---
 
diff --git a/model_config.py b/model_config.py
@@ -1,4 +1,5 @@
 import os
+
 """
 This module defines the configuration for language model (LLM) and embedding models.
 Attributes:
@@ -10,7 +11,6 @@
             for ollama and other local models use "others" with base_url updated in openai_compatible.
             - If you using others llm type, then check the openai_compatible url dict for others key, you can generally 
             find it by "googling YOUR provider name openai api base compatilble url"
-            - "llm_base_url" (str): Base URL for the LLM API endpoint.
             - "llm_tools" (list): List of tools or plugins to use with the LLM.
             - "llm_kwargs" (dict): Additional keyword arguments for LLM initialization.
                 - "temperature" (float): Sampling temperature for generation.
@@ -23,73 +23,72 @@
             - "cross_encoder_name" (str): Name of the cross-encoder model for reranking.
 """
 ############## PORT and HOST SETTINGS
-PORT_NUM_SEARXNG = 8085
+PORT_NUM_SEARXNG = 8080
 PORT_NUM_APP = 8000
 HOST_APP = "localhost"
 HOST_SEARXNG = "localhost"
+START_SEARXNG = 0
 ###############
 
 ## USER INPUTS NEEDED
-#for open source model you can replace it by 'DUMMY' (for both llm and embed), else respective providers 
-llm_api_key = os.environ.get('GOOGLE_API_KEY', 'DUMMY') # either paste llm key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local
-embed_api_key = os.environ.get('GOOGLE_API_KEY', 'DUMMY') # either paste embeder key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local
+# for open source model you can replace it by 'DUMMY' (for both llm and embed), else respective providers
+llm_api_key = os.environ.get(
+    "GOOGLE_API_KEY", "DUMMY"
+)  # either paste llm key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local
+embed_api_key = os.environ.get(
+    "GOOGLE_API_KEY", "DUMMY"
+)  # either paste embeder key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local
 
 model_config = {
     # Name of the LLM model to use. For local models, use the model name served by your local server.
-    "llm_model_name": "gemini-2.0-flash",
-
-    # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others' 
-    # in case of 'others' (base url needs to be updated in openai_compatible given below accordingly).
+    "llm_model_name": "yasserrmd/jan-nano-4b",
+    # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others'
+    # in case of 'others' (base url needs to be updated in the `openai_compatible` dictionary below).
     # Make sure to update the api_key variable above to match the provider.
     # "local" is for lmstudio, for ollama and other local models use "others" with base_url updated in openai_compatible.
     # You can generally find it by "googling YOUR PROVIDER (example ollama) name openai api base compatible url"
-    "llm_type": "google",
-
+    "llm_type": "others",
     # List of tools or plugins to use with the LLM, if any. Set to None if not used.
     "llm_tools": None,
-
     # Additional keyword arguments for LLM initialization.
     "llm_kwargs": {
         "temperature": 0.1,  # Sampling temperature for generation.
         "max_tokens": None,  # Maximum number of tokens to generate (None for default).
-        "timeout": None,     # Timeout for API requests (None for default).
-        "max_retries": 2,    # Maximum number of retries for failed requests.
+        "timeout": None,  # Timeout for API requests (None for default).
+        "max_retries": 2,  # Maximum number of retries for failed requests.
         "api_key": llm_api_key,  # API key for authentication.
     },
-
     # Name of the embedding model to use.
     # For Google, use their embedding model names. For local/HuggingFace, use the model path or name.
-    "embedding_model_name": "models/embedding-001",
-
-    "embed_kwargs":{"google_api_key": embed_api_key}, #optional additional kwargs for embedding model initialization
-
+    # Tested models can be found at https://github.com/michaelfeil/infinity?tab=readme-ov-file#supported-tasks-and-models-by-infinity
+    "embedding_model_name": "mixedbread-ai/mxbai-embed-large-v1",
+    "embed_kwargs": {},  # optional additional kwargs for embedding model initialization
     # Embedding backend: 'google' for Google, 'infinity_emb' for local/HuggingFace models.
-    "embed_mode": "google",
-
+    "embed_mode": "infinity_emb",
     # Name of the cross-encoder model for reranking, typically a HuggingFace model.
-    "cross_encoder_name": "BAAI/bge-reranker-base"
+    "cross_encoder_name": "BAAI/bge-reranker-base",
 }
 
 
 # NO CHANGE NEEDED UNLESS PROVIDER CHANGES THE BASE URLS, OR YOU WANT TO USE DIFFERENT PROVIDER UNDER "others"
 openai_compatible = {
-    'google': "https://generativelanguage.googleapis.com/v1beta/openai/",
-    'local': "http://127.0.0.1:1234/v1",
-    'groq': 'https://api.groq.com/openai/v1',
-    'openai':'https://api.openai.com/v1',
-    'others': 'https://openrouter.ai/api/v1' # for an example I have added here the openrouter api, since its openai compatible
+    "google": "https://generativelanguage.googleapis.com/v1beta/openai/",
+    "local": "http://127.0.0.1:1234/v1",
+    "groq": "https://api.groq.com/openai/v1",
+    "openai": "https://api.openai.com/v1",
+    "others": "http://localhost:11434/v1",  # Ollama default port
 }
 
-#####IF YOU WANT TO GO ALL LOCAL 
+#####IF YOU WANT TO GO ALL LOCAL
 
 # model_config = {
 #     # Name of the LLM model to use. For local models, use the model name served by your local server.
 #     "llm_model_name": "google/gemma-3-12b",
 
-#     # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others' 
+#     # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others'
 #     # in case of 'others' (base url needs to be updated in openai_compatible given below accordingly).
 #     # Make sure to update the api_key variable above to match the provider.
-#     "llm_type": "local", 
+#     "llm_type": "local",
 
 #     # List of tools or plugins to use with the LLM, if any. Set to None if not used.
 #     "llm_tools": None,
@@ -114,4 +113,4 @@
 
 #     # Name of the cross-encoder model for reranking, typically a HuggingFace model.
 #     "cross_encoder_name": "BAAI/bge-reranker-base"
-# }
+# }
diff --git a/quick_setup.sh b/quick_setup.sh
@@ -1,62 +1,80 @@
-#!/bin/zsh
-# Quick Shell Setup for CoexistAI (macOS/zsh)
-
-echo "Pulling SearxNG Docker image..."
-docker pull searxng/searxng
+#!/bin/sh
+# Quick Shell Setup for CoexistAI (linux/sh)
 
+# Install infinity in its own virtual environment
 echo "📚 Installing infinity_emb in separate environment..."
-echo "Creating and activating Python virtual environment..."
-python3.13 -m venv infinity_env
-source infinity_env/bin/activate
-pip install 'infinity_emb[all]'
-pip install --upgrade "transformers<4.49"
-pip install --upgrade "typer==0.19.1" "click>=8.1.3"
-deactivate
-echo "✅ Infinity environment setup complete"
 
-# (Optional) Create and activate a Python virtual environment
-echo "Creating and activating Python virtual environment..."
-python3.13 -m venv coexistaienv
-source coexistaienv/bin/activate
+echo "Remove old infinity virtual environment"
+rm -rf infinity_env
 
-pip install 'markitdown[all]'
+echo "Creating infinity virtual environment"
+python3 -m venv infinity_env
 
-# You can neglect this if you dont want to use google models (either llm or embedding)
-echo "Setting GOOGLE_API_KEY, add any other keys which you want to store in environment (edit this script to use your real key)"
-export GOOGLE_API_KEY=REPLACE_YOUR_API_KEY_HERE_WITHOUT_QUOTES_AND_SPACES
+echo "Activating infinity virtual environment"
+if [ -d infinity_env/Scripts ]; then
+  ENV_DIR=infinity_env/Scripts
+else
+  ENV_DIR=infinity_env/bin
+fi
+source $ENV_DIR/activate
+if [ $? -ne 0 ]; then
+    echo "Error activating infinity environment"
+	return 1
+fi
 
+echo "Installing infinity"
+python3 -m pip install 'infinity_emb[all]'
+if [ $? -ne 0 ]; then
+    echo "Error installing infinity in the environment"
+	return 1
+fi
+python -m pip install --upgrade "transformers<4.49"
+python -m pip install --upgrade "typer==0.19.1" "click>=8.1.3"
 
-# Spin up the SearxNG Docker container
-echo "Starting SearxNG Docker container..."
-PORT_NUM_SEARXNG=$(python3.13 -c "from model_config import PORT_NUM_SEARXNG; print(PORT_NUM_SEARXNG)")
-HOST_SEARXNG=$(python3.13 -c "from model_config import HOST_SEARXNG; print(HOST_SEARXNG)")
+echo "Deactivating infinity virtual environment"
+deactivate
+echo "✅ Infinity environment setup complete"
 
-# Stop and remove existing searxng container if it exists
-if [ "$(docker ps -aq -f name=searxng)" ]; then
-  echo "Stopping and removing existing SearxNG container..."
-  docker stop searxng 2>/dev/null || true
-  docker rm searxng 2>/dev/null || true
-fi
+# (Optional) Create and activate a Python virtual environment
+echo "Remove old coexistai environment"
+rm -rf coexistaienv
 
-# Start new SearxNG container
-docker run -d \
-  --name searxng \
-  -p ${PORT_NUM_SEARXNG}:8080 \
-  -v $(pwd)/searxng:/etc/searxng:rw \
-  -e SEARXNG_BASE_URL=http://${HOST_SEARXNG}:${PORT_NUM_SEARXNG}/ \
-  -e SEARXNG_PORT=${PORT_NUM_SEARXNG} \
-  -e SEARXNG_BIND_ADDRESS=${HOST_SEARXNG} \
-  --restart unless-stopped \
-  searxng/searxng:latest
+echo "Creating coexistai virtual environment..."
+python -m venv coexistaienv
 
-echo "SearxNG container started successfully!"
+echo "Activating coexistai virtual environment"
+if [ -d coexistaienv/Scripts ]; then
+  ENV_DIR=coexistaienv/Scripts
+else
+  ENV_DIR=coexistaienv/bin
+fi
+source $ENV_DIR/activate
+if [ $? -ne 0 ]; then
+    echo "Error activating coexistai virtual environment"
+	return 1
+fi
 
 # Install Python dependencies
-echo "Installing Python dependencies..."
-pip install -r ./requirements.txt
+echo "Installing Python dependencies in coexistai virtual environment"
+python -m pip install -r ./requirements.txt
+
+# Installing SearXNG
+START_SEARXNG=$(python -c "from model_config import START_SEARXNG; print(START_SEARXNG)")
+if [ $START_SEARXNG == 0 ]; then
+  echo "Skipping SearxNG startup as per configuration"
+elif [ $START_SEARXNG == 1 ]; then
+	echo "Pulling SearxNG Docker image..."
+	docker pull searxng/searxng
+else
+    echo "Invalid value for START_SEARXNG in model_config.py. Use 0 or 1."
+    exit 1
+fi
+
+# Deactivate coexistai virtual environment
+echo "Deactivating coexistai virtual environment"
+deactivate
 
 # Adding tts files
-# Check if wget is installed
 # Check if wget or curl is installed
 if command -v wget &> /dev/null; then
   DOWNLOADER_CMD="wget"
@@ -82,12 +100,3 @@ if [ ! -f voices-v1.0.bin ]; then
 else
   echo "voices-v1.0.bin already exists, skipping download."
 fi
-
-
-# 8. Start the FastAPI app
-echo "Starting FastAPI app..."
-cd . || exit 1
-# Get port and host values from model_config
-PORT_NUM_APP=$(python3.13 -c "from model_config import PORT_NUM_APP; print(PORT_NUM_APP)")
-HOST_APP=$(python3.13 -c "from model_config import HOST_APP; print(HOST_APP)")
-uvicorn app:app --host ${HOST_APP} --port ${PORT_NUM_APP} --reload
diff --git a/quick_start.sh b/quick_start.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+# Quick Shell Startup for CoexistAI (linux/sh)
+
+# For Git bash run
+#export PATH=$(pwd)/infinity_env/Scripts:$PATH
+
+# (Optional) Activate a Python virtual environment
+echo "Activating Python virtual environment..."
+if [ -d coexistaienv/Scripts ]; then
+  ENV_DIR=coexistaienv/Scripts
+else
+  ENV_DIR=coexistaienv/bin
+fi
+source $ENV_DIR/activate
+if [ $? -ne 0 ]; then
+    echo "Error activating coexistai environment"
+	return 1
+fi
+
+# You can neglect this if you dont want to use google models (either llm or embedding)
+echo "Setting GOOGLE_API_KEY, add any other keys which you want to store in environment (edit this script to use your real key)"
+export GOOGLE_API_KEY=REPLACE_YOUR_API_KEY_HERE_WITHOUT_QUOTES_AND_SPACES
+
+# Spin up the SearxNG Docker container
+START_SEARXNG=$(python -c "from model_config import START_SEARXNG; print(START_SEARXNG)")
+if [ $START_SEARXNG == 0 ]; then
+  echo "Skipping SearxNG startup as per configuration"
+elif [ $START_SEARXNG == 1 ]; then
+  echo "Starting SearxNG Docker container..."
+  PORT_NUM_SEARXNG=$(python -c "from model_config import PORT_NUM_SEARXNG; print(PORT_NUM_SEARXNG)")
+  HOST_SEARXNG=$(python -c "from model_config import HOST_SEARXNG; print(HOST_SEARXNG)")
+
+  # Stop and remove existing searxng container if it exists
+  if [ "$(docker ps -aq -f name=searxng)" ]; then
+    echo "Stopping and removing existing SearxNG container..."
+    docker stop searxng 2>/dev/null || true
+    docker rm searxng 2>/dev/null || true
+  fi
+
+  # Start new SearxNG container
+  docker run -d \
+    --name searxng \
+    -p ${PORT_NUM_SEARXNG}:8080 \
+    -v $(pwd)/searxng:/etc/searxng:rw \
+    -e SEARXNG_BASE_URL=http://${HOST_SEARXNG}:${PORT_NUM_SEARXNG}/ \
+    -e SEARXNG_PORT=${PORT_NUM_SEARXNG} \
+    -e SEARXNG_BIND_ADDRESS=${HOST_SEARXNG} \
+    --restart unless-stopped \
+    searxng/searxng:latest
+  echo "SearxNG container started successfully!"
+else
+    echo "Invalid value for START_SEARXNG in model_config.py. Use 0 or 1."
+    exit 1
+fi
+
+# Start the FastAPI app
+echo "Starting FastAPI app..."
+cd . || exit 1
+# Get port and host values from model_config
+PORT_NUM_APP=$(python -c "from model_config import PORT_NUM_APP; print(PORT_NUM_APP)")
+HOST_APP=$(python -c "from model_config import HOST_APP; print(HOST_APP)")
+python -m uvicorn app:app --host ${HOST_APP} --port ${PORT_NUM_APP} --reload
diff --git a/quick_start_infinity.sh b/quick_start_infinity.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Quick Shell Startup for CoexistAI (linux/sh)
+
+INFINITY_MODEL=$(python -c "from model_config import model_config; print(model_config['embedding_model_name'])")
+./infinity_env/Scripts/infinity_emb v2 --model-id $INFINITY_MODEL
diff --git a/requirements.txt b/requirements.txt
@@ -39,4 +39,5 @@ youtube_transcript_api>=1.1.0
 slowapi
 charset-normalizer
 kokoro-onnx
-soundfile
+soundfile
+markitdown[all]
diff --git a/utils/utils.py b/utils/utils.py
diff --git a/utils/websearch_utils.py b/utils/websearch_utils.py
diff --git a/vcvarsall.sh b/vcvarsall.sh