From 0b4ae8d4f611b891be1c530f84757ed468636be5 Mon Sep 17 00:00:00 2001
From: Joevenner <mosaab@192.168.1.6>
Date: Sat, 14 Feb 2026 15:00:21 +0100
Subject: [PATCH 1/3] feat: Add multi-provider LLM support via LiteLLM
 integration

Replace OpenAI-only implementation with LiteLLM to support 100+ LLM providers
including Anthropic Claude, Google Gemini, Azure OpenAI, AWS Bedrock, Groq,
and local Ollama models.

Changes:
- Add litellm>=1.0.0 dependency
- Refactor ChatGPT_API functions to use litellm.completion()
- Enhance count_tokens() for multi-provider token counting
- Update config.yaml with provider-specific model examples
- Update README.md with multi-provider setup instructions

Backward compatible: Existing OPENAI_API_KEY and CHATGPT_API_KEY still work.
Default model remains gpt-4o-2024-11-20.
---
 README.md             |  49 ++++++++++++-
 pageindex/config.yaml |  35 +++++++++
 pageindex/utils.py    | 165 +++++++++++++++++++++++++++++++++++-------
 requirements.txt      |   4 +-
 4 files changed, 222 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index 7180efd5a..f4ffab311 100644
--- a/README.md
+++ b/README.md
@@ -147,14 +147,49 @@ You can follow these steps to generate a PageIndex tree from a PDF document.
 pip3 install --upgrade -r requirements.txt
 ```
 
-### 2. Set your OpenAI API key
+### 2. Set your API key
 
-Create a `.env` file in the root directory and add your API key:
+PageIndex now supports multiple LLM providers via [LiteLLM](https://docs.litellm.ai/). Create a `.env` file in the root directory and add your API key:
 
+**OpenAI (default):**
 ```bash
+OPENAI_API_KEY=your_openai_key_here
+# or
 CHATGPT_API_KEY=your_openai_key_here
 ```
 
+**Anthropic Claude:**
+```bash
+ANTHROPIC_API_KEY=your_anthropic_key_here
+```
+
+**Google Gemini:**
+```bash
+GEMINI_API_KEY=your_google_key_here
+```
+
+**Azure OpenAI:**
+```bash
+AZURE_API_KEY=your_azure_key_here
+AZURE_API_BASE=your_azure_endpoint
+AZURE_API_VERSION=2024-02-01
+```
+
+**AWS Bedrock:**
+```bash
+AWS_ACCESS_KEY_ID=your_access_key
+AWS_SECRET_ACCESS_KEY=your_secret_key
+AWS_REGION_NAME=us-east-1
+```
+
+**Groq:**
+```bash
+GROQ_API_KEY=your_groq_key_here
+```
+
+**Ollama (local):**
+No API key needed. Just ensure Ollama is running locally.
+
 ### 3. Run PageIndex on your PDF
 
 ```bash
@@ -167,7 +202,15 @@ python3 run_pageindex.py --pdf_path /path/to/your/document.pdf
 You can customize the processing with additional optional arguments:
 
 ```
---model                 OpenAI model to use (default: gpt-4o-2024-11-20)
+--model                 LLM model to use (default: gpt-4o-2024-11-20)
+                        Examples:
+                          - OpenAI: gpt-4o, gpt-4-turbo
+                          - Claude: claude-3-opus-20240229, claude-3-sonnet-20240229
+                          - Gemini: gemini/gemini-pro, gemini/gemini-1.5-pro
+                          - Azure: azure/your-deployment-name
+                          - Bedrock: bedrock/anthropic.claude-3-opus-20240229-v1:0
+                          - Groq: groq/llama-3.1-70b-versatile
+                          - Ollama: ollama/llama3
 --toc-check-pages       Pages to check for table of contents (default: 20)
 --max-pages-per-node    Max pages per node (default: 10)
 --max-tokens-per-node   Max tokens per node (default: 20000)
diff --git a/pageindex/config.yaml b/pageindex/config.yaml
index aa60a1f91..0b44a1c18 100644
--- a/pageindex/config.yaml
+++ b/pageindex/config.yaml
@@ -1,3 +1,38 @@
+# PageIndex Configuration
+# 
+# Model Configuration:
+# PageIndex now supports multiple LLM providers via LiteLLM.
+# Set the model string according to your preferred provider:
+#
+# OpenAI (default):
+#   model: "gpt-4o-2024-11-20" or "gpt-4o" or "gpt-4-turbo"
+#   Env var: OPENAI_API_KEY or CHATGPT_API_KEY
+#
+# Anthropic Claude:
+#   model: "claude-3-opus-20240229" or "claude-3-sonnet-20240229"
+#   Env var: ANTHROPIC_API_KEY
+#
+# Google Gemini:
+#   model: "gemini/gemini-pro" or "gemini/gemini-1.5-pro"
+#   Env var: GEMINI_API_KEY
+#
+# Azure OpenAI:
+#   model: "azure/your-deployment-name"
+#   Env vars: AZURE_API_KEY, AZURE_API_BASE, AZURE_API_VERSION
+#
+# AWS Bedrock:
+#   model: "bedrock/anthropic.claude-3-opus-20240229-v1:0"
+#   Env vars: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION_NAME
+#
+# Groq:
+#   model: "groq/llama-3.1-70b-versatile"
+#   Env var: GROQ_API_KEY
+#
+# Ollama (local):
+#   model: "ollama/llama3"
+#
+# For more providers, see: https://docs.litellm.ai/docs/providers
+
 model: "gpt-4o-2024-11-20"
 # model: "anthropic/claude-sonnet-4-6"
 toc_check_page_num: 20
diff --git a/pageindex/utils.py b/pageindex/utils.py
index 57b69c5b5..e9f59ffc9 100644
--- a/pageindex/utils.py
+++ b/pageindex/utils.py
@@ -1,3 +1,4 @@
+import tiktoken
 import litellm
 import logging
 import os
@@ -16,65 +17,174 @@
 from pathlib import Path
 from types import SimpleNamespace as config
 
-# Backward compatibility: support CHATGPT_API_KEY as alias for OPENAI_API_KEY
-if not os.getenv("OPENAI_API_KEY") and os.getenv("CHATGPT_API_KEY"):
-    os.environ["OPENAI_API_KEY"] = os.getenv("CHATGPT_API_KEY")
+# Support multiple API key environment variables for different providers
+CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY") or os.getenv("OPENAI_API_KEY")
 
-litellm.drop_params = True
+# Configure LiteLLM to use environment variables for different providers
+# Users can set: OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, etc.
+# See: https://docs.litellm.ai/docs/providers
 
 def count_tokens(text, model=None):
+    """
+    Count tokens in text using the appropriate tokenizer for the model.
+    Uses tiktoken for OpenAI models and LiteLLM's token counter for other providers.
+    """
     if not text:
         return 0
-    return litellm.token_counter(model=model, text=text)
-
+    
+    # Check if it's an OpenAI model (no prefix or openai/ prefix)
+    model_lower = model.lower() if model else ""
+    is_openai_model = (
+        not "/" in model or 
+        model_lower.startswith("openai/") or
+        model_lower.startswith("gpt-") or
+        model_lower.startswith("o1-") or
+        model_lower.startswith("o3-")
+    )
+    
+    if is_openai_model:
+        # Use tiktoken for OpenAI models
+        try:
+            # Strip openai/ prefix if present
+            clean_model = model.replace("openai/", "") if model else "gpt-4o"
+            enc = tiktoken.encoding_for_model(clean_model)
+            tokens = enc.encode(text)
+            return len(tokens)
+        except KeyError:
+            # Fallback to cl100k_base encoding for unknown OpenAI models
+            enc = tiktoken.get_encoding("cl100k_base")
+            tokens = enc.encode(text)
+            return len(tokens)
+    else:
+        # Use LiteLLM's token counter for other providers
+        try:
+            return litellm.token_counter(model=model, text=text)
+        except Exception:
+            # Fallback to approximate counting (4 chars per token)
+            return len(text) // 4
 
-def llm_completion(model, prompt, chat_history=None, return_finish_reason=False):
+def ChatGPT_API_with_finish_reason(model, prompt, api_key=None, chat_history=None):
+    """
+    Synchronous chat completion API with finish reason tracking.
+    Uses LiteLLM to support multiple LLM providers.
+    
+    Args:
+        model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
+        prompt: User prompt
+        api_key: API key (optional, uses environment variables if not provided)
+        chat_history: Previous conversation history
+    
+    Returns:
+        Tuple of (response_content, finish_reason)
+    """
     max_retries = 10
-    messages = list(chat_history) + [{"role": "user", "content": prompt}] if chat_history else [{"role": "user", "content": prompt}]
+    
+    # Build messages list
+    if chat_history:
+        messages = chat_history.copy()
+        messages.append({"role": "user", "content": prompt})
+    else:
+        messages = [{"role": "user", "content": prompt}]
+    
     for i in range(max_retries):
         try:
             response = litellm.completion(
                 model=model,
                 messages=messages,
                 temperature=0,
+                api_key=api_key,
             )
-            content = response.choices[0].message.content
-            if return_finish_reason:
-                finish_reason = "max_output_reached" if response.choices[0].finish_reason == "length" else "finished"
-                return content, finish_reason
-            return content
+            if response.choices[0].finish_reason == "length":
+                return response.choices[0].message.content, "max_output_reached"
+            else:
+                return response.choices[0].message.content, "finished"
+
         except Exception as e:
             print('************* Retrying *************')
             logging.error(f"Error: {e}")
             if i < max_retries - 1:
-                time.sleep(1)
+                time.sleep(1)  # Wait for 1s before retrying
             else:
                 logging.error('Max retries reached for prompt: ' + prompt)
-                if return_finish_reason:
-                    return "", "error"
-                return ""
+                return "Error", "error"
+
 
 
+def ChatGPT_API(model, prompt, api_key=None, chat_history=None):
+    """
+    Synchronous chat completion API.
+    Uses LiteLLM to support multiple LLM providers.
+    
+    Args:
+        model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
+        prompt: User prompt
+        api_key: API key (optional, uses environment variables if not provided)
+        chat_history: Previous conversation history
+    
+    Returns:
+        Response content string
+    """
+    max_retries = 10
+    
+    # Build messages list
+    if chat_history:
+        messages = chat_history.copy()
+        messages.append({"role": "user", "content": prompt})
+    else:
+        messages = [{"role": "user", "content": prompt}]
+    
+    for i in range(max_retries):
+        try:
+            response = litellm.completion(
+                model=model,
+                messages=messages,
+                temperature=0,
+                api_key=api_key,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print('************* Retrying *************')
+            logging.error(f"Error: {e}")
+            if i < max_retries - 1:
+                time.sleep(1)  # Wait for 1s before retrying
+            else:
+                logging.error('Max retries reached for prompt: ' + prompt)
+                return "Error"
+            
 
-async def llm_acompletion(model, prompt):
+async def ChatGPT_API_async(model, prompt, api_key=None):
+    """
+    Asynchronous chat completion API.
+    Uses LiteLLM to support multiple LLM providers.
+    
+    Args:
+        model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
+        prompt: User prompt
+        api_key: API key (optional, uses environment variables if not provided)
+    
+    Returns:
+        Response content string
+    """
     max_retries = 10
     messages = [{"role": "user", "content": prompt}]
+    
     for i in range(max_retries):
         try:
             response = await litellm.acompletion(
                 model=model,
                 messages=messages,
                 temperature=0,
+                api_key=api_key,
             )
             return response.choices[0].message.content
         except Exception as e:
             print('************* Retrying *************')
             logging.error(f"Error: {e}")
             if i < max_retries - 1:
-                await asyncio.sleep(1)
+                await asyncio.sleep(1)  # Wait for 1s before retrying
             else:
                 logging.error('Max retries reached for prompt: ' + prompt)
-                return ""
+                return "Error"  
             
             
 def get_json_content(response):
@@ -379,14 +489,15 @@ def add_preface_if_needed(data):
 
 
 
-def get_page_tokens(pdf_path, model=None, pdf_parser="PyPDF2"):
+def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
+    enc = tiktoken.encoding_for_model(model)
     if pdf_parser == "PyPDF2":
         pdf_reader = PyPDF2.PdfReader(pdf_path)
         page_list = []
         for page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
             page_text = page.extract_text()
-            token_length = litellm.token_counter(model=model, text=page_text)
+            token_length = len(enc.encode(page_text))
             page_list.append((page_text, token_length))
         return page_list
     elif pdf_parser == "PyMuPDF":
@@ -398,7 +509,7 @@ def get_page_tokens(pdf_path, model=None, pdf_parser="PyPDF2"):
         page_list = []
         for page in doc:
             page_text = page.get_text()
-            token_length = litellm.token_counter(model=model, text=page_text)
+            token_length = len(enc.encode(page_text))
             page_list.append((page_text, token_length))
         return page_list
     else:
@@ -501,7 +612,7 @@ def remove_structure_text(data):
 def check_token_limit(structure, limit=110000):
     list = structure_to_list(structure)
     for node in list:
-        num_tokens = count_tokens(node['text'], model=None)
+        num_tokens = count_tokens(node['text'], model='gpt-4o')
         if num_tokens > limit:
             print(f"Node ID: {node['node_id']} has {num_tokens} tokens")
             print("Start Index:", node['start_index'])
@@ -577,7 +688,7 @@ async def generate_node_summary(node, model=None):
     
     Directly return the description, do not include any other text.
     """
-    response = await llm_acompletion(model, prompt)
+    response = await ChatGPT_API_async(model, prompt)
     return response
 
 
@@ -622,7 +733,7 @@ def generate_doc_description(structure, model=None):
     
     Directly return the description, do not include any other text.
     """
-    response = llm_completion(model, prompt)
+    response = ChatGPT_API(model, prompt)
     return response
 
 
@@ -677,4 +788,4 @@ def load(self, user_opt=None) -> config:
 
         self._validate_keys(user_dict)
         merged = {**self._default_dict, **user_dict}
-        return config(**merged)
+        return config(**merged)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 3b82eda0b..d595e0fc9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
-litellm==1.82.0
+litellm>=1.0.0
+openai>=1.0.0
 pymupdf==1.26.4
 PyPDF2==3.0.1
 python-dotenv==1.1.0
+tiktoken==0.11.0
 pyyaml==6.0.2

From 4c34e60c3bad944583ee5c9fcf3e84bdb30da0cb Mon Sep 17 00:00:00 2001
From: JoeVenner <ylafrimi@gmail.com>
Date: Sun, 22 Mar 2026 14:16:49 +0100
Subject: [PATCH 2/3] Refactored token counting to use LiteLLM for
 multi-provider support, removing reliance on direct tiktoken and hardcoded
 models, while cleaning up tokenization logic to prevent crashes

---
 pageindex/page_index.py | 70 ++++++++++++++++++-----------------------
 pageindex/utils.py      | 50 +++++++----------------------
 2 files changed, 42 insertions(+), 78 deletions(-)

diff --git a/pageindex/page_index.py b/pageindex/page_index.py
index 719255463..5f9aaa8f2 100644
--- a/pageindex/page_index.py
+++ b/pageindex/page_index.py
@@ -36,7 +36,7 @@ async def check_title_appearance(item, page_list, start_index=1, model=None):
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = await llm_acompletion(model=model, prompt=prompt)
+    response = await ChatGPT_API_async(model=model, prompt=prompt)
     response = extract_json(response)
     if 'answer' in response:
         answer = response['answer']
@@ -64,7 +64,7 @@ async def check_title_appearance_in_start(title, page_text, model=None, logger=N
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = await llm_acompletion(model=model, prompt=prompt)
+    response = await ChatGPT_API_async(model=model, prompt=prompt)
     response = extract_json(response)
     if logger:
         logger.info(f"Response: {response}")
@@ -116,7 +116,7 @@ def toc_detector_single_page(content, model=None):
     Directly return the final JSON structure. Do not output anything else.
     Please note: abstract,summary, notation list, figure list, table list, etc. are not table of contents."""
 
-    response = llm_completion(model=model, prompt=prompt)
+    response = ChatGPT_API(model=model, prompt=prompt)
     # print('response', response)
     json_content = extract_json(response)    
     return json_content['toc_detected']
@@ -135,7 +135,7 @@ def check_if_toc_extraction_is_complete(content, toc, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\n Document:\n' + content + '\n Table of contents:\n' + toc
-    response = llm_completion(model=model, prompt=prompt)
+    response = ChatGPT_API(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['completed']
 
@@ -153,7 +153,7 @@ def check_if_toc_transformation_is_complete(content, toc, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\n Raw Table of contents:\n' + content + '\n Cleaned Table of contents:\n' + toc
-    response = llm_completion(model=model, prompt=prompt)
+    response = ChatGPT_API(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['completed']
 
@@ -165,7 +165,7 @@ def extract_toc_content(content, model=None):
 
     Directly return the full table of contents content. Do not output anything else."""
 
-    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
+    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
     
     if_complete = check_if_toc_transformation_is_complete(content, response, model)
     if if_complete == "yes" and finish_reason == "finished":
@@ -176,26 +176,23 @@ def extract_toc_content(content, model=None):
         {"role": "assistant", "content": response},    
     ]
     prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-    new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
+    new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
     response = response + new_response
     if_complete = check_if_toc_transformation_is_complete(content, response, model)
     
-    attempt = 0
-    max_attempts = 5
-
     while not (if_complete == "yes" and finish_reason == "finished"):
-        attempt += 1
-        if attempt > max_attempts:
-            raise Exception('Failed to complete table of contents after maximum retries')
-
         chat_history = [
-            {"role": "user", "content": prompt},
-            {"role": "assistant", "content": response},
+            {"role": "user", "content": prompt}, 
+            {"role": "assistant", "content": response},    
         ]
         prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-        new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
+        new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
         response = response + new_response
         if_complete = check_if_toc_transformation_is_complete(content, response, model)
+        
+        # Optional: Add a maximum retry limit to prevent infinite loops
+        if len(chat_history) > 5:  # Arbitrary limit of 10 attempts
+            raise Exception('Failed to complete table of contents after maximum retries')
     
     return response
 
@@ -215,7 +212,7 @@ def detect_page_index(toc_content, model=None):
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = llm_completion(model=model, prompt=prompt)
+    response = ChatGPT_API(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['page_index_given_in_toc']
 
@@ -242,7 +239,7 @@ def transform_dots_to_colon(text):
 
 def toc_index_extractor(toc, content, model=None):
     print('start toc_index_extractor')
-    toc_extractor_prompt = """
+    tob_extractor_prompt = """
     You are given a table of contents in a json format and several pages of a document, your job is to add the physical_index to the table of contents in the json format.
 
     The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
@@ -263,8 +260,8 @@ def toc_index_extractor(toc, content, model=None):
     If the section is not in the provided pages, do not add the physical_index to it.
     Directly return the final JSON structure. Do not output anything else."""
 
-    prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
-    response = llm_completion(model=model, prompt=prompt)
+    prompt = tob_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
+    response = ChatGPT_API(model=model, prompt=prompt)
     json_content = extract_json(response)    
     return json_content
 
@@ -292,7 +289,7 @@ def toc_transformer(toc_content, model=None):
     Directly return the final JSON structure, do not output anything else. """
 
     prompt = init_prompt + '\n Given table of contents\n:' + toc_content
-    last_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
+    last_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
     if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model)
     if if_complete == "yes" and finish_reason == "finished":
         last_complete = extract_json(last_complete)
@@ -300,12 +297,7 @@ def toc_transformer(toc_content, model=None):
         return cleaned_response
     
     last_complete = get_json_content(last_complete)
-    attempt = 0
-    max_attempts = 5
     while not (if_complete == "yes" and finish_reason == "finished"):
-        attempt += 1
-        if attempt > max_attempts:
-            raise Exception('Failed to complete toc transformation after maximum retries')
         position = last_complete.rfind('}')
         if position != -1:
             last_complete = last_complete[:position+2]
@@ -321,7 +313,7 @@ def toc_transformer(toc_content, model=None):
 
         Please continue the json structure, directly output the remaining part of the json structure."""
 
-        new_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
+        new_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
 
         if new_complete.startswith('```json'):
             new_complete =  get_json_content(new_complete)
@@ -482,7 +474,7 @@ def add_page_number_to_toc(part, structure, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = fill_prompt_seq + f"\n\nCurrent Partial Document:\n{part}\n\nGiven Structure\n{json.dumps(structure, indent=2)}\n"
-    current_json_raw = llm_completion(model=model, prompt=prompt)
+    current_json_raw = ChatGPT_API(model=model, prompt=prompt)
     json_result = extract_json(current_json_raw)
     
     for item in json_result:
@@ -532,7 +524,7 @@ def generate_toc_continue(toc_content, part, model=None):
     Directly return the additional part of the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\nGiven text\n:' + part + '\nPrevious tree structure\n:' + json.dumps(toc_content, indent=2)
-    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
+    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
     if finish_reason == 'finished':
         return extract_json(response)
     else:
@@ -566,7 +558,7 @@ def generate_toc_init(part, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\nGiven text\n:' + part
-    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
+    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
 
     if finish_reason == 'finished':
          return extract_json(response)
@@ -737,8 +729,8 @@ def check_toc(page_list, opt=None):
 
 
 ################### fix incorrect toc #########################################################
-async def single_toc_item_index_fixer(section_title, content, model=None):
-    toc_extractor_prompt = """
+def single_toc_item_index_fixer(section_title, content, model=None):
+    tob_extractor_prompt = """
     You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document.
 
     The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
@@ -750,8 +742,8 @@ async def single_toc_item_index_fixer(section_title, content, model=None):
     }
     Directly return the final JSON structure. Do not output anything else."""
 
-    prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
-    response = await llm_acompletion(model=model, prompt=prompt)
+    prompt = tob_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
+    response = ChatGPT_API(model=model, prompt=prompt)
     json_content = extract_json(response)    
     return convert_physical_index_to_int(json_content['physical_index'])
 
@@ -812,15 +804,15 @@ async def process_and_check_item(incorrect_item):
         page_contents=[]
         for page_index in range(prev_correct, next_correct+1):
             # Add bounds checking to prevent IndexError
-            page_list_idx = page_index - start_index
-            if page_list_idx >= 0 and page_list_idx < len(page_list):
-                page_text = f"<physical_index_{page_index}>\n{page_list[page_list_idx][0]}\n<physical_index_{page_index}>\n\n"
+            list_index = page_index - start_index
+            if list_index >= 0 and list_index < len(page_list):
+                page_text = f"<physical_index_{page_index}>\n{page_list[list_index][0]}\n<physical_index_{page_index}>\n\n"
                 page_contents.append(page_text)
             else:
                 continue
         content_range = ''.join(page_contents)
         
-        physical_index_int = await single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
+        physical_index_int = single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
         
         # Check if the result is correct
         check_item = incorrect_item.copy()
diff --git a/pageindex/utils.py b/pageindex/utils.py
index e9f59ffc9..e73e469a6 100644
--- a/pageindex/utils.py
+++ b/pageindex/utils.py
@@ -1,4 +1,3 @@
-import tiktoken
 import litellm
 import logging
 import os
@@ -26,42 +25,16 @@
 
 def count_tokens(text, model=None):
     """
-    Count tokens in text using the appropriate tokenizer for the model.
-    Uses tiktoken for OpenAI models and LiteLLM's token counter for other providers.
+    Count tokens in text using LiteLLM's token counter, which automatically
+    selects the appropriate tokenizer for each provider.
     """
     if not text:
         return 0
-    
-    # Check if it's an OpenAI model (no prefix or openai/ prefix)
-    model_lower = model.lower() if model else ""
-    is_openai_model = (
-        not "/" in model or 
-        model_lower.startswith("openai/") or
-        model_lower.startswith("gpt-") or
-        model_lower.startswith("o1-") or
-        model_lower.startswith("o3-")
-    )
-    
-    if is_openai_model:
-        # Use tiktoken for OpenAI models
-        try:
-            # Strip openai/ prefix if present
-            clean_model = model.replace("openai/", "") if model else "gpt-4o"
-            enc = tiktoken.encoding_for_model(clean_model)
-            tokens = enc.encode(text)
-            return len(tokens)
-        except KeyError:
-            # Fallback to cl100k_base encoding for unknown OpenAI models
-            enc = tiktoken.get_encoding("cl100k_base")
-            tokens = enc.encode(text)
-            return len(tokens)
-    else:
-        # Use LiteLLM's token counter for other providers
-        try:
-            return litellm.token_counter(model=model, text=text)
-        except Exception:
-            # Fallback to approximate counting (4 chars per token)
-            return len(text) // 4
+    try:
+        return litellm.token_counter(model=model or "gpt-4o", text=text)
+    except Exception:
+        # Fallback to approximate counting (4 chars per token)
+        return len(text) // 4
 
 def ChatGPT_API_with_finish_reason(model, prompt, api_key=None, chat_history=None):
     """
@@ -490,14 +463,13 @@ def add_preface_if_needed(data):
 
 
 def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
-    enc = tiktoken.encoding_for_model(model)
     if pdf_parser == "PyPDF2":
         pdf_reader = PyPDF2.PdfReader(pdf_path)
         page_list = []
         for page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
             page_text = page.extract_text()
-            token_length = len(enc.encode(page_text))
+            token_length = count_tokens(page_text, model=model)
             page_list.append((page_text, token_length))
         return page_list
     elif pdf_parser == "PyMuPDF":
@@ -509,7 +481,7 @@ def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
         page_list = []
         for page in doc:
             page_text = page.get_text()
-            token_length = len(enc.encode(page_text))
+            token_length = count_tokens(page_text, model=model)
             page_list.append((page_text, token_length))
         return page_list
     else:
@@ -609,10 +581,10 @@ def remove_structure_text(data):
     return data
 
 
-def check_token_limit(structure, limit=110000):
+def check_token_limit(structure, limit=110000, model=None):
     list = structure_to_list(structure)
     for node in list:
-        num_tokens = count_tokens(node['text'], model='gpt-4o')
+        num_tokens = count_tokens(node['text'], model=model)
         if num_tokens > limit:
             print(f"Node ID: {node['node_id']} has {num_tokens} tokens")
             print("Start Index:", node['start_index'])

From 7d632096afaf0eb690d75e3fa974855341cde442 Mon Sep 17 00:00:00 2001
From: JoeVenner <ylafrimi@gmail.com>
Date: Sun, 22 Mar 2026 14:23:54 +0100
Subject: [PATCH 3/3] Resolve merge conflicts: accept upstream LiteLLM
 integration

Accept upstream's LiteLLM implementation (llm_completion/llm_acompletion,
pinned litellm==1.82.0, ConfigLoader, drop_params) which supersedes
our parallel implementation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pageindex/page_index.py |  70 +++++++++++---------
 pageindex/utils.py      | 139 ++++++++--------------------------------
 requirements.txt        |   4 +-
 3 files changed, 68 insertions(+), 145 deletions(-)

diff --git a/pageindex/page_index.py b/pageindex/page_index.py
index 5f9aaa8f2..719255463 100644
--- a/pageindex/page_index.py
+++ b/pageindex/page_index.py
@@ -36,7 +36,7 @@ async def check_title_appearance(item, page_list, start_index=1, model=None):
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = await ChatGPT_API_async(model=model, prompt=prompt)
+    response = await llm_acompletion(model=model, prompt=prompt)
     response = extract_json(response)
     if 'answer' in response:
         answer = response['answer']
@@ -64,7 +64,7 @@ async def check_title_appearance_in_start(title, page_text, model=None, logger=N
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = await ChatGPT_API_async(model=model, prompt=prompt)
+    response = await llm_acompletion(model=model, prompt=prompt)
     response = extract_json(response)
     if logger:
         logger.info(f"Response: {response}")
@@ -116,7 +116,7 @@ def toc_detector_single_page(content, model=None):
     Directly return the final JSON structure. Do not output anything else.
     Please note: abstract,summary, notation list, figure list, table list, etc. are not table of contents."""
 
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
     # print('response', response)
     json_content = extract_json(response)    
     return json_content['toc_detected']
@@ -135,7 +135,7 @@ def check_if_toc_extraction_is_complete(content, toc, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\n Document:\n' + content + '\n Table of contents:\n' + toc
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['completed']
 
@@ -153,7 +153,7 @@ def check_if_toc_transformation_is_complete(content, toc, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\n Raw Table of contents:\n' + content + '\n Cleaned Table of contents:\n' + toc
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['completed']
 
@@ -165,7 +165,7 @@ def extract_toc_content(content, model=None):
 
     Directly return the full table of contents content. Do not output anything else."""
 
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
     
     if_complete = check_if_toc_transformation_is_complete(content, response, model)
     if if_complete == "yes" and finish_reason == "finished":
@@ -176,23 +176,26 @@ def extract_toc_content(content, model=None):
         {"role": "assistant", "content": response},    
     ]
     prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-    new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
+    new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
     response = response + new_response
     if_complete = check_if_toc_transformation_is_complete(content, response, model)
     
+    attempt = 0
+    max_attempts = 5
+
     while not (if_complete == "yes" and finish_reason == "finished"):
+        attempt += 1
+        if attempt > max_attempts:
+            raise Exception('Failed to complete table of contents after maximum retries')
+
         chat_history = [
-            {"role": "user", "content": prompt}, 
-            {"role": "assistant", "content": response},    
+            {"role": "user", "content": prompt},
+            {"role": "assistant", "content": response},
         ]
         prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-        new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
+        new_response, finish_reason = llm_completion(model=model, prompt=prompt, chat_history=chat_history, return_finish_reason=True)
         response = response + new_response
         if_complete = check_if_toc_transformation_is_complete(content, response, model)
-        
-        # Optional: Add a maximum retry limit to prevent infinite loops
-        if len(chat_history) > 5:  # Arbitrary limit of 10 attempts
-            raise Exception('Failed to complete table of contents after maximum retries')
     
     return response
 
@@ -212,7 +215,7 @@ def detect_page_index(toc_content, model=None):
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = llm_completion(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['page_index_given_in_toc']
 
@@ -239,7 +242,7 @@ def transform_dots_to_colon(text):
 
 def toc_index_extractor(toc, content, model=None):
     print('start toc_index_extractor')
-    tob_extractor_prompt = """
+    toc_extractor_prompt = """
     You are given a table of contents in a json format and several pages of a document, your job is to add the physical_index to the table of contents in the json format.
 
     The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
@@ -260,8 +263,8 @@ def toc_index_extractor(toc, content, model=None):
     If the section is not in the provided pages, do not add the physical_index to it.
     Directly return the final JSON structure. Do not output anything else."""
 
-    prompt = tob_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
-    response = ChatGPT_API(model=model, prompt=prompt)
+    prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
+    response = llm_completion(model=model, prompt=prompt)
     json_content = extract_json(response)    
     return json_content
 
@@ -289,7 +292,7 @@ def toc_transformer(toc_content, model=None):
     Directly return the final JSON structure, do not output anything else. """
 
     prompt = init_prompt + '\n Given table of contents\n:' + toc_content
-    last_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    last_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
     if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model)
     if if_complete == "yes" and finish_reason == "finished":
         last_complete = extract_json(last_complete)
@@ -297,7 +300,12 @@ def toc_transformer(toc_content, model=None):
         return cleaned_response
     
     last_complete = get_json_content(last_complete)
+    attempt = 0
+    max_attempts = 5
     while not (if_complete == "yes" and finish_reason == "finished"):
+        attempt += 1
+        if attempt > max_attempts:
+            raise Exception('Failed to complete toc transformation after maximum retries')
         position = last_complete.rfind('}')
         if position != -1:
             last_complete = last_complete[:position+2]
@@ -313,7 +321,7 @@ def toc_transformer(toc_content, model=None):
 
         Please continue the json structure, directly output the remaining part of the json structure."""
 
-        new_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+        new_complete, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
 
         if new_complete.startswith('```json'):
             new_complete =  get_json_content(new_complete)
@@ -474,7 +482,7 @@ def add_page_number_to_toc(part, structure, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = fill_prompt_seq + f"\n\nCurrent Partial Document:\n{part}\n\nGiven Structure\n{json.dumps(structure, indent=2)}\n"
-    current_json_raw = ChatGPT_API(model=model, prompt=prompt)
+    current_json_raw = llm_completion(model=model, prompt=prompt)
     json_result = extract_json(current_json_raw)
     
     for item in json_result:
@@ -524,7 +532,7 @@ def generate_toc_continue(toc_content, part, model=None):
     Directly return the additional part of the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\nGiven text\n:' + part + '\nPrevious tree structure\n:' + json.dumps(toc_content, indent=2)
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
     if finish_reason == 'finished':
         return extract_json(response)
     else:
@@ -558,7 +566,7 @@ def generate_toc_init(part, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\nGiven text\n:' + part
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = llm_completion(model=model, prompt=prompt, return_finish_reason=True)
 
     if finish_reason == 'finished':
          return extract_json(response)
@@ -729,8 +737,8 @@ def check_toc(page_list, opt=None):
 
 
 ################### fix incorrect toc #########################################################
-def single_toc_item_index_fixer(section_title, content, model=None):
-    tob_extractor_prompt = """
+async def single_toc_item_index_fixer(section_title, content, model=None):
+    toc_extractor_prompt = """
     You are given a section title and several pages of a document, your job is to find the physical index of the start page of the section in the partial document.
 
     The provided pages contains tags like <physical_index_X> and <physical_index_X> to indicate the physical location of the page X.
@@ -742,8 +750,8 @@ def single_toc_item_index_fixer(section_title, content, model=None):
     }
     Directly return the final JSON structure. Do not output anything else."""
 
-    prompt = tob_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
-    response = ChatGPT_API(model=model, prompt=prompt)
+    prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
+    response = await llm_acompletion(model=model, prompt=prompt)
     json_content = extract_json(response)    
     return convert_physical_index_to_int(json_content['physical_index'])
 
@@ -804,15 +812,15 @@ async def process_and_check_item(incorrect_item):
         page_contents=[]
         for page_index in range(prev_correct, next_correct+1):
             # Add bounds checking to prevent IndexError
-            list_index = page_index - start_index
-            if list_index >= 0 and list_index < len(page_list):
-                page_text = f"<physical_index_{page_index}>\n{page_list[list_index][0]}\n<physical_index_{page_index}>\n\n"
+            page_list_idx = page_index - start_index
+            if page_list_idx >= 0 and page_list_idx < len(page_list):
+                page_text = f"<physical_index_{page_index}>\n{page_list[page_list_idx][0]}\n<physical_index_{page_index}>\n\n"
                 page_contents.append(page_text)
             else:
                 continue
         content_range = ''.join(page_contents)
         
-        physical_index_int = single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
+        physical_index_int = await single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
         
         # Check if the result is correct
         check_item = incorrect_item.copy()
diff --git a/pageindex/utils.py b/pageindex/utils.py
index e73e469a6..57b69c5b5 100644
--- a/pageindex/utils.py
+++ b/pageindex/utils.py
@@ -16,148 +16,65 @@
 from pathlib import Path
 from types import SimpleNamespace as config
 
-# Support multiple API key environment variables for different providers
-CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY") or os.getenv("OPENAI_API_KEY")
+# Backward compatibility: support CHATGPT_API_KEY as alias for OPENAI_API_KEY
+if not os.getenv("OPENAI_API_KEY") and os.getenv("CHATGPT_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = os.getenv("CHATGPT_API_KEY")
 
-# Configure LiteLLM to use environment variables for different providers
-# Users can set: OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, etc.
-# See: https://docs.litellm.ai/docs/providers
+litellm.drop_params = True
 
 def count_tokens(text, model=None):
-    """
-    Count tokens in text using LiteLLM's token counter, which automatically
-    selects the appropriate tokenizer for each provider.
-    """
     if not text:
         return 0
-    try:
-        return litellm.token_counter(model=model or "gpt-4o", text=text)
-    except Exception:
-        # Fallback to approximate counting (4 chars per token)
-        return len(text) // 4
+    return litellm.token_counter(model=model, text=text)
 
-def ChatGPT_API_with_finish_reason(model, prompt, api_key=None, chat_history=None):
-    """
-    Synchronous chat completion API with finish reason tracking.
-    Uses LiteLLM to support multiple LLM providers.
-    
-    Args:
-        model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
-        prompt: User prompt
-        api_key: API key (optional, uses environment variables if not provided)
-        chat_history: Previous conversation history
-    
-    Returns:
-        Tuple of (response_content, finish_reason)
-    """
+
+def llm_completion(model, prompt, chat_history=None, return_finish_reason=False):
     max_retries = 10
-    
-    # Build messages list
-    if chat_history:
-        messages = chat_history.copy()
-        messages.append({"role": "user", "content": prompt})
-    else:
-        messages = [{"role": "user", "content": prompt}]
-    
+    messages = list(chat_history) + [{"role": "user", "content": prompt}] if chat_history else [{"role": "user", "content": prompt}]
     for i in range(max_retries):
         try:
             response = litellm.completion(
                 model=model,
                 messages=messages,
                 temperature=0,
-                api_key=api_key,
             )
-            if response.choices[0].finish_reason == "length":
-                return response.choices[0].message.content, "max_output_reached"
-            else:
-                return response.choices[0].message.content, "finished"
-
+            content = response.choices[0].message.content
+            if return_finish_reason:
+                finish_reason = "max_output_reached" if response.choices[0].finish_reason == "length" else "finished"
+                return content, finish_reason
+            return content
         except Exception as e:
             print('************* Retrying *************')
             logging.error(f"Error: {e}")
             if i < max_retries - 1:
-                time.sleep(1)  # Wait for 1s before retrying
+                time.sleep(1)
             else:
                 logging.error('Max retries reached for prompt: ' + prompt)
-                return "Error", "error"
-
+                if return_finish_reason:
+                    return "", "error"
+                return ""
 
 
-def ChatGPT_API(model, prompt, api_key=None, chat_history=None):
-    """
-    Synchronous chat completion API.
-    Uses LiteLLM to support multiple LLM providers.
-    
-    Args:
-        model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
-        prompt: User prompt
-        api_key: API key (optional, uses environment variables if not provided)
-        chat_history: Previous conversation history
-    
-    Returns:
-        Response content string
-    """
-    max_retries = 10
-    
-    # Build messages list
-    if chat_history:
-        messages = chat_history.copy()
-        messages.append({"role": "user", "content": prompt})
-    else:
-        messages = [{"role": "user", "content": prompt}]
-    
-    for i in range(max_retries):
-        try:
-            response = litellm.completion(
-                model=model,
-                messages=messages,
-                temperature=0,
-                api_key=api_key,
-            )
-            return response.choices[0].message.content
-        except Exception as e:
-            print('************* Retrying *************')
-            logging.error(f"Error: {e}")
-            if i < max_retries - 1:
-                time.sleep(1)  # Wait for 1s before retrying
-            else:
-                logging.error('Max retries reached for prompt: ' + prompt)
-                return "Error"
-            
 
-async def ChatGPT_API_async(model, prompt, api_key=None):
-    """
-    Asynchronous chat completion API.
-    Uses LiteLLM to support multiple LLM providers.
-    
-    Args:
-        model: Model string (e.g., "gpt-4o", "claude-3-opus-20240229", "gemini/gemini-pro")
-        prompt: User prompt
-        api_key: API key (optional, uses environment variables if not provided)
-    
-    Returns:
-        Response content string
-    """
+async def llm_acompletion(model, prompt):
     max_retries = 10
     messages = [{"role": "user", "content": prompt}]
-    
     for i in range(max_retries):
         try:
             response = await litellm.acompletion(
                 model=model,
                 messages=messages,
                 temperature=0,
-                api_key=api_key,
             )
             return response.choices[0].message.content
         except Exception as e:
             print('************* Retrying *************')
             logging.error(f"Error: {e}")
             if i < max_retries - 1:
-                await asyncio.sleep(1)  # Wait for 1s before retrying
+                await asyncio.sleep(1)
             else:
                 logging.error('Max retries reached for prompt: ' + prompt)
-                return "Error"  
+                return ""
             
             
 def get_json_content(response):
@@ -462,14 +379,14 @@ def add_preface_if_needed(data):
 
 
 
-def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
+def get_page_tokens(pdf_path, model=None, pdf_parser="PyPDF2"):
     if pdf_parser == "PyPDF2":
         pdf_reader = PyPDF2.PdfReader(pdf_path)
         page_list = []
         for page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
             page_text = page.extract_text()
-            token_length = count_tokens(page_text, model=model)
+            token_length = litellm.token_counter(model=model, text=page_text)
             page_list.append((page_text, token_length))
         return page_list
     elif pdf_parser == "PyMuPDF":
@@ -481,7 +398,7 @@ def get_page_tokens(pdf_path, model="gpt-4o-2024-11-20", pdf_parser="PyPDF2"):
         page_list = []
         for page in doc:
             page_text = page.get_text()
-            token_length = count_tokens(page_text, model=model)
+            token_length = litellm.token_counter(model=model, text=page_text)
             page_list.append((page_text, token_length))
         return page_list
     else:
@@ -581,10 +498,10 @@ def remove_structure_text(data):
     return data
 
 
-def check_token_limit(structure, limit=110000, model=None):
+def check_token_limit(structure, limit=110000):
     list = structure_to_list(structure)
     for node in list:
-        num_tokens = count_tokens(node['text'], model=model)
+        num_tokens = count_tokens(node['text'], model=None)
         if num_tokens > limit:
             print(f"Node ID: {node['node_id']} has {num_tokens} tokens")
             print("Start Index:", node['start_index'])
@@ -660,7 +577,7 @@ async def generate_node_summary(node, model=None):
     
     Directly return the description, do not include any other text.
     """
-    response = await ChatGPT_API_async(model, prompt)
+    response = await llm_acompletion(model, prompt)
     return response
 
 
@@ -705,7 +622,7 @@ def generate_doc_description(structure, model=None):
     
     Directly return the description, do not include any other text.
     """
-    response = ChatGPT_API(model, prompt)
+    response = llm_completion(model, prompt)
     return response
 
 
@@ -760,4 +677,4 @@ def load(self, user_opt=None) -> config:
 
         self._validate_keys(user_dict)
         merged = {**self._default_dict, **user_dict}
-        return config(**merged)
\ No newline at end of file
+        return config(**merged)
diff --git a/requirements.txt b/requirements.txt
index d595e0fc9..3b82eda0b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,5 @@
-litellm>=1.0.0
-openai>=1.0.0
+litellm==1.82.0
 pymupdf==1.26.4
 PyPDF2==3.0.1
 python-dotenv==1.1.0
-tiktoken==0.11.0
 pyyaml==6.0.2