Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
689 changes: 456 additions & 233 deletions app.py

Large diffs are not rendered by default.

Binary file modified chroma_db/0868db41-6ebe-49c6-a051-6415cda05a70/data_level0.bin
Binary file not shown.
Binary file modified chroma_db/0868db41-6ebe-49c6-a051-6415cda05a70/header.bin
Binary file not shown.
Binary file not shown.
Binary file modified chroma_db/0868db41-6ebe-49c6-a051-6415cda05a70/length.bin
Binary file not shown.
Binary file modified chroma_db/0868db41-6ebe-49c6-a051-6415cda05a70/link_lists.bin
Binary file not shown.
Binary file modified chroma_db/chroma.sqlite3
Binary file not shown.
38 changes: 37 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,40 @@
CHUNK_OVERLAP = int(CHUNK_SIZE * 0.2)

# Embedding API 调用时的批处理大小 (embedding-v1 API限制每次最多16个输入)
EMBEDDING_BATCH_SIZE = 16
EMBEDDING_BATCH_SIZE = 16

# config.py (在文件末尾或合适位置添加以下内容)

# --- RAG 和 LLM 结果评估相关配置 ---
UNCERTAINTY_PHRASES = [
"无法找到", "没有相关信息", "未能找到", "无法提供", "不确定",
"我不知道", "无法确定", "没有足够信息", "目前无法回答",
"To", "I cannot", "I don't", "Unable to", "not find", "no information",
"对不起,我无法", "抱歉,我无法" # 添加更多常见的不确定性短语
]
RAG_SCORE_THRESHOLD = 0.45 # RAG 文档相关性得分阈值 (可根据实际效果调整)
RAG_ANSWER_MIN_LENGTH = 25 # RAG 回答最小长度阈值 (字符数, 可调整)

# --- 机器学习关键词列表 ---
ML_KEYWORDS = [
'机器学习', '模型', '训练', '预测', '分类', '回归', '聚类', '算法', '特征', '数据',
'随机森林', '决策树', '支持向量机', 'svm', 'knn', 'k近邻', '逻辑回归', '线性回归',
'神经网络', '深度学习', '朴素贝叶斯', 'k-means', 'xgboost', 'lightgbm', 'catboost',
'准确率', '精确率', '召回率', 'f1分数', 'auc', 'roc', 'mse', 'rmse', 'mae', 'r方', 'r2',
'超参数', '验证集', '测试集', '过拟合', '欠拟合', '特征工程', '降维', 'pca',
'tensorflow', 'keras', 'pytorch', 'scikit-learn', 'sklearn', 'paddlepaddle', 'paddle'
]

ML_OPS_KEYWORDS = [
'训练', '预测', '比较', '评估', '构建', '解释', '优化', '部署', '监控', '保存', '加载',
'选择模型', '调整参数', '分析特征', '生成报告', '自动化', '工作流',
'版本控制', '流水线', 'pipeline', 'finetune', '微调', '自动机器学习', 'automl'
]

# --- 应用行为相关配置 (示例,您可以按需添加更多) ---
# 例如,上传文件存储位置,虽然您在 app.py 中定义了 UPLOADS_DIR,但也可以考虑放在这里
# UPLOADS_DIR = os.path.join(os.getcwd(), "uploads")
# MODELS_STORAGE_DIR = os.path.join(os.getcwd(), "ml_models")

# 默认的预览行数
DEFAULT_PREVIEW_ROWS = 10
3 changes: 2 additions & 1 deletion ml_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

from langchain.tools import StructuredTool
from langchain_core.tools import Tool
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.pydantic_v1 import Field
from pydantic import BaseModel
from langchain.agents import AgentExecutor, create_structured_chat_agent
from langchain_core.prompts import PromptTemplate

Expand Down
208 changes: 208 additions & 0 deletions ml_api.log

Large diffs are not rendered by default.

410 changes: 410 additions & 0 deletions project_documentation.md

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions project_overview.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# 项目名称

AI机器学习助手 Pro

## 项目目的

这是一个集成了RAG检索增强生成和机器学习模型的智能助手系统,可以回答机器学习相关问题,并提供模型训练、预测、分析和可视化功能。
2 changes: 1 addition & 1 deletion rag_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
UnstructuredWordDocumentLoader # 添加DOCX加载器
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain.chains import RetrievalQA
from langchain_core.documents import Document
from langchain_community.vectorstores.utils import filter_complex_metadata
Expand Down
1,447 changes: 1,058 additions & 389 deletions static/js/app.js

Large diffs are not rendered by default.