2026-01-08 14:11:42 +08:00
|
|
|
#!/Users/bzbb/Documents/work/1/test/.venv/bin/python3
|
|
|
|
|
import os
|
|
|
|
|
import tempfile
|
|
|
|
|
import json
|
|
|
|
|
from flask import Flask, render_template, request, Response, jsonify, redirect, url_for
|
|
|
|
|
from openai import OpenAI
|
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
from knowledge_base import get_knowledge_base
|
|
|
|
|
|
|
|
|
|
# 加载环境变量
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
|
|
|
|
# 创建Flask应用
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
app.config['UPLOAD_FOLDER'] = tempfile.gettempdir()
|
|
|
|
|
app.config['ALLOWED_EXTENSIONS'] = {'txt', 'pdf', 'doc', 'docx'}
|
|
|
|
|
|
|
|
|
|
# 初始化OpenAI客户端
|
|
|
|
|
client = OpenAI(
|
|
|
|
|
api_key=os.getenv("DEEPSEEK_API_KEY"),
|
|
|
|
|
base_url="https://api.deepseek.com"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 初始化知识库
|
|
|
|
|
base = get_knowledge_base()
|
|
|
|
|
|
|
|
|
|
def allowed_file(filename):
|
|
|
|
|
"""检查文件是否被允许上传"""
|
|
|
|
|
return '.' in filename and \
|
|
|
|
|
filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
|
|
|
|
|
|
|
|
|
|
@app.route('/')
|
|
|
|
|
def home():
|
|
|
|
|
"""首页路由"""
|
|
|
|
|
return render_template('index.html')
|
|
|
|
|
|
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
|
|
|
def upload_file():
|
|
|
|
|
"""上传文档到知识库"""
|
|
|
|
|
if 'file' not in request.files:
|
|
|
|
|
return jsonify({"error": "没有文件上传"}), 400
|
|
|
|
|
|
|
|
|
|
file = request.files['file']
|
|
|
|
|
if file.filename == '':
|
|
|
|
|
return jsonify({"error": "没有选择文件"}), 400
|
|
|
|
|
|
|
|
|
|
if file and allowed_file(file.filename):
|
|
|
|
|
try:
|
|
|
|
|
# 保存文件到临时目录
|
|
|
|
|
filename = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
|
|
|
|
|
file.save(filename)
|
|
|
|
|
|
|
|
|
|
# 获取额外的元数据
|
|
|
|
|
metadata = {}
|
|
|
|
|
if 'title' in request.form:
|
|
|
|
|
metadata['title'] = request.form['title']
|
|
|
|
|
|
|
|
|
|
# 添加文档到知识库
|
|
|
|
|
document_ids = base.add_document(file_path=filename)
|
|
|
|
|
|
|
|
|
|
# 删除临时文件
|
|
|
|
|
os.remove(filename)
|
|
|
|
|
|
|
|
|
|
return jsonify({
|
|
|
|
|
"success": True,
|
|
|
|
|
"message": "文档上传成功",
|
|
|
|
|
"document_ids": document_ids,
|
|
|
|
|
"count": len(document_ids)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return jsonify({"error": f"上传失败: {str(e)}"}), 500
|
|
|
|
|
|
|
|
|
|
return jsonify({"error": "不支持的文件类型"}), 400
|
|
|
|
|
|
|
|
|
|
@app.route('/documents', methods=['GET'])
|
|
|
|
|
def list_documents():
|
|
|
|
|
"""获取文档列表"""
|
|
|
|
|
documents = base.list_documents()
|
|
|
|
|
return jsonify({
|
|
|
|
|
"success": True,
|
|
|
|
|
"documents": documents,
|
|
|
|
|
"count": len(documents)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
@app.route('/documents/<document_id>', methods=['DELETE'])
|
|
|
|
|
def delete_document(document_id):
|
|
|
|
|
"""删除文档"""
|
|
|
|
|
try:
|
|
|
|
|
result = base.delete_document(document_id)
|
|
|
|
|
if result:
|
|
|
|
|
return jsonify({"success": True, "message": "文档删除成功"})
|
|
|
|
|
else:
|
|
|
|
|
return jsonify({"error": "文档不存在"}), 404
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return jsonify({"error": f"删除失败: {str(e)}"}), 500
|
|
|
|
|
|
|
|
|
|
@app.route('/search', methods=['POST'])
|
|
|
|
|
def search_documents():
|
|
|
|
|
"""搜索文档"""
|
|
|
|
|
try:
|
|
|
|
|
query = request.json.get('query', '')
|
|
|
|
|
n_results = request.json.get('n_results', 5)
|
|
|
|
|
hybrid_weight = request.json.get('hybrid_weight', 0.5)
|
|
|
|
|
|
|
|
|
|
if not query:
|
|
|
|
|
return jsonify({"error": "查询不能为空"}), 400
|
|
|
|
|
|
|
|
|
|
results = base.search(query, n_results, hybrid_weight)
|
|
|
|
|
return jsonify({
|
|
|
|
|
"success": True,
|
|
|
|
|
"results": results,
|
|
|
|
|
"count": len(results)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return jsonify({"error": f"搜索失败: {str(e)}"}), 500
|
|
|
|
|
|
|
|
|
|
@app.route('/ask', methods=['POST'])
|
|
|
|
|
def ask_question():
|
|
|
|
|
"""问答API"""
|
|
|
|
|
try:
|
|
|
|
|
query = request.json.get('query', '')
|
|
|
|
|
if not query:
|
|
|
|
|
return jsonify({"error": "问题不能为空"}), 400
|
|
|
|
|
|
|
|
|
|
# 1. 搜索相关文档
|
|
|
|
|
search_results = base.search(query, n_results=3)
|
|
|
|
|
|
|
|
|
|
# 2. 构建上下文
|
|
|
|
|
context = "\n\n".join([f"[文档{idx+1}] {result['content']}" for idx, result in enumerate(search_results)])
|
|
|
|
|
|
|
|
|
|
# 3. 构建Prompt
|
|
|
|
|
prompt = f"你是一个智能知识库助手,根据以下上下文回答用户问题。\n\n上下文:\n{context}\n\n用户问题:{query}\n\n要求:\n1. 基于上下文回答,不要编造信息\n2. 如果上下文没有相关信息,回答'我没有找到相关信息'\n3. 请引用来源文档,格式为[文档1]、[文档2]等\n4. 保持回答简洁明了\n\n回答:"
|
|
|
|
|
|
|
|
|
|
def generate():
|
|
|
|
|
"""流式生成回答"""
|
|
|
|
|
stream = client.chat.completions.create(
|
|
|
|
|
model="deepseek-chat",
|
|
|
|
|
messages=[
|
|
|
|
|
{"role": "system", "content": "你是一个智能知识库助手,根据提供的上下文回答用户问题。"},
|
|
|
|
|
{"role": "user", "content": prompt}
|
|
|
|
|
],
|
|
|
|
|
temperature=0.3,
|
|
|
|
|
stream=True
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for chunk in stream:
|
|
|
|
|
if chunk.choices[0].delta.content:
|
|
|
|
|
yield chunk.choices[0].delta.content
|
|
|
|
|
|
|
|
|
|
return Response(generate(), mimetype='text/plain')
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return jsonify({"error": f"问答失败: {str(e)}"}), 500
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
app.run(debug=True, host='0.0.0.0', port=5001)
|
2026-01-09 10:34:34 +08:00
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
"""Main function for uv to run"""
|
|
|
|
|
app.run(debug=True, host='0.0.0.0', port=5001)
|