- 添加MDF文件导出功能 - 集成阿里云OCR大模型识别 - 添加百度智能云AI照片评分 - 集成DeepSeek大模型创意文案生成 - 完善文档和配置管理 - 使用uv进行现代化依赖管理 - 添加完整的.gitignore配置
192 lines
5.8 KiB
Python
192 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
数据提取与转换器 - 功能测试脚本
|
||
用于验证应用的各项功能是否正常工作
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import tempfile
|
||
from pathlib import Path
|
||
|
||
# 添加项目路径到Python路径
|
||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# 导入工具模块
|
||
try:
|
||
from utils.pdf_extractor import extract_text_from_pdf
|
||
from utils.ocr_processor import extract_text_from_image
|
||
from utils.format_converter import excel_to_csv, csv_to_excel, json_to_excel
|
||
from utils.web_scraper import scrape_webpage
|
||
from utils.database_exporter import export_sqlite_to_excel
|
||
print("✅ 所有工具模块导入成功")
|
||
except ImportError as e:
|
||
print(f"❌ 模块导入失败: {e}")
|
||
sys.exit(1)
|
||
|
||
def test_format_conversion():
|
||
"""测试格式转换功能"""
|
||
print("\n📊 测试格式转换功能...")
|
||
|
||
# 测试数据
|
||
test_data = [
|
||
{"姓名": "张三", "年龄": 20, "城市": "北京"},
|
||
{"姓名": "李四", "年龄": 21, "城市": "上海"},
|
||
{"姓名": "王五", "年龄": 19, "城市": "广州"}
|
||
]
|
||
|
||
try:
|
||
# 创建临时文件
|
||
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w', encoding='utf-8') as f:
|
||
f.write("姓名,年龄,城市\n")
|
||
for item in test_data:
|
||
f.write(f"{item['姓名']},{item['年龄']},{item['城市']}\n")
|
||
csv_path = f.name
|
||
|
||
# CSV转Excel
|
||
excel_path = csv_path.replace('.csv', '.xlsx')
|
||
csv_to_excel(csv_path, excel_path)
|
||
|
||
if os.path.exists(excel_path):
|
||
print("✅ CSV转Excel功能正常")
|
||
os.unlink(excel_path)
|
||
else:
|
||
print("❌ CSV转Excel功能失败")
|
||
|
||
os.unlink(csv_path)
|
||
|
||
except Exception as e:
|
||
print(f"❌ 格式转换测试失败: {e}")
|
||
|
||
def test_web_scraping():
|
||
"""测试网页抓取功能"""
|
||
print("\n🌐 测试网页抓取功能...")
|
||
|
||
try:
|
||
# 测试抓取百度首页标题
|
||
content = scrape_webpage("https://www.baidu.com")
|
||
if content and len(content) > 0:
|
||
print("✅ 网页抓取功能正常")
|
||
print(f" 抓取内容长度: {len(content)} 字符")
|
||
else:
|
||
print("❌ 网页抓取功能失败")
|
||
except Exception as e:
|
||
print(f"❌ 网页抓取测试失败: {e}")
|
||
|
||
def test_ocr_functionality():
|
||
"""测试OCR功能"""
|
||
print("\n🖼️ 测试OCR功能...")
|
||
|
||
try:
|
||
# 创建一个简单的测试图片(包含文字)
|
||
from PIL import Image, ImageDraw, ImageFont
|
||
|
||
# 创建图片
|
||
img = Image.new('RGB', (400, 200), color='white')
|
||
d = ImageDraw.Draw(img)
|
||
|
||
# 尝试使用系统字体
|
||
try:
|
||
font = ImageFont.truetype("arial.ttf", 24)
|
||
except:
|
||
try:
|
||
font = ImageFont.truetype("Arial.ttf", 24)
|
||
except:
|
||
font = ImageFont.load_default()
|
||
|
||
# 添加文字
|
||
d.text((50, 80), "测试文字: Hello World 你好世界", fill="black", font=font)
|
||
|
||
# 保存图片
|
||
img_path = os.path.join(tempfile.gettempdir(), "test_ocr.png")
|
||
img.save(img_path)
|
||
|
||
# 测试OCR识别
|
||
text = extract_text_from_image(img_path)
|
||
|
||
if text:
|
||
print("✅ OCR功能正常")
|
||
print(f" 识别结果: {text}")
|
||
else:
|
||
print("⚠️ OCR识别无结果(可能是字体问题)")
|
||
|
||
os.unlink(img_path)
|
||
|
||
except Exception as e:
|
||
print(f"❌ OCR测试失败: {e}")
|
||
|
||
def test_database_functionality():
|
||
"""测试数据库功能"""
|
||
print("\n🗄️ 测试数据库功能...")
|
||
|
||
try:
|
||
import sqlite3
|
||
|
||
# 创建测试数据库
|
||
db_path = os.path.join(tempfile.gettempdir(), "test.db")
|
||
conn = sqlite3.connect(db_path)
|
||
cursor = conn.cursor()
|
||
|
||
# 创建测试表
|
||
cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS students (
|
||
id INTEGER PRIMARY KEY,
|
||
name TEXT NOT NULL,
|
||
age INTEGER,
|
||
major TEXT
|
||
)
|
||
""")
|
||
|
||
# 插入测试数据
|
||
test_data = [
|
||
(1, "张三", 20, "计算机科学"),
|
||
(2, "李四", 21, "数据科学"),
|
||
(3, "王五", 19, "人工智能")
|
||
]
|
||
|
||
cursor.executemany("INSERT INTO students VALUES (?, ?, ?, ?)", test_data)
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
# 测试数据库导出
|
||
excel_path = db_path.replace('.db', '.xlsx')
|
||
export_sqlite_to_excel(db_path, excel_path)
|
||
|
||
if os.path.exists(excel_path):
|
||
print("✅ 数据库导出功能正常")
|
||
os.unlink(excel_path)
|
||
else:
|
||
print("❌ 数据库导出功能失败")
|
||
|
||
os.unlink(db_path)
|
||
|
||
except Exception as e:
|
||
print(f"❌ 数据库功能测试失败: {e}")
|
||
|
||
def main():
|
||
"""主测试函数"""
|
||
print("=" * 50)
|
||
print("数据提取与转换器 - 功能测试")
|
||
print("=" * 50)
|
||
|
||
# 测试各项功能
|
||
test_format_conversion()
|
||
test_web_scraping()
|
||
test_ocr_functionality()
|
||
test_database_functionality()
|
||
|
||
print("\n" + "=" * 50)
|
||
print("测试完成!")
|
||
print("=" * 50)
|
||
|
||
# 显示应用访问信息
|
||
print("\n🌐 应用访问信息:")
|
||
print("本地访问: http://localhost:8502")
|
||
print("网络访问: http://192.168.10.21:8502")
|
||
print("\n💡 测试建议:")
|
||
print("1. 访问应用界面测试文件上传功能")
|
||
print("2. 使用test_cases目录下的测试文件")
|
||
print("3. 测试网页抓取功能(输入百度等网站URL)")
|
||
|
||
if __name__ == "__main__":
|
||
main() |