GH/test_functionality.py
AI Developer 2ec2c0a1ab feat: 完整的数据提取与转换器项目
- 添加MDF文件导出功能
- 集成阿里云OCR大模型识别
- 添加百度智能云AI照片评分
- 集成DeepSeek大模型创意文案生成
- 完善文档和配置管理
- 使用uv进行现代化依赖管理
- 添加完整的.gitignore配置
2026-01-08 20:25:49 +08:00

192 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
数据提取与转换器 - 功能测试脚本
用于验证应用的各项功能是否正常工作
"""
import os
import sys
import tempfile
from pathlib import Path
# 添加项目路径到Python路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# 导入工具模块
try:
from utils.pdf_extractor import extract_text_from_pdf
from utils.ocr_processor import extract_text_from_image
from utils.format_converter import excel_to_csv, csv_to_excel, json_to_excel
from utils.web_scraper import scrape_webpage
from utils.database_exporter import export_sqlite_to_excel
print("✅ 所有工具模块导入成功")
except ImportError as e:
print(f"❌ 模块导入失败: {e}")
sys.exit(1)
def test_format_conversion():
"""测试格式转换功能"""
print("\n📊 测试格式转换功能...")
# 测试数据
test_data = [
{"姓名": "张三", "年龄": 20, "城市": "北京"},
{"姓名": "李四", "年龄": 21, "城市": "上海"},
{"姓名": "王五", "年龄": 19, "城市": "广州"}
]
try:
# 创建临时文件
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w', encoding='utf-8') as f:
f.write("姓名,年龄,城市\n")
for item in test_data:
f.write(f"{item['姓名']},{item['年龄']},{item['城市']}\n")
csv_path = f.name
# CSV转Excel
excel_path = csv_path.replace('.csv', '.xlsx')
csv_to_excel(csv_path, excel_path)
if os.path.exists(excel_path):
print("✅ CSV转Excel功能正常")
os.unlink(excel_path)
else:
print("❌ CSV转Excel功能失败")
os.unlink(csv_path)
except Exception as e:
print(f"❌ 格式转换测试失败: {e}")
def test_web_scraping():
"""测试网页抓取功能"""
print("\n🌐 测试网页抓取功能...")
try:
# 测试抓取百度首页标题
content = scrape_webpage("https://www.baidu.com")
if content and len(content) > 0:
print("✅ 网页抓取功能正常")
print(f" 抓取内容长度: {len(content)} 字符")
else:
print("❌ 网页抓取功能失败")
except Exception as e:
print(f"❌ 网页抓取测试失败: {e}")
def test_ocr_functionality():
"""测试OCR功能"""
print("\n🖼️ 测试OCR功能...")
try:
# 创建一个简单的测试图片(包含文字)
from PIL import Image, ImageDraw, ImageFont
# 创建图片
img = Image.new('RGB', (400, 200), color='white')
d = ImageDraw.Draw(img)
# 尝试使用系统字体
try:
font = ImageFont.truetype("arial.ttf", 24)
except:
try:
font = ImageFont.truetype("Arial.ttf", 24)
except:
font = ImageFont.load_default()
# 添加文字
d.text((50, 80), "测试文字: Hello World 你好世界", fill="black", font=font)
# 保存图片
img_path = os.path.join(tempfile.gettempdir(), "test_ocr.png")
img.save(img_path)
# 测试OCR识别
text = extract_text_from_image(img_path)
if text:
print("✅ OCR功能正常")
print(f" 识别结果: {text}")
else:
print("⚠️ OCR识别无结果可能是字体问题")
os.unlink(img_path)
except Exception as e:
print(f"❌ OCR测试失败: {e}")
def test_database_functionality():
"""测试数据库功能"""
print("\n🗄️ 测试数据库功能...")
try:
import sqlite3
# 创建测试数据库
db_path = os.path.join(tempfile.gettempdir(), "test.db")
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 创建测试表
cursor.execute("""
CREATE TABLE IF NOT EXISTS students (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
age INTEGER,
major TEXT
)
""")
# 插入测试数据
test_data = [
(1, "张三", 20, "计算机科学"),
(2, "李四", 21, "数据科学"),
(3, "王五", 19, "人工智能")
]
cursor.executemany("INSERT INTO students VALUES (?, ?, ?, ?)", test_data)
conn.commit()
conn.close()
# 测试数据库导出
excel_path = db_path.replace('.db', '.xlsx')
export_sqlite_to_excel(db_path, excel_path)
if os.path.exists(excel_path):
print("✅ 数据库导出功能正常")
os.unlink(excel_path)
else:
print("❌ 数据库导出功能失败")
os.unlink(db_path)
except Exception as e:
print(f"❌ 数据库功能测试失败: {e}")
def main():
"""主测试函数"""
print("=" * 50)
print("数据提取与转换器 - 功能测试")
print("=" * 50)
# 测试各项功能
test_format_conversion()
test_web_scraping()
test_ocr_functionality()
test_database_functionality()
print("\n" + "=" * 50)
print("测试完成!")
print("=" * 50)
# 显示应用访问信息
print("\n🌐 应用访问信息:")
print("本地访问: http://localhost:8502")
print("网络访问: http://192.168.10.21:8502")
print("\n💡 测试建议:")
print("1. 访问应用界面测试文件上传功能")
print("2. 使用test_cases目录下的测试文件")
print("3. 测试网页抓取功能输入百度等网站URL")
if __name__ == "__main__":
main()