GH/test_functionality.py

192 lines
5.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
数据提取与转换器 - 功能测试脚本
用于验证应用的各项功能是否正常工作
"""
import os
import sys
import tempfile
from pathlib import Path
# 添加项目路径到Python路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# 导入工具模块
try:
from utils.pdf_extractor import extract_text_from_pdf
from utils.ocr_processor import extract_text_from_image
from utils.format_converter import excel_to_csv, csv_to_excel, json_to_excel
from utils.web_scraper import scrape_webpage
from utils.database_exporter import export_sqlite_to_excel
print("✅ 所有工具模块导入成功")
except ImportError as e:
print(f"❌ 模块导入失败: {e}")
sys.exit(1)
def test_format_conversion():
"""测试格式转换功能"""
print("\n📊 测试格式转换功能...")
# 测试数据
test_data = [
{"姓名": "张三", "年龄": 20, "城市": "北京"},
{"姓名": "李四", "年龄": 21, "城市": "上海"},
{"姓名": "王五", "年龄": 19, "城市": "广州"}
]
try:
# 创建临时文件
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w', encoding='utf-8') as f:
f.write("姓名,年龄,城市\n")
for item in test_data:
f.write(f"{item['姓名']},{item['年龄']},{item['城市']}\n")
csv_path = f.name
# CSV转Excel
excel_path = csv_path.replace('.csv', '.xlsx')
csv_to_excel(csv_path, excel_path)
if os.path.exists(excel_path):
print("✅ CSV转Excel功能正常")
os.unlink(excel_path)
else:
print("❌ CSV转Excel功能失败")
os.unlink(csv_path)
except Exception as e:
print(f"❌ 格式转换测试失败: {e}")
def test_web_scraping():
"""测试网页抓取功能"""
print("\n🌐 测试网页抓取功能...")
try:
# 测试抓取百度首页标题
content = scrape_webpage("https://www.baidu.com")
if content and len(content) > 0:
print("✅ 网页抓取功能正常")
print(f" 抓取内容长度: {len(content)} 字符")
else:
print("❌ 网页抓取功能失败")
except Exception as e:
print(f"❌ 网页抓取测试失败: {e}")
def test_ocr_functionality():
"""测试OCR功能"""
print("\n🖼️ 测试OCR功能...")
try:
# 创建一个简单的测试图片(包含文字)
from PIL import Image, ImageDraw, ImageFont
# 创建图片
img = Image.new('RGB', (400, 200), color='white')
d = ImageDraw.Draw(img)
# 尝试使用系统字体
try:
font = ImageFont.truetype("arial.ttf", 24)
except:
try:
font = ImageFont.truetype("Arial.ttf", 24)
except:
font = ImageFont.load_default()
# 添加文字
d.text((50, 80), "测试文字: Hello World 你好世界", fill="black", font=font)
# 保存图片
img_path = os.path.join(tempfile.gettempdir(), "test_ocr.png")
img.save(img_path)
# 测试OCR识别
text = extract_text_from_image(img_path)
if text:
print("✅ OCR功能正常")
print(f" 识别结果: {text}")
else:
print("⚠️ OCR识别无结果可能是字体问题")
os.unlink(img_path)
except Exception as e:
print(f"❌ OCR测试失败: {e}")
def test_database_functionality():
"""测试数据库功能"""
print("\n🗄️ 测试数据库功能...")
try:
import sqlite3
# 创建测试数据库
db_path = os.path.join(tempfile.gettempdir(), "test.db")
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 创建测试表
cursor.execute("""
CREATE TABLE IF NOT EXISTS students (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
age INTEGER,
major TEXT
)
""")
# 插入测试数据
test_data = [
(1, "张三", 20, "计算机科学"),
(2, "李四", 21, "数据科学"),
(3, "王五", 19, "人工智能")
]
cursor.executemany("INSERT INTO students VALUES (?, ?, ?, ?)", test_data)
conn.commit()
conn.close()
# 测试数据库导出
excel_path = db_path.replace('.db', '.xlsx')
export_sqlite_to_excel(db_path, excel_path)
if os.path.exists(excel_path):
print("✅ 数据库导出功能正常")
os.unlink(excel_path)
else:
print("❌ 数据库导出功能失败")
os.unlink(db_path)
except Exception as e:
print(f"❌ 数据库功能测试失败: {e}")
def main():
"""主测试函数"""
print("=" * 50)
print("数据提取与转换器 - 功能测试")
print("=" * 50)
# 测试各项功能
test_format_conversion()
test_web_scraping()
test_ocr_functionality()
test_database_functionality()
print("\n" + "=" * 50)
print("测试完成!")
print("=" * 50)
# 显示应用访问信息
print("\n🌐 应用访问信息:")
print("本地访问: http://localhost:8502")
print("网络访问: http://192.168.10.21:8502")
print("\n💡 测试建议:")
print("1. 访问应用界面测试文件上传功能")
print("2. 使用test_cases目录下的测试文件")
print("3. 测试网页抓取功能输入百度等网站URL")
if __name__ == "__main__":
main()