#!/usr/bin/env python3 """ 数据提取与转换器 - 功能测试脚本 用于验证应用的各项功能是否正常工作 """ import os import sys import tempfile from pathlib import Path # 添加项目路径到Python路径 sys.path.append(os.path.dirname(os.path.abspath(__file__))) # 导入工具模块 try: from utils.pdf_extractor import extract_text_from_pdf from utils.ocr_processor import extract_text_from_image from utils.format_converter import excel_to_csv, csv_to_excel, json_to_excel from utils.web_scraper import scrape_webpage from utils.database_exporter import export_sqlite_to_excel print("✅ 所有工具模块导入成功") except ImportError as e: print(f"❌ 模块导入失败: {e}") sys.exit(1) def test_format_conversion(): """测试格式转换功能""" print("\n📊 测试格式转换功能...") # 测试数据 test_data = [ {"姓名": "张三", "年龄": 20, "城市": "北京"}, {"姓名": "李四", "年龄": 21, "城市": "上海"}, {"姓名": "王五", "年龄": 19, "城市": "广州"} ] try: # 创建临时文件 with tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w', encoding='utf-8') as f: f.write("姓名,年龄,城市\n") for item in test_data: f.write(f"{item['姓名']},{item['年龄']},{item['城市']}\n") csv_path = f.name # CSV转Excel excel_path = csv_path.replace('.csv', '.xlsx') csv_to_excel(csv_path, excel_path) if os.path.exists(excel_path): print("✅ CSV转Excel功能正常") os.unlink(excel_path) else: print("❌ CSV转Excel功能失败") os.unlink(csv_path) except Exception as e: print(f"❌ 格式转换测试失败: {e}") def test_web_scraping(): """测试网页抓取功能""" print("\n🌐 测试网页抓取功能...") try: # 测试抓取百度首页标题 content = scrape_webpage("https://www.baidu.com") if content and len(content) > 0: print("✅ 网页抓取功能正常") print(f" 抓取内容长度: {len(content)} 字符") else: print("❌ 网页抓取功能失败") except Exception as e: print(f"❌ 网页抓取测试失败: {e}") def test_ocr_functionality(): """测试OCR功能""" print("\n🖼️ 测试OCR功能...") try: # 创建一个简单的测试图片(包含文字) from PIL import Image, ImageDraw, ImageFont # 创建图片 img = Image.new('RGB', (400, 200), color='white') d = ImageDraw.Draw(img) # 尝试使用系统字体 try: font = ImageFont.truetype("arial.ttf", 24) except: try: font = ImageFont.truetype("Arial.ttf", 24) except: font = ImageFont.load_default() # 添加文字 d.text((50, 80), "测试文字: Hello World 你好世界", fill="black", font=font) # 保存图片 img_path = os.path.join(tempfile.gettempdir(), "test_ocr.png") img.save(img_path) # 测试OCR识别 text = extract_text_from_image(img_path) if text: print("✅ OCR功能正常") print(f" 识别结果: {text}") else: print("⚠️ OCR识别无结果(可能是字体问题)") os.unlink(img_path) except Exception as e: print(f"❌ OCR测试失败: {e}") def test_database_functionality(): """测试数据库功能""" print("\n🗄️ 测试数据库功能...") try: import sqlite3 # 创建测试数据库 db_path = os.path.join(tempfile.gettempdir(), "test.db") conn = sqlite3.connect(db_path) cursor = conn.cursor() # 创建测试表 cursor.execute(""" CREATE TABLE IF NOT EXISTS students ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, age INTEGER, major TEXT ) """) # 插入测试数据 test_data = [ (1, "张三", 20, "计算机科学"), (2, "李四", 21, "数据科学"), (3, "王五", 19, "人工智能") ] cursor.executemany("INSERT INTO students VALUES (?, ?, ?, ?)", test_data) conn.commit() conn.close() # 测试数据库导出 excel_path = db_path.replace('.db', '.xlsx') export_sqlite_to_excel(db_path, excel_path) if os.path.exists(excel_path): print("✅ 数据库导出功能正常") os.unlink(excel_path) else: print("❌ 数据库导出功能失败") os.unlink(db_path) except Exception as e: print(f"❌ 数据库功能测试失败: {e}") def main(): """主测试函数""" print("=" * 50) print("数据提取与转换器 - 功能测试") print("=" * 50) # 测试各项功能 test_format_conversion() test_web_scraping() test_ocr_functionality() test_database_functionality() print("\n" + "=" * 50) print("测试完成!") print("=" * 50) # 显示应用访问信息 print("\n🌐 应用访问信息:") print("本地访问: http://localhost:8502") print("网络访问: http://192.168.10.21:8502") print("\n💡 测试建议:") print("1. 访问应用界面测试文件上传功能") print("2. 使用test_cases目录下的测试文件") print("3. 测试网页抓取功能(输入百度等网站URL)") if __name__ == "__main__": main()