import os
import base64
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from openpyxl import Workbook
import csv
import time
class OCRApp:
def __init__(self, root):
self.root = root
self.root.title("Qwen-VL OCR 表格識(shí)別工具 v3.0")
self.root.geometry("700x500")
self.root.resizable(False, False)
self.root.configure(bg="#f0f0f0")
style = ttk.Style()
style.theme_use('clam')
style.configure("TButton", font=("微軟雅黑", 10), padding=5)
style.configure("TLabel", font=("微軟雅黑", 10), background="#f0f0f0")
style.configure("TEntry", font=("微軟雅黑", 10), padding=3)
style.configure("TProgressbar", thickness=8)
style.configure("Green.TButton", background="#2ecc71",
foreground="white", font=("微軟雅黑", 12, "bold"))
style.configure("Radio.TRadiobutton",
background="#f0f0f0", font=("微軟雅黑", 10))
title_label = tk.Label(root, text="?? Qwen-VL OCR 表格識(shí)別工具",
font=("微軟雅黑", 14, "bold"), bg="#f0f0f0", fg="#2c3e50")
title_label.pack(pady=10)
frame_mode = tk.Frame(root, bg="#f0f0f0")
frame_mode.pack(pady=5, fill="x", padx=20)
tk.Label(frame_mode, text="?? 處理模式:", font=(
"微軟雅黑", 10), bg="#f0f0f0").pack(side="left")
self.process_mode = tk.StringVar(value="single")
ttk.Radiobutton(frame_mode, text="單文件處理", variable=self.process_mode,
value="single", style="Radio.TRadiobutton",
command=self.toggle_process_mode).pack(side="left", padx=10)
ttk.Radiobutton(frame_mode, text="批量文件夾處理", variable=self.process_mode,
value="batch", style="Radio.TRadiobutton",
command=self.toggle_process_mode).pack(side="left")
frame_api = tk.Frame(root, bg="#f0f0f0")
frame_api.pack(pady=5, fill="x", padx=20)
tk.Label(frame_api, text="?? API Key:", font=(
"微軟雅黑", 10), bg="#f0f0f0").pack(side="left")
self.api_key_entry = tk.Entry(
frame_api, width=40, show="*", font=("Consolas", 10))
self.api_key_entry.pack(side="left", padx=10, expand=True, fill="x")
self.frame_single_image = tk.Frame(root, bg="#f0f0f0")
self.frame_single_image.pack(pady=5, fill="x", padx=20)
tk.Label(self.frame_single_image, text="??? 圖片路徑:", font=(
"微軟雅黑", 10), bg="#f0f0f0").pack(side="left")
self.image_path_var = tk.StringVar()
self.image_entry = tk.Entry(self.frame_single_image, textvariable=self.image_path_var, width=30, font=(
"Consolas", 10), state='readonly')
self.image_entry.pack(side="left", padx=5, expand=True, fill="x")
ttk.Button(self.frame_single_image, text="?? 選擇圖片",
command=self.select_image).pack(side="left", padx=5)
self.frame_single_output = tk.Frame(root, bg="#f0f0f0")
self.frame_single_output.pack(pady=5, fill="x", padx=20)
tk.Label(self.frame_single_output, text="?? 輸出路徑:", font=(
"微軟雅黑", 10), bg="#f0f0f0").pack(side="left")
self.output_path_var = tk.StringVar()
self.output_entry = tk.Entry(self.frame_single_output, textvariable=self.output_path_var, width=30, font=(
"Consolas", 10), state='readonly')
self.output_entry.pack(side="left", padx=5, expand=True, fill="x")
ttk.Button(self.frame_single_output, text="?? 選擇輸出",
command=self.select_output).pack(side="left", padx=5)
self.frame_batch_folder = tk.Frame(root, bg="#f0f0f0")
self.frame_batch_folder.pack(pady=5, fill="x", padx=20)
self.frame_batch_folder.pack_forget()
tk.Label(self.frame_batch_folder, text="?? 圖片文件夾:", font=(
"微軟雅黑", 10), bg="#f0f0f0").pack(side="left")
self.folder_path_var = tk.StringVar()
self.folder_entry = tk.Entry(self.frame_batch_folder, textvariable=self.folder_path_var, width=30, font=(
"Consolas", 10), state='readonly')
self.folder_entry.pack(side="left", padx=5, expand=True, fill="x")
ttk.Button(self.frame_batch_folder, text="?? 選擇文件夾",
command=self.select_folder).pack(side="left", padx=5)
self.frame_batch_output = tk.Frame(root, bg="#f0f0f0")
self.frame_batch_output.pack(pady=5, fill="x", padx=20)
self.frame_batch_output.pack_forget()
tk.Label(self.frame_batch_output, text="?? 輸出文件夾:", font=(
"微軟雅黑", 10), bg="#f0f0f0").pack(side="left")
self.batch_output_var = tk.StringVar()
self.batch_output_entry = tk.Entry(self.frame_batch_output, textvariable=self.batch_output_var, width=30, font=(
"Consolas", 10), state='readonly')
self.batch_output_entry.pack(
side="left", padx=5, expand=True, fill="x")
ttk.Button(self.frame_batch_output, text="?? 選擇輸出",
command=self.select_batch_output).pack(side="left", padx=5)
frame_progress = tk.Frame(root, bg="#f0f0f0")
frame_progress.pack(pady=10, fill="x", padx=20)
self.progress_label = tk.Label(frame_progress, text="準(zhǔn)備就緒...", font=(
"微軟雅黑", 9), bg="#f0f0f0", fg="#7f8c8d")
self.progress_label.pack(side="top", anchor="w")
self.progress_bar = ttk.Progressbar(
frame_progress, length=500, mode='determinate')
self.progress_bar.pack(side="top", pady=5)
start_frame = tk.Frame(root, bg="#f0f0f0")
start_frame.pack(pady=15)
self.start_button = ttk.Button(
start_frame, text="?? 開(kāi)始識(shí)別", command=self.start_processing, width=20)
self.start_button.pack()
self.start_button.configure(style="Green.TButton")
self.status_label = tk.Label(root, text="", font=(
"微軟雅黑", 10), bg="#f0f0f0", fg="#2c3e50")
self.status_label.pack(pady=10)
def toggle_process_mode(self):
"""切換單文件/批量處理模式的UI顯示"""
mode = self.process_mode.get()
if mode == "single":
self.frame_single_image.pack(pady=5, fill="x", padx=20)
self.frame_single_output.pack(pady=5, fill="x", padx=20)
self.frame_batch_folder.pack_forget()
self.frame_batch_output.pack_forget()
else:
self.frame_single_image.pack_forget()
self.frame_single_output.pack_forget()
self.frame_batch_folder.pack(pady=5, fill="x", padx=20)
self.frame_batch_output.pack(pady=5, fill="x", padx=20)
def select_image(self):
path = filedialog.askopenfilename(
title="選擇表格圖片",
filetypes=[("Image Files", "*.png *.jpg *.jpeg *.gif *.bmp")]
)
if path:
self.image_path_var.set(path)
output_path = os.path.splitext(path)[0] + ".xlsx"
self.output_path_var.set(output_path)
def select_output(self):
path = filedialog.asksaveasfilename(
title="選擇輸出 Excel 文件",
defaultextension=".xlsx",
filetypes=[("Excel 文件", "*.xlsx"), ("所有文件", "*.*")]
)
if path:
self.output_path_var.set(path)
def select_folder(self):
path = filedialog.askdirectory(title="選擇圖片文件夾")
if path:
self.folder_path_var.set(path)
self.batch_output_var.set(path)
def select_batch_output(self):
path = filedialog.askdirectory(title="選擇批量輸出文件夾")
if path:
self.batch_output_var.set(path)
def image_to_base64(self, image_path):
"""將本地圖片轉(zhuǎn)為 base64 字符串"""
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def update_progress(self, value, message):
"""更新進(jìn)度條和狀態(tài)信息"""
self.progress_bar['value'] = value
self.progress_label.config(text=message)
self.root.update_idletasks()
def process_single_file(self, api_key, image_path, output_path):
"""處理單個(gè)圖片文件"""
try:
time.sleep(0.2)
from openai import OpenAI
client = OpenAI(
api_key=api_key,
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
self.update_progress(30, "正在讀取圖片...")
image_b64 = self.image_to_base64(image_path)
self.update_progress(50, "正在調(diào)用模型識(shí)別...")
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_b64}"
}
},
{
"type": "text",
"text": (
"請(qǐng)識(shí)別這張圖片中的表格內(nèi)容,并以標(biāo)準(zhǔn) CSV 格式輸出,包含表頭。\n"
"不要添加任何解釋、前綴或后綴,只輸出純 CSV 內(nèi)容。\n"
"例如:\n姓名,年齡,城市\(zhòng)n張三,25,北京\n李四,30,上海"
)
}
]
}
]
completion = client.chat.completions.create(
model="qwen-vl-ocr-2025-04-13",
messages=messages,
max_tokens=4096,
temperature=0.0
)
csv_text = completion.choices[0].message.content.strip()
self.update_progress(80, "正在保存為 Excel...")
self.save_csv_to_excel(csv_text, output_path)
return True, f"成功處理: {os.path.basename(image_path)}"
except Exception as e:
return False, f"處理 {os.path.basename(image_path)} 失敗: {str(e)}"
def save_csv_to_excel(self, csv_text, output_path):
"""將CSV文本保存為Excel文件"""
lines = csv_text.splitlines()
if not lines:
raise ValueError("模型未返回有效數(shù)據(jù)")
wb = Workbook()
ws = wb.active
for line in lines:
row_data = line.split(',')
ws.append(row_data)
wb.save(output_path)
def start_processing(self):
"""開(kāi)始處理(單文件或批量)"""
api_key = self.api_key_entry.get().strip()
if not api_key:
messagebox.showerror("錯(cuò)誤", "請(qǐng)先輸入 API Key!")
return
try:
self.update_progress(10, "正在初始化...")
if self.process_mode.get() == "single":
image_path = self.image_path_var.get()
output_path = self.output_path_var.get()
if not image_path:
messagebox.showerror("錯(cuò)誤", "請(qǐng)選擇圖片文件!")
return
if not output_path:
messagebox.showerror("錯(cuò)誤", "請(qǐng)選擇輸出文件路徑!")
return
success, message = self.process_single_file(
api_key, image_path, output_path)
self.update_progress(100, message)
self.status_label.config(text=message)
if success:
messagebox.showinfo("成功", f"表格已保存至:\n{output_path}")
else:
folder_path = self.folder_path_var.get()
output_folder = self.batch_output_var.get()
if not folder_path:
messagebox.showerror("錯(cuò)誤", "請(qǐng)選擇圖片文件夾!")
return
if not output_folder:
messagebox.showerror("錯(cuò)誤", "請(qǐng)選擇輸出文件夾!")
return
image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp')
image_files = [
f for f in os.listdir(folder_path)
if f.lower().endswith(image_extensions)
and os.path.isfile(os.path.join(folder_path, f))
]
if not image_files:
messagebox.showinfo("提示", "所選文件夾中沒(méi)有圖片文件!")
self.update_progress(0, "準(zhǔn)備就緒...")
return
total_files = len(image_files)
success_count = 0
error_messages = []
for i, filename in enumerate(image_files, 1):
try:
overall_progress = 10 + (i / total_files) * 80
self.update_progress(overall_progress,
f"正在處理 {i}/{total_files}: {filename}")
image_path = os.path.join(folder_path, filename)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(
output_folder, f"{base_name}.xlsx")
success, msg = self.process_single_file(
api_key, image_path, output_path)
if success:
success_count += 1
else:
error_messages.append(msg)
except Exception as e:
error_messages.append(f"處理 {filename} 時(shí)出錯(cuò): {str(e)}")
self.update_progress(
100, f"批量處理完成: {success_count}/{total_files} 成功")
self.status_label.config(
text=f"批量處理完成: {success_count}/{total_files} 成功")
result_msg = f"批量處理完成!\n成功: {success_count} 個(gè)文件\n失敗: {total_files - success_count} 個(gè)文件"
if error_messages:
result_msg += "\n\n錯(cuò)誤詳情:\n" + \
"\n".join(error_messages[:5])
if len(error_messages) > 5:
result_msg += f"\n... 還有 {len(error_messages) - 5} 個(gè)錯(cuò)誤"
messagebox.showinfo("批量處理完成", result_msg)
except Exception as e:
self.update_progress(0, f"? 錯(cuò)誤: {str(e)}")
messagebox.showerror("錯(cuò)誤", f"處理失敗:\n{str(e)}")
if __name__ == "__main__":
root = tk.Tk()
app = OCRApp(root)
root.mainloop()