加入批量ocr 功能

2024-10-04 20:12:29 +08:00 · 2024-10-04 20:12:29 +08:00 · 8a7e8de066
commit 8a7e8de066
parent 7c8b8f47e0
4 changed files with 75 additions and 0 deletions
--- a/.env
+++ b/.env
@ -0,0 +1 @@
 GLM_API_KEY="d58beac412cc13d5a4ea96613f59d55a.NCYKWCm3vyeqepgL"
--- a/.env.example
+++ b/.env.example
@ -0,0 +1 @@
 GLM_API_KEY=""
--- a/chyoso_toolkit_ui.py
+++ b/chyoso_toolkit_ui.py
@ -2,6 +2,13 @@ import gradio as gr
 import subprocess
 import os
 import re
 import io
 import base64
 from dotenv import load_dotenv
 from openai import OpenAI
 from PIL import Image
 load_dotenv()
 def convert_to_wav(audio_file):
@ -192,6 +199,56 @@ def run_ocr(image):
    return content
 def ocr_image(image):
    # 将图片转换为 base64 编码
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "识别图片中的文字并以纯文本(txt)格式输出"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{image_base64}"
                    }
                }
            ]
        }
    ]
    client = OpenAI(
        # This is the default and can be omitted
        api_key=os.getenv("GLM_API_KEY"),
        base_url="https://open.bigmodel.cn/api/paas/v4/"
    )
    chat_completion = client.chat.completions.create(
        messages=messages,
        model="glm-4v-plus",
    )
    return chat_completion.choices[0].message.content
 def batch_ocr(files):
    results = []
    for file in files:
        image = Image.open(file)
        ocr_result = ocr_image(image)
        results.append(f"\n\n{ocr_result}\n\n")
    # 将结果写入 Markdown 文件
    output_file = "ocr_results.txt"
    with open(output_file, "w") as f:
        f.write("\n".join(results))
    return output_file
 with gr.Blocks() as iface:
    gr.Markdown("# 大模型工具集")
    with gr.Tabs():
@ -265,6 +322,19 @@ with gr.Blocks() as iface:
            btn_recognize.click(fn=run_ocr, inputs=image_input, outputs=text_output)
        with gr.Tab("批量图片识别"):
            gr.Markdown("## OCR 图片批量识别")
            with gr.Row():
                with gr.Column():
                    input_files = gr.File(file_count="multiple", label="Upload Images")
                with gr.Column():
                    output_file = gr.File(label="Download OCR Results")
            with gr.Row():
                process_button = gr.Button("Process Images")
            # 绑定按钮点击事件
            process_button.click(batch_ocr, inputs=input_files, outputs=output_file)
    iface.load(fn=update_log_output, outputs=[log_output], every=1)
 iface.launch(server_name="0.0.0.0")
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +1,4 @@
 gradio==4.44.0
 openai==1.44.1
 python-dotenv~=1.0.1
 pillow~=10.4.0
		`@ -0,0 +1 @@`
							`GLM_API_KEY="d58beac412cc13d5a4ea96613f59d55a.NCYKWCm3vyeqepgL"`