From 4e42b668d493e0c3b9a3fdceb4078ec87b38a27b Mon Sep 17 00:00:00 2001
From: William Jin <jinwei@gmail.com>
Date: Thu, 26 Sep 2024 15:27:49 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E5=85=A5ocr=E5=9B=BE=E5=83=8F?=
 =?UTF-8?q?=E8=AF=86=E5=88=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 chyoso_toolkit_ui.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/chyoso_toolkit_ui.py b/chyoso_toolkit_ui.py
index 2327a27..31ac6c0 100644
--- a/chyoso_toolkit_ui.py
+++ b/chyoso_toolkit_ui.py
@@ -167,6 +167,33 @@ def download_pdf(curl_command, pdf_filename):
     return file_path
 
 
+def run_ocr(image):
+    # 保存上传的图片到指定路径
+    image_path = os.path.expanduser("/home/tmfc/got-ocr/img.png")
+    image.save(image_path)
+
+    # 激活 Conda 环境
+    activate_env_command = "conda activate got"
+
+    # 调用 OCR 命令
+    ocr_command = [
+        "python3", "/home/tmfc/got-ocr/GOT/demo/run_ocr_2.0_crop.py",
+        "--model-name", "/home/tmfc/got-ocr/models/",
+        "--image-file", image_path
+    ]
+
+    # 构建完整的命令
+    full_command = f"{activate_env_command} && {' '.join(ocr_command)}"
+
+    # 使用 shell 执行命令
+    result = subprocess.run(full_command, shell=True, capture_output=True, text=True)
+
+    # 获取 OCR 识别的输出
+    ocr_output = result.stdout
+
+    return ocr_output
+
+
 with gr.Blocks() as iface:
     gr.Markdown("# 大模型工具集")
     with gr.Tabs():
@@ -229,6 +256,17 @@ with gr.Blocks() as iface:
 
             pdf_download_button.click(download_pdf, inputs=[curl_text_input, pdf_filename], outputs=pdf_download_result)
 
+        with gr.Tab("图片识别"):
+            gr.Markdown("## OCR 图片识别")
+            with gr.Row():
+                with gr.Column():
+                    image_input = gr.Image(type="pil", label="上传图片")
+                    btn_recognize = gr.Button("识别")
+                with gr.Column():
+                    text_output = gr.Textbox(label="OCR 识别结果")
+
+            btn_recognize.click(fn=run_ocr, inputs=image_input, outputs=text_output)
+
     iface.load(fn=update_log_output, outputs=[log_output], every=1)
 
 iface.launch(server_name="0.0.0.0")