From 3f72706e241266b09f50530d3ad888a517d4a55d Mon Sep 17 00:00:00 2001 From: William Jin Date: Tue, 15 Oct 2024 10:07:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=89=B9=E9=87=8F=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E8=AF=86=E5=88=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + chyoso_toolkit_ui.py | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2eea525 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/chyoso_toolkit_ui.py b/chyoso_toolkit_ui.py index bd40b0d..2d8c755 100644 --- a/chyoso_toolkit_ui.py +++ b/chyoso_toolkit_ui.py @@ -1,3 +1,4 @@ +import PIL import gradio as gr import subprocess import os @@ -199,18 +200,27 @@ def run_ocr(image): return content -def ocr_image(image): - # 将图片转换为 base64 编码 +def ocr_image(image, user_query=""): + # 压缩图片并降低分辨率 + max_size = 1600 + aspect_ratio = image.width / image.height + if aspect_ratio > 1: # 宽度大于高度 + new_size = (max_size, int(max_size / aspect_ratio)) + else: # 高度大于或等于宽度 + new_size = (int(max_size * aspect_ratio), max_size) + compressed_image = image.resize(new_size, PIL.Image.Resampling.LANCZOS) buffered = io.BytesIO() - image.save(buffered, format="JPEG") + + compressed_image.save(buffered, format="JPEG", quality=75) # 设置JPEG质量为85 image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") + print(f"Base64 字符串长度: {len(image_base64) / 1024:.2f} k") messages = [ { "role": "user", "content": [ { "type": "text", - "text": "识别图片中的文字并以纯文本(txt)格式输出" + "text": "识别图片中的文字并以纯文本(txt)格式输出,如果图片分为左右两栏,则先输出左边栏再输出右边栏的内容。" + user_query }, { "type": "image_url", @@ -234,12 +244,12 @@ def ocr_image(image): return chat_completion.choices[0].message.content -def batch_ocr(files): +def batch_ocr(files, user_query): results = [] for file in files: try: image = Image.open(file) - ocr_result = ocr_image(image) + ocr_result = ocr_image(image, user_query) results.append(f"\n\n{ocr_result}\n\n") except Exception as e: print(e) @@ -334,9 +344,11 @@ with gr.Blocks() as iface: output_file = gr.File(label="Download OCR Results") with gr.Row(): process_button = gr.Button("Process Images") + with gr.Row(): + user_query_text = gr.Textbox(placeholder="输入额外的要求") # 绑定按钮点击事件 - process_button.click(batch_ocr, inputs=input_files, outputs=output_file) + process_button.click(batch_ocr, inputs=[input_files, user_query_text], outputs=output_file) iface.load(fn=update_log_output, outputs=[log_output], every=1)