diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..45093b7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +gradio==4.44.0 \ No newline at end of file diff --git a/ui.py b/ui.py new file mode 100644 index 0000000..fbd0c11 --- /dev/null +++ b/ui.py @@ -0,0 +1,187 @@ +import gradio as gr +import subprocess +import os + + +def convert_to_wav(audio_file): + # 使用 ffmpeg 将音频文件转换为 wav 格式 + output_wav_file = "output.wav" + print(f"开始转换音频文件 {audio_file} 为 WAV 格式...") + subprocess.run([ + "ffmpeg", "-y", "-i", audio_file, + "-ac", "1", + "-ar", "16000", + output_wav_file + ]) + print(f"音频文件 {audio_file} 已转换为 WAV 格式,输出文件为 {output_wav_file}") + return output_wav_file + + +def transcribe_audio(wav_file, original_filename, offset_time, duration_time): + # 调用 whisper 命令行程序进行转写 + whisper_cmd = [ + "/home/tmfc/apps/whisper/main", + "--language", "Chinese", + "-otxt", + "-ot", str(offset_time * 1000), + "-d", str(duration_time * 1000), + "-of", "/home/tmfc/apps/whisper/" + original_filename, + "-m", "/home/tmfc/apps/whisper/models/ggml-large-v3-q5_0.bin", + wav_file + ] + print(whisper_cmd) + print(f"开始转写音频文件 {wav_file}...") + result = subprocess.run(whisper_cmd, capture_output=True, text=True) + print(result) + # 将转写结果保存为 txt 文件 + # txt_file = "transcription.txt" + # with open(txt_file, "w") as f: + # f.write(result.stdout) + + txt_file = original_filename + ".txt" + print(f"音频文件 {wav_file} 转写完成,结果已保存为 {txt_file}") + + return txt_file + + +def process_audio(audio_file, offset_time, duration_time): + print("开始处理音频文件...") + # 获取上传的文件名 + original_filename = os.path.basename(audio_file) + + # 转换音频文件为 wav 格式 + wav_file = convert_to_wav(audio_file) + + # 转写音频文件 + txt_file = transcribe_audio(wav_file, original_filename, offset_time, duration_time) + + print("音频文件处理完成") + return txt_file + + +def direct_transcribe(audio_file, offset_time, duration_time): + print("开始直接转写音频文件...") + + # 转写音频文件 + txt_file = transcribe_audio("output.wav", "output.wav", offset_time, duration_time) + + print("音频文件直接转写完成") + return txt_file + + +batch_directory = '/mnt/d/share/audio/' + + +def list_files(): + # 获取目录下的所有文件 + files = os.listdir(batch_directory) + # 过滤掉目录,只保留文件 + files = [f for f in files if os.path.isfile(os.path.join(batch_directory, f)) and f.lower().endswith(('.mp3', '.m4a'))] + return files + + +log_content = "" + + +def batch_transcribe(): + global log_content + files = list_files() + result_file = [] + for file in files: + # 转换音频文件为 wav 格式 + log_entry = "转换" + file + "为 wav\n" + log_content += log_entry + wav_file = convert_to_wav(batch_directory + file) + log_entry = "转换wav成功,开始转写\n" + log_content += log_entry + # 转写音频文件 + txt_file = transcribe_audio(wav_file, file, 10, 0) + log_entry = "转写 " + file + "完成\n" + log_content += log_entry + result_file.append(txt_file) + return result_file + + +def display_files(): + files = list_files() + return "\n".join(files) + + +def get_log(): + global log_content + return log_content + + +def update_log_output(): + return gr.update(value=get_log()) + + +def convert_to_docx(text): + if text.strip() == "": + return "输入框不能为空!" + + # 将输入内容写入 file.md 文件 + with open("file.md", "w") as file: + file.write(text) + + # 使用 pandoc 将 file.md 转换为 file.docx + try: + subprocess.run(["pandoc", "file.md", "-o", "file.docx"], check=True) + except subprocess.CalledProcessError as e: + return f"转换失败: {e}" + + # 返回 file.docx 文件供用户下载 + return "file.docx" + + +with gr.Blocks() as iface: + gr.Markdown("# 音频文件转换为 WAV 格式并转录为文字") + with gr.Tabs(): + with gr.TabItem("处理并转写"): + with gr.Row(): + audio_input = gr.Audio(type="filepath", label="上传音频文件") + with gr.Column(): + offset_input = gr.Number(label="偏移时间 (秒)") + duration_input = gr.Number(label="转写时长 (秒)") + + with gr.Row(): + process_button = gr.Button("处理并转写") + direct_transcribe_button = gr.Button("直接转写") + + output_file = gr.File(label="转写结果") + + process_button.click(process_audio, inputs=[audio_input, offset_input, duration_input], outputs=output_file) + direct_transcribe_button.click(direct_transcribe, inputs=[audio_input, offset_input, duration_input], + outputs=output_file) + with gr.TabItem("批量处理"): + with gr.Row(): + link_upload = gr.HTML(value='点击上传文件') + with gr.Row(): + file_list = gr.Textbox(label="文件列表") + with gr.Column(): + list_file_button = gr.Button("刷新文件") + batch_process_button = gr.Button("批量处理") + with gr.Column(): + batch_output_file = gr.File(label="批量转写结果") + + with gr.Row(): + log_output = gr.Textbox(label="日志信息", lines=10) + + list_file_button.click(fn=display_files, outputs=file_list) + + batch_process_button.click(batch_transcribe, outputs=batch_output_file) + + with gr.Tab("Markdown 转 DOCX"): + gr.Markdown("## Markdown 转 DOCX 转换器") + with gr.Row(): + text_input = gr.Textbox(lines=10, placeholder="请在此输入 Markdown 内容...") + with gr.Row(): + convert_button = gr.Button("转换") + with gr.Row(): + output_file = gr.File(label="下载转换后的文件") + + convert_button.click(convert_to_docx, inputs=text_input, outputs=output_file) + + iface.load(fn=update_log_output, outputs=[log_output], every=1) + +iface.launch(server_name="0.0.0.0") \ No newline at end of file