From 22f47d90de60896e9844bd0f982a63977ec90afc Mon Sep 17 00:00:00 2001 From: yyhhyyyyyy Date: Fri, 9 May 2025 22:14:43 +0800 Subject: [PATCH 1/2] feat: add TTS services provider selection list --- app/services/voice.py | 6 +- webui/Main.py | 151 +++++++++++++++++++++++++++++------------- webui/i18n/de.json | 2 + webui/i18n/en.json | 2 + webui/i18n/pt.json | 2 + webui/i18n/vi.json | 2 + webui/i18n/zh.json | 2 + 7 files changed, 118 insertions(+), 49 deletions(-) diff --git a/app/services/voice.py b/app/services/voice.py index 4cc7327..e2d9fe9 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -16,7 +16,7 @@ from app.utils import utils def get_all_azure_voices(filter_locals=None) -> list[str]: - voices_str = """ + azure_voices_str = """ Name: af-ZA-AdriNeural Gender: Female @@ -1015,7 +1015,7 @@ Gender: Female # 定义正则表达式模式,用于匹配 Name 和 Gender 行 pattern = re.compile(r"Name:\s*(.+)\s*Gender:\s*(.+)\s*", re.MULTILINE) # 使用正则表达式查找所有匹配项 - matches = pattern.findall(voices_str) + matches = pattern.findall(azure_voices_str) for name, gender in matches: # 应用过滤条件 @@ -1219,7 +1219,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) """ start_t = mktimestamp(start_time).replace(".", ",") end_t = mktimestamp(end_time).replace(".", ",") - return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n" + return f"{idx}\n{start_t} --> {end_t}\n{sub_text}\n" start_time = -1.0 sub_items = [] diff --git a/webui/Main.py b/webui/Main.py index 0ff0b02..4e4cee6 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -44,7 +44,7 @@ st.set_page_config( streamlit_style = """ """ @@ -107,6 +107,7 @@ support_locales = [ "th-TH", ] + def get_all_fonts(): fonts = [] for root, dirs, files in os.walk(font_dir): @@ -197,7 +198,8 @@ def tr(key): loc = locales.get(st.session_state["ui_language"], {}) return loc.get("Translation", {}).get(key, key) - # 创建基础设置折叠框 + +# 创建基础设置折叠框 if not config.app.get("hide_config", False): with st.expander(tr("Basic Settings"), expanded=False): config_panels = st.columns(3) @@ -220,7 +222,7 @@ if not config.app.get("hide_config", False): config.ui["hide_log"] = hide_log # 中间面板 - LLM 设置 - + with middle_config_panel: st.write(tr("LLM Settings")) llm_providers = [ @@ -423,31 +425,31 @@ if not config.app.get("hide_config", False): # 右侧面板 - API 密钥设置 with right_config_panel: - def get_keys_from_config(cfg_key): - api_keys = config.app.get(cfg_key, []) - if isinstance(api_keys, str): - api_keys = [api_keys] - api_key = ", ".join(api_keys) - return api_key + def get_keys_from_config(cfg_key): + api_keys = config.app.get(cfg_key, []) + if isinstance(api_keys, str): + api_keys = [api_keys] + api_key = ", ".join(api_keys) + return api_key - def save_keys_to_config(cfg_key, value): - value = value.replace(" ", "") - if value: - config.app[cfg_key] = value.split(",") + def save_keys_to_config(cfg_key, value): + value = value.replace(" ", "") + if value: + config.app[cfg_key] = value.split(",") - st.write(tr("Video Source Settings")) + st.write(tr("Video Source Settings")) - pexels_api_key = get_keys_from_config("pexels_api_keys") - pexels_api_key = st.text_input( - tr("Pexels API Key"), value=pexels_api_key, type="password" - ) - save_keys_to_config("pexels_api_keys", pexels_api_key) + pexels_api_key = get_keys_from_config("pexels_api_keys") + pexels_api_key = st.text_input( + tr("Pexels API Key"), value=pexels_api_key, type="password" + ) + save_keys_to_config("pexels_api_keys", pexels_api_key) - pixabay_api_key = get_keys_from_config("pixabay_api_keys") - pixabay_api_key = st.text_input( - tr("Pixabay API Key"), value=pixabay_api_key, type="password" - ) - save_keys_to_config("pixabay_api_keys", pixabay_api_key) + pixabay_api_key = get_keys_from_config("pixabay_api_keys") + pixabay_api_key = st.text_input( + tr("Pixabay API Key"), value=pixabay_api_key, type="password" + ) + save_keys_to_config("pixabay_api_keys", pixabay_api_key) llm_provider = config.app.get("llm_provider", "").lower() panel = st.columns(3) @@ -615,42 +617,96 @@ with middle_panel: with st.container(border=True): st.write(tr("Audio Settings")) - # tts_providers = ['edge', 'azure'] - # tts_provider = st.selectbox(tr("TTS Provider"), tts_providers) + # 添加TTS服务器选择下拉框 + tts_servers = [ + ("azure-tts-v1", "Azure TTS V1"), + ("azure-tts-v2", "Azure TTS V2"), + ] + + # 获取保存的TTS服务器,默认为v1 + saved_tts_server = config.ui.get("tts_server", "azure-tts-v1") + saved_tts_server_index = 0 + for i, (server_value, _) in enumerate(tts_servers): + if server_value == saved_tts_server: + saved_tts_server_index = i + break + + selected_tts_server_index = st.selectbox( + tr("TTS Servers"), + options=range(len(tts_servers)), + format_func=lambda x: tts_servers[x][1], + index=saved_tts_server_index, + ) + + selected_tts_server = tts_servers[selected_tts_server_index][0] + config.ui["tts_server"] = selected_tts_server + + # 获取所有声音 + all_voices = voice.get_all_azure_voices(filter_locals=None) + + # 根据选择的TTS服务器筛选声音 + filtered_voices = [] + for v in all_voices: + if selected_tts_server == "azure-tts-v2": + # V2版本的声音名称中包含"v2" + if "V2" in v: + filtered_voices.append(v) + else: + # V1版本的声音名称中不包含"v2" + if "V2" not in v: + filtered_voices.append(v) - voices = voice.get_all_azure_voices(filter_locals=None) friendly_names = { v: v.replace("Female", tr("Female")) .replace("Male", tr("Male")) .replace("Neural", "") - for v in voices + for v in filtered_voices } + saved_voice_name = config.ui.get("voice_name", "") saved_voice_name_index = 0 + + # 检查保存的声音是否在当前筛选的声音列表中 if saved_voice_name in friendly_names: saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) else: - for i, v in enumerate(voices): - if ( - v.lower().startswith(st.session_state["ui_language"].lower()) - and "V2" not in v - ): + # 如果不在,则根据当前UI语言选择一个默认声音 + for i, v in enumerate(filtered_voices): + if v.lower().startswith(st.session_state["ui_language"].lower()): saved_voice_name_index = i break - selected_friendly_name = st.selectbox( - tr("Speech Synthesis"), - options=list(friendly_names.values()), - index=saved_voice_name_index, - ) + # 如果没有找到匹配的声音,使用第一个声音 + if saved_voice_name_index >= len(friendly_names) and friendly_names: + saved_voice_name_index = 0 - voice_name = list(friendly_names.keys())[ - list(friendly_names.values()).index(selected_friendly_name) - ] - params.voice_name = voice_name - config.ui["voice_name"] = voice_name + # 确保有声音可选 + if friendly_names: + selected_friendly_name = st.selectbox( + tr("Speech Synthesis"), + options=list(friendly_names.values()), + index=min(saved_voice_name_index, len(friendly_names) - 1) + if friendly_names + else 0, + ) - if st.button(tr("Play Voice")): + voice_name = list(friendly_names.keys())[ + list(friendly_names.values()).index(selected_friendly_name) + ] + params.voice_name = voice_name + config.ui["voice_name"] = voice_name + else: + # 如果没有声音可选,显示提示信息 + st.warning( + tr( + "No voices available for the selected TTS server. Please select another server." + ) + ) + params.voice_name = "" + config.ui["voice_name"] = "" + + # 只有在有声音可选时才显示试听按钮 + if friendly_names and st.button(tr("Play Voice")): play_content = params.video_subject if not play_content: play_content = params.video_script @@ -680,7 +736,10 @@ with middle_panel: if os.path.exists(audio_file): os.remove(audio_file) - if voice.is_azure_v2_voice(voice_name): + # 当选择V2版本或者声音是V2声音时,显示服务区域和API key输入框 + if selected_tts_server == "azure-tts-v2" or ( + voice_name and voice.is_azure_v2_voice(voice_name) + ): saved_azure_speech_region = config.azure.get("speech_region", "") saved_azure_speech_key = config.azure.get("speech_key", "") azure_speech_region = st.text_input( @@ -876,4 +935,4 @@ if start_button: logger.info(tr("Video Generation Completed")) scroll_to_bottom() -config.save_config() \ No newline at end of file +config.save_config() diff --git a/webui/i18n/de.json b/webui/i18n/de.json index f2fc0da..159154c 100644 --- a/webui/i18n/de.json +++ b/webui/i18n/de.json @@ -91,6 +91,8 @@ "Voice Example": "Dies ist ein Beispieltext zum Testen der Sprachsynthese", "Synthesizing Voice": "Sprachsynthese läuft, bitte warten...", "TTS Provider": "Sprachsynthese-Anbieter auswählen", + "TTS Servers": "TTS-Server", + "No voices available for the selected TTS server. Please select another server.": "Keine Stimmen für den ausgewählten TTS-Server verfügbar. Bitte wählen Sie einen anderen Server.", "Hide Log": "Protokoll ausblenden", "Hide Basic Settings": "Basis-Einstellungen ausblenden\n\nWenn diese Option deaktiviert ist, wird die Basis-Einstellungen-Leiste nicht auf der Seite angezeigt.\n\nWenn Sie sie erneut anzeigen möchten, setzen Sie `hide_config = false` in `config.toml`", "LLM Settings": "**LLM-Einstellungen**", diff --git a/webui/i18n/en.json b/webui/i18n/en.json index b1738e7..2116844 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -91,6 +91,8 @@ "Voice Example": "This is an example text for testing speech synthesis", "Synthesizing Voice": "Synthesizing voice, please wait...", "TTS Provider": "Select the voice synthesis provider", + "TTS Servers": "TTS Servers", + "No voices available for the selected TTS server. Please select another server.": "No voices available for the selected TTS server. Please select another server.", "Hide Log": "Hide Log", "Hide Basic Settings": "Hide Basic Settings\n\nHidden, the basic settings panel will not be displayed on the page.\n\nIf you need to display it again, please set `hide_config = false` in `config.toml`", "LLM Settings": "**LLM Settings**", diff --git a/webui/i18n/pt.json b/webui/i18n/pt.json index 0c1202e..55339c9 100644 --- a/webui/i18n/pt.json +++ b/webui/i18n/pt.json @@ -91,6 +91,8 @@ "Voice Example": "Este é um exemplo de texto para testar a síntese de fala", "Synthesizing Voice": "Sintetizando voz, por favor aguarde...", "TTS Provider": "Selecione o provedor de síntese de voz", + "TTS Servers": "Servidores TTS", + "No voices available for the selected TTS server. Please select another server.": "Não há vozes disponíveis para o servidor TTS selecionado. Por favor, selecione outro servidor.", "Hide Log": "Ocultar Log", "Hide Basic Settings": "Ocultar Configurações Básicas\n\nOculto, o painel de configurações básicas não será exibido na página.\n\nSe precisar exibi-lo novamente, defina `hide_config = false` em `config.toml`", "LLM Settings": "**Configurações do LLM**", diff --git a/webui/i18n/vi.json b/webui/i18n/vi.json index 7e1ebef..e71fa6d 100644 --- a/webui/i18n/vi.json +++ b/webui/i18n/vi.json @@ -91,6 +91,8 @@ "Voice Example": "Đây là văn bản mẫu để kiểm tra tổng hợp giọng nói", "Synthesizing Voice": "Đang tổng hợp giọng nói, vui lòng đợi...", "TTS Provider": "Chọn nhà cung cấp tổng hợp giọng nói", + "TTS Servers": "Máy chủ TTS", + "No voices available for the selected TTS server. Please select another server.": "Không có giọng nói nào cho máy chủ TTS đã chọn. Vui lòng chọn máy chủ khác.", "Hide Log": "Ẩn Nhật Ký", "Hide Basic Settings": "Ẩn Cài Đặt Cơ Bản\n\nẨn, thanh cài đặt cơ bản sẽ không hiển thị trên trang web.\n\nNếu bạn muốn hiển thị lại, vui lòng đặt `hide_config = false` trong `config.toml`", "LLM Settings": "**Cài Đặt LLM**", diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 85289c0..e48d560 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -91,6 +91,8 @@ "Voice Example": "这是一段测试语音合成的示例文本", "Synthesizing Voice": "语音合成中,请稍候...", "TTS Provider": "语音合成提供商", + "TTS Servers": "TTS服务器", + "No voices available for the selected TTS server. Please select another server.": "当前选择的TTS服务器没有可用的声音,请选择其他服务器。", "Hide Log": "隐藏日志", "Hide Basic Settings": "隐藏基础设置\n\n隐藏后,基础设置面板将不会显示在页面中。\n\n如需要再次显示,请在 `config.toml` 中设置 `hide_config = false`", "LLM Settings": "**大模型设置**", From 45f32756a3c43f6d5d92bde4c5f5b50375a11a7e Mon Sep 17 00:00:00 2001 From: yyhhyyyyyy Date: Fri, 9 May 2025 23:31:04 +0800 Subject: [PATCH 2/2] feat: increase siliconflow TTS services --- app/config/config.py | 11 ++- app/services/voice.py | 193 +++++++++++++++++++++++++++++++++++++++++- config.example.toml | 5 ++ webui/Main.py | 61 ++++++++++--- webui/i18n/de.json | 4 + webui/i18n/en.json | 4 + webui/i18n/pt.json | 4 + webui/i18n/vi.json | 4 + webui/i18n/zh.json | 4 + 9 files changed, 273 insertions(+), 17 deletions(-) diff --git a/app/config/config.py b/app/config/config.py index e1534a4..aabf3bd 100644 --- a/app/config/config.py +++ b/app/config/config.py @@ -36,6 +36,7 @@ def save_config(): with open(config_file, "w", encoding="utf-8") as f: _cfg["app"] = app _cfg["azure"] = azure + _cfg["siliconflow"] = siliconflow _cfg["ui"] = ui f.write(toml.dumps(_cfg)) @@ -45,9 +46,13 @@ app = _cfg.get("app", {}) whisper = _cfg.get("whisper", {}) proxy = _cfg.get("proxy", {}) azure = _cfg.get("azure", {}) -ui = _cfg.get("ui", { - "hide_log": False, -}) +siliconflow = _cfg.get("siliconflow", {}) +ui = _cfg.get( + "ui", + { + "hide_log": False, + }, +) hostname = socket.gethostname() diff --git a/app/services/voice.py b/app/services/voice.py index e2d9fe9..8e9d824 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -6,6 +6,7 @@ from typing import Union from xml.sax.saxutils import unescape import edge_tts +import requests from edge_tts import SubMaker, submaker from edge_tts.submaker import mktimestamp from loguru import logger @@ -15,6 +16,32 @@ from app.config import config from app.utils import utils +def get_siliconflow_voices() -> list[str]: + """ + 获取硅基流动的声音列表 + + Returns: + 声音列表,格式为 ["siliconflow:FunAudioLLM/CosyVoice2-0.5B:alex", ...] + """ + # 硅基流动的声音列表和对应的性别(用于显示) + voices_with_gender = [ + ("FunAudioLLM/CosyVoice2-0.5B", "alex", "Male"), + ("FunAudioLLM/CosyVoice2-0.5B", "anna", "Female"), + ("FunAudioLLM/CosyVoice2-0.5B", "bella", "Female"), + ("FunAudioLLM/CosyVoice2-0.5B", "benjamin", "Male"), + ("FunAudioLLM/CosyVoice2-0.5B", "charles", "Male"), + ("FunAudioLLM/CosyVoice2-0.5B", "claire", "Female"), + ("FunAudioLLM/CosyVoice2-0.5B", "david", "Male"), + ("FunAudioLLM/CosyVoice2-0.5B", "diana", "Female"), + ] + + # 添加siliconflow:前缀,并格式化为显示名称 + return [ + f"siliconflow:{model}:{voice}-{gender}" + for model, voice, gender in voices_with_gender + ] + + def get_all_azure_voices(filter_locals=None) -> list[str]: azure_voices_str = """ Name: af-ZA-AdriNeural @@ -1045,11 +1072,37 @@ def is_azure_v2_voice(voice_name: str): return "" +def is_siliconflow_voice(voice_name: str): + """检查是否是硅基流动的声音""" + return voice_name.startswith("siliconflow:") + + def tts( - text: str, voice_name: str, voice_rate: float, voice_file: str + text: str, + voice_name: str, + voice_rate: float, + voice_file: str, + voice_volume: float = 1.0, ) -> Union[SubMaker, None]: if is_azure_v2_voice(voice_name): return azure_tts_v2(text, voice_name, voice_file) + elif is_siliconflow_voice(voice_name): + # 从voice_name中提取模型和声音 + # 格式: siliconflow:model:voice-Gender + parts = voice_name.split(":") + if len(parts) >= 3: + model = parts[1] + # 移除性别后缀,例如 "alex-Male" -> "alex" + voice_with_gender = parts[2] + voice = voice_with_gender.split("-")[0] + # 构建完整的voice参数,格式为 "model:voice" + full_voice = f"{model}:{voice}" + return siliconflow_tts( + text, model, full_voice, voice_rate, voice_file, voice_volume + ) + else: + logger.error(f"Invalid siliconflow voice name format: {voice_name}") + return None return azure_tts_v1(text, voice_name, voice_rate, voice_file) @@ -1098,6 +1151,144 @@ def azure_tts_v1( return None +def siliconflow_tts( + text: str, + model: str, + voice: str, + voice_rate: float, + voice_file: str, + voice_volume: float = 1.0, +) -> Union[SubMaker, None]: + """ + 使用硅基流动的API生成语音 + + Args: + text: 要转换为语音的文本 + model: 模型名称,如 "FunAudioLLM/CosyVoice2-0.5B" + voice: 声音名称,如 "FunAudioLLM/CosyVoice2-0.5B:alex" + voice_rate: 语音速度,范围[0.25, 4.0] + voice_file: 输出的音频文件路径 + voice_volume: 语音音量,范围[0.6, 5.0],需要转换为硅基流动的增益范围[-10, 10] + + Returns: + SubMaker对象或None + """ + text = text.strip() + api_key = config.siliconflow.get("api_key", "") + + if not api_key: + logger.error("SiliconFlow API key is not set") + return None + + # 将voice_volume转换为硅基流动的增益范围 + # 默认voice_volume为1.0,对应gain为0 + gain = voice_volume - 1.0 + # 确保gain在[-10, 10]范围内 + gain = max(-10, min(10, gain)) + + url = "https://api.siliconflow.cn/v1/audio/speech" + + payload = { + "model": model, + "input": text, + "voice": voice, + "response_format": "mp3", + "sample_rate": 32000, + "stream": False, + "speed": voice_rate, + "gain": gain, + } + + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + + for i in range(3): # 尝试3次 + try: + logger.info( + f"start siliconflow tts, model: {model}, voice: {voice}, try: {i + 1}" + ) + + response = requests.post(url, json=payload, headers=headers) + + if response.status_code == 200: + # 保存音频文件 + with open(voice_file, "wb") as f: + f.write(response.content) + + # 创建一个空的SubMaker对象 + sub_maker = SubMaker() + + # 获取音频文件的实际长度 + try: + # 尝试使用moviepy获取音频长度 + from moviepy import AudioFileClip + + audio_clip = AudioFileClip(voice_file) + audio_duration = audio_clip.duration + audio_clip.close() + + # 将音频长度转换为100纳秒单位(与edge_tts兼容) + audio_duration_100ns = int(audio_duration * 10000000) + + # 使用文本分割来创建更准确的字幕 + # 将文本按标点符号分割成句子 + sentences = utils.split_string_by_punctuations(text) + + if sentences: + # 计算每个句子的大致时长(按字符数比例分配) + total_chars = sum(len(s) for s in sentences) + char_duration = ( + audio_duration_100ns / total_chars if total_chars > 0 else 0 + ) + + current_offset = 0 + for sentence in sentences: + if not sentence.strip(): + continue + + # 计算当前句子的时长 + sentence_chars = len(sentence) + sentence_duration = int(sentence_chars * char_duration) + + # 添加到SubMaker + sub_maker.subs.append(sentence) + sub_maker.offset.append( + (current_offset, current_offset + sentence_duration) + ) + + # 更新偏移量 + current_offset += sentence_duration + else: + # 如果无法分割,则使用整个文本作为一个字幕 + sub_maker.subs = [text] + sub_maker.offset = [(0, audio_duration_100ns)] + + except Exception as e: + logger.warning(f"Failed to create accurate subtitles: {str(e)}") + # 回退到简单的字幕 + sub_maker.subs = [text] + # 使用音频文件的实际长度,如果无法获取,则假设为10秒 + sub_maker.offset = [ + ( + 0, + audio_duration_100ns + if "audio_duration_100ns" in locals() + else 10000000, + ) + ] + + logger.success(f"siliconflow tts succeeded: {voice_file}") + print("s", sub_maker.subs, sub_maker.offset) + return sub_maker + else: + logger.error( + f"siliconflow tts failed with status code {response.status_code}: {response.text}" + ) + except Exception as e: + logger.error(f"siliconflow tts failed: {str(e)}") + + return None + + def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]: voice_name = is_azure_v2_voice(voice_name) if not voice_name: diff --git a/config.example.toml b/config.example.toml index 9fa0049..ecea468 100644 --- a/config.example.toml +++ b/config.example.toml @@ -193,6 +193,11 @@ compute_type = "int8" speech_key = "" speech_region = "" +[siliconflow] +# SiliconFlow API Key +# Get your API key at https://siliconflow.cn +api_key = "" + [ui] # UI related settings # 是否隐藏日志信息 diff --git a/webui/Main.py b/webui/Main.py index 4e4cee6..aafed1b 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -621,6 +621,7 @@ with middle_panel: tts_servers = [ ("azure-tts-v1", "Azure TTS V1"), ("azure-tts-v2", "Azure TTS V2"), + ("siliconflow", "SiliconFlow TTS"), ] # 获取保存的TTS服务器,默认为v1 @@ -641,20 +642,26 @@ with middle_panel: selected_tts_server = tts_servers[selected_tts_server_index][0] config.ui["tts_server"] = selected_tts_server - # 获取所有声音 - all_voices = voice.get_all_azure_voices(filter_locals=None) - - # 根据选择的TTS服务器筛选声音 + # 根据选择的TTS服务器获取声音列表 filtered_voices = [] - for v in all_voices: - if selected_tts_server == "azure-tts-v2": - # V2版本的声音名称中包含"v2" - if "V2" in v: - filtered_voices.append(v) - else: - # V1版本的声音名称中不包含"v2" - if "V2" not in v: - filtered_voices.append(v) + + if selected_tts_server == "siliconflow": + # 获取硅基流动的声音列表 + filtered_voices = voice.get_siliconflow_voices() + else: + # 获取Azure的声音列表 + all_voices = voice.get_all_azure_voices(filter_locals=None) + + # 根据选择的TTS服务器筛选声音 + for v in all_voices: + if selected_tts_server == "azure-tts-v2": + # V2版本的声音名称中包含"v2" + if "V2" in v: + filtered_voices.append(v) + else: + # V1版本的声音名称中不包含"v2" + if "V2" not in v: + filtered_voices.append(v) friendly_names = { v: v.replace("Female", tr("Female")) @@ -720,6 +727,7 @@ with middle_panel: voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file, + voice_volume=params.voice_volume, ) # if the voice file generation failed, try again with a default content. if not sub_maker: @@ -729,6 +737,7 @@ with middle_panel: voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file, + voice_volume=params.voice_volume, ) if sub_maker and os.path.exists(audio_file): @@ -756,6 +765,32 @@ with middle_panel: config.azure["speech_region"] = azure_speech_region config.azure["speech_key"] = azure_speech_key + # 当选择硅基流动时,显示API key输入框和说明信息 + if selected_tts_server == "siliconflow" or ( + voice_name and voice.is_siliconflow_voice(voice_name) + ): + saved_siliconflow_api_key = config.siliconflow.get("api_key", "") + + siliconflow_api_key = st.text_input( + tr("SiliconFlow API Key"), + value=saved_siliconflow_api_key, + type="password", + key="siliconflow_api_key_input", + ) + + # 显示硅基流动的说明信息 + st.info( + tr("SiliconFlow TTS Settings") + + ":\n" + + "- " + + tr("Speed: Range [0.25, 4.0], default is 1.0") + + "\n" + + "- " + + tr("Volume: Uses Speech Volume setting, default 1.0 maps to gain 0") + ) + + config.siliconflow["api_key"] = siliconflow_api_key + params.voice_volume = st.selectbox( tr("Speech Volume"), options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], diff --git a/webui/i18n/de.json b/webui/i18n/de.json index 159154c..cedc3b7 100644 --- a/webui/i18n/de.json +++ b/webui/i18n/de.json @@ -93,6 +93,10 @@ "TTS Provider": "Sprachsynthese-Anbieter auswählen", "TTS Servers": "TTS-Server", "No voices available for the selected TTS server. Please select another server.": "Keine Stimmen für den ausgewählten TTS-Server verfügbar. Bitte wählen Sie einen anderen Server.", + "SiliconFlow API Key": "SiliconFlow API-Schlüssel", + "SiliconFlow TTS Settings": "SiliconFlow TTS-Einstellungen", + "Speed: Range [0.25, 4.0], default is 1.0": "Geschwindigkeit: Bereich [0.25, 4.0], Standardwert ist 1.0", + "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Lautstärke: Verwendet die Sprachlautstärke-Einstellung, Standardwert 1.0 entspricht Verstärkung 0", "Hide Log": "Protokoll ausblenden", "Hide Basic Settings": "Basis-Einstellungen ausblenden\n\nWenn diese Option deaktiviert ist, wird die Basis-Einstellungen-Leiste nicht auf der Seite angezeigt.\n\nWenn Sie sie erneut anzeigen möchten, setzen Sie `hide_config = false` in `config.toml`", "LLM Settings": "**LLM-Einstellungen**", diff --git a/webui/i18n/en.json b/webui/i18n/en.json index 2116844..0f3c2c1 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -93,6 +93,10 @@ "TTS Provider": "Select the voice synthesis provider", "TTS Servers": "TTS Servers", "No voices available for the selected TTS server. Please select another server.": "No voices available for the selected TTS server. Please select another server.", + "SiliconFlow API Key": "SiliconFlow API Key", + "SiliconFlow TTS Settings": "SiliconFlow TTS Settings", + "Speed: Range [0.25, 4.0], default is 1.0": "Speed: Range [0.25, 4.0], default is 1.0", + "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0", "Hide Log": "Hide Log", "Hide Basic Settings": "Hide Basic Settings\n\nHidden, the basic settings panel will not be displayed on the page.\n\nIf you need to display it again, please set `hide_config = false` in `config.toml`", "LLM Settings": "**LLM Settings**", diff --git a/webui/i18n/pt.json b/webui/i18n/pt.json index 55339c9..6a9d47b 100644 --- a/webui/i18n/pt.json +++ b/webui/i18n/pt.json @@ -93,6 +93,10 @@ "TTS Provider": "Selecione o provedor de síntese de voz", "TTS Servers": "Servidores TTS", "No voices available for the selected TTS server. Please select another server.": "Não há vozes disponíveis para o servidor TTS selecionado. Por favor, selecione outro servidor.", + "SiliconFlow API Key": "Chave API do SiliconFlow", + "SiliconFlow TTS Settings": "Configurações do SiliconFlow TTS", + "Speed: Range [0.25, 4.0], default is 1.0": "Velocidade: Intervalo [0.25, 4.0], o padrão é 1.0", + "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Volume: Usa a configuração de Volume de Fala, o padrão 1.0 corresponde ao ganho 0", "Hide Log": "Ocultar Log", "Hide Basic Settings": "Ocultar Configurações Básicas\n\nOculto, o painel de configurações básicas não será exibido na página.\n\nSe precisar exibi-lo novamente, defina `hide_config = false` em `config.toml`", "LLM Settings": "**Configurações do LLM**", diff --git a/webui/i18n/vi.json b/webui/i18n/vi.json index e71fa6d..c1a604b 100644 --- a/webui/i18n/vi.json +++ b/webui/i18n/vi.json @@ -93,6 +93,10 @@ "TTS Provider": "Chọn nhà cung cấp tổng hợp giọng nói", "TTS Servers": "Máy chủ TTS", "No voices available for the selected TTS server. Please select another server.": "Không có giọng nói nào cho máy chủ TTS đã chọn. Vui lòng chọn máy chủ khác.", + "SiliconFlow API Key": "Khóa API SiliconFlow", + "SiliconFlow TTS Settings": "Cài đặt SiliconFlow TTS", + "Speed: Range [0.25, 4.0], default is 1.0": "Tốc độ: Phạm vi [0.25, 4.0], mặc định là 1.0", + "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Âm lượng: Sử dụng cài đặt Âm lượng Giọng nói, mặc định 1.0 tương ứng với tăng ích 0", "Hide Log": "Ẩn Nhật Ký", "Hide Basic Settings": "Ẩn Cài Đặt Cơ Bản\n\nẨn, thanh cài đặt cơ bản sẽ không hiển thị trên trang web.\n\nNếu bạn muốn hiển thị lại, vui lòng đặt `hide_config = false` trong `config.toml`", "LLM Settings": "**Cài Đặt LLM**", diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index e48d560..3811ae8 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -93,6 +93,10 @@ "TTS Provider": "语音合成提供商", "TTS Servers": "TTS服务器", "No voices available for the selected TTS server. Please select another server.": "当前选择的TTS服务器没有可用的声音,请选择其他服务器。", + "SiliconFlow API Key": "硅基流动API密钥", + "SiliconFlow TTS Settings": "硅基流动TTS设置", + "Speed: Range [0.25, 4.0], default is 1.0": "语速范围 [0.25, 4.0],默认值为1.0", + "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "音量:使用朗读音量设置,默认值1.0对应增益0", "Hide Log": "隐藏日志", "Hide Basic Settings": "隐藏基础设置\n\n隐藏后,基础设置面板将不会显示在页面中。\n\n如需要再次显示,请在 `config.toml` 中设置 `hide_config = false`", "LLM Settings": "**大模型设置**",