diff --git a/app/config/config.py b/app/config/config.py
index e1534a4..c20a3dc 100644
--- a/app/config/config.py
+++ b/app/config/config.py
@@ -36,6 +36,7 @@ def save_config():
with open(config_file, "w", encoding="utf-8") as f:
_cfg["app"] = app
_cfg["azure"] = azure
+ _cfg["siliconflow"] = siliconflow
_cfg["ui"] = ui
f.write(toml.dumps(_cfg))
@@ -45,9 +46,13 @@ app = _cfg.get("app", {})
whisper = _cfg.get("whisper", {})
proxy = _cfg.get("proxy", {})
azure = _cfg.get("azure", {})
-ui = _cfg.get("ui", {
- "hide_log": False,
-})
+siliconflow = _cfg.get("siliconflow", {})
+ui = _cfg.get(
+ "ui",
+ {
+ "hide_log": False,
+ },
+)
hostname = socket.gethostname()
@@ -59,7 +64,7 @@ project_description = _cfg.get(
"project_description",
"https://github.com/harry0703/MoneyPrinterTurbo",
)
-project_version = _cfg.get("project_version", "1.2.5")
+project_version = _cfg.get("project_version", "1.2.6")
reload_debug = False
imagemagick_path = app.get("imagemagick_path", "")
diff --git a/app/services/voice.py b/app/services/voice.py
index 4cc7327..8e9d824 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -6,6 +6,7 @@ from typing import Union
from xml.sax.saxutils import unescape
import edge_tts
+import requests
from edge_tts import SubMaker, submaker
from edge_tts.submaker import mktimestamp
from loguru import logger
@@ -15,8 +16,34 @@ from app.config import config
from app.utils import utils
+def get_siliconflow_voices() -> list[str]:
+ """
+ 获取硅基流动的声音列表
+
+ Returns:
+ 声音列表,格式为 ["siliconflow:FunAudioLLM/CosyVoice2-0.5B:alex", ...]
+ """
+ # 硅基流动的声音列表和对应的性别(用于显示)
+ voices_with_gender = [
+ ("FunAudioLLM/CosyVoice2-0.5B", "alex", "Male"),
+ ("FunAudioLLM/CosyVoice2-0.5B", "anna", "Female"),
+ ("FunAudioLLM/CosyVoice2-0.5B", "bella", "Female"),
+ ("FunAudioLLM/CosyVoice2-0.5B", "benjamin", "Male"),
+ ("FunAudioLLM/CosyVoice2-0.5B", "charles", "Male"),
+ ("FunAudioLLM/CosyVoice2-0.5B", "claire", "Female"),
+ ("FunAudioLLM/CosyVoice2-0.5B", "david", "Male"),
+ ("FunAudioLLM/CosyVoice2-0.5B", "diana", "Female"),
+ ]
+
+ # 添加siliconflow:前缀,并格式化为显示名称
+ return [
+ f"siliconflow:{model}:{voice}-{gender}"
+ for model, voice, gender in voices_with_gender
+ ]
+
+
def get_all_azure_voices(filter_locals=None) -> list[str]:
- voices_str = """
+ azure_voices_str = """
Name: af-ZA-AdriNeural
Gender: Female
@@ -1015,7 +1042,7 @@ Gender: Female
# 定义正则表达式模式,用于匹配 Name 和 Gender 行
pattern = re.compile(r"Name:\s*(.+)\s*Gender:\s*(.+)\s*", re.MULTILINE)
# 使用正则表达式查找所有匹配项
- matches = pattern.findall(voices_str)
+ matches = pattern.findall(azure_voices_str)
for name, gender in matches:
# 应用过滤条件
@@ -1045,11 +1072,37 @@ def is_azure_v2_voice(voice_name: str):
return ""
+def is_siliconflow_voice(voice_name: str):
+ """检查是否是硅基流动的声音"""
+ return voice_name.startswith("siliconflow:")
+
+
def tts(
- text: str, voice_name: str, voice_rate: float, voice_file: str
+ text: str,
+ voice_name: str,
+ voice_rate: float,
+ voice_file: str,
+ voice_volume: float = 1.0,
) -> Union[SubMaker, None]:
if is_azure_v2_voice(voice_name):
return azure_tts_v2(text, voice_name, voice_file)
+ elif is_siliconflow_voice(voice_name):
+ # 从voice_name中提取模型和声音
+ # 格式: siliconflow:model:voice-Gender
+ parts = voice_name.split(":")
+ if len(parts) >= 3:
+ model = parts[1]
+ # 移除性别后缀,例如 "alex-Male" -> "alex"
+ voice_with_gender = parts[2]
+ voice = voice_with_gender.split("-")[0]
+ # 构建完整的voice参数,格式为 "model:voice"
+ full_voice = f"{model}:{voice}"
+ return siliconflow_tts(
+ text, model, full_voice, voice_rate, voice_file, voice_volume
+ )
+ else:
+ logger.error(f"Invalid siliconflow voice name format: {voice_name}")
+ return None
return azure_tts_v1(text, voice_name, voice_rate, voice_file)
@@ -1098,6 +1151,144 @@ def azure_tts_v1(
return None
+def siliconflow_tts(
+ text: str,
+ model: str,
+ voice: str,
+ voice_rate: float,
+ voice_file: str,
+ voice_volume: float = 1.0,
+) -> Union[SubMaker, None]:
+ """
+ 使用硅基流动的API生成语音
+
+ Args:
+ text: 要转换为语音的文本
+ model: 模型名称,如 "FunAudioLLM/CosyVoice2-0.5B"
+ voice: 声音名称,如 "FunAudioLLM/CosyVoice2-0.5B:alex"
+ voice_rate: 语音速度,范围[0.25, 4.0]
+ voice_file: 输出的音频文件路径
+ voice_volume: 语音音量,范围[0.6, 5.0],需要转换为硅基流动的增益范围[-10, 10]
+
+ Returns:
+ SubMaker对象或None
+ """
+ text = text.strip()
+ api_key = config.siliconflow.get("api_key", "")
+
+ if not api_key:
+ logger.error("SiliconFlow API key is not set")
+ return None
+
+ # 将voice_volume转换为硅基流动的增益范围
+ # 默认voice_volume为1.0,对应gain为0
+ gain = voice_volume - 1.0
+ # 确保gain在[-10, 10]范围内
+ gain = max(-10, min(10, gain))
+
+ url = "https://api.siliconflow.cn/v1/audio/speech"
+
+ payload = {
+ "model": model,
+ "input": text,
+ "voice": voice,
+ "response_format": "mp3",
+ "sample_rate": 32000,
+ "stream": False,
+ "speed": voice_rate,
+ "gain": gain,
+ }
+
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+ for i in range(3): # 尝试3次
+ try:
+ logger.info(
+ f"start siliconflow tts, model: {model}, voice: {voice}, try: {i + 1}"
+ )
+
+ response = requests.post(url, json=payload, headers=headers)
+
+ if response.status_code == 200:
+ # 保存音频文件
+ with open(voice_file, "wb") as f:
+ f.write(response.content)
+
+ # 创建一个空的SubMaker对象
+ sub_maker = SubMaker()
+
+ # 获取音频文件的实际长度
+ try:
+ # 尝试使用moviepy获取音频长度
+ from moviepy import AudioFileClip
+
+ audio_clip = AudioFileClip(voice_file)
+ audio_duration = audio_clip.duration
+ audio_clip.close()
+
+ # 将音频长度转换为100纳秒单位(与edge_tts兼容)
+ audio_duration_100ns = int(audio_duration * 10000000)
+
+ # 使用文本分割来创建更准确的字幕
+ # 将文本按标点符号分割成句子
+ sentences = utils.split_string_by_punctuations(text)
+
+ if sentences:
+ # 计算每个句子的大致时长(按字符数比例分配)
+ total_chars = sum(len(s) for s in sentences)
+ char_duration = (
+ audio_duration_100ns / total_chars if total_chars > 0 else 0
+ )
+
+ current_offset = 0
+ for sentence in sentences:
+ if not sentence.strip():
+ continue
+
+ # 计算当前句子的时长
+ sentence_chars = len(sentence)
+ sentence_duration = int(sentence_chars * char_duration)
+
+ # 添加到SubMaker
+ sub_maker.subs.append(sentence)
+ sub_maker.offset.append(
+ (current_offset, current_offset + sentence_duration)
+ )
+
+ # 更新偏移量
+ current_offset += sentence_duration
+ else:
+ # 如果无法分割,则使用整个文本作为一个字幕
+ sub_maker.subs = [text]
+ sub_maker.offset = [(0, audio_duration_100ns)]
+
+ except Exception as e:
+ logger.warning(f"Failed to create accurate subtitles: {str(e)}")
+ # 回退到简单的字幕
+ sub_maker.subs = [text]
+ # 使用音频文件的实际长度,如果无法获取,则假设为10秒
+ sub_maker.offset = [
+ (
+ 0,
+ audio_duration_100ns
+ if "audio_duration_100ns" in locals()
+ else 10000000,
+ )
+ ]
+
+ logger.success(f"siliconflow tts succeeded: {voice_file}")
+ print("s", sub_maker.subs, sub_maker.offset)
+ return sub_maker
+ else:
+ logger.error(
+ f"siliconflow tts failed with status code {response.status_code}: {response.text}"
+ )
+ except Exception as e:
+ logger.error(f"siliconflow tts failed: {str(e)}")
+
+ return None
+
+
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
voice_name = is_azure_v2_voice(voice_name)
if not voice_name:
@@ -1219,7 +1410,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
"""
start_t = mktimestamp(start_time).replace(".", ",")
end_t = mktimestamp(end_time).replace(".", ",")
- return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
+ return f"{idx}\n{start_t} --> {end_t}\n{sub_text}\n"
start_time = -1.0
sub_items = []
diff --git a/config.example.toml b/config.example.toml
index 9fa0049..ecea468 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -193,6 +193,11 @@ compute_type = "int8"
speech_key = ""
speech_region = ""
+[siliconflow]
+# SiliconFlow API Key
+# Get your API key at https://siliconflow.cn
+api_key = ""
+
[ui]
# UI related settings
# 是否隐藏日志信息
diff --git a/webui/Main.py b/webui/Main.py
index 0ff0b02..aafed1b 100644
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -44,7 +44,7 @@ st.set_page_config(
streamlit_style = """
"""
@@ -107,6 +107,7 @@ support_locales = [
"th-TH",
]
+
def get_all_fonts():
fonts = []
for root, dirs, files in os.walk(font_dir):
@@ -197,7 +198,8 @@ def tr(key):
loc = locales.get(st.session_state["ui_language"], {})
return loc.get("Translation", {}).get(key, key)
- # 创建基础设置折叠框
+
+# 创建基础设置折叠框
if not config.app.get("hide_config", False):
with st.expander(tr("Basic Settings"), expanded=False):
config_panels = st.columns(3)
@@ -220,7 +222,7 @@ if not config.app.get("hide_config", False):
config.ui["hide_log"] = hide_log
# 中间面板 - LLM 设置
-
+
with middle_config_panel:
st.write(tr("LLM Settings"))
llm_providers = [
@@ -423,31 +425,31 @@ if not config.app.get("hide_config", False):
# 右侧面板 - API 密钥设置
with right_config_panel:
- def get_keys_from_config(cfg_key):
- api_keys = config.app.get(cfg_key, [])
- if isinstance(api_keys, str):
- api_keys = [api_keys]
- api_key = ", ".join(api_keys)
- return api_key
+ def get_keys_from_config(cfg_key):
+ api_keys = config.app.get(cfg_key, [])
+ if isinstance(api_keys, str):
+ api_keys = [api_keys]
+ api_key = ", ".join(api_keys)
+ return api_key
- def save_keys_to_config(cfg_key, value):
- value = value.replace(" ", "")
- if value:
- config.app[cfg_key] = value.split(",")
+ def save_keys_to_config(cfg_key, value):
+ value = value.replace(" ", "")
+ if value:
+ config.app[cfg_key] = value.split(",")
- st.write(tr("Video Source Settings"))
+ st.write(tr("Video Source Settings"))
- pexels_api_key = get_keys_from_config("pexels_api_keys")
- pexels_api_key = st.text_input(
- tr("Pexels API Key"), value=pexels_api_key, type="password"
- )
- save_keys_to_config("pexels_api_keys", pexels_api_key)
+ pexels_api_key = get_keys_from_config("pexels_api_keys")
+ pexels_api_key = st.text_input(
+ tr("Pexels API Key"), value=pexels_api_key, type="password"
+ )
+ save_keys_to_config("pexels_api_keys", pexels_api_key)
- pixabay_api_key = get_keys_from_config("pixabay_api_keys")
- pixabay_api_key = st.text_input(
- tr("Pixabay API Key"), value=pixabay_api_key, type="password"
- )
- save_keys_to_config("pixabay_api_keys", pixabay_api_key)
+ pixabay_api_key = get_keys_from_config("pixabay_api_keys")
+ pixabay_api_key = st.text_input(
+ tr("Pixabay API Key"), value=pixabay_api_key, type="password"
+ )
+ save_keys_to_config("pixabay_api_keys", pixabay_api_key)
llm_provider = config.app.get("llm_provider", "").lower()
panel = st.columns(3)
@@ -615,42 +617,103 @@ with middle_panel:
with st.container(border=True):
st.write(tr("Audio Settings"))
- # tts_providers = ['edge', 'azure']
- # tts_provider = st.selectbox(tr("TTS Provider"), tts_providers)
+ # 添加TTS服务器选择下拉框
+ tts_servers = [
+ ("azure-tts-v1", "Azure TTS V1"),
+ ("azure-tts-v2", "Azure TTS V2"),
+ ("siliconflow", "SiliconFlow TTS"),
+ ]
+
+ # 获取保存的TTS服务器,默认为v1
+ saved_tts_server = config.ui.get("tts_server", "azure-tts-v1")
+ saved_tts_server_index = 0
+ for i, (server_value, _) in enumerate(tts_servers):
+ if server_value == saved_tts_server:
+ saved_tts_server_index = i
+ break
+
+ selected_tts_server_index = st.selectbox(
+ tr("TTS Servers"),
+ options=range(len(tts_servers)),
+ format_func=lambda x: tts_servers[x][1],
+ index=saved_tts_server_index,
+ )
+
+ selected_tts_server = tts_servers[selected_tts_server_index][0]
+ config.ui["tts_server"] = selected_tts_server
+
+ # 根据选择的TTS服务器获取声音列表
+ filtered_voices = []
+
+ if selected_tts_server == "siliconflow":
+ # 获取硅基流动的声音列表
+ filtered_voices = voice.get_siliconflow_voices()
+ else:
+ # 获取Azure的声音列表
+ all_voices = voice.get_all_azure_voices(filter_locals=None)
+
+ # 根据选择的TTS服务器筛选声音
+ for v in all_voices:
+ if selected_tts_server == "azure-tts-v2":
+ # V2版本的声音名称中包含"v2"
+ if "V2" in v:
+ filtered_voices.append(v)
+ else:
+ # V1版本的声音名称中不包含"v2"
+ if "V2" not in v:
+ filtered_voices.append(v)
- voices = voice.get_all_azure_voices(filter_locals=None)
friendly_names = {
v: v.replace("Female", tr("Female"))
.replace("Male", tr("Male"))
.replace("Neural", "")
- for v in voices
+ for v in filtered_voices
}
+
saved_voice_name = config.ui.get("voice_name", "")
saved_voice_name_index = 0
+
+ # 检查保存的声音是否在当前筛选的声音列表中
if saved_voice_name in friendly_names:
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
else:
- for i, v in enumerate(voices):
- if (
- v.lower().startswith(st.session_state["ui_language"].lower())
- and "V2" not in v
- ):
+ # 如果不在,则根据当前UI语言选择一个默认声音
+ for i, v in enumerate(filtered_voices):
+ if v.lower().startswith(st.session_state["ui_language"].lower()):
saved_voice_name_index = i
break
- selected_friendly_name = st.selectbox(
- tr("Speech Synthesis"),
- options=list(friendly_names.values()),
- index=saved_voice_name_index,
- )
+ # 如果没有找到匹配的声音,使用第一个声音
+ if saved_voice_name_index >= len(friendly_names) and friendly_names:
+ saved_voice_name_index = 0
- voice_name = list(friendly_names.keys())[
- list(friendly_names.values()).index(selected_friendly_name)
- ]
- params.voice_name = voice_name
- config.ui["voice_name"] = voice_name
+ # 确保有声音可选
+ if friendly_names:
+ selected_friendly_name = st.selectbox(
+ tr("Speech Synthesis"),
+ options=list(friendly_names.values()),
+ index=min(saved_voice_name_index, len(friendly_names) - 1)
+ if friendly_names
+ else 0,
+ )
- if st.button(tr("Play Voice")):
+ voice_name = list(friendly_names.keys())[
+ list(friendly_names.values()).index(selected_friendly_name)
+ ]
+ params.voice_name = voice_name
+ config.ui["voice_name"] = voice_name
+ else:
+ # 如果没有声音可选,显示提示信息
+ st.warning(
+ tr(
+ "No voices available for the selected TTS server. Please select another server."
+ )
+ )
+ params.voice_name = ""
+ config.ui["voice_name"] = ""
+
+ # 只有在有声音可选时才显示试听按钮
+ if friendly_names and st.button(tr("Play Voice")):
play_content = params.video_subject
if not play_content:
play_content = params.video_script
@@ -664,6 +727,7 @@ with middle_panel:
voice_name=voice_name,
voice_rate=params.voice_rate,
voice_file=audio_file,
+ voice_volume=params.voice_volume,
)
# if the voice file generation failed, try again with a default content.
if not sub_maker:
@@ -673,6 +737,7 @@ with middle_panel:
voice_name=voice_name,
voice_rate=params.voice_rate,
voice_file=audio_file,
+ voice_volume=params.voice_volume,
)
if sub_maker and os.path.exists(audio_file):
@@ -680,7 +745,10 @@ with middle_panel:
if os.path.exists(audio_file):
os.remove(audio_file)
- if voice.is_azure_v2_voice(voice_name):
+ # 当选择V2版本或者声音是V2声音时,显示服务区域和API key输入框
+ if selected_tts_server == "azure-tts-v2" or (
+ voice_name and voice.is_azure_v2_voice(voice_name)
+ ):
saved_azure_speech_region = config.azure.get("speech_region", "")
saved_azure_speech_key = config.azure.get("speech_key", "")
azure_speech_region = st.text_input(
@@ -697,6 +765,32 @@ with middle_panel:
config.azure["speech_region"] = azure_speech_region
config.azure["speech_key"] = azure_speech_key
+ # 当选择硅基流动时,显示API key输入框和说明信息
+ if selected_tts_server == "siliconflow" or (
+ voice_name and voice.is_siliconflow_voice(voice_name)
+ ):
+ saved_siliconflow_api_key = config.siliconflow.get("api_key", "")
+
+ siliconflow_api_key = st.text_input(
+ tr("SiliconFlow API Key"),
+ value=saved_siliconflow_api_key,
+ type="password",
+ key="siliconflow_api_key_input",
+ )
+
+ # 显示硅基流动的说明信息
+ st.info(
+ tr("SiliconFlow TTS Settings")
+ + ":\n"
+ + "- "
+ + tr("Speed: Range [0.25, 4.0], default is 1.0")
+ + "\n"
+ + "- "
+ + tr("Volume: Uses Speech Volume setting, default 1.0 maps to gain 0")
+ )
+
+ config.siliconflow["api_key"] = siliconflow_api_key
+
params.voice_volume = st.selectbox(
tr("Speech Volume"),
options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0],
@@ -876,4 +970,4 @@ if start_button:
logger.info(tr("Video Generation Completed"))
scroll_to_bottom()
-config.save_config()
\ No newline at end of file
+config.save_config()
diff --git a/webui/i18n/de.json b/webui/i18n/de.json
index f2fc0da..cedc3b7 100644
--- a/webui/i18n/de.json
+++ b/webui/i18n/de.json
@@ -91,6 +91,12 @@
"Voice Example": "Dies ist ein Beispieltext zum Testen der Sprachsynthese",
"Synthesizing Voice": "Sprachsynthese läuft, bitte warten...",
"TTS Provider": "Sprachsynthese-Anbieter auswählen",
+ "TTS Servers": "TTS-Server",
+ "No voices available for the selected TTS server. Please select another server.": "Keine Stimmen für den ausgewählten TTS-Server verfügbar. Bitte wählen Sie einen anderen Server.",
+ "SiliconFlow API Key": "SiliconFlow API-Schlüssel",
+ "SiliconFlow TTS Settings": "SiliconFlow TTS-Einstellungen",
+ "Speed: Range [0.25, 4.0], default is 1.0": "Geschwindigkeit: Bereich [0.25, 4.0], Standardwert ist 1.0",
+ "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Lautstärke: Verwendet die Sprachlautstärke-Einstellung, Standardwert 1.0 entspricht Verstärkung 0",
"Hide Log": "Protokoll ausblenden",
"Hide Basic Settings": "Basis-Einstellungen ausblenden\n\nWenn diese Option deaktiviert ist, wird die Basis-Einstellungen-Leiste nicht auf der Seite angezeigt.\n\nWenn Sie sie erneut anzeigen möchten, setzen Sie `hide_config = false` in `config.toml`",
"LLM Settings": "**LLM-Einstellungen**",
diff --git a/webui/i18n/en.json b/webui/i18n/en.json
index b1738e7..c3c9ac2 100644
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@@ -91,6 +91,12 @@
"Voice Example": "This is an example text for testing speech synthesis",
"Synthesizing Voice": "Synthesizing voice, please wait...",
"TTS Provider": "Select the voice synthesis provider",
+ "TTS Servers": "TTS Servers",
+ "No voices available for the selected TTS server. Please select another server.": "No voices available for the selected TTS server. Please select another server.",
+ "SiliconFlow API Key": "SiliconFlow API Key [Click to get](https://cloud.siliconflow.cn/account/ak)",
+ "SiliconFlow TTS Settings": "SiliconFlow TTS Settings",
+ "Speed: Range [0.25, 4.0], default is 1.0": "Speed: Range [0.25, 4.0], default is 1.0",
+ "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0",
"Hide Log": "Hide Log",
"Hide Basic Settings": "Hide Basic Settings\n\nHidden, the basic settings panel will not be displayed on the page.\n\nIf you need to display it again, please set `hide_config = false` in `config.toml`",
"LLM Settings": "**LLM Settings**",
diff --git a/webui/i18n/pt.json b/webui/i18n/pt.json
index 0c1202e..6a9d47b 100644
--- a/webui/i18n/pt.json
+++ b/webui/i18n/pt.json
@@ -91,6 +91,12 @@
"Voice Example": "Este é um exemplo de texto para testar a síntese de fala",
"Synthesizing Voice": "Sintetizando voz, por favor aguarde...",
"TTS Provider": "Selecione o provedor de síntese de voz",
+ "TTS Servers": "Servidores TTS",
+ "No voices available for the selected TTS server. Please select another server.": "Não há vozes disponíveis para o servidor TTS selecionado. Por favor, selecione outro servidor.",
+ "SiliconFlow API Key": "Chave API do SiliconFlow",
+ "SiliconFlow TTS Settings": "Configurações do SiliconFlow TTS",
+ "Speed: Range [0.25, 4.0], default is 1.0": "Velocidade: Intervalo [0.25, 4.0], o padrão é 1.0",
+ "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Volume: Usa a configuração de Volume de Fala, o padrão 1.0 corresponde ao ganho 0",
"Hide Log": "Ocultar Log",
"Hide Basic Settings": "Ocultar Configurações Básicas\n\nOculto, o painel de configurações básicas não será exibido na página.\n\nSe precisar exibi-lo novamente, defina `hide_config = false` em `config.toml`",
"LLM Settings": "**Configurações do LLM**",
diff --git a/webui/i18n/vi.json b/webui/i18n/vi.json
index 7e1ebef..c1a604b 100644
--- a/webui/i18n/vi.json
+++ b/webui/i18n/vi.json
@@ -91,6 +91,12 @@
"Voice Example": "Đây là văn bản mẫu để kiểm tra tổng hợp giọng nói",
"Synthesizing Voice": "Đang tổng hợp giọng nói, vui lòng đợi...",
"TTS Provider": "Chọn nhà cung cấp tổng hợp giọng nói",
+ "TTS Servers": "Máy chủ TTS",
+ "No voices available for the selected TTS server. Please select another server.": "Không có giọng nói nào cho máy chủ TTS đã chọn. Vui lòng chọn máy chủ khác.",
+ "SiliconFlow API Key": "Khóa API SiliconFlow",
+ "SiliconFlow TTS Settings": "Cài đặt SiliconFlow TTS",
+ "Speed: Range [0.25, 4.0], default is 1.0": "Tốc độ: Phạm vi [0.25, 4.0], mặc định là 1.0",
+ "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Âm lượng: Sử dụng cài đặt Âm lượng Giọng nói, mặc định 1.0 tương ứng với tăng ích 0",
"Hide Log": "Ẩn Nhật Ký",
"Hide Basic Settings": "Ẩn Cài Đặt Cơ Bản\n\nẨn, thanh cài đặt cơ bản sẽ không hiển thị trên trang web.\n\nNếu bạn muốn hiển thị lại, vui lòng đặt `hide_config = false` trong `config.toml`",
"LLM Settings": "**Cài Đặt LLM**",
diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json
index 85289c0..cb6057e 100644
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@@ -91,6 +91,12 @@
"Voice Example": "这是一段测试语音合成的示例文本",
"Synthesizing Voice": "语音合成中,请稍候...",
"TTS Provider": "语音合成提供商",
+ "TTS Servers": "TTS服务器",
+ "No voices available for the selected TTS server. Please select another server.": "当前选择的TTS服务器没有可用的声音,请选择其他服务器。",
+ "SiliconFlow API Key": "硅基流动API密钥 [点击获取](https://cloud.siliconflow.cn/account/ak)",
+ "SiliconFlow TTS Settings": "硅基流动TTS设置",
+ "Speed: Range [0.25, 4.0], default is 1.0": "语速范围 [0.25, 4.0],默认值为1.0",
+ "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "音量:使用朗读音量设置,默认值1.0对应增益0",
"Hide Log": "隐藏日志",
"Hide Basic Settings": "隐藏基础设置\n\n隐藏后,基础设置面板将不会显示在页面中。\n\n如需要再次显示,请在 `config.toml` 中设置 `hide_config = false`",
"LLM Settings": "**大模型设置**",