1. Added multi-language support to the UI

2. Optimized the voice name
3. Other UI optimizations
This commit is contained in:
harry 2024-03-29 17:13:25 +08:00
parent a7ba661053
commit bc8e005f59
6 changed files with 1293 additions and 137 deletions

View File

@ -34,43 +34,43 @@ class MaterialInfo:
duration: int = 0
VoiceNames = [
# zh-CN
"female-zh-CN-XiaoxiaoNeural",
"female-zh-CN-XiaoyiNeural",
"female-zh-CN-liaoning-XiaobeiNeural",
"female-zh-CN-shaanxi-XiaoniNeural",
"male-zh-CN-YunjianNeural",
"male-zh-CN-YunxiNeural",
"male-zh-CN-YunxiaNeural",
"male-zh-CN-YunyangNeural",
# "female-zh-HK-HiuGaaiNeural",
# "female-zh-HK-HiuMaanNeural",
# "male-zh-HK-WanLungNeural",
#
# "female-zh-TW-HsiaoChenNeural",
# "female-zh-TW-HsiaoYuNeural",
# "male-zh-TW-YunJheNeural",
# en-US
"female-en-US-AnaNeural",
"female-en-US-AriaNeural",
"female-en-US-AvaNeural",
"female-en-US-EmmaNeural",
"female-en-US-JennyNeural",
"female-en-US-MichelleNeural",
"male-en-US-AndrewNeural",
"male-en-US-BrianNeural",
"male-en-US-ChristopherNeural",
"male-en-US-EricNeural",
"male-en-US-GuyNeural",
"male-en-US-RogerNeural",
"male-en-US-SteffanNeural",
]
# VoiceNames = [
# # zh-CN
# "female-zh-CN-XiaoxiaoNeural",
# "female-zh-CN-XiaoyiNeural",
# "female-zh-CN-liaoning-XiaobeiNeural",
# "female-zh-CN-shaanxi-XiaoniNeural",
#
# "male-zh-CN-YunjianNeural",
# "male-zh-CN-YunxiNeural",
# "male-zh-CN-YunxiaNeural",
# "male-zh-CN-YunyangNeural",
#
# # "female-zh-HK-HiuGaaiNeural",
# # "female-zh-HK-HiuMaanNeural",
# # "male-zh-HK-WanLungNeural",
# #
# # "female-zh-TW-HsiaoChenNeural",
# # "female-zh-TW-HsiaoYuNeural",
# # "male-zh-TW-YunJheNeural",
#
# # en-US
#
# "female-en-US-AnaNeural",
# "female-en-US-AriaNeural",
# "female-en-US-AvaNeural",
# "female-en-US-EmmaNeural",
# "female-en-US-JennyNeural",
# "female-en-US-MichelleNeural",
#
# "male-en-US-AndrewNeural",
# "male-en-US-BrianNeural",
# "male-en-US-ChristopherNeural",
# "male-en-US-EricNeural",
# "male-en-US-GuyNeural",
# "male-en-US-RogerNeural",
# "male-en-US-SteffanNeural",
# ]
class VideoParams:
@ -97,7 +97,7 @@ class VideoParams:
video_language: Optional[str] = "" # auto detect
voice_name: Optional[str] = VoiceNames[0]
voice_name: Optional[str] = ""
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2

View File

@ -6,24 +6,11 @@ from os import path
from loguru import logger
from app.config import config
from app.models.schema import VideoParams, VoiceNames, VideoConcatMode
from app.models.schema import VideoParams, VideoConcatMode
from app.services import llm, material, voice, video, subtitle
from app.utils import utils
def _parse_voice(name: str):
# "female-zh-CN-XiaoxiaoNeural",
# remove first part split by "-"
if name not in VoiceNames:
name = VoiceNames[0]
parts = name.split("-")
_lang = f"{parts[1]}-{parts[2]}"
_voice = f"{_lang}-{parts[3]}"
return _voice, _lang
def start(task_id, params: VideoParams):
"""
{
@ -40,7 +27,7 @@ def start(task_id, params: VideoParams):
"""
logger.info(f"start task: {task_id}")
video_subject = params.video_subject
voice_name, language = _parse_voice(params.voice_name)
voice_name = voice.parse_voice_name(params.voice_name)
paragraph_number = params.paragraph_number
n_threads = params.n_threads
max_clip_duration = params.video_clip_duration

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,28 @@
import json
import locale
import streamlit as st
st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
initial_sidebar_state="auto")
import sys
import os
from uuid import uuid4
import platform
import streamlit.components.v1 as components
import toml
from loguru import logger
from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode
from app.services import task as tm, llm
st.set_page_config(page_title="MoneyPrinterTurbo",
page_icon="🤖",
layout="wide",
initial_sidebar_state="auto",
menu_items={
'Report a bug': "https://github.com/harry0703/MoneyPrinterTurbo/issues",
'About': "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
"automatically generate the video copy, video materials, video subtitles, "
"and video background music before synthesizing a high-definition short "
"video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo"
})
from app.models.schema import VideoParams, VideoAspect, VideoConcatMode
from app.services import task as tm, llm, voice
from app.utils import utils
hide_streamlit_style = """
@ -21,6 +34,35 @@ st.title("MoneyPrinterTurbo")
root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
font_dir = os.path.join(root_dir, "resource", "fonts")
song_dir = os.path.join(root_dir, "resource", "songs")
i18n_dir = os.path.join(root_dir, "webui", "i18n")
config_file = os.path.join(root_dir, "webui", ".streamlit", "webui.toml")
def load_config() -> dict:
try:
return toml.load(config_file)
except Exception as e:
return {}
cfg = load_config()
def save_config():
with open(config_file, "w", encoding="utf-8") as f:
f.write(toml.dumps(cfg))
def get_system_locale():
try:
loc = locale.getdefaultlocale()
# zh_CN, zh_TW return zh
# en_US, en_GB return en
language_code = loc[0].split("_")[0]
return language_code
except Exception as e:
return "en"
if 'video_subject' not in st.session_state:
st.session_state['video_subject'] = ''
@ -28,6 +70,8 @@ if 'video_script' not in st.session_state:
st.session_state['video_script'] = ''
if 'video_terms' not in st.session_state:
st.session_state['video_terms'] = ''
if 'ui_language' not in st.session_state:
st.session_state['ui_language'] = cfg.get("ui_language", get_system_locale())
def get_all_fonts():
@ -109,113 +153,154 @@ def init_log():
init_log()
def load_locales():
locales = {}
for root, dirs, files in os.walk(i18n_dir):
for file in files:
if file.endswith(".json"):
lang = file.split(".")[0]
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
locales[lang] = json.loads(f.read())
return locales
locales = load_locales()
def tr(key):
loc = locales.get(st.session_state['ui_language'], {})
return loc.get("Translation", {}).get(key, key)
display_languages = []
selected_index = 0
for i, code in enumerate(locales.keys()):
display_languages.append(f"{code} - {locales[code].get('Language')}")
if code == st.session_state['ui_language']:
selected_index = i
selected_language = st.selectbox("Language", options=display_languages, label_visibility='collapsed',
index=selected_index)
if selected_language:
code = selected_language.split(" - ")[0].strip()
st.session_state['ui_language'] = code
cfg['ui_language'] = code
save_config()
panel = st.columns(3)
left_panel = panel[0]
middle_panel = panel[1]
right_panel = panel[2]
cfg = VideoParams()
params = VideoParams()
with left_panel:
with st.container(border=True):
st.write("**文案设置**")
cfg.video_subject = st.text_input("视频主题(给定一个关键词,:red[AI自动生成]视频文案)",
value=st.session_state['video_subject']).strip()
st.write(tr("Video Script Settings"))
params.video_subject = st.text_input(tr("Video Subject"),
value=st.session_state['video_subject']).strip()
video_languages = [
("自动判断Auto detect", ""),
(tr("Auto Detect"), ""),
]
for lang in ["zh-CN", "zh-TW", "en-US"]:
video_languages.append((lang, lang))
for code in ["zh-CN", "zh-TW", "en-US"]:
video_languages.append((code, code))
selected_index = st.selectbox("生成视频脚本的语言(:blue[一般情况AI会自动根据你输入的主题语言输出]",
selected_index = st.selectbox(tr("Script Language"),
index=0,
options=range(len(video_languages)), # 使用索引作为内部选项值
format_func=lambda x: video_languages[x][0] # 显示给用户的是标签
)
cfg.video_language = video_languages[selected_index][1]
params.video_language = video_languages[selected_index][1]
if cfg.video_language:
st.write(f"设置AI输出文案语言为: **:red[{cfg.video_language}]**")
if st.button("点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】", key="auto_generate_script"):
with st.spinner("AI正在生成视频文案和关键词..."):
script = llm.generate_script(video_subject=cfg.video_subject, language=cfg.video_language)
terms = llm.generate_terms(cfg.video_subject, script)
st.toast('AI生成成功')
if st.button(tr("Generate Video Script and Keywords"), key="auto_generate_script"):
with st.spinner(tr("Generating Video Script and Keywords")):
script = llm.generate_script(video_subject=params.video_subject, language=params.video_language)
terms = llm.generate_terms(params.video_subject, script)
st.session_state['video_script'] = script
st.session_state['video_terms'] = ", ".join(terms)
cfg.video_script = st.text_area(
"视频文案(:blue[①可不填使用AI生成 ②合理使用标点断句,有助于生成字幕]",
params.video_script = st.text_area(
tr("Video Script"),
value=st.session_state['video_script'],
height=180
)
if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"):
if not cfg.video_script:
st.error("请先填写视频文案")
if st.button(tr("Generate Video Keywords"), key="auto_generate_terms"):
if not params.video_script:
st.error(tr("Please Enter the Video Subject"))
st.stop()
with st.spinner("AI正在生成视频关键词..."):
terms = llm.generate_terms(cfg.video_subject, cfg.video_script)
st.toast('AI生成成功')
with st.spinner(tr("Generating Video Keywords")):
terms = llm.generate_terms(params.video_subject, params.video_script)
st.session_state['video_terms'] = ", ".join(terms)
cfg.video_terms = st.text_area(
"视频关键词(:blue[①可不填使用AI生成 ②用**英文逗号**分隔,只支持英文]",
params.video_terms = st.text_area(
tr("Video Keywords"),
value=st.session_state['video_terms'],
height=50)
with middle_panel:
with st.container(border=True):
st.write("**视频设置**")
st.write(tr("Video Settings"))
video_concat_modes = [
("顺序拼接", "sequential"),
("随机拼接(推荐)", "random"),
(tr("Sequential"), "sequential"),
(tr("Random"), "random"),
]
selected_index = st.selectbox("视频拼接模式",
selected_index = st.selectbox(tr("Video Concat Mode"),
index=1,
options=range(len(video_concat_modes)), # 使用索引作为内部选项值
format_func=lambda x: video_concat_modes[x][0] # 显示给用户的是标签
)
cfg.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])
params.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])
video_aspect_ratios = [
("竖屏 9:16抖音视频", VideoAspect.portrait.value),
("横屏 16:9西瓜视频", VideoAspect.landscape.value),
# ("方形 1:1", VideoAspect.square.value)
(tr("Portrait"), VideoAspect.portrait.value),
(tr("Landscape"), VideoAspect.landscape.value),
]
selected_index = st.selectbox("视频比例",
selected_index = st.selectbox(tr("Video Ratio"),
options=range(len(video_aspect_ratios)), # 使用索引作为内部选项值
format_func=lambda x: video_aspect_ratios[x][0] # 显示给用户的是标签
)
cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
params.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
cfg.video_clip_duration = st.selectbox("视频片段最大时长(秒)", options=[2, 3, 4, 5, 6], index=1)
cfg.video_count = st.selectbox("同时生成视频数量", options=[1, 2, 3, 4, 5], index=0)
params.video_clip_duration = st.selectbox(tr("Clip Duration"), options=[2, 3, 4, 5, 6], index=1)
params.video_count = st.selectbox(tr("Number of Videos Generated Simultaneously"), options=[1, 2, 3, 4, 5],
index=0)
with st.container(border=True):
st.write("**音频设置**")
# 创建一个映射字典,将原始值映射到友好名称
st.write(tr("Audio Settings"))
voices = voice.get_all_voices(filter_locals=["zh-CN", "zh-HK", "zh-TW", "en-US"])
friendly_names = {
voice: voice.
replace("female", "女性").
replace("male", "男性").
replace("zh-CN", "中文").
replace("zh-HK", "香港").
replace("zh-TW", "台湾").
replace("en-US", "英文").
replace("Female", tr("Female")).
replace("Male", tr("Male")).
replace("Neural", "") for
voice in VoiceNames}
selected_friendly_name = st.selectbox("朗读声音", options=list(friendly_names.values()))
voice in voices}
saved_voice_name = cfg.get("voice_name", "")
saved_voice_name_index = 0
if saved_voice_name in friendly_names:
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
else:
for i, voice in enumerate(voices):
if voice.lower().startswith(st.session_state['ui_language'].lower()):
saved_voice_name_index = i
break
selected_friendly_name = st.selectbox(tr("Speech Synthesis"),
options=list(friendly_names.values()),
index=saved_voice_name_index)
voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
cfg.voice_name = voice_name
params.voice_name = voice_name
cfg['voice_name'] = voice_name
save_config()
bgm_options = [
("无背景音乐 No BGM", ""),
("随机背景音乐 Random BGM", "random"),
("自定义背景音乐 Custom BGM", "custom"),
(tr("No Background Music"), ""),
(tr("Random Background Music"), "random"),
(tr("Custom Background Music"), "custom"),
]
selected_index = st.selectbox("背景音乐",
selected_index = st.selectbox(tr("Background Music"),
index=1,
options=range(len(bgm_options)), # 使用索引作为内部选项值
format_func=lambda x: bgm_options[x][0] # 显示给用户的是标签
@ -225,49 +310,49 @@ with middle_panel:
# 根据选择显示或隐藏组件
if bgm_type == "custom":
custom_bgm_file = st.text_input("请输入自定义背景音乐的文件路径:")
custom_bgm_file = st.text_input(tr("Custom Background Music File"))
if custom_bgm_file and os.path.exists(custom_bgm_file):
cfg.bgm_file = custom_bgm_file
params.bgm_file = custom_bgm_file
# st.write(f":red[已选择自定义背景音乐]**{custom_bgm_file}**")
cfg.bgm_volume = st.selectbox("背景音乐音量0.2表示20%,背景声音不宜过高)",
options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
params.bgm_volume = st.selectbox(tr("Background Music Volume"),
options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
with right_panel:
with st.container(border=True):
st.write("**字幕设置**")
cfg.subtitle_enabled = st.checkbox("生成字幕(若取消勾选,下面的设置都将不生效)", value=True)
st.write(tr("Subtitle Settings"))
params.subtitle_enabled = st.checkbox(tr("Enable Subtitles"), value=True)
font_names = get_all_fonts()
cfg.font_name = st.selectbox("字体", font_names)
params.font_name = st.selectbox(tr("Font"), font_names)
subtitle_positions = [
("顶部top", "top"),
("居中center", "center"),
("底部bottom推荐", "bottom"),
(tr("Top"), "top"),
(tr("Middle"), "center"),
(tr("Bottom"), "bottom"),
]
selected_index = st.selectbox("字幕位置",
selected_index = st.selectbox(tr("Position"),
index=2,
options=range(len(subtitle_positions)), # 使用索引作为内部选项值
format_func=lambda x: subtitle_positions[x][0] # 显示给用户的是标签
)
cfg.subtitle_position = subtitle_positions[selected_index][1]
params.subtitle_position = subtitle_positions[selected_index][1]
font_cols = st.columns([0.3, 0.7])
with font_cols[0]:
cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")
params.text_fore_color = st.color_picker(tr("Font Color"), "#FFFFFF")
with font_cols[1]:
cfg.font_size = st.slider("字幕大小", 30, 100, 60)
params.font_size = st.slider(tr("Font Size"), 30, 100, 60)
stroke_cols = st.columns([0.3, 0.7])
with stroke_cols[0]:
cfg.stroke_color = st.color_picker("描边颜色", "#000000")
params.stroke_color = st.color_picker(tr("Stroke Color"), "#000000")
with stroke_cols[1]:
cfg.stroke_width = st.slider("描边粗细", 0.0, 10.0, 1.5)
params.stroke_width = st.slider(tr("Stroke Width"), 0.0, 10.0, 1.5)
start_button = st.button("开始生成视频", use_container_width=True, type="primary")
start_button = st.button(tr("Generate Video"), use_container_width=True, type="primary")
if start_button:
task_id = str(uuid4())
if not cfg.video_subject and not cfg.video_script:
st.error("视频主题 或 视频文案,不能同时为空")
if not params.video_subject and not params.video_script:
st.error(tr("Video Script and Subject Cannot Both Be Empty"))
scroll_to_bottom()
st.stop()
@ -283,11 +368,11 @@ if start_button:
logger.add(log_received)
st.toast("正在生成视频,请稍候...")
logger.info("开始生成视频")
logger.info(utils.to_json(cfg))
st.toast(tr("Generating Video"))
logger.info(tr("Start Generating Video"))
logger.info(utils.to_json(params))
scroll_to_bottom()
tm.start(task_id=task_id, params=cfg)
tm.start(task_id=task_id, params=params)
open_task_folder(task_id)
logger.info(f"完成")
logger.info(tr("Video Generation Completed"))

51
webui/i18n/en.json Normal file
View File

@ -0,0 +1,51 @@
{
"Language": "English",
"Translation": {
"Video Script Settings": "**Video Script Settings**",
"Video Subject": "Video Subject (Provide a keyword, :red[AI will automatically generate] video script)",
"Script Language": "Language for Generating Video Script (AI will automatically output based on the language of your subject)",
"Generate Video Script and Keywords": "Click to use AI to generate 【Video Script】 and 【Video Keywords】 based on **subject**",
"Auto Detect": "Auto Detect",
"Video Script": "Video Script (:blue[① Optional, AI generated ② Proper punctuation helps with subtitle generation])",
"Generate Video Keywords": "Click to use AI to generate 【Video Keywords】 based on **script**",
"Please Enter the Video Subject": "Please Enter the Video Script First",
"Generating Video Script and Keywords": "AI is generating video script and keywords...",
"Generating Video Keywords": "AI is generating video keywords...",
"Video Keywords": "Video Keywords (:blue[① Optional, AI generated ② Use **English commas** for separation, English only])",
"Video Settings": "**Video Settings**",
"Video Concat Mode": "Video Concatenation Mode",
"Random": "Random Concatenation (Recommended)",
"Sequential": "Sequential Concatenation",
"Video Ratio": "Video Aspect Ratio",
"Portrait": "Portrait 9:16",
"Landscape": "Landscape 16:9",
"Clip Duration": "Maximum Duration of Video Clips (seconds)",
"Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously",
"Audio Settings": "**Audio Settings**",
"Speech Synthesis": "Speech Synthesis Voice",
"Male": "Male",
"Female": "Female",
"Background Music": "Background Music",
"No Background Music": "No Background Music",
"Random Background Music": "Random Background Music",
"Custom Background Music": "Custom Background Music",
"Custom Background Music File": "Please enter the file path for custom background music:",
"Background Music Volume": "Background Music Volume (0.2 represents 20%, background music should not be too loud)",
"Subtitle Settings": "**Subtitle Settings**",
"Enable Subtitles": "Enable Subtitles (If unchecked, the settings below will not take effect)",
"Font": "Subtitle Font",
"Position": "Subtitle Position",
"Top": "Top",
"Center": "Middle",
"Bottom": "Bottom (Recommended)",
"Font Size": "Subtitle Font Size",
"Font Color": "Subtitle Font Color",
"Stroke Color": "Subtitle Outline Color",
"Stroke Width": "Subtitle Outline Width",
"Generate Video": "Generate Video",
"Video Script and Subject Cannot Both Be Empty": "Video Subject and Video Script cannot both be empty",
"Generating Video": "Generating video, please wait...",
"Start Generating Video": "Start Generating Video",
"Video Generation Completed": "Video Generation Completed"
}
}

51
webui/i18n/zh.json Normal file
View File

@ -0,0 +1,51 @@
{
"Language": "简体中文",
"Translation": {
"Video Script Settings": "**文案设置**",
"Video Subject": "视频主题(给定一个关键词,:red[AI自动生成]视频文案)",
"Script Language": "生成视频脚本的语言一般情况AI会自动根据你输入的主题语言输出",
"Generate Video Script and Keywords": "点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】",
"Auto Detect": "自动检测",
"Video Script": "视频文案(:blue[①可不填使用AI生成 ②合理使用标点断句,有助于生成字幕]",
"Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键词】",
"Please Enter the Video Subject": "请先填写视频文案",
"Generating Video Script and Keywords": "AI正在生成视频文案和关键词...",
"Generating Video Keywords": "AI正在生成视频关键词...",
"Video Keywords": "视频关键词(:blue[①可不填使用AI生成 ②用**英文逗号**分隔,只支持英文]",
"Video Settings": "**视频设置**",
"Video Concat Mode": "视频拼接模式",
"Random": "随机拼接(推荐)",
"Sequential": "顺序拼接",
"Video Ratio": "视频比例",
"Portrait": "竖屏 9:16抖音视频",
"Landscape": "横屏 16:9西瓜视频",
"Clip Duration": "视频片段最大时长(秒)",
"Number of Videos Generated Simultaneously": "同时生成视频数量",
"Audio Settings": "**音频设置**",
"Speech Synthesis": "朗读声音",
"Male": "男性",
"Female": "女性",
"Background Music": "背景音乐",
"No Background Music": "无背景音乐",
"Random Background Music": "随机背景音乐",
"Custom Background Music": "自定义背景音乐",
"Custom Background Music File": "请输入自定义背景音乐的文件路径",
"Background Music Volume": "背景音乐音量0.2表示20%,背景声音不宜过高)",
"Subtitle Settings": "**字幕设置**",
"Enable Subtitles": "启用字幕(若取消勾选,下面的设置都将不生效)",
"Font": "字幕字体",
"Position": "字幕位置",
"Top": "顶部",
"Center": "中间",
"Bottom": "底部(推荐)",
"Font Size": "字幕大小",
"Font Color": "字幕颜色",
"Stroke Color": "描边颜色",
"Stroke Width": "描边粗细",
"Generate Video": "生成视频",
"Video Script and Subject Cannot Both Be Empty": "视频主题 和 视频文案,不能同时为空",
"Generating Video": "正在生成视频,请稍候...",
"Start Generating Video": "开始生成视频",
"Video Generation Completed": "视频生成完成"
}
}