1, 支持AI生成文案预览

2, 支持自定义视频文案,关键词
3, 可选择是否启用字幕
4, UI优化
5, 一些其他bug修复和优化
This commit is contained in:
harry 2024-03-22 17:46:56 +08:00
parent 4a800eab4b
commit ce4b3771b6
10 changed files with 301 additions and 186 deletions

View File

@ -63,6 +63,8 @@ pip install -r requirements.txt
- 如何在Windows上部署https://v.douyin.com/iFyjoW3M
### 前提
> 注意,尽量不要使用 **中文路径**,避免出现一些无法预料的问题
1. 安装好 ImageMagick
- Windows:
- 下载 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe 并安装(不要修改安装路径)

View File

@ -1,4 +1,4 @@
punctuations = [
"?", ",", ".", "", ";",
"", "", "", "", "",
"?", ",", ".", "", ";", ":",
"", "", "", "", "", "",
]

View File

@ -8,6 +8,11 @@ import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="Field name.*shadows an attribute in parent.*")
class VideoConcatMode(str, Enum):
random = "random"
sequential = "sequential"
class VideoAspect(str, Enum):
landscape = "16:9"
portrait = "9:16"
@ -23,6 +28,12 @@ class VideoAspect(str, Enum):
return 1080, 1920
class MaterialInfo:
provider: str = "pexels"
url: str = ""
duration: int = 0
VoiceNames = [
# zh-CN
"female-zh-CN-XiaoxiaoNeural",
@ -77,11 +88,21 @@ class VideoParams:
}
"""
video_subject: str
video_script: str = "" # 用于生成视频的脚本
video_terms: str = "" # 用于生成视频的关键词
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5
voice_name: Optional[str] = VoiceNames[0]
bgm_name: Optional[str] = "random"
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
subtitle_enabled: Optional[bool] = True
font_name: Optional[str] = "STHeitiMedium.ttc"
text_fore_color: Optional[str] = "#FFFFFF"
text_background_color: Optional[str] = "transparent"
font_size: int = 60
stroke_color: Optional[str] = "#000000"
stroke_width: float = 1.5

View File

@ -1,12 +1,13 @@
import random
import time
from urllib.parse import urlencode
import requests
from typing import List
from loguru import logger
from app.config import config
from app.models.schema import VideoAspect
from app.models.schema import VideoAspect, VideoConcatMode, MaterialInfo
from app.utils import utils
requested_count = 0
@ -22,11 +23,9 @@ def round_robin_api_key():
def search_videos(search_term: str,
wanted_count: int,
minimum_duration: int,
video_aspect: VideoAspect = VideoAspect.portrait,
locale: str = "zh-CN"
) -> List[str]:
) -> List[MaterialInfo]:
aspect = VideoAspect(video_aspect)
video_orientation = aspect.name
video_width, video_height = aspect.to_resolution()
@ -36,37 +35,45 @@ def search_videos(search_term: str,
}
proxies = config.pexels.get("proxies", None)
# Build URL
query_url = f"https://api.pexels.com/videos/search?query={search_term}&per_page=15&orientation={video_orientation}&locale={locale}"
params = {
"query": search_term,
"per_page": 20,
"orientation": video_orientation
}
query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
logger.info(f"searching videos: {query_url}, with proxies: {proxies}")
# Send the request
r = requests.get(query_url, headers=headers, proxies=proxies, verify=False)
# Parse the response
response = r.json()
video_urls = []
try:
videos_count = min(len(response["videos"]), wanted_count)
r = requests.get(query_url, headers=headers, proxies=proxies, verify=False)
response = r.json()
video_items = []
if "videos" not in response:
logger.error(f"search videos failed: {response}")
return video_items
videos = response["videos"]
# loop through each video in the result
for i in range(videos_count):
for v in videos:
duration = v["duration"]
# check if video has desired minimum duration
if response["videos"][i]["duration"] < minimum_duration:
if duration < minimum_duration:
continue
video_files = response["videos"][i]["video_files"]
video_files = v["video_files"]
# loop through each url to determine the best quality
for video in video_files:
# Check if video has a valid download link
# if ".com/external" in video["link"]:
w = int(video["width"])
h = int(video["height"])
if w == video_width and h == video_height:
video_urls.append(video["link"])
item = MaterialInfo()
item.provider = "pexels"
item.url = video["link"]
item.duration = duration
video_items.append(item)
break
return video_items
except Exception as e:
logger.error(f"search videos failed: {e}")
return video_urls
return []
def save_video(video_url: str, save_dir: str) -> str:
@ -82,41 +89,46 @@ def save_video(video_url: str, save_dir: str) -> str:
def download_videos(task_id: str,
search_terms: List[str],
video_aspect: VideoAspect = VideoAspect.portrait,
wanted_count: int = 15,
minimum_duration: int = 5
video_contact_mode: VideoConcatMode = VideoConcatMode.random,
audio_duration: float = 0.0,
max_clip_duration: int = 5,
) -> List[str]:
valid_video_items = []
valid_video_urls = []
video_concat_mode = config.pexels.get("video_concat_mode", "")
found_duration = 0.0
for search_term in search_terms:
# logger.info(f"searching videos for '{search_term}'")
video_urls = search_videos(search_term=search_term,
wanted_count=wanted_count,
minimum_duration=minimum_duration,
video_aspect=video_aspect)
logger.info(f"found {len(video_urls)} videos for '{search_term}'")
video_items = search_videos(search_term=search_term,
minimum_duration=max_clip_duration,
video_aspect=video_aspect)
logger.info(f"found {len(video_items)} videos for '{search_term}'")
i = 0
for url in video_urls:
if video_concat_mode == "random":
url = random.choice(video_urls)
for item in video_items:
if item.url not in valid_video_urls:
valid_video_items.append(item)
valid_video_urls.append(item.url)
found_duration += item.duration
if url not in valid_video_urls:
valid_video_urls.append(url)
i += 1
if i >= 3:
break
logger.info(f"downloading videos: {len(valid_video_urls)}")
logger.info(
f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds")
video_paths = []
save_dir = utils.task_dir(task_id)
for video_url in valid_video_urls:
if video_contact_mode.value == VideoConcatMode.random.value:
random.shuffle(valid_video_items)
total_duration = 0.0
for item in valid_video_items:
try:
saved_video_path = save_video(video_url, save_dir)
logger.info(f"downloading video: {item.url}")
saved_video_path = save_video(item.url, save_dir)
video_paths.append(saved_video_path)
seconds = min(max_clip_duration, item.duration)
total_duration += seconds
if total_duration > audio_duration:
logger.info(f"total duration of downloaded videos: {total_duration} seconds, skip downloading more")
break
except Exception as e:
logger.error(f"failed to download video: {video_url}, {e}")
logger.error(f"failed to download video: {item}, {e}")
logger.success(f"downloaded {len(video_paths)} videos")
return video_paths

View File

@ -106,7 +106,7 @@ def create(audio_file, subtitle_file: str = ""):
idx += 1
sub = "\n".join(lines)
with open(subtitle_file, "w") as f:
with open(subtitle_file, "w", encoding="utf-8") as f:
f.write(sub)
logger.info(f"subtitle file created: {subtitle_file}")
@ -116,7 +116,7 @@ def file_to_subtitles(filename):
current_times = None
current_text = ""
index = 0
with open(filename, 'r') as f:
with open(filename, 'r', encoding="utf-8") as f:
for line in f:
times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
if times:
@ -145,7 +145,7 @@ def correct(subtitle_file, video_script):
corrected = True
if corrected:
with open(subtitle_file, "w") as fd:
with open(subtitle_file, "w", encoding="utf-8") as fd:
for item in subtitle_items:
fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n")
logger.info(f"subtitle corrected")

View File

@ -1,4 +1,5 @@
import os.path
import re
from os import path
from loguru import logger
@ -41,77 +42,101 @@ def start(task_id, params: VideoParams):
voice_name, language = _parse_voice(params.voice_name)
paragraph_number = params.paragraph_number
n_threads = params.n_threads
max_clip_duration = params.video_clip_duration
logger.info("\n\n## generating video script")
script = llm.generate_script(video_subject=video_subject, language=language, paragraph_number=paragraph_number)
video_script = params.video_script.strip()
if not video_script:
video_script = llm.generate_script(video_subject=video_subject, language=language,
paragraph_number=paragraph_number)
else:
logger.debug(f"video script: \n{video_script}")
logger.info("\n\n## generating video terms")
search_terms = llm.generate_terms(video_subject=video_subject, video_script=script, amount=5)
video_terms = params.video_terms
if not video_terms:
video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5)
else:
video_terms = [term.strip() for term in re.split(r'[,]', video_terms)]
logger.debug(f"video terms: {utils.to_json(video_terms)}")
script_file = path.join(utils.task_dir(task_id), f"script.json")
script_data = {
"script": script,
"search_terms": search_terms
"script": video_script,
"search_terms": video_terms
}
with open(script_file, "w") as f:
with open(script_file, "w", encoding="utf-8") as f:
f.write(utils.to_json(script_data))
audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
logger.info("\n\n## generating audio")
sub_maker = voice.tts(text=script, voice_name=voice_name, voice_file=audio_file)
audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_file=audio_file)
if sub_maker is None:
logger.error(
"failed to generate audio, maybe the network is not available. if you are in China, please use a VPN.")
return
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
subtitle_fallback = False
if subtitle_provider == "edge":
voice.create_subtitle(text=script, sub_maker=sub_maker, subtitle_file=subtitle_path)
if not os.path.exists(subtitle_path):
subtitle_fallback = True
logger.warning("subtitle file not found, fallback to whisper")
audio_duration = voice.get_audio_duration(sub_maker)
subtitle_path = ""
if params.subtitle_enabled:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
subtitle_fallback = False
if subtitle_provider == "edge":
voice.create_subtitle(text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path)
if not os.path.exists(subtitle_path):
subtitle_fallback = True
logger.warning("subtitle file not found, fallback to whisper")
else:
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
logger.warning(f"subtitle file is invalid: {subtitle_path}")
subtitle_fallback = True
if subtitle_provider == "whisper" or subtitle_fallback:
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
logger.info("\n\n## correcting subtitle")
subtitle.correct(subtitle_file=subtitle_path, video_script=script)
if subtitle_provider == "whisper" or subtitle_fallback:
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
logger.info("\n\n## correcting subtitle")
subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
logger.warning(f"subtitle file is invalid: {subtitle_path}")
subtitle_path = ""
logger.info("\n\n## downloading videos")
video_paths = material.download_videos(task_id=task_id, search_terms=search_terms, video_aspect=params.video_aspect,
wanted_count=20,
minimum_duration=5)
downloaded_videos = material.download_videos(task_id=task_id,
search_terms=video_terms,
video_aspect=params.video_aspect,
video_contact_mode=params.video_concat_mode,
audio_duration=audio_duration,
max_clip_duration=max_clip_duration,
)
if not downloaded_videos:
logger.error(
"failed to download videos, maybe the network is not available. if you are in China, please use a VPN.")
return
logger.info("\n\n## combining videos")
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
video.combine_videos(combined_video_path=combined_video_path,
video_paths=video_paths,
video_paths=downloaded_videos,
audio_file=audio_file,
video_aspect=params.video_aspect,
max_clip_duration=5,
video_concat_mode=params.video_concat_mode,
max_clip_duration=max_clip_duration,
threads=n_threads)
final_video_path = path.join(utils.task_dir(task_id), f"final.mp4")
bgm_file = video.get_bgm_file(bgm_name=params.bgm_name)
logger.info("\n\n## generating video")
# Put everything together
video.generate_video(video_path=combined_video_path,
audio_path=audio_file,
subtitle_path=subtitle_path,
output_file=final_video_path,
video_aspect=params.video_aspect,
threads=n_threads,
font_name=params.font_name,
fontsize=params.font_size,
text_fore_color=params.text_fore_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
bgm_file=bgm_file
params=params,
)
logger.start(f"task {task_id} finished")
return {

View File

@ -7,22 +7,22 @@ from moviepy.editor import *
from moviepy.video.fx.crop import crop
from moviepy.video.tools.subtitles import SubtitlesClip
from app.models.schema import VideoAspect
from app.models.schema import VideoAspect, VideoParams, VideoConcatMode
from app.utils import utils
def get_bgm_file(bgm_name: str = "random"):
if not bgm_name:
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
if not bgm_type:
return ""
if bgm_name == "random":
if bgm_type == "random":
suffix = "*.mp3"
song_dir = utils.song_dir()
files = glob.glob(os.path.join(song_dir, suffix))
return random.choice(files)
file = os.path.join(utils.song_dir(), bgm_name)
if os.path.exists(file):
return file
if os.path.exists(bgm_file):
return bgm_file
return ""
@ -30,6 +30,7 @@ def combine_videos(combined_video_path: str,
video_paths: List[str],
audio_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
max_clip_duration: int = 5,
threads: int = 2,
) -> str:
@ -48,6 +49,10 @@ def combine_videos(combined_video_path: str,
tot_dur = 0
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
while tot_dur < max_duration:
# random video_paths order
if video_concat_mode.value == VideoConcatMode.random.value:
random.shuffle(video_paths)
for video_path in video_paths:
clip = VideoFileClip(video_path)
clip = clip.without_audio()
@ -127,20 +132,9 @@ def generate_video(video_path: str,
audio_path: str,
subtitle_path: str,
output_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
threads: int = 2,
font_name: str = "",
fontsize: int = 60,
stroke_color: str = "#000000",
stroke_width: float = 1.5,
text_fore_color: str = "white",
text_background_color: str = "transparent",
bgm_file: str = "",
params: VideoParams,
):
aspect = VideoAspect(video_aspect)
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
logger.info(f"start, video size: {video_width} x {video_height}")
@ -149,31 +143,33 @@ def generate_video(video_path: str,
logger.info(f" ③ subtitle: {subtitle_path}")
logger.info(f" ④ output: {output_file}")
if not font_name:
font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), font_name)
if os.name == 'nt':
font_path = font_path.replace("\\", "/")
font_path = ""
if params.subtitle_enabled:
if not params.font_name:
params.font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), params.font_name)
if os.name == 'nt':
font_path = font_path.replace("\\", "/")
logger.info(f"using font: {font_path}")
logger.info(f"using font: {font_path}")
def generator(txt):
wrapped_txt = wrap_text(txt, max_width=video_width - 100,
font=font_path,
fontsize=fontsize) # 调整max_width以适应你的视频
fontsize=params.font_size) # 调整max_width以适应你的视频
return TextClip(
wrapped_txt,
font=font_path,
fontsize=fontsize,
color=text_fore_color,
bg_color=text_background_color,
stroke_color=stroke_color,
stroke_width=stroke_width,
fontsize=params.font_size,
color=params.text_fore_color,
bg_color=params.text_background_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
print_cmd=False,
)
position_height = video_height - 200
if video_aspect == VideoAspect.landscape:
if params.video_aspect == VideoAspect.landscape:
position_height = video_height - 100
clips = [
@ -191,9 +187,11 @@ def generate_video(video_path: str,
temp_output_file = f"{output_file}.temp.mp4"
logger.info(f"writing to temp file: {temp_output_file}")
result.write_videofile(temp_output_file, threads=threads or 2)
result.write_videofile(temp_output_file, threads=params.n_threads or 2)
video_clip = VideoFileClip(temp_output_file)
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_file:
logger.info(f"adding background music: {bgm_file}")
# Add song to video at 30% volume using moviepy
@ -209,35 +207,7 @@ def generate_video(video_path: str,
video_clip = video_clip.set_duration(original_duration)
logger.info(f"encoding audio codec to aac")
video_clip.write_videofile(output_file, audio_codec="aac", threads=threads)
video_clip.write_videofile(output_file, audio_codec="aac", threads=params.n_threads or 2)
os.remove(temp_output_file)
logger.success(f"completed")
if __name__ == "__main__":
txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
print(t)
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
task_dir = utils.task_dir(task_id)
video_file = f"{task_dir}/combined.mp4"
audio_file = f"{task_dir}/audio.mp3"
subtitle_file = f"{task_dir}/subtitle.srt"
output_file = f"{task_dir}/final.mp4"
generate_video(video_path=video_file,
audio_path=audio_file,
subtitle_path=subtitle_file,
output_file=output_file,
video_aspect=VideoAspect.portrait,
threads=2,
font_name="STHeitiMedium.ttc",
fontsize=60,
stroke_color="#000000",
stroke_width=1.5,
text_fore_color="white",
text_background_color="transparent",
bgm_file=""
)

View File

@ -8,23 +8,26 @@ import edge_tts
from app.utils import utils
def tts(text: str, voice_name: str, voice_file: str) -> SubMaker:
def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
logger.info(f"start, voice name: {voice_name}")
try:
async def _do() -> SubMaker:
communicate = edge_tts.Communicate(text, voice_name)
sub_maker = edge_tts.SubMaker()
with open(voice_file, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
return sub_maker
async def _do() -> SubMaker:
communicate = edge_tts.Communicate(text, voice_name)
sub_maker = edge_tts.SubMaker()
with open(voice_file, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
sub_maker = asyncio.run(_do())
logger.info(f"completed, output file: {voice_file}")
return sub_maker
sub_maker = asyncio.run(_do())
logger.info(f"completed, output file: {voice_file}")
return sub_maker
except Exception as e:
logger.error(f"failed, error: {e}")
return None
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
@ -78,6 +81,15 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
file.write("\n".join(sub_items))
def get_audio_duration(sub_maker: submaker.SubMaker):
"""
获取音频时长
"""
if not sub_maker.offset:
return 0.0
return sub_maker.offset[-1][1] / 10000000
if __name__ == "__main__":
async def _do():
@ -102,6 +114,8 @@ if __name__ == "__main__":
subtitle_file = f"{temp_dir}/tts.mp3.srt"
sub_maker = tts(text=text, voice_name=voice_name, voice_file=voice_file)
create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
audio_duration = get_audio_duration(sub_maker)
print(f"voice: {voice_name}, audio duration: {audio_duration}s")
loop = asyncio.get_event_loop_policy().get_event_loop()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 241 KiB

After

Width:  |  Height:  |  Size: 342 KiB

View File

@ -1,11 +1,12 @@
import asyncio
import sys
import os
import time
from uuid import uuid4
import streamlit as st
from loguru import logger
from app.models.schema import VideoParams, VideoAspect, VoiceNames
from app.services import task as tm
from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode
from app.services import task as tm, llm
st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
initial_sidebar_state="auto")
@ -15,6 +16,15 @@ root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
font_dir = os.path.join(root_dir, "resource", "fonts")
song_dir = os.path.join(root_dir, "resource", "songs")
# st.session_state
if 'video_subject' not in st.session_state:
st.session_state['video_subject'] = ''
if 'video_script' not in st.session_state:
st.session_state['video_script'] = ''
if 'video_terms' not in st.session_state:
st.session_state['video_terms'] = ''
def get_all_fonts():
fonts = []
@ -65,17 +75,61 @@ def init_log():
init_log()
panel = st.columns(2)
panel = st.columns(3)
left_panel = panel[0]
right_panel = panel[1]
middle_panel = panel[1]
right_panel = panel[2]
# define cfg as VideoParams class
cfg = VideoParams()
with left_panel:
with st.container(border=True):
st.write("**文案设置**")
cfg.video_subject = st.text_input("视频主题(给定一个关键词,:red[AI自动生成]视频文案)",
value=st.session_state['video_subject']).strip()
if st.button("点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】", key="auto_generate_script"):
with st.spinner("AI正在生成视频文案和关键词..."):
script = llm.generate_script(cfg.video_subject)
terms = llm.generate_terms(cfg.video_subject, script)
st.toast('AI生成成功')
st.session_state['video_script'] = script
st.session_state['video_terms'] = ", ".join(terms)
cfg.video_script = st.text_area(
"视频文案(:blue[①可不填使用AI生成 ②合理使用标点断句,有助于生成字幕]",
value=st.session_state['video_script'],
height=190
)
if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"):
if not cfg.video_script:
st.error("请先填写视频文案")
st.stop()
with st.spinner("AI正在生成视频关键词..."):
terms = llm.generate_terms(cfg.video_subject, cfg.video_script)
st.toast('AI生成成功')
st.session_state['video_terms'] = ", ".join(terms)
cfg.video_terms = st.text_area(
"视频关键词(:blue[①可不填使用AI生成 ②用**英文逗号**分隔,只支持英文]",
value=st.session_state['video_terms'],
height=40)
with middle_panel:
with st.container(border=True):
st.write("**视频设置**")
cfg.video_subject = st.text_area("视频主题", help="请输入视频主题")
video_concat_modes = [
("顺序拼接", "sequential"),
("随机拼接", "random"),
]
selected_index = st.selectbox("视频拼接模式",
index=1,
options=range(len(video_concat_modes)), # 使用索引作为内部选项值
format_func=lambda x: video_concat_modes[x][0] # 显示给用户的是标签
)
cfg.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])
video_aspect_ratios = [
("竖屏 9:16抖音视频", VideoAspect.portrait.value),
("横屏 16:9西瓜视频", VideoAspect.landscape.value),
@ -87,8 +141,10 @@ with left_panel:
)
cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
cfg.video_clip_duration = st.slider("视频片段最大时长(秒)", 2, 5, 3)
with st.container(border=True):
st.write("**声音设置**")
st.write("**设置**")
# 创建一个映射字典,将原始值映射到友好名称
friendly_names = {
voice: voice.
@ -100,38 +156,53 @@ with left_panel:
replace("en-US", "英文").
replace("Neural", "") for
voice in VoiceNames}
selected_friendly_name = st.selectbox("声音", options=list(friendly_names.values()))
selected_friendly_name = st.selectbox("朗读声音", options=list(friendly_names.values()))
voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
cfg.voice_name = voice_name
song_names = [
bgm_options = [
("无背景音乐 No BGM", ""),
("随机背景音乐 Random BGM", "random"),
*[(song, song) for song in get_all_songs()]
("自定义背景音乐 Custom BGM", "custom"),
]
selected_index = st.selectbox("背景音乐",
index=1,
options=range(len(song_names)), # 使用索引作为内部选项值
format_func=lambda x: song_names[x][0] # 显示给用户的是标签
options=range(len(bgm_options)), # 使用索引作为内部选项值
format_func=lambda x: bgm_options[x][0] # 显示给用户的是标签
)
cfg.bgm_name = song_names[selected_index][1]
# 获取选择的背景音乐类型
bgm_type = bgm_options[selected_index][1]
# 根据选择显示或隐藏组件
if bgm_type == "custom":
custom_bgm_file = st.text_input("请输入自定义背景音乐的文件路径:")
if custom_bgm_file and os.path.exists(custom_bgm_file):
cfg.bgm_file = custom_bgm_file
# st.write(f":red[已选择自定义背景音乐]**{custom_bgm_file}**")
with right_panel:
with st.container(border=True):
st.write("**字幕设置**")
cfg.subtitle_enabled = st.checkbox("生成字幕(若取消勾选,下面的设置都将不生效)", value=True)
font_names = get_all_fonts()
cfg.font_name = st.selectbox("字体", font_names)
cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")
cfg.font_size = st.slider("字幕大小", 30, 100, 60)
cfg.stroke_color = st.color_picker("描边颜色", "#000000")
cfg.stroke_width = st.slider("描边粗细", 0.0, 10.0, 1.5)
font_cols = st.columns([0.3, 0.7])
with font_cols[0]:
cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")
with font_cols[1]:
cfg.font_size = st.slider("字幕大小", 30, 100, 60)
stroke_cols = st.columns([0.3, 0.7])
with stroke_cols[0]:
cfg.stroke_color = st.color_picker("描边颜色", "#000000")
with stroke_cols[1]:
cfg.stroke_width = st.slider("描边粗细", 0.0, 10.0, 1.5)
start_button = st.button("开始生成视频", use_container_width=True, type="primary")
if start_button:
task_id = str(uuid4())
st.session_state['started'] = True
if not cfg.video_subject:
st.error("视频主题不能为空")
if not cfg.video_subject and not cfg.video_script:
st.error("视频主题 或 视频文案,不能同时为空")
st.stop()
st.write(cfg)