diff --git a/README.md b/README.md index 9ca6349..025b957 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,8 @@ pip install -r requirements.txt - 如何在Windows上部署:https://v.douyin.com/iFyjoW3M ### 前提 +> 注意,尽量不要使用 **中文路径**,避免出现一些无法预料的问题 + 1. 安装好 ImageMagick - Windows: - 下载 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe 并安装(不要修改安装路径) diff --git a/app/models/const.py b/app/models/const.py index 079a103..0ea3b76 100644 --- a/app/models/const.py +++ b/app/models/const.py @@ -1,4 +1,4 @@ punctuations = [ - "?", ",", ".", "、", ";", - "?", ",", "。", "、", ";", + "?", ",", ".", "、", ";", ":", + "?", ",", "。", "、", ";", ":", ] diff --git a/app/models/schema.py b/app/models/schema.py index 4a1f671..e9fb084 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -8,6 +8,11 @@ import warnings warnings.filterwarnings("ignore", category=UserWarning, message="Field name.*shadows an attribute in parent.*") +class VideoConcatMode(str, Enum): + random = "random" + sequential = "sequential" + + class VideoAspect(str, Enum): landscape = "16:9" portrait = "9:16" @@ -23,6 +28,12 @@ class VideoAspect(str, Enum): return 1080, 1920 +class MaterialInfo: + provider: str = "pexels" + url: str = "" + duration: int = 0 + + VoiceNames = [ # zh-CN "female-zh-CN-XiaoxiaoNeural", @@ -77,11 +88,21 @@ class VideoParams: } """ video_subject: str + video_script: str = "" # 用于生成视频的脚本 + video_terms: str = "" # 用于生成视频的关键词 video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value + video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value + video_clip_duration: Optional[int] = 5 + voice_name: Optional[str] = VoiceNames[0] - bgm_name: Optional[str] = "random" + bgm_type: Optional[str] = "random" + bgm_file: Optional[str] = "" + + subtitle_enabled: Optional[bool] = True font_name: Optional[str] = "STHeitiMedium.ttc" text_fore_color: Optional[str] = "#FFFFFF" + text_background_color: Optional[str] = "transparent" + font_size: int = 60 stroke_color: Optional[str] = "#000000" stroke_width: float = 1.5 diff --git a/app/services/material.py b/app/services/material.py index e2d9298..ca7f92b 100644 --- a/app/services/material.py +++ b/app/services/material.py @@ -1,12 +1,13 @@ import random import time +from urllib.parse import urlencode import requests from typing import List from loguru import logger from app.config import config -from app.models.schema import VideoAspect +from app.models.schema import VideoAspect, VideoConcatMode, MaterialInfo from app.utils import utils requested_count = 0 @@ -22,11 +23,9 @@ def round_robin_api_key(): def search_videos(search_term: str, - wanted_count: int, minimum_duration: int, video_aspect: VideoAspect = VideoAspect.portrait, - locale: str = "zh-CN" - ) -> List[str]: + ) -> List[MaterialInfo]: aspect = VideoAspect(video_aspect) video_orientation = aspect.name video_width, video_height = aspect.to_resolution() @@ -36,37 +35,45 @@ def search_videos(search_term: str, } proxies = config.pexels.get("proxies", None) # Build URL - query_url = f"https://api.pexels.com/videos/search?query={search_term}&per_page=15&orientation={video_orientation}&locale={locale}" + params = { + "query": search_term, + "per_page": 20, + "orientation": video_orientation + } + query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}" logger.info(f"searching videos: {query_url}, with proxies: {proxies}") - # Send the request - r = requests.get(query_url, headers=headers, proxies=proxies, verify=False) - - # Parse the response - response = r.json() - video_urls = [] try: - videos_count = min(len(response["videos"]), wanted_count) + r = requests.get(query_url, headers=headers, proxies=proxies, verify=False) + response = r.json() + video_items = [] + if "videos" not in response: + logger.error(f"search videos failed: {response}") + return video_items + videos = response["videos"] # loop through each video in the result - for i in range(videos_count): + for v in videos: + duration = v["duration"] # check if video has desired minimum duration - if response["videos"][i]["duration"] < minimum_duration: + if duration < minimum_duration: continue - video_files = response["videos"][i]["video_files"] + video_files = v["video_files"] # loop through each url to determine the best quality for video in video_files: - # Check if video has a valid download link - # if ".com/external" in video["link"]: w = int(video["width"]) h = int(video["height"]) if w == video_width and h == video_height: - video_urls.append(video["link"]) + item = MaterialInfo() + item.provider = "pexels" + item.url = video["link"] + item.duration = duration + video_items.append(item) break - + return video_items except Exception as e: logger.error(f"search videos failed: {e}") - return video_urls + return [] def save_video(video_url: str, save_dir: str) -> str: @@ -82,41 +89,46 @@ def save_video(video_url: str, save_dir: str) -> str: def download_videos(task_id: str, search_terms: List[str], video_aspect: VideoAspect = VideoAspect.portrait, - wanted_count: int = 15, - minimum_duration: int = 5 + video_contact_mode: VideoConcatMode = VideoConcatMode.random, + audio_duration: float = 0.0, + max_clip_duration: int = 5, ) -> List[str]: + valid_video_items = [] valid_video_urls = [] - - video_concat_mode = config.pexels.get("video_concat_mode", "") - + found_duration = 0.0 for search_term in search_terms: # logger.info(f"searching videos for '{search_term}'") - video_urls = search_videos(search_term=search_term, - wanted_count=wanted_count, - minimum_duration=minimum_duration, - video_aspect=video_aspect) - logger.info(f"found {len(video_urls)} videos for '{search_term}'") + video_items = search_videos(search_term=search_term, + minimum_duration=max_clip_duration, + video_aspect=video_aspect) + logger.info(f"found {len(video_items)} videos for '{search_term}'") - i = 0 - for url in video_urls: - if video_concat_mode == "random": - url = random.choice(video_urls) + for item in video_items: + if item.url not in valid_video_urls: + valid_video_items.append(item) + valid_video_urls.append(item.url) + found_duration += item.duration - if url not in valid_video_urls: - valid_video_urls.append(url) - i += 1 - - if i >= 3: - break - - logger.info(f"downloading videos: {len(valid_video_urls)}") + logger.info( + f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds") video_paths = [] save_dir = utils.task_dir(task_id) - for video_url in valid_video_urls: + + if video_contact_mode.value == VideoConcatMode.random.value: + random.shuffle(valid_video_items) + + total_duration = 0.0 + for item in valid_video_items: try: - saved_video_path = save_video(video_url, save_dir) + logger.info(f"downloading video: {item.url}") + saved_video_path = save_video(item.url, save_dir) video_paths.append(saved_video_path) + seconds = min(max_clip_duration, item.duration) + total_duration += seconds + if total_duration > audio_duration: + logger.info(f"total duration of downloaded videos: {total_duration} seconds, skip downloading more") + break except Exception as e: - logger.error(f"failed to download video: {video_url}, {e}") + logger.error(f"failed to download video: {item}, {e}") logger.success(f"downloaded {len(video_paths)} videos") return video_paths diff --git a/app/services/subtitle.py b/app/services/subtitle.py index e62ce57..3dcbe88 100644 --- a/app/services/subtitle.py +++ b/app/services/subtitle.py @@ -106,7 +106,7 @@ def create(audio_file, subtitle_file: str = ""): idx += 1 sub = "\n".join(lines) - with open(subtitle_file, "w") as f: + with open(subtitle_file, "w", encoding="utf-8") as f: f.write(sub) logger.info(f"subtitle file created: {subtitle_file}") @@ -116,7 +116,7 @@ def file_to_subtitles(filename): current_times = None current_text = "" index = 0 - with open(filename, 'r') as f: + with open(filename, 'r', encoding="utf-8") as f: for line in f: times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line) if times: @@ -145,7 +145,7 @@ def correct(subtitle_file, video_script): corrected = True if corrected: - with open(subtitle_file, "w") as fd: + with open(subtitle_file, "w", encoding="utf-8") as fd: for item in subtitle_items: fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n") logger.info(f"subtitle corrected") diff --git a/app/services/task.py b/app/services/task.py index 1961608..96a66b4 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -1,4 +1,5 @@ import os.path +import re from os import path from loguru import logger @@ -41,77 +42,101 @@ def start(task_id, params: VideoParams): voice_name, language = _parse_voice(params.voice_name) paragraph_number = params.paragraph_number n_threads = params.n_threads + max_clip_duration = params.video_clip_duration logger.info("\n\n## generating video script") - script = llm.generate_script(video_subject=video_subject, language=language, paragraph_number=paragraph_number) + video_script = params.video_script.strip() + if not video_script: + video_script = llm.generate_script(video_subject=video_subject, language=language, + paragraph_number=paragraph_number) + else: + logger.debug(f"video script: \n{video_script}") logger.info("\n\n## generating video terms") - search_terms = llm.generate_terms(video_subject=video_subject, video_script=script, amount=5) + video_terms = params.video_terms + if not video_terms: + video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5) + else: + video_terms = [term.strip() for term in re.split(r'[,,]', video_terms)] + logger.debug(f"video terms: {utils.to_json(video_terms)}") script_file = path.join(utils.task_dir(task_id), f"script.json") script_data = { - "script": script, - "search_terms": search_terms + "script": video_script, + "search_terms": video_terms } - with open(script_file, "w") as f: + with open(script_file, "w", encoding="utf-8") as f: f.write(utils.to_json(script_data)) - audio_file = path.join(utils.task_dir(task_id), f"audio.mp3") - subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") - logger.info("\n\n## generating audio") - sub_maker = voice.tts(text=script, voice_name=voice_name, voice_file=audio_file) + audio_file = path.join(utils.task_dir(task_id), f"audio.mp3") + sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_file=audio_file) + if sub_maker is None: + logger.error( + "failed to generate audio, maybe the network is not available. if you are in China, please use a VPN.") + return - subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() - logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}") - subtitle_fallback = False - if subtitle_provider == "edge": - voice.create_subtitle(text=script, sub_maker=sub_maker, subtitle_file=subtitle_path) - if not os.path.exists(subtitle_path): - subtitle_fallback = True - logger.warning("subtitle file not found, fallback to whisper") + audio_duration = voice.get_audio_duration(sub_maker) + subtitle_path = "" + if params.subtitle_enabled: + subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") + subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() + logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}") + subtitle_fallback = False + if subtitle_provider == "edge": + voice.create_subtitle(text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path) + if not os.path.exists(subtitle_path): + subtitle_fallback = True + logger.warning("subtitle file not found, fallback to whisper") + else: + subtitle_lines = subtitle.file_to_subtitles(subtitle_path) + if not subtitle_lines: + logger.warning(f"subtitle file is invalid: {subtitle_path}") + subtitle_fallback = True - if subtitle_provider == "whisper" or subtitle_fallback: - subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path) - logger.info("\n\n## correcting subtitle") - subtitle.correct(subtitle_file=subtitle_path, video_script=script) + if subtitle_provider == "whisper" or subtitle_fallback: + subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path) + logger.info("\n\n## correcting subtitle") + subtitle.correct(subtitle_file=subtitle_path, video_script=video_script) + + subtitle_lines = subtitle.file_to_subtitles(subtitle_path) + if not subtitle_lines: + logger.warning(f"subtitle file is invalid: {subtitle_path}") + subtitle_path = "" logger.info("\n\n## downloading videos") - video_paths = material.download_videos(task_id=task_id, search_terms=search_terms, video_aspect=params.video_aspect, - wanted_count=20, - minimum_duration=5) + downloaded_videos = material.download_videos(task_id=task_id, + search_terms=video_terms, + video_aspect=params.video_aspect, + video_contact_mode=params.video_concat_mode, + audio_duration=audio_duration, + max_clip_duration=max_clip_duration, + ) + if not downloaded_videos: + logger.error( + "failed to download videos, maybe the network is not available. if you are in China, please use a VPN.") + return logger.info("\n\n## combining videos") combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4") video.combine_videos(combined_video_path=combined_video_path, - video_paths=video_paths, + video_paths=downloaded_videos, audio_file=audio_file, video_aspect=params.video_aspect, - max_clip_duration=5, + video_concat_mode=params.video_concat_mode, + max_clip_duration=max_clip_duration, threads=n_threads) final_video_path = path.join(utils.task_dir(task_id), f"final.mp4") - bgm_file = video.get_bgm_file(bgm_name=params.bgm_name) logger.info("\n\n## generating video") # Put everything together video.generate_video(video_path=combined_video_path, audio_path=audio_file, subtitle_path=subtitle_path, output_file=final_video_path, - - video_aspect=params.video_aspect, - - threads=n_threads, - - font_name=params.font_name, - fontsize=params.font_size, - text_fore_color=params.text_fore_color, - stroke_color=params.stroke_color, - stroke_width=params.stroke_width, - - bgm_file=bgm_file + params=params, ) logger.start(f"task {task_id} finished") return { diff --git a/app/services/video.py b/app/services/video.py index 249279d..f00ee36 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -7,22 +7,22 @@ from moviepy.editor import * from moviepy.video.fx.crop import crop from moviepy.video.tools.subtitles import SubtitlesClip -from app.models.schema import VideoAspect +from app.models.schema import VideoAspect, VideoParams, VideoConcatMode from app.utils import utils -def get_bgm_file(bgm_name: str = "random"): - if not bgm_name: +def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""): + if not bgm_type: return "" - if bgm_name == "random": + if bgm_type == "random": suffix = "*.mp3" song_dir = utils.song_dir() files = glob.glob(os.path.join(song_dir, suffix)) return random.choice(files) - file = os.path.join(utils.song_dir(), bgm_name) - if os.path.exists(file): - return file + if os.path.exists(bgm_file): + return bgm_file + return "" @@ -30,6 +30,7 @@ def combine_videos(combined_video_path: str, video_paths: List[str], audio_file: str, video_aspect: VideoAspect = VideoAspect.portrait, + video_concat_mode: VideoConcatMode = VideoConcatMode.random, max_clip_duration: int = 5, threads: int = 2, ) -> str: @@ -48,6 +49,10 @@ def combine_videos(combined_video_path: str, tot_dur = 0 # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached while tot_dur < max_duration: + # random video_paths order + if video_concat_mode.value == VideoConcatMode.random.value: + random.shuffle(video_paths) + for video_path in video_paths: clip = VideoFileClip(video_path) clip = clip.without_audio() @@ -127,20 +132,9 @@ def generate_video(video_path: str, audio_path: str, subtitle_path: str, output_file: str, - video_aspect: VideoAspect = VideoAspect.portrait, - - threads: int = 2, - - font_name: str = "", - fontsize: int = 60, - stroke_color: str = "#000000", - stroke_width: float = 1.5, - text_fore_color: str = "white", - text_background_color: str = "transparent", - - bgm_file: str = "", + params: VideoParams, ): - aspect = VideoAspect(video_aspect) + aspect = VideoAspect(params.video_aspect) video_width, video_height = aspect.to_resolution() logger.info(f"start, video size: {video_width} x {video_height}") @@ -149,31 +143,33 @@ def generate_video(video_path: str, logger.info(f" ③ subtitle: {subtitle_path}") logger.info(f" ④ output: {output_file}") - if not font_name: - font_name = "STHeitiMedium.ttc" - font_path = os.path.join(utils.font_dir(), font_name) - if os.name == 'nt': - font_path = font_path.replace("\\", "/") + font_path = "" + if params.subtitle_enabled: + if not params.font_name: + params.font_name = "STHeitiMedium.ttc" + font_path = os.path.join(utils.font_dir(), params.font_name) + if os.name == 'nt': + font_path = font_path.replace("\\", "/") - logger.info(f"using font: {font_path}") + logger.info(f"using font: {font_path}") def generator(txt): wrapped_txt = wrap_text(txt, max_width=video_width - 100, font=font_path, - fontsize=fontsize) # 调整max_width以适应你的视频 + fontsize=params.font_size) # 调整max_width以适应你的视频 return TextClip( wrapped_txt, font=font_path, - fontsize=fontsize, - color=text_fore_color, - bg_color=text_background_color, - stroke_color=stroke_color, - stroke_width=stroke_width, + fontsize=params.font_size, + color=params.text_fore_color, + bg_color=params.text_background_color, + stroke_color=params.stroke_color, + stroke_width=params.stroke_width, print_cmd=False, ) position_height = video_height - 200 - if video_aspect == VideoAspect.landscape: + if params.video_aspect == VideoAspect.landscape: position_height = video_height - 100 clips = [ @@ -191,9 +187,11 @@ def generate_video(video_path: str, temp_output_file = f"{output_file}.temp.mp4" logger.info(f"writing to temp file: {temp_output_file}") - result.write_videofile(temp_output_file, threads=threads or 2) + result.write_videofile(temp_output_file, threads=params.n_threads or 2) video_clip = VideoFileClip(temp_output_file) + + bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file) if bgm_file: logger.info(f"adding background music: {bgm_file}") # Add song to video at 30% volume using moviepy @@ -209,35 +207,7 @@ def generate_video(video_path: str, video_clip = video_clip.set_duration(original_duration) logger.info(f"encoding audio codec to aac") - video_clip.write_videofile(output_file, audio_codec="aac", threads=threads) + video_clip.write_videofile(output_file, audio_codec="aac", threads=params.n_threads or 2) os.remove(temp_output_file) logger.success(f"completed") - - -if __name__ == "__main__": - txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟" - font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc" - t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60) - print(t) - - task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072" - task_dir = utils.task_dir(task_id) - video_file = f"{task_dir}/combined.mp4" - audio_file = f"{task_dir}/audio.mp3" - subtitle_file = f"{task_dir}/subtitle.srt" - output_file = f"{task_dir}/final.mp4" - generate_video(video_path=video_file, - audio_path=audio_file, - subtitle_path=subtitle_file, - output_file=output_file, - video_aspect=VideoAspect.portrait, - threads=2, - font_name="STHeitiMedium.ttc", - fontsize=60, - stroke_color="#000000", - stroke_width=1.5, - text_fore_color="white", - text_background_color="transparent", - bgm_file="" - ) diff --git a/app/services/voice.py b/app/services/voice.py index 736aab4..76fb921 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -8,23 +8,26 @@ import edge_tts from app.utils import utils -def tts(text: str, voice_name: str, voice_file: str) -> SubMaker: +def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]: logger.info(f"start, voice name: {voice_name}") + try: + async def _do() -> SubMaker: + communicate = edge_tts.Communicate(text, voice_name) + sub_maker = edge_tts.SubMaker() + with open(voice_file, "wb") as file: + async for chunk in communicate.stream(): + if chunk["type"] == "audio": + file.write(chunk["data"]) + elif chunk["type"] == "WordBoundary": + sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"]) + return sub_maker - async def _do() -> SubMaker: - communicate = edge_tts.Communicate(text, voice_name) - sub_maker = edge_tts.SubMaker() - with open(voice_file, "wb") as file: - async for chunk in communicate.stream(): - if chunk["type"] == "audio": - file.write(chunk["data"]) - elif chunk["type"] == "WordBoundary": - sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"]) + sub_maker = asyncio.run(_do()) + logger.info(f"completed, output file: {voice_file}") return sub_maker - - sub_maker = asyncio.run(_do()) - logger.info(f"completed, output file: {voice_file}") - return sub_maker + except Exception as e: + logger.error(f"failed, error: {e}") + return None def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str): @@ -78,6 +81,15 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) file.write("\n".join(sub_items)) +def get_audio_duration(sub_maker: submaker.SubMaker): + """ + 获取音频时长 + """ + if not sub_maker.offset: + return 0.0 + return sub_maker.offset[-1][1] / 10000000 + + if __name__ == "__main__": async def _do(): @@ -102,6 +114,8 @@ if __name__ == "__main__": subtitle_file = f"{temp_dir}/tts.mp3.srt" sub_maker = tts(text=text, voice_name=voice_name, voice_file=voice_file) create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file) + audio_duration = get_audio_duration(sub_maker) + print(f"voice: {voice_name}, audio duration: {audio_duration}s") loop = asyncio.get_event_loop_policy().get_event_loop() diff --git a/docs/webui.jpg b/docs/webui.jpg index 37be26e..5810dd9 100644 Binary files a/docs/webui.jpg and b/docs/webui.jpg differ diff --git a/webui/Main.py b/webui/Main.py index 1eb913f..d6b09c9 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -1,11 +1,12 @@ import asyncio import sys import os +import time from uuid import uuid4 import streamlit as st from loguru import logger -from app.models.schema import VideoParams, VideoAspect, VoiceNames -from app.services import task as tm +from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode +from app.services import task as tm, llm st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide", initial_sidebar_state="auto") @@ -15,6 +16,15 @@ root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) font_dir = os.path.join(root_dir, "resource", "fonts") song_dir = os.path.join(root_dir, "resource", "songs") +# st.session_state + +if 'video_subject' not in st.session_state: + st.session_state['video_subject'] = '' +if 'video_script' not in st.session_state: + st.session_state['video_script'] = '' +if 'video_terms' not in st.session_state: + st.session_state['video_terms'] = '' + def get_all_fonts(): fonts = [] @@ -65,17 +75,61 @@ def init_log(): init_log() -panel = st.columns(2) +panel = st.columns(3) left_panel = panel[0] -right_panel = panel[1] +middle_panel = panel[1] +right_panel = panel[2] # define cfg as VideoParams class cfg = VideoParams() with left_panel: + with st.container(border=True): + st.write("**文案设置**") + cfg.video_subject = st.text_input("视频主题(给定一个关键词,:red[AI自动生成]视频文案)", + value=st.session_state['video_subject']).strip() + if st.button("点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】", key="auto_generate_script"): + with st.spinner("AI正在生成视频文案和关键词..."): + script = llm.generate_script(cfg.video_subject) + terms = llm.generate_terms(cfg.video_subject, script) + st.toast('AI生成成功') + st.session_state['video_script'] = script + st.session_state['video_terms'] = ", ".join(terms) + + cfg.video_script = st.text_area( + "视频文案(:blue[①可不填,使用AI生成 ②合理使用标点断句,有助于生成字幕])", + value=st.session_state['video_script'], + height=190 + ) + if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"): + if not cfg.video_script: + st.error("请先填写视频文案") + st.stop() + + with st.spinner("AI正在生成视频关键词..."): + terms = llm.generate_terms(cfg.video_subject, cfg.video_script) + st.toast('AI生成成功') + st.session_state['video_terms'] = ", ".join(terms) + + cfg.video_terms = st.text_area( + "视频关键词(:blue[①可不填,使用AI生成 ②用**英文逗号**分隔,只支持英文])", + value=st.session_state['video_terms'], + height=40) + +with middle_panel: with st.container(border=True): st.write("**视频设置**") - cfg.video_subject = st.text_area("视频主题", help="请输入视频主题") + video_concat_modes = [ + ("顺序拼接", "sequential"), + ("随机拼接", "random"), + ] + selected_index = st.selectbox("视频拼接模式", + index=1, + options=range(len(video_concat_modes)), # 使用索引作为内部选项值 + format_func=lambda x: video_concat_modes[x][0] # 显示给用户的是标签 + ) + cfg.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1]) + video_aspect_ratios = [ ("竖屏 9:16(抖音视频)", VideoAspect.portrait.value), ("横屏 16:9(西瓜视频)", VideoAspect.landscape.value), @@ -87,8 +141,10 @@ with left_panel: ) cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1]) + cfg.video_clip_duration = st.slider("视频片段最大时长(秒)", 2, 5, 3) + with st.container(border=True): - st.write("**声音设置**") + st.write("**音频设置**") # 创建一个映射字典,将原始值映射到友好名称 friendly_names = { voice: voice. @@ -100,38 +156,53 @@ with left_panel: replace("en-US", "英文"). replace("Neural", "") for voice in VoiceNames} - selected_friendly_name = st.selectbox("声音", options=list(friendly_names.values())) + selected_friendly_name = st.selectbox("朗读声音", options=list(friendly_names.values())) voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)] cfg.voice_name = voice_name - song_names = [ + bgm_options = [ ("无背景音乐 No BGM", ""), ("随机背景音乐 Random BGM", "random"), - *[(song, song) for song in get_all_songs()] + ("自定义背景音乐 Custom BGM", "custom"), ] selected_index = st.selectbox("背景音乐", index=1, - options=range(len(song_names)), # 使用索引作为内部选项值 - format_func=lambda x: song_names[x][0] # 显示给用户的是标签 + options=range(len(bgm_options)), # 使用索引作为内部选项值 + format_func=lambda x: bgm_options[x][0] # 显示给用户的是标签 ) - cfg.bgm_name = song_names[selected_index][1] + # 获取选择的背景音乐类型 + bgm_type = bgm_options[selected_index][1] + + # 根据选择显示或隐藏组件 + if bgm_type == "custom": + custom_bgm_file = st.text_input("请输入自定义背景音乐的文件路径:") + if custom_bgm_file and os.path.exists(custom_bgm_file): + cfg.bgm_file = custom_bgm_file + # st.write(f":red[已选择自定义背景音乐]:**{custom_bgm_file}**") with right_panel: with st.container(border=True): st.write("**字幕设置**") + cfg.subtitle_enabled = st.checkbox("生成字幕(若取消勾选,下面的设置都将不生效)", value=True) font_names = get_all_fonts() cfg.font_name = st.selectbox("字体", font_names) - cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF") - cfg.font_size = st.slider("字幕大小", 30, 100, 60) - cfg.stroke_color = st.color_picker("描边颜色", "#000000") - cfg.stroke_width = st.slider("描边粗细", 0.0, 10.0, 1.5) + font_cols = st.columns([0.3, 0.7]) + with font_cols[0]: + cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF") + with font_cols[1]: + cfg.font_size = st.slider("字幕大小", 30, 100, 60) + + stroke_cols = st.columns([0.3, 0.7]) + with stroke_cols[0]: + cfg.stroke_color = st.color_picker("描边颜色", "#000000") + with stroke_cols[1]: + cfg.stroke_width = st.slider("描边粗细", 0.0, 10.0, 1.5) start_button = st.button("开始生成视频", use_container_width=True, type="primary") if start_button: task_id = str(uuid4()) - st.session_state['started'] = True - if not cfg.video_subject: - st.error("视频主题不能为空") + if not cfg.video_subject and not cfg.video_script: + st.error("视频主题 或 视频文案,不能同时为空") st.stop() st.write(cfg)