From 0771b3268cba6c8b31762fa45b9b8978787d2ebd Mon Sep 17 00:00:00 2001 From: harry Date: Sat, 23 Mar 2024 15:31:34 +0800 Subject: [PATCH] =?UTF-8?q?1,=20=E5=A2=9E=E5=8A=A0=E4=B8=80=E6=AC=A1?= =?UTF-8?q?=E6=80=A7=E8=BE=93=E5=87=BA=E5=A4=9A=E4=B8=AA=E8=A7=86=E9=A2=91?= =?UTF-8?q?=202,=20=E5=A2=9E=E5=8A=A0=E8=83=8C=E6=99=AF=E9=9F=B3=E4=B9=90?= =?UTF-8?q?=E9=9F=B3=E9=87=8F=E8=AE=BE=E7=BD=AE=203,=20=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=AD=97=E5=B9=95=E4=BD=8D=E7=BD=AE=204,=20UI=E4=BC=98?= =?UTF-8?q?=E5=8C=96=205,=20=E4=B8=80=E4=BA=9B=E5=85=B6=E4=BB=96Bug?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=92=8C=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/config/config.py | 12 +++--- app/models/schema.py | 6 ++- app/services/subtitle.py | 2 +- app/services/task.py | 57 +++++++++++++++---------- app/services/video.py | 90 ++++++++++++++++++++++++++++++---------- app/services/voice.py | 2 +- webui/Main.py | 42 ++++++++++++++----- 7 files changed, 146 insertions(+), 65 deletions(-) diff --git a/app/config/config.py b/app/config/config.py index d05eb17..c843fd5 100644 --- a/app/config/config.py +++ b/app/config/config.py @@ -28,9 +28,9 @@ imagemagick_path = app.get("imagemagick_path", "") if imagemagick_path and os.path.isfile(imagemagick_path): os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path -__cfg = { - "hostname": hostname, - "listen_host": listen_host, - "listen_port": listen_port, -} -logger.info(__cfg) +# __cfg = { +# "hostname": hostname, +# "listen_host": listen_host, +# "listen_port": listen_port, +# } +# logger.info(__cfg) diff --git a/app/models/schema.py b/app/models/schema.py index e9fb084..5dae826 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -93,12 +93,14 @@ class VideoParams: video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value video_clip_duration: Optional[int] = 5 - + video_count: Optional[int] = 1 voice_name: Optional[str] = VoiceNames[0] bgm_type: Optional[str] = "random" bgm_file: Optional[str] = "" - + bgm_volume: Optional[float] = 0.2 + subtitle_enabled: Optional[bool] = True + subtitle_position: Optional[str] = "bottom" # top, bottom, center font_name: Optional[str] = "STHeitiMedium.ttc" text_fore_color: Optional[str] = "#FFFFFF" text_background_color: Optional[str] = "transparent" diff --git a/app/services/subtitle.py b/app/services/subtitle.py index 3dcbe88..0573cd0 100644 --- a/app/services/subtitle.py +++ b/app/services/subtitle.py @@ -105,7 +105,7 @@ def create(audio_file, subtitle_file: str = ""): lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time"))) idx += 1 - sub = "\n".join(lines) + sub = "\n".join(lines) + "\n" with open(subtitle_file, "w", encoding="utf-8") as f: f.write(sub) logger.info(f"subtitle file created: {subtitle_file}") diff --git a/app/services/task.py b/app/services/task.py index 96a66b4..159e841 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -1,3 +1,4 @@ +import math import os.path import re from os import path @@ -5,7 +6,7 @@ from os import path from loguru import logger from app.config import config -from app.models.schema import VideoParams, VoiceNames +from app.models.schema import VideoParams, VoiceNames, VideoConcatMode from app.services import llm, material, voice, video, subtitle from app.utils import utils @@ -78,6 +79,8 @@ def start(task_id, params: VideoParams): return audio_duration = voice.get_audio_duration(sub_maker) + audio_duration = math.ceil(audio_duration) + subtitle_path = "" if params.subtitle_enabled: subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") @@ -110,7 +113,7 @@ def start(task_id, params: VideoParams): search_terms=video_terms, video_aspect=params.video_aspect, video_contact_mode=params.video_concat_mode, - audio_duration=audio_duration, + audio_duration=audio_duration * params.video_count, max_clip_duration=max_clip_duration, ) if not downloaded_videos: @@ -118,27 +121,37 @@ def start(task_id, params: VideoParams): "failed to download videos, maybe the network is not available. if you are in China, please use a VPN.") return - logger.info("\n\n## combining videos") - combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4") - video.combine_videos(combined_video_path=combined_video_path, - video_paths=downloaded_videos, - audio_file=audio_file, - video_aspect=params.video_aspect, - video_concat_mode=params.video_concat_mode, - max_clip_duration=max_clip_duration, - threads=n_threads) + final_video_paths = [] + video_concat_mode = params.video_concat_mode + if params.video_count > 1: + video_concat_mode = VideoConcatMode.random - final_video_path = path.join(utils.task_dir(task_id), f"final.mp4") + for i in range(params.video_count): + index = i + 1 + combined_video_path = path.join(utils.task_dir(task_id), f"combined-{index}.mp4") + logger.info(f"\n\n## combining video: {index} => {combined_video_path}") + video.combine_videos(combined_video_path=combined_video_path, + video_paths=downloaded_videos, + audio_file=audio_file, + video_aspect=params.video_aspect, + video_concat_mode=video_concat_mode, + max_clip_duration=max_clip_duration, + threads=n_threads) + + final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4") + + logger.info(f"\n\n## generating video: {index} => {final_video_path}") + # Put everything together + video.generate_video(video_path=combined_video_path, + audio_path=audio_file, + subtitle_path=subtitle_path, + output_file=final_video_path, + params=params, + ) + final_video_paths.append(final_video_path) + + logger.success(f"task {task_id} finished, generated {len(final_video_paths)} videos.") - logger.info("\n\n## generating video") - # Put everything together - video.generate_video(video_path=combined_video_path, - audio_path=audio_file, - subtitle_path=subtitle_path, - output_file=final_video_path, - params=params, - ) - logger.start(f"task {task_id} finished") return { - "video_file": final_video_path, + "videos": final_video_paths, } diff --git a/app/services/video.py b/app/services/video.py index f00ee36..1630fad 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -34,31 +34,30 @@ def combine_videos(combined_video_path: str, max_clip_duration: int = 5, threads: int = 2, ) -> str: - logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}") audio_clip = AudioFileClip(audio_file) - max_duration = audio_clip.duration - logger.info(f"max duration of audio: {max_duration} seconds") + audio_duration = audio_clip.duration + logger.info(f"max duration of audio: {audio_duration} seconds") # Required duration of each clip - req_dur = max_duration / len(video_paths) + req_dur = audio_duration / len(video_paths) + req_dur = max_clip_duration logger.info(f"each clip will be maximum {req_dur} seconds long") aspect = VideoAspect(video_aspect) video_width, video_height = aspect.to_resolution() clips = [] - tot_dur = 0 + video_duration = 0 # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached - while tot_dur < max_duration: + while video_duration < audio_duration: # random video_paths order if video_concat_mode.value == VideoConcatMode.random.value: random.shuffle(video_paths) for video_path in video_paths: - clip = VideoFileClip(video_path) - clip = clip.without_audio() + clip = VideoFileClip(video_path).without_audio() # Check if clip is longer than the remaining audio - if (max_duration - tot_dur) < clip.duration: - clip = clip.subclip(0, (max_duration - tot_dur)) + if (audio_duration - video_duration) < clip.duration: + clip = clip.subclip(0, (audio_duration - video_duration)) # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image elif req_dur < clip.duration: clip = clip.subclip(0, req_dur) @@ -88,7 +87,7 @@ def combine_videos(combined_video_path: str, clip = clip.subclip(0, max_clip_duration) clips.append(clip) - tot_dur += clip.duration + video_duration += clip.duration final_clip = concatenate_videoclips(clips) final_clip = final_clip.set_fps(30) @@ -125,7 +124,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60): _wrapped_lines_.append(_txt_) _txt_ = '' _wrapped_lines_.append(_txt_) - return '\n'.join(_wrapped_lines_) + return '\n'.join(_wrapped_lines_).strip() def generate_video(video_path: str, @@ -153,11 +152,23 @@ def generate_video(video_path: str, logger.info(f"using font: {font_path}") - def generator(txt): - wrapped_txt = wrap_text(txt, max_width=video_width - 100, + if params.subtitle_position == "top": + position_height = video_height * 0.1 + elif params.subtitle_position == "bottom": + position_height = video_height * 0.9 + else: + position_height = "center" + + def generator(txt, **kwargs): + max_width = video_width * 0.9 + # logger.debug(f"rendering text: {txt}") + wrapped_txt = wrap_text(txt, + max_width=max_width, font=font_path, - fontsize=params.font_size) # 调整max_width以适应你的视频 - return TextClip( + fontsize=params.font_size + ) # 调整max_width以适应你的视频 + + clip = TextClip( wrapped_txt, font=font_path, fontsize=params.font_size, @@ -167,18 +178,16 @@ def generate_video(video_path: str, stroke_width=params.stroke_width, print_cmd=False, ) - - position_height = video_height - 200 - if params.video_aspect == VideoAspect.landscape: - position_height = video_height - 100 + return clip clips = [ VideoFileClip(video_path), ] if subtitle_path and os.path.exists(subtitle_path): - subtitles = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8') - clips.append(subtitles.set_position(lambda _t: ('center', position_height))) + sub = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8') + sub_clip = sub.set_position(lambda _t: ('center', position_height)) + clips.append(sub_clip) result = CompositeVideoClip(clips) @@ -199,7 +208,7 @@ def generate_video(video_path: str, original_audio = video_clip.audio song_clip = AudioFileClip(bgm_file).set_fps(44100) # Set the volume of the song to 10% of the original volume - song_clip = song_clip.volumex(0.2).set_fps(44100) + song_clip = song_clip.volumex(params.bgm_volume) # Add the song to the video comp_audio = CompositeAudioClip([original_audio, song_clip]) video_clip = video_clip.set_audio(comp_audio) @@ -211,3 +220,38 @@ def generate_video(video_path: str, os.remove(temp_output_file) logger.success(f"completed") + + +if __name__ == "__main__": + txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟" + font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc" + t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60) + print(t) + + task_id = "69232dfa-f6c5-4b5e-80ba-be3098d3f930" + task_dir = utils.task_dir(task_id) + video_file = f"{task_dir}/combined-1.mp4" + audio_file = f"{task_dir}/audio.mp3" + subtitle_file = f"{task_dir}/subtitle.srt" + output_file = f"{task_dir}/final.mp4" + cfg = VideoParams() + cfg.video_aspect = VideoAspect.portrait + cfg.font_name = "STHeitiMedium.ttc" + cfg.font_size = 60 + cfg.stroke_color = "#000000" + cfg.stroke_width = 1.5 + cfg.text_fore_color = "#FFFFFF" + cfg.text_background_color = "transparent" + cfg.bgm_file = "" + cfg.bgm_volume = 0.2 + cfg.subtitle_enabled = True + cfg.subtitle_position = "bottom" + cfg.n_threads = 2 + cfg.paragraph_number = 1 + + generate_video(video_path=video_file, + audio_path=audio_file, + subtitle_path=subtitle_file, + output_file=output_file, + params=cfg + ) diff --git a/app/services/voice.py b/app/services/voice.py index 76fb921..6769961 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -78,7 +78,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) sub_line = "" with open(subtitle_file, "w", encoding="utf-8") as file: - file.write("\n".join(sub_items)) + file.write("\n".join(sub_items) + "\n") def get_audio_duration(sub_maker: submaker.SubMaker): diff --git a/webui/Main.py b/webui/Main.py index d6b09c9..db1ba98 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -1,16 +1,23 @@ -import asyncio +import streamlit as st + +st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide", + initial_sidebar_state="auto") import sys import os -import time from uuid import uuid4 -import streamlit as st + from loguru import logger from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode from app.services import task as tm, llm -st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide", - initial_sidebar_state="auto") +hide_streamlit_style = """ + +""" +st.markdown(hide_streamlit_style, unsafe_allow_html=True) st.title("MoneyPrinterTurbo") +st.write( + "⚠️ 先在 **config.toml** 中设置 `pexels_api_keys` 和 `llm_provider` 参数,根据不同的 llm_provider,配置对应的 **API KEY**" +) root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) font_dir = os.path.join(root_dir, "resource", "fonts") @@ -99,7 +106,7 @@ with left_panel: cfg.video_script = st.text_area( "视频文案(:blue[①可不填,使用AI生成 ②合理使用标点断句,有助于生成字幕])", value=st.session_state['video_script'], - height=190 + height=280 ) if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"): if not cfg.video_script: @@ -114,14 +121,14 @@ with left_panel: cfg.video_terms = st.text_area( "视频关键词(:blue[①可不填,使用AI生成 ②用**英文逗号**分隔,只支持英文])", value=st.session_state['video_terms'], - height=40) + height=50) with middle_panel: with st.container(border=True): st.write("**视频设置**") video_concat_modes = [ ("顺序拼接", "sequential"), - ("随机拼接", "random"), + ("随机拼接(推荐)", "random"), ] selected_index = st.selectbox("视频拼接模式", index=1, @@ -141,8 +148,8 @@ with middle_panel: ) cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1]) - cfg.video_clip_duration = st.slider("视频片段最大时长(秒)", 2, 5, 3) - + cfg.video_clip_duration = st.selectbox("视频片段最大时长(秒)", options=[2, 3, 4, 5, 6], index=1) + cfg.video_count = st.selectbox("同时生成视频数量", options=[1, 2, 3, 4, 5], index=0) with st.container(border=True): st.write("**音频设置**") # 创建一个映射字典,将原始值映射到友好名称 @@ -179,6 +186,8 @@ with middle_panel: if custom_bgm_file and os.path.exists(custom_bgm_file): cfg.bgm_file = custom_bgm_file # st.write(f":red[已选择自定义背景音乐]:**{custom_bgm_file}**") + cfg.bgm_volume = st.selectbox("背景音乐音量(0.2表示20%,背景声音不宜过高)", + options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2) with right_panel: with st.container(border=True): @@ -186,6 +195,19 @@ with right_panel: cfg.subtitle_enabled = st.checkbox("生成字幕(若取消勾选,下面的设置都将不生效)", value=True) font_names = get_all_fonts() cfg.font_name = st.selectbox("字体", font_names) + + subtitle_positions = [ + ("顶部(top)", "top"), + ("居中(center)", "center"), + ("底部(bottom,推荐)", "bottom"), + ] + selected_index = st.selectbox("字幕位置", + index=2, + options=range(len(subtitle_positions)), # 使用索引作为内部选项值 + format_func=lambda x: subtitle_positions[x][0] # 显示给用户的是标签 + ) + cfg.subtitle_position = subtitle_positions[selected_index][1] + font_cols = st.columns([0.3, 0.7]) with font_cols[0]: cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")