1, 增加一次性输出多个视频

2, 增加背景音乐音量设置
3, 增加字幕位置
4, UI优化
5, 一些其他Bug修复和优化
This commit is contained in:
harry 2024-03-23 15:31:34 +08:00
parent ce4b3771b6
commit 0771b3268c
7 changed files with 146 additions and 65 deletions

View File

@ -28,9 +28,9 @@ imagemagick_path = app.get("imagemagick_path", "")
if imagemagick_path and os.path.isfile(imagemagick_path):
os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path
__cfg = {
"hostname": hostname,
"listen_host": listen_host,
"listen_port": listen_port,
}
logger.info(__cfg)
# __cfg = {
# "hostname": hostname,
# "listen_host": listen_host,
# "listen_port": listen_port,
# }
# logger.info(__cfg)

View File

@ -93,12 +93,14 @@ class VideoParams:
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5
video_count: Optional[int] = 1
voice_name: Optional[str] = VoiceNames[0]
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
font_name: Optional[str] = "STHeitiMedium.ttc"
text_fore_color: Optional[str] = "#FFFFFF"
text_background_color: Optional[str] = "transparent"

View File

@ -105,7 +105,7 @@ def create(audio_file, subtitle_file: str = ""):
lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
idx += 1
sub = "\n".join(lines)
sub = "\n".join(lines) + "\n"
with open(subtitle_file, "w", encoding="utf-8") as f:
f.write(sub)
logger.info(f"subtitle file created: {subtitle_file}")

View File

@ -1,3 +1,4 @@
import math
import os.path
import re
from os import path
@ -5,7 +6,7 @@ from os import path
from loguru import logger
from app.config import config
from app.models.schema import VideoParams, VoiceNames
from app.models.schema import VideoParams, VoiceNames, VideoConcatMode
from app.services import llm, material, voice, video, subtitle
from app.utils import utils
@ -78,6 +79,8 @@ def start(task_id, params: VideoParams):
return
audio_duration = voice.get_audio_duration(sub_maker)
audio_duration = math.ceil(audio_duration)
subtitle_path = ""
if params.subtitle_enabled:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
@ -110,7 +113,7 @@ def start(task_id, params: VideoParams):
search_terms=video_terms,
video_aspect=params.video_aspect,
video_contact_mode=params.video_concat_mode,
audio_duration=audio_duration,
audio_duration=audio_duration * params.video_count,
max_clip_duration=max_clip_duration,
)
if not downloaded_videos:
@ -118,27 +121,37 @@ def start(task_id, params: VideoParams):
"failed to download videos, maybe the network is not available. if you are in China, please use a VPN.")
return
logger.info("\n\n## combining videos")
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
video.combine_videos(combined_video_path=combined_video_path,
video_paths=downloaded_videos,
audio_file=audio_file,
video_aspect=params.video_aspect,
video_concat_mode=params.video_concat_mode,
max_clip_duration=max_clip_duration,
threads=n_threads)
final_video_paths = []
video_concat_mode = params.video_concat_mode
if params.video_count > 1:
video_concat_mode = VideoConcatMode.random
final_video_path = path.join(utils.task_dir(task_id), f"final.mp4")
for i in range(params.video_count):
index = i + 1
combined_video_path = path.join(utils.task_dir(task_id), f"combined-{index}.mp4")
logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
video.combine_videos(combined_video_path=combined_video_path,
video_paths=downloaded_videos,
audio_file=audio_file,
video_aspect=params.video_aspect,
video_concat_mode=video_concat_mode,
max_clip_duration=max_clip_duration,
threads=n_threads)
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
logger.info(f"\n\n## generating video: {index} => {final_video_path}")
# Put everything together
video.generate_video(video_path=combined_video_path,
audio_path=audio_file,
subtitle_path=subtitle_path,
output_file=final_video_path,
params=params,
)
final_video_paths.append(final_video_path)
logger.success(f"task {task_id} finished, generated {len(final_video_paths)} videos.")
logger.info("\n\n## generating video")
# Put everything together
video.generate_video(video_path=combined_video_path,
audio_path=audio_file,
subtitle_path=subtitle_path,
output_file=final_video_path,
params=params,
)
logger.start(f"task {task_id} finished")
return {
"video_file": final_video_path,
"videos": final_video_paths,
}

View File

@ -34,31 +34,30 @@ def combine_videos(combined_video_path: str,
max_clip_duration: int = 5,
threads: int = 2,
) -> str:
logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}")
audio_clip = AudioFileClip(audio_file)
max_duration = audio_clip.duration
logger.info(f"max duration of audio: {max_duration} seconds")
audio_duration = audio_clip.duration
logger.info(f"max duration of audio: {audio_duration} seconds")
# Required duration of each clip
req_dur = max_duration / len(video_paths)
req_dur = audio_duration / len(video_paths)
req_dur = max_clip_duration
logger.info(f"each clip will be maximum {req_dur} seconds long")
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
clips = []
tot_dur = 0
video_duration = 0
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
while tot_dur < max_duration:
while video_duration < audio_duration:
# random video_paths order
if video_concat_mode.value == VideoConcatMode.random.value:
random.shuffle(video_paths)
for video_path in video_paths:
clip = VideoFileClip(video_path)
clip = clip.without_audio()
clip = VideoFileClip(video_path).without_audio()
# Check if clip is longer than the remaining audio
if (max_duration - tot_dur) < clip.duration:
clip = clip.subclip(0, (max_duration - tot_dur))
if (audio_duration - video_duration) < clip.duration:
clip = clip.subclip(0, (audio_duration - video_duration))
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
elif req_dur < clip.duration:
clip = clip.subclip(0, req_dur)
@ -88,7 +87,7 @@ def combine_videos(combined_video_path: str,
clip = clip.subclip(0, max_clip_duration)
clips.append(clip)
tot_dur += clip.duration
video_duration += clip.duration
final_clip = concatenate_videoclips(clips)
final_clip = final_clip.set_fps(30)
@ -125,7 +124,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
_wrapped_lines_.append(_txt_)
_txt_ = ''
_wrapped_lines_.append(_txt_)
return '\n'.join(_wrapped_lines_)
return '\n'.join(_wrapped_lines_).strip()
def generate_video(video_path: str,
@ -153,11 +152,23 @@ def generate_video(video_path: str,
logger.info(f"using font: {font_path}")
def generator(txt):
wrapped_txt = wrap_text(txt, max_width=video_width - 100,
if params.subtitle_position == "top":
position_height = video_height * 0.1
elif params.subtitle_position == "bottom":
position_height = video_height * 0.9
else:
position_height = "center"
def generator(txt, **kwargs):
max_width = video_width * 0.9
# logger.debug(f"rendering text: {txt}")
wrapped_txt = wrap_text(txt,
max_width=max_width,
font=font_path,
fontsize=params.font_size) # 调整max_width以适应你的视频
return TextClip(
fontsize=params.font_size
) # 调整max_width以适应你的视频
clip = TextClip(
wrapped_txt,
font=font_path,
fontsize=params.font_size,
@ -167,18 +178,16 @@ def generate_video(video_path: str,
stroke_width=params.stroke_width,
print_cmd=False,
)
position_height = video_height - 200
if params.video_aspect == VideoAspect.landscape:
position_height = video_height - 100
return clip
clips = [
VideoFileClip(video_path),
]
if subtitle_path and os.path.exists(subtitle_path):
subtitles = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8')
clips.append(subtitles.set_position(lambda _t: ('center', position_height)))
sub = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8')
sub_clip = sub.set_position(lambda _t: ('center', position_height))
clips.append(sub_clip)
result = CompositeVideoClip(clips)
@ -199,7 +208,7 @@ def generate_video(video_path: str,
original_audio = video_clip.audio
song_clip = AudioFileClip(bgm_file).set_fps(44100)
# Set the volume of the song to 10% of the original volume
song_clip = song_clip.volumex(0.2).set_fps(44100)
song_clip = song_clip.volumex(params.bgm_volume)
# Add the song to the video
comp_audio = CompositeAudioClip([original_audio, song_clip])
video_clip = video_clip.set_audio(comp_audio)
@ -211,3 +220,38 @@ def generate_video(video_path: str,
os.remove(temp_output_file)
logger.success(f"completed")
if __name__ == "__main__":
txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
print(t)
task_id = "69232dfa-f6c5-4b5e-80ba-be3098d3f930"
task_dir = utils.task_dir(task_id)
video_file = f"{task_dir}/combined-1.mp4"
audio_file = f"{task_dir}/audio.mp3"
subtitle_file = f"{task_dir}/subtitle.srt"
output_file = f"{task_dir}/final.mp4"
cfg = VideoParams()
cfg.video_aspect = VideoAspect.portrait
cfg.font_name = "STHeitiMedium.ttc"
cfg.font_size = 60
cfg.stroke_color = "#000000"
cfg.stroke_width = 1.5
cfg.text_fore_color = "#FFFFFF"
cfg.text_background_color = "transparent"
cfg.bgm_file = ""
cfg.bgm_volume = 0.2
cfg.subtitle_enabled = True
cfg.subtitle_position = "bottom"
cfg.n_threads = 2
cfg.paragraph_number = 1
generate_video(video_path=video_file,
audio_path=audio_file,
subtitle_path=subtitle_file,
output_file=output_file,
params=cfg
)

View File

@ -78,7 +78,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
sub_line = ""
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items))
file.write("\n".join(sub_items) + "\n")
def get_audio_duration(sub_maker: submaker.SubMaker):

View File

@ -1,16 +1,23 @@
import asyncio
import streamlit as st
st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
initial_sidebar_state="auto")
import sys
import os
import time
from uuid import uuid4
import streamlit as st
from loguru import logger
from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode
from app.services import task as tm, llm
st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
initial_sidebar_state="auto")
hide_streamlit_style = """
<style>#root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 0rem;}</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.title("MoneyPrinterTurbo")
st.write(
"⚠️ 先在 **config.toml** 中设置 `pexels_api_keys` 和 `llm_provider` 参数,根据不同的 llm_provider配置对应的 **API KEY**"
)
root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
font_dir = os.path.join(root_dir, "resource", "fonts")
@ -99,7 +106,7 @@ with left_panel:
cfg.video_script = st.text_area(
"视频文案(:blue[①可不填使用AI生成 ②合理使用标点断句,有助于生成字幕]",
value=st.session_state['video_script'],
height=190
height=280
)
if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"):
if not cfg.video_script:
@ -114,14 +121,14 @@ with left_panel:
cfg.video_terms = st.text_area(
"视频关键词(:blue[①可不填使用AI生成 ②用**英文逗号**分隔,只支持英文]",
value=st.session_state['video_terms'],
height=40)
height=50)
with middle_panel:
with st.container(border=True):
st.write("**视频设置**")
video_concat_modes = [
("顺序拼接", "sequential"),
("随机拼接", "random"),
("随机拼接(推荐)", "random"),
]
selected_index = st.selectbox("视频拼接模式",
index=1,
@ -141,8 +148,8 @@ with middle_panel:
)
cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
cfg.video_clip_duration = st.slider("视频片段最大时长(秒)", 2, 5, 3)
cfg.video_clip_duration = st.selectbox("视频片段最大时长(秒)", options=[2, 3, 4, 5, 6], index=1)
cfg.video_count = st.selectbox("同时生成视频数量", options=[1, 2, 3, 4, 5], index=0)
with st.container(border=True):
st.write("**音频设置**")
# 创建一个映射字典,将原始值映射到友好名称
@ -179,6 +186,8 @@ with middle_panel:
if custom_bgm_file and os.path.exists(custom_bgm_file):
cfg.bgm_file = custom_bgm_file
# st.write(f":red[已选择自定义背景音乐]**{custom_bgm_file}**")
cfg.bgm_volume = st.selectbox("背景音乐音量0.2表示20%,背景声音不宜过高)",
options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
with right_panel:
with st.container(border=True):
@ -186,6 +195,19 @@ with right_panel:
cfg.subtitle_enabled = st.checkbox("生成字幕(若取消勾选,下面的设置都将不生效)", value=True)
font_names = get_all_fonts()
cfg.font_name = st.selectbox("字体", font_names)
subtitle_positions = [
("顶部top", "top"),
("居中center", "center"),
("底部bottom推荐", "bottom"),
]
selected_index = st.selectbox("字幕位置",
index=2,
options=range(len(subtitle_positions)), # 使用索引作为内部选项值
format_func=lambda x: subtitle_positions[x][0] # 显示给用户的是标签
)
cfg.subtitle_position = subtitle_positions[selected_index][1]
font_cols = st.columns([0.3, 0.7])
with font_cols[0]:
cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")