From 0771b3268cba6c8b31762fa45b9b8978787d2ebd Mon Sep 17 00:00:00 2001
From: harry <harry@wangxutech.com>
Date: Sat, 23 Mar 2024 15:31:34 +0800
Subject: [PATCH] =?UTF-8?q?1,=20=E5=A2=9E=E5=8A=A0=E4=B8=80=E6=AC=A1?=
 =?UTF-8?q?=E6=80=A7=E8=BE=93=E5=87=BA=E5=A4=9A=E4=B8=AA=E8=A7=86=E9=A2=91?=
 =?UTF-8?q?=202,=20=E5=A2=9E=E5=8A=A0=E8=83=8C=E6=99=AF=E9=9F=B3=E4=B9=90?=
 =?UTF-8?q?=E9=9F=B3=E9=87=8F=E8=AE=BE=E7=BD=AE=203,=20=E5=A2=9E=E5=8A=A0?=
 =?UTF-8?q?=E5=AD=97=E5=B9=95=E4=BD=8D=E7=BD=AE=204,=20UI=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=205,=20=E4=B8=80=E4=BA=9B=E5=85=B6=E4=BB=96Bug?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=92=8C=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/config/config.py     | 12 +++---
 app/models/schema.py     |  6 ++-
 app/services/subtitle.py |  2 +-
 app/services/task.py     | 57 +++++++++++++++----------
 app/services/video.py    | 90 ++++++++++++++++++++++++++++++----------
 app/services/voice.py    |  2 +-
 webui/Main.py            | 42 ++++++++++++++-----
 7 files changed, 146 insertions(+), 65 deletions(-)

diff --git a/app/config/config.py b/app/config/config.py
index d05eb17..c843fd5 100644
--- a/app/config/config.py
+++ b/app/config/config.py
@@ -28,9 +28,9 @@ imagemagick_path = app.get("imagemagick_path", "")
 if imagemagick_path and os.path.isfile(imagemagick_path):
     os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path
 
-__cfg = {
-    "hostname": hostname,
-    "listen_host": listen_host,
-    "listen_port": listen_port,
-}
-logger.info(__cfg)
+# __cfg = {
+#     "hostname": hostname,
+#     "listen_host": listen_host,
+#     "listen_port": listen_port,
+# }
+# logger.info(__cfg)
diff --git a/app/models/schema.py b/app/models/schema.py
index e9fb084..5dae826 100644
--- a/app/models/schema.py
+++ b/app/models/schema.py
@@ -93,12 +93,14 @@ class VideoParams:
     video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
     video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
     video_clip_duration: Optional[int] = 5
-
+    video_count: Optional[int] = 1
     voice_name: Optional[str] = VoiceNames[0]
     bgm_type: Optional[str] = "random"
     bgm_file: Optional[str] = ""
-
+    bgm_volume: Optional[float] = 0.2
+    
     subtitle_enabled: Optional[bool] = True
+    subtitle_position: Optional[str] = "bottom"  # top, bottom, center
     font_name: Optional[str] = "STHeitiMedium.ttc"
     text_fore_color: Optional[str] = "#FFFFFF"
     text_background_color: Optional[str] = "transparent"
diff --git a/app/services/subtitle.py b/app/services/subtitle.py
index 3dcbe88..0573cd0 100644
--- a/app/services/subtitle.py
+++ b/app/services/subtitle.py
@@ -105,7 +105,7 @@ def create(audio_file, subtitle_file: str = ""):
             lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
             idx += 1
 
-    sub = "\n".join(lines)
+    sub = "\n".join(lines) + "\n"
     with open(subtitle_file, "w", encoding="utf-8") as f:
         f.write(sub)
     logger.info(f"subtitle file created: {subtitle_file}")
diff --git a/app/services/task.py b/app/services/task.py
index 96a66b4..159e841 100644
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -1,3 +1,4 @@
+import math
 import os.path
 import re
 from os import path
@@ -5,7 +6,7 @@ from os import path
 from loguru import logger
 
 from app.config import config
-from app.models.schema import VideoParams, VoiceNames
+from app.models.schema import VideoParams, VoiceNames, VideoConcatMode
 from app.services import llm, material, voice, video, subtitle
 from app.utils import utils
 
@@ -78,6 +79,8 @@ def start(task_id, params: VideoParams):
         return
 
     audio_duration = voice.get_audio_duration(sub_maker)
+    audio_duration = math.ceil(audio_duration)
+
     subtitle_path = ""
     if params.subtitle_enabled:
         subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
@@ -110,7 +113,7 @@ def start(task_id, params: VideoParams):
                                                  search_terms=video_terms,
                                                  video_aspect=params.video_aspect,
                                                  video_contact_mode=params.video_concat_mode,
-                                                 audio_duration=audio_duration,
+                                                 audio_duration=audio_duration * params.video_count,
                                                  max_clip_duration=max_clip_duration,
                                                  )
     if not downloaded_videos:
@@ -118,27 +121,37 @@ def start(task_id, params: VideoParams):
             "failed to download videos, maybe the network is not available. if you are in China, please use a VPN.")
         return
 
-    logger.info("\n\n## combining videos")
-    combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
-    video.combine_videos(combined_video_path=combined_video_path,
-                         video_paths=downloaded_videos,
-                         audio_file=audio_file,
-                         video_aspect=params.video_aspect,
-                         video_concat_mode=params.video_concat_mode,
-                         max_clip_duration=max_clip_duration,
-                         threads=n_threads)
+    final_video_paths = []
+    video_concat_mode = params.video_concat_mode
+    if params.video_count > 1:
+        video_concat_mode = VideoConcatMode.random
 
-    final_video_path = path.join(utils.task_dir(task_id), f"final.mp4")
+    for i in range(params.video_count):
+        index = i + 1
+        combined_video_path = path.join(utils.task_dir(task_id), f"combined-{index}.mp4")
+        logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
+        video.combine_videos(combined_video_path=combined_video_path,
+                             video_paths=downloaded_videos,
+                             audio_file=audio_file,
+                             video_aspect=params.video_aspect,
+                             video_concat_mode=video_concat_mode,
+                             max_clip_duration=max_clip_duration,
+                             threads=n_threads)
+
+        final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
+
+        logger.info(f"\n\n## generating video: {index} => {final_video_path}")
+        # Put everything together
+        video.generate_video(video_path=combined_video_path,
+                             audio_path=audio_file,
+                             subtitle_path=subtitle_path,
+                             output_file=final_video_path,
+                             params=params,
+                             )
+        final_video_paths.append(final_video_path)
+
+    logger.success(f"task {task_id} finished, generated {len(final_video_paths)} videos.")
 
-    logger.info("\n\n## generating video")
-    # Put everything together
-    video.generate_video(video_path=combined_video_path,
-                         audio_path=audio_file,
-                         subtitle_path=subtitle_path,
-                         output_file=final_video_path,
-                         params=params,
-                         )
-    logger.start(f"task {task_id} finished")
     return {
-        "video_file": final_video_path,
+        "videos": final_video_paths,
     }
diff --git a/app/services/video.py b/app/services/video.py
index f00ee36..1630fad 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -34,31 +34,30 @@ def combine_videos(combined_video_path: str,
                    max_clip_duration: int = 5,
                    threads: int = 2,
                    ) -> str:
-    logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}")
     audio_clip = AudioFileClip(audio_file)
-    max_duration = audio_clip.duration
-    logger.info(f"max duration of audio: {max_duration} seconds")
+    audio_duration = audio_clip.duration
+    logger.info(f"max duration of audio: {audio_duration} seconds")
     # Required duration of each clip
-    req_dur = max_duration / len(video_paths)
+    req_dur = audio_duration / len(video_paths)
+    req_dur = max_clip_duration
     logger.info(f"each clip will be maximum {req_dur} seconds long")
 
     aspect = VideoAspect(video_aspect)
     video_width, video_height = aspect.to_resolution()
 
     clips = []
-    tot_dur = 0
+    video_duration = 0
     # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
-    while tot_dur < max_duration:
+    while video_duration < audio_duration:
         # random video_paths order
         if video_concat_mode.value == VideoConcatMode.random.value:
             random.shuffle(video_paths)
 
         for video_path in video_paths:
-            clip = VideoFileClip(video_path)
-            clip = clip.without_audio()
+            clip = VideoFileClip(video_path).without_audio()
             # Check if clip is longer than the remaining audio
-            if (max_duration - tot_dur) < clip.duration:
-                clip = clip.subclip(0, (max_duration - tot_dur))
+            if (audio_duration - video_duration) < clip.duration:
+                clip = clip.subclip(0, (audio_duration - video_duration))
             # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
             elif req_dur < clip.duration:
                 clip = clip.subclip(0, req_dur)
@@ -88,7 +87,7 @@ def combine_videos(combined_video_path: str,
                 clip = clip.subclip(0, max_clip_duration)
 
             clips.append(clip)
-            tot_dur += clip.duration
+            video_duration += clip.duration
 
     final_clip = concatenate_videoclips(clips)
     final_clip = final_clip.set_fps(30)
@@ -125,7 +124,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
             _wrapped_lines_.append(_txt_)
             _txt_ = ''
     _wrapped_lines_.append(_txt_)
-    return '\n'.join(_wrapped_lines_)
+    return '\n'.join(_wrapped_lines_).strip()
 
 
 def generate_video(video_path: str,
@@ -153,11 +152,23 @@ def generate_video(video_path: str,
 
         logger.info(f"using font: {font_path}")
 
-    def generator(txt):
-        wrapped_txt = wrap_text(txt, max_width=video_width - 100,
+    if params.subtitle_position == "top":
+        position_height = video_height * 0.1
+    elif params.subtitle_position == "bottom":
+        position_height = video_height * 0.9
+    else:
+        position_height = "center"
+
+    def generator(txt, **kwargs):
+        max_width = video_width * 0.9
+        # logger.debug(f"rendering text: {txt}")
+        wrapped_txt = wrap_text(txt,
+                                max_width=max_width,
                                 font=font_path,
-                                fontsize=params.font_size)  # 调整max_width以适应你的视频
-        return TextClip(
+                                fontsize=params.font_size
+                                )  # 调整max_width以适应你的视频
+
+        clip = TextClip(
             wrapped_txt,
             font=font_path,
             fontsize=params.font_size,
@@ -167,18 +178,16 @@ def generate_video(video_path: str,
             stroke_width=params.stroke_width,
             print_cmd=False,
         )
-
-    position_height = video_height - 200
-    if params.video_aspect == VideoAspect.landscape:
-        position_height = video_height - 100
+        return clip
 
     clips = [
         VideoFileClip(video_path),
     ]
 
     if subtitle_path and os.path.exists(subtitle_path):
-        subtitles = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8')
-        clips.append(subtitles.set_position(lambda _t: ('center', position_height)))
+        sub = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8')
+        sub_clip = sub.set_position(lambda _t: ('center', position_height))
+        clips.append(sub_clip)
 
     result = CompositeVideoClip(clips)
 
@@ -199,7 +208,7 @@ def generate_video(video_path: str,
         original_audio = video_clip.audio
         song_clip = AudioFileClip(bgm_file).set_fps(44100)
         # Set the volume of the song to 10% of the original volume
-        song_clip = song_clip.volumex(0.2).set_fps(44100)
+        song_clip = song_clip.volumex(params.bgm_volume)
         # Add the song to the video
         comp_audio = CompositeAudioClip([original_audio, song_clip])
         video_clip = video_clip.set_audio(comp_audio)
@@ -211,3 +220,38 @@ def generate_video(video_path: str,
 
     os.remove(temp_output_file)
     logger.success(f"completed")
+
+
+if __name__ == "__main__":
+    txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
+    font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
+    t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
+    print(t)
+
+    task_id = "69232dfa-f6c5-4b5e-80ba-be3098d3f930"
+    task_dir = utils.task_dir(task_id)
+    video_file = f"{task_dir}/combined-1.mp4"
+    audio_file = f"{task_dir}/audio.mp3"
+    subtitle_file = f"{task_dir}/subtitle.srt"
+    output_file = f"{task_dir}/final.mp4"
+    cfg = VideoParams()
+    cfg.video_aspect = VideoAspect.portrait
+    cfg.font_name = "STHeitiMedium.ttc"
+    cfg.font_size = 60
+    cfg.stroke_color = "#000000"
+    cfg.stroke_width = 1.5
+    cfg.text_fore_color = "#FFFFFF"
+    cfg.text_background_color = "transparent"
+    cfg.bgm_file = ""
+    cfg.bgm_volume = 0.2
+    cfg.subtitle_enabled = True
+    cfg.subtitle_position = "bottom"
+    cfg.n_threads = 2
+    cfg.paragraph_number = 1
+
+    generate_video(video_path=video_file,
+                   audio_path=audio_file,
+                   subtitle_path=subtitle_file,
+                   output_file=output_file,
+                   params=cfg
+                   )
diff --git a/app/services/voice.py b/app/services/voice.py
index 76fb921..6769961 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -78,7 +78,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
             sub_line = ""
 
     with open(subtitle_file, "w", encoding="utf-8") as file:
-        file.write("\n".join(sub_items))
+        file.write("\n".join(sub_items) + "\n")
 
 
 def get_audio_duration(sub_maker: submaker.SubMaker):
diff --git a/webui/Main.py b/webui/Main.py
index d6b09c9..db1ba98 100644
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -1,16 +1,23 @@
-import asyncio
+import streamlit as st
+
+st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
+                   initial_sidebar_state="auto")
 import sys
 import os
-import time
 from uuid import uuid4
-import streamlit as st
+
 from loguru import logger
 from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode
 from app.services import task as tm, llm
 
-st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
-                   initial_sidebar_state="auto")
+hide_streamlit_style = """
+<style>#root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 0rem;}</style>
+"""
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 st.title("MoneyPrinterTurbo")
+st.write(
+    "⚠️ 先在 **config.toml** 中设置 `pexels_api_keys` 和 `llm_provider` 参数，根据不同的 llm_provider，配置对应的 **API KEY**"
+)
 
 root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 font_dir = os.path.join(root_dir, "resource", "fonts")
@@ -99,7 +106,7 @@ with left_panel:
         cfg.video_script = st.text_area(
             "视频文案（:blue[①可不填，使用AI生成  ②合理使用标点断句，有助于生成字幕]）",
             value=st.session_state['video_script'],
-            height=190
+            height=280
         )
         if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"):
             if not cfg.video_script:
@@ -114,14 +121,14 @@ with left_panel:
         cfg.video_terms = st.text_area(
             "视频关键词（:blue[①可不填，使用AI生成 ②用**英文逗号**分隔，只支持英文]）",
             value=st.session_state['video_terms'],
-            height=40)
+            height=50)
 
 with middle_panel:
     with st.container(border=True):
         st.write("**视频设置**")
         video_concat_modes = [
             ("顺序拼接", "sequential"),
-            ("随机拼接", "random"),
+            ("随机拼接（推荐）", "random"),
         ]
         selected_index = st.selectbox("视频拼接模式",
                                       index=1,
@@ -141,8 +148,8 @@ with middle_panel:
                                       )
         cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
 
-        cfg.video_clip_duration = st.slider("视频片段最大时长(秒)", 2, 5, 3)
-
+        cfg.video_clip_duration = st.selectbox("视频片段最大时长(秒)", options=[2, 3, 4, 5, 6], index=1)
+        cfg.video_count = st.selectbox("同时生成视频数量", options=[1, 2, 3, 4, 5], index=0)
     with st.container(border=True):
         st.write("**音频设置**")
         # 创建一个映射字典，将原始值映射到友好名称
@@ -179,6 +186,8 @@ with middle_panel:
             if custom_bgm_file and os.path.exists(custom_bgm_file):
                 cfg.bgm_file = custom_bgm_file
                 # st.write(f":red[已选择自定义背景音乐]：**{custom_bgm_file}**")
+        cfg.bgm_volume = st.selectbox("背景音乐音量（0.2表示20%，背景声音不宜过高）",
+                                      options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
 
 with right_panel:
     with st.container(border=True):
@@ -186,6 +195,19 @@ with right_panel:
         cfg.subtitle_enabled = st.checkbox("生成字幕（若取消勾选，下面的设置都将不生效）", value=True)
         font_names = get_all_fonts()
         cfg.font_name = st.selectbox("字体", font_names)
+
+        subtitle_positions = [
+            ("顶部（top）", "top"),
+            ("居中（center）", "center"),
+            ("底部（bottom，推荐）", "bottom"),
+        ]
+        selected_index = st.selectbox("字幕位置",
+                                      index=2,
+                                      options=range(len(subtitle_positions)),  # 使用索引作为内部选项值
+                                      format_func=lambda x: subtitle_positions[x][0]  # 显示给用户的是标签
+                                      )
+        cfg.subtitle_position = subtitle_positions[selected_index][1]
+
         font_cols = st.columns([0.3, 0.7])
         with font_cols[0]:
             cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")