Merge pull request #325 from harry0703/dev

support local videos
2026-02-21 16:37:21 +08:00 · 2024-04-27 08:52:10 +08:00 · 2024-04-27 08:52:10 +08:00 · 661d8cb5ab
commit 661d8cb5ab
parent 4596804bcf 4de02f4429
12 changed files with 203 additions and 69 deletions
--- a/README.md
+++ b/README.md
@ -57,7 +57,7 @@
 - [x] 支持 **多种语音** 合成
 - [x] 支持 **字幕生成**，可以调整 `字体`、`位置`、`颜色`、`大小`，同时支持`字幕描边`设置
 - [x] 支持 **背景音乐**，随机或者指定音乐文件，可设置`背景音乐音量`
- [x] 视频素材来源 **高清**，而且 **无版权**
+- [x] 视频素材来源 **高清**，而且 **无版权**，也可以使用自己的本地素材
 - [x] 支持 **OpenAI**、**moonshot**、**Azure**、**gpt4free**、**one-api**、**通义千问**、**Google Gemini**、**Ollama** 等多种模型接入

  ❓[如何使用免费的 **OpenAI GPT-3.5
@ -71,7 +71,6 @@
 - [ ] 增加更多视频素材来源，优化视频素材和文案的匹配度
 - [ ] 增加视频长度选项：短、中、长
 - [ ] 增加免费网络代理，让访问OpenAI和素材下载不再受限
- [ ] 可以使用自己的素材
 - [ ] 朗读声音和背景音乐，提供实时试听
 - [ ] 支持更多的语音合成服务商，比如 OpenAI TTS
 - [ ] 自动上传到YouTube平台
--- a/app/config/config.py
+++ b/app/config/config.py
@ -56,7 +56,7 @@ listen_port = _cfg.get("listen_port", 8080)
 project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
 project_description = _cfg.get("project_description",
                               "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>")
-project_version = _cfg.get("project_version", "1.1.4")
+project_version = _cfg.get("project_version", "1.1.5")
 reload_debug = False

 imagemagick_path = app.get("imagemagick_path", "")
--- a/app/models/const.py
+++ b/app/models/const.py
@ -6,3 +6,6 @@ PUNCTUATIONS = [
 TASK_STATE_FAILED = -1
 TASK_STATE_COMPLETE = 1
 TASK_STATE_PROCESSING = 4
+
+FILE_TYPE_VIDEOS = ['mp4', 'mov', 'mkv', 'webm']
+FILE_TYPE_IMAGES = ['jpg', 'jpeg', 'png', 'bmp']
--- a/app/models/schema.py
+++ b/app/models/schema.py
@ -1,6 +1,7 @@
 from enum import Enum
-from typing import Any, Optional
+from typing import Any, Optional, List

+import pydantic
 from pydantic import BaseModel
 import warnings

@ -28,6 +29,11 @@ class VideoAspect(str, Enum):
        return 1080, 1920


+class _Config:
+    arbitrary_types_allowed = True
+
+
+@pydantic.dataclasses.dataclass(config=_Config)
 class MaterialInfo:
    provider: str = "pexels"
    url: str = ""
@ -95,6 +101,9 @@ class VideoParams(BaseModel):
    video_clip_duration: Optional[int] = 5
    video_count: Optional[int] = 1

+    video_source: Optional[str] = "pexels"
+    video_materials: Optional[List[MaterialInfo]] = None  # 用于生成视频的素材
+
    video_language: Optional[str] = ""  # auto detect

    voice_name: Optional[str] = ""
--- a/app/services/task.py
+++ b/app/services/task.py
@ -114,14 +114,28 @@ def start(task_id, params: VideoParams):

    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)

-    logger.info("\n\n## downloading videos")
-    downloaded_videos = material.download_videos(task_id=task_id,
-                                                 search_terms=video_terms,
-                                                 video_aspect=params.video_aspect,
-                                                 video_contact_mode=params.video_concat_mode,
-                                                 audio_duration=audio_duration * params.video_count,
-                                                 max_clip_duration=max_clip_duration,
-                                                 )
+    downloaded_videos = []
+    if params.video_source == "local":
+        logger.info("\n\n## preprocess local materials")
+        materials = video.preprocess_video(materials=params.video_materials, clip_duration=max_clip_duration)
+        print(materials)
+
+        if not materials:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            logger.error("no valid materials found, please check the materials and try again.")
+            return
+        for material_info in materials:
+            print(material_info)
+            downloaded_videos.append(material_info.url)
+    else:
+        logger.info("\n\n## downloading videos")
+        downloaded_videos = material.download_videos(task_id=task_id,
+                                                     search_terms=video_terms,
+                                                     video_aspect=params.video_aspect,
+                                                     video_contact_mode=params.video_concat_mode,
+                                                     audio_duration=audio_duration * params.video_count,
+                                                     max_clip_duration=max_clip_duration,
+                                                     )
    if not downloaded_videos:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error(
--- a/app/services/video.py
+++ b/app/services/video.py
@ -1,12 +1,13 @@
 import glob
 import random
 from typing import List
-from PIL import ImageFont
+from PIL import ImageFont, Image
 from loguru import logger
 from moviepy.editor import *
 from moviepy.video.tools.subtitles import SubtitlesClip

-from app.models.schema import VideoAspect, VideoParams, VideoConcatMode
+from app.models import const
+from app.models.schema import VideoAspect, VideoParams, VideoConcatMode, MaterialInfo
 from app.utils import utils


@ -268,55 +269,101 @@ def generate_video(video_path: str,
    logger.success(f"completed")


+def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
+    for material in materials:
+        if not material.url:
+            continue
+
+        ext = utils.parse_extension(material.url)
+        try:
+            clip = VideoFileClip(material.url)
+        except Exception as e:
+            clip = ImageClip(material.url)
+
+        width = clip.size[0]
+        height = clip.size[1]
+        if width < 480 or height < 480:
+            logger.warning(f"video is too small, width: {width}, height: {height}")
+            continue
+
+        if ext in const.FILE_TYPE_IMAGES:
+            logger.info(f"processing image: {material.url}")
+            # 创建一个图片剪辑，并设置持续时间为3秒钟
+            clip = ImageClip(material.url).set_duration(clip_duration).set_position("center")
+            # 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
+            # 假设我们想要从原始大小逐渐放大到120%的大小。
+            # t代表当前时间，clip.duration为视频总时长，这里是3秒。
+            # 注意：1 表示100%的大小，所以1.2表示120%的大小
+            zoom_clip = clip.resize(lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration))
+
+            # 如果需要，可以创建一个包含缩放剪辑的复合视频剪辑
+            # （这在您想要在视频中添加其他元素时非常有用）
+            final_clip = CompositeVideoClip([zoom_clip])
+
+            # 输出视频
+            video_file = f"{material.url}.mp4"
+            final_clip.write_videofile(video_file, fps=30, logger=None)
+            final_clip.close()
+            material.url = video_file
+            logger.success(f"completed: {video_file}")
+    return materials
+
+
 if __name__ == "__main__":
-    txt_en = "Here's your guide to travel hacks for budget-friendly adventures"
-    txt_zh = "测试长字段这是您的旅行技巧指南帮助您进行预算友好的冒险"
-    font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
-    for txt in [txt_en, txt_zh]:
-        t, h = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
-        print(t)
+    m = MaterialInfo()
+    m.url = "/Users/harry/Downloads/IMG_2915.JPG"
+    m.provider = "local"
+    materials = preprocess_video([m], clip_duration=4)
+    print(materials)

-    task_id = "aa563149-a7ea-49c2-b39f-8c32cc225baf"
-    task_dir = utils.task_dir(task_id)
-    video_file = f"{task_dir}/combined-1.mp4"
-    audio_file = f"{task_dir}/audio.mp3"
-    subtitle_file = f"{task_dir}/subtitle.srt"
-    output_file = f"{task_dir}/final.mp4"
-
-    # video_paths = []
-    # for file in os.listdir(utils.storage_dir("test")):
-    #     if file.endswith(".mp4"):
-    #         video_paths.append(os.path.join(utils.storage_dir("test"), file))
+    # txt_en = "Here's your guide to travel hacks for budget-friendly adventures"
+    # txt_zh = "测试长字段这是您的旅行技巧指南帮助您进行预算友好的冒险"
+    # font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
+    # for txt in [txt_en, txt_zh]:
+    #     t, h = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
+    #     print(t)
    #
-    # combine_videos(combined_video_path=video_file,
-    #                audio_file=audio_file,
-    #                video_paths=video_paths,
-    #                video_aspect=VideoAspect.portrait,
-    #                video_concat_mode=VideoConcatMode.random,
-    #                max_clip_duration=5,
-    #                threads=2)
-
-    cfg = VideoParams()
-    cfg.video_aspect = VideoAspect.portrait
-    cfg.font_name = "STHeitiMedium.ttc"
-    cfg.font_size = 60
-    cfg.stroke_color = "#000000"
-    cfg.stroke_width = 1.5
-    cfg.text_fore_color = "#FFFFFF"
-    cfg.text_background_color = "transparent"
-    cfg.bgm_type = "random"
-    cfg.bgm_file = ""
-    cfg.bgm_volume = 1.0
-    cfg.subtitle_enabled = True
-    cfg.subtitle_position = "bottom"
-    cfg.n_threads = 2
-    cfg.paragraph_number = 1
-
-    cfg.voice_volume = 1.0
-
-    generate_video(video_path=video_file,
-                   audio_path=audio_file,
-                   subtitle_path=subtitle_file,
-                   output_file=output_file,
-                   params=cfg
-                   )
+    # task_id = "aa563149-a7ea-49c2-b39f-8c32cc225baf"
+    # task_dir = utils.task_dir(task_id)
+    # video_file = f"{task_dir}/combined-1.mp4"
+    # audio_file = f"{task_dir}/audio.mp3"
+    # subtitle_file = f"{task_dir}/subtitle.srt"
+    # output_file = f"{task_dir}/final.mp4"
+    #
+    # # video_paths = []
+    # # for file in os.listdir(utils.storage_dir("test")):
+    # #     if file.endswith(".mp4"):
+    # #         video_paths.append(os.path.join(utils.storage_dir("test"), file))
+    # #
+    # # combine_videos(combined_video_path=video_file,
+    # #                audio_file=audio_file,
+    # #                video_paths=video_paths,
+    # #                video_aspect=VideoAspect.portrait,
+    # #                video_concat_mode=VideoConcatMode.random,
+    # #                max_clip_duration=5,
+    # #                threads=2)
+    #
+    # cfg = VideoParams()
+    # cfg.video_aspect = VideoAspect.portrait
+    # cfg.font_name = "STHeitiMedium.ttc"
+    # cfg.font_size = 60
+    # cfg.stroke_color = "#000000"
+    # cfg.stroke_width = 1.5
+    # cfg.text_fore_color = "#FFFFFF"
+    # cfg.text_background_color = "transparent"
+    # cfg.bgm_type = "random"
+    # cfg.bgm_file = ""
+    # cfg.bgm_volume = 1.0
+    # cfg.subtitle_enabled = True
+    # cfg.subtitle_position = "bottom"
+    # cfg.n_threads = 2
+    # cfg.paragraph_number = 1
+    #
+    # cfg.voice_volume = 1.0
+    #
+    # generate_video(video_path=video_file,
+    #                audio_path=audio_file,
+    #                subtitle_path=subtitle_file,
+    #                output_file=output_file,
+    #                params=cfg
+    #                )
--- a/app/utils/utils.py
+++ b/app/utils/utils.py
@ -67,10 +67,13 @@ def root_dir():
    return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))


-def storage_dir(sub_dir: str = ""):
+def storage_dir(sub_dir: str = "", create: bool = False):
    d = os.path.join(root_dir(), "storage")
    if sub_dir:
        d = os.path.join(d, sub_dir)
+    if create and not os.path.exists(d):
+        os.makedirs(d)
+
    return d


@ -219,3 +222,7 @@ def load_locales(i18n_dir):
                with open(os.path.join(root, file), "r", encoding="utf-8") as f:
                    _locales[lang] = json.loads(f.read())
    return _locales
+
+
+def parse_extension(filename):
+    return os.path.splitext(filename)[1].strip().lower().replace(".", "")
--- a/webui/Main.py
+++ b/webui/Main.py
@ -30,10 +30,11 @@ st.set_page_config(page_title="MoneyPrinterTurbo",
                                "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo"
                   })

-from app.models.schema import VideoParams, VideoAspect, VideoConcatMode
+from app.models.schema import VideoParams, VideoAspect, VideoConcatMode, MaterialInfo
 from app.services import task as tm, llm, voice
 from app.utils import utils
 from app.config import config
+from app.models.const import FILE_TYPE_VIDEOS, FILE_TYPE_IMAGES

 hide_streamlit_style = """
 <style>#root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 0rem;}</style>
@ -150,6 +151,8 @@ def tr(key):

 st.write(tr("Get Help"))

+llm_provider = config.app.get("llm_provider", "").lower()
+
 if not config.app.get("hide_config", False):
    with st.expander(tr("Basic Settings"), expanded=False):
        config_panels = st.columns(3)
@ -319,6 +322,7 @@ middle_panel = panel[1]
 right_panel = panel[2]

 params = VideoParams(video_subject="")
+uploaded_files = []

 with left_panel:
    with st.container(border=True):
@ -372,6 +376,24 @@ with middle_panel:
            (tr("Sequential"), "sequential"),
            (tr("Random"), "random"),
        ]
+        video_sources = [
+            (tr("Pexels"), "pexels"),
+            (tr("Local file"), "local"),
+            (tr("TikTok"), "douyin"),
+            (tr("Bilibili"), "bilibili"),
+            (tr("Xiaohongshu"), "xiaohongshu"),
+        ]
+        selected_index = st.selectbox(tr("Video Source"),
+                                      options=range(len(video_sources)),  # 使用索引作为内部选项值
+                                      format_func=lambda x: video_sources[x][0]  # 显示给用户的是标签
+                                      )
+        params.video_source = video_sources[selected_index][1]
+        if params.video_source == 'local':
+            _supported_types = FILE_TYPE_VIDEOS + FILE_TYPE_IMAGES
+            uploaded_files = st.file_uploader("Upload Local Files",
+                                              type=["mp4", "mov", "avi", "flv", "mkv", "jpg", "jpeg", "png"],
+                                              accept_multiple_files=True)
+
        selected_index = st.selectbox(tr("Video Concat Mode"),
                                      index=1,
                                      options=range(len(video_concat_modes)),  # 使用索引作为内部选项值
@ -512,6 +534,19 @@ if start_button:
        scroll_to_bottom()
        st.stop()

+    if uploaded_files:
+        local_videos_dir = utils.storage_dir("local_videos", create=True)
+        for file in uploaded_files:
+            file_path = os.path.join(local_videos_dir, f"{file.file_id}_{file.name}")
+            with open(file_path, "wb") as f:
+                f.write(file.getbuffer())
+                m = MaterialInfo()
+                m.provider = "local"
+                m.url = file_path
+                if not params.video_materials:
+                    params.video_materials = []
+                params.video_materials.append(m)
+
    log_container = st.empty()
    log_records = []

--- a/webui/i18n/de.json
+++ b/webui/i18n/de.json
@ -61,6 +61,11 @@
    "Model Name": "Model Name",
    "Please Enter the LLM API Key": "Please Enter the **LLM API Key**",
    "Please Enter the Pexels API Key": "Please Enter the **Pexels API Key**",
-    "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc"
+    "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc",
+    "Video Source": "Video Source",
+    "TikTok": "TikTok (TikTok support is coming soon)",
+    "Bilibili": "Bilibili (Bilibili support is coming soon)",
+    "Xiaohongshu": "Xiaohongshu (Xiaohongshu support is coming soon)",
+    "Local file": "Local file"
  }
 }
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@ -62,6 +62,11 @@
    "Model Name": "Model Name",
    "Please Enter the LLM API Key": "Please Enter the **LLM API Key**",
    "Please Enter the Pexels API Key": "Please Enter the **Pexels API Key**",
-    "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc"
+    "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc",
+    "Video Source": "Video Source",
+    "TikTok": "TikTok (TikTok support is coming soon)",
+    "Bilibili": "Bilibili (Bilibili support is coming soon)",
+    "Xiaohongshu": "Xiaohongshu (Xiaohongshu support is coming soon)",
+    "Local file": "Local file"
  }
 }
--- a/webui/i18n/vi.json
+++ b/webui/i18n/vi.json
@ -62,6 +62,11 @@
    "Model Name": "Tên Mô Hình",
    "Please Enter the LLM API Key": "Vui lòng Nhập **Khóa API LLM**",
    "Please Enter the Pexels API Key": "Vui lòng Nhập **Khóa API Pexels**",
-    "Get Help": "Nếu bạn cần giúp đỡ hoặc có bất kỳ câu hỏi nào, bạn có thể tham gia discord để được giúp đỡ: https://harryai.cc"
+    "Get Help": "Nếu bạn cần giúp đỡ hoặc có bất kỳ câu hỏi nào, bạn có thể tham gia discord để được giúp đỡ: https://harryai.cc",
+    "Video Source": "Video Source",
+    "TikTok": "TikTok (TikTok support is coming soon)",
+    "Bilibili": "Bilibili (Bilibili support is coming soon)",
+    "Xiaohongshu": "Xiaohongshu (Xiaohongshu support is coming soon)",
+    "Local file": "Local file"
  }
 }
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@ -62,6 +62,11 @@
    "Model Name": "模型名称 (:blue[需要到大模型提供商的后台确认被授权的模型名称])",
    "Please Enter the LLM API Key": "请先填写大模型 **API Key**",
    "Please Enter the Pexels API Key": "请先填写 **Pexels API Key**",
-    "Get Help": "有任何问题或建议，可以加入 **微信群** 求助或讨论：https://harryai.cc"
+    "Get Help": "有任何问题或建议，可以加入 **微信群** 求助或讨论：https://harryai.cc",
+    "Video Source": "视频来源",
+    "TikTok": "抖音 (TikTok 支持中，敬请期待)",
+    "Bilibili": "哔哩哔哩 (Bilibili 支持中，敬请期待)",
+    "Xiaohongshu": "小红书 (Xiaohongshu 支持中，敬请期待)",
+    "Local file": "本地文件"
  }
 }