From 4c6d8d615eeb5eb2ab0ff65819c4aceaeb8f9886 Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Sat, 17 May 2025 19:02:32 -0300
Subject: [PATCH 1/9] add git ignore
---
.gitignore | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index 6aa0ca7..d8bbe40 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,4 +25,5 @@ node_modules
./models/*
venv/
-.venv
\ No newline at end of file
+.venv
+/.vs
From 5f6e933fbba611983552a4e739c7987f86d68ff8 Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Sat, 17 May 2025 22:38:30 -0300
Subject: [PATCH 2/9] dev
---
CHANGELOG.md | 0
README-en.md | 389 ------------------------------------------
README.md | 345 ++++++++++++++++++-------------------
app/services/video.py | 58 ++++++-
webui.bat | 9 +-
5 files changed, 229 insertions(+), 572 deletions(-)
create mode 100644 CHANGELOG.md
delete mode 100644 README-en.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..e69de29
diff --git a/README-en.md b/README-en.md
deleted file mode 100644
index 7c16f2d..0000000
--- a/README-en.md
+++ /dev/null
@@ -1,389 +0,0 @@
-
-
MoneyPrinterTurbo 💸
-
-
-
-
-
-
-
-
-
English | 简体中文
-
-
-

-
-
-Simply provide a
topic or
keyword for a video, and it will automatically generate the video copy, video
-materials, video subtitles, and video background music before synthesizing a high-definition short video.
-
-### WebUI
-
-
-
-### API Interface
-
-
-
-
-
-## Special Thanks 🙏
-
-Due to the **deployment** and **usage** of this project, there is a certain threshold for some beginner users. We would
-like to express our special thanks to
-
-**RecCloud (AI-Powered Multimedia Service Platform)** for providing a free `AI Video Generator` service based on this
-project. It allows for online use without deployment, which is very convenient.
-
-- Chinese version: https://reccloud.cn
-- English version: https://reccloud.com
-
-
-
-## Thanks for Sponsorship 🙏
-
-Thanks to Picwish https://picwish.com for supporting and sponsoring this project, enabling continuous updates and maintenance.
-
-Picwish focuses on the **image processing field**, providing a rich set of **image processing tools** that extremely simplify complex operations, truly making image processing easier.
-
-
-
-## Features 🎯
-
-- [x] Complete **MVC architecture**, **clearly structured** code, easy to maintain, supports both `API`
- and `Web interface`
-- [x] Supports **AI-generated** video copy, as well as **customized copy**
-- [x] Supports various **high-definition video** sizes
- - [x] Portrait 9:16, `1080x1920`
- - [x] Landscape 16:9, `1920x1080`
-- [x] Supports **batch video generation**, allowing the creation of multiple videos at once, then selecting the most
- satisfactory one
-- [x] Supports setting the **duration of video clips**, facilitating adjustments to material switching frequency
-- [x] Supports video copy in both **Chinese** and **English**
-- [x] Supports **multiple voice** synthesis, with **real-time preview** of effects
-- [x] Supports **subtitle generation**, with adjustable `font`, `position`, `color`, `size`, and also
- supports `subtitle outlining`
-- [x] Supports **background music**, either random or specified music files, with adjustable `background music volume`
-- [x] Video material sources are **high-definition** and **royalty-free**, and you can also use your own **local materials**
-- [x] Supports integration with various models such as **OpenAI**, **Moonshot**, **Azure**, **gpt4free**, **one-api**, **Qwen**, **Google Gemini**, **Ollama**, **DeepSeek**, **ERNIE**, **Pollinations** and more
-
-### Future Plans 📅
-
-- [ ] GPT-SoVITS dubbing support
-- [ ] Optimize voice synthesis using large models for more natural and emotionally rich voice output
-- [ ] Add video transition effects for a smoother viewing experience
-- [ ] Add more video material sources, improve the matching between video materials and script
-- [ ] Add video length options: short, medium, long
-- [ ] Support more voice synthesis providers, such as OpenAI TTS
-- [ ] Automate upload to YouTube platform
-
-## Video Demos 📺
-
-### Portrait 9:16
-
-
-
-
-| ▶️ How to Add Fun to Your Life |
-▶️ What is the Meaning of Life |
-
-
-
-
- |
- |
-
-
-
-
-### Landscape 16:9
-
-
-
-
-| ▶️ What is the Meaning of Life |
-▶️ Why Exercise |
-
-
-
-
- |
- |
-
-
-
-
-## System Requirements 📦
-
-- Recommended minimum 4 CPU cores or more, 4G of memory or more, GPU is not required
-- Windows 10 or MacOS 11.0, and their later versions
-
-## Quick Start 🚀
-
-### Run in Google Colab
-Want to try MoneyPrinterTurbo without setting up a local environment? Run it directly in Google Colab!
-
-[](https://colab.research.google.com/github/harry0703/MoneyPrinterTurbo/blob/main/docs/MoneyPrinterTurbo.ipynb)
-
-
-### Windows
-
-Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
-
-After downloading, it is recommended to **double-click** `update.bat` first to update to the **latest code**, then double-click `start.bat` to launch
-
-After launching, the browser will open automatically (if it opens blank, it is recommended to use **Chrome** or **Edge**)
-
-### Other Systems
-
-One-click startup packages have not been created yet. See the **Installation & Deployment** section below. It is recommended to use **docker** for deployment, which is more convenient.
-
-## Installation & Deployment 📥
-
-### Prerequisites
-
-#### ① Clone the Project
-
-```shell
-git clone https://github.com/harry0703/MoneyPrinterTurbo.git
-```
-
-#### ② Modify the Configuration File
-
-- Copy the `config.example.toml` file and rename it to `config.toml`
-- Follow the instructions in the `config.toml` file to configure `pexels_api_keys` and `llm_provider`, and according to
- the llm_provider's service provider, set up the corresponding API Key
-
-### Docker Deployment 🐳
-
-#### ① Launch the Docker Container
-
-If you haven't installed Docker, please install it first https://www.docker.com/products/docker-desktop/
-If you are using a Windows system, please refer to Microsoft's documentation:
-
-1. https://learn.microsoft.com/en-us/windows/wsl/install
-2. https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-containers
-
-```shell
-cd MoneyPrinterTurbo
-docker-compose up
-```
-
-> Note:The latest version of docker will automatically install docker compose in the form of a plug-in, and the start command is adjusted to `docker compose up `
-
-#### ② Access the Web Interface
-
-Open your browser and visit http://0.0.0.0:8501
-
-#### ③ Access the API Interface
-
-Open your browser and visit http://0.0.0.0:8080/docs Or http://0.0.0.0:8080/redoc
-
-### Manual Deployment 📦
-
-#### ① Create a Python Virtual Environment
-
-It is recommended to create a Python virtual environment using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)
-
-```shell
-git clone https://github.com/harry0703/MoneyPrinterTurbo.git
-cd MoneyPrinterTurbo
-conda create -n MoneyPrinterTurbo python=3.11
-conda activate MoneyPrinterTurbo
-pip install -r requirements.txt
-```
-
-#### ② Install ImageMagick
-
-###### Windows:
-
-- Download https://imagemagick.org/script/download.php Choose the Windows version, make sure to select the **static library** version, such as ImageMagick-7.1.1-32-Q16-x64-**static**.exe
-- Install the downloaded ImageMagick, **do not change the installation path**
-- Modify the `config.toml` configuration file, set `imagemagick_path` to your actual installation path
-
-###### MacOS:
-
-```shell
-brew install imagemagick
-````
-
-###### Ubuntu
-
-```shell
-sudo apt-get install imagemagick
-```
-
-###### CentOS
-
-```shell
-sudo yum install ImageMagick
-```
-
-#### ③ Launch the Web Interface 🌐
-
-Note that you need to execute the following commands in the `root directory` of the MoneyPrinterTurbo project
-
-###### Windows
-
-```bat
-webui.bat
-```
-
-###### MacOS or Linux
-
-```shell
-sh webui.sh
-```
-
-After launching, the browser will open automatically
-
-#### ④ Launch the API Service 🚀
-
-```shell
-python main.py
-```
-
-After launching, you can view the `API documentation` at http://127.0.0.1:8080/docs and directly test the interface
-online for a quick experience.
-
-## Voice Synthesis 🗣
-
-A list of all supported voices can be viewed here: [Voice List](./docs/voice-list.txt)
-
-2024-04-16 v1.1.2 Added 9 new Azure voice synthesis voices that require API KEY configuration. These voices sound more realistic.
-
-## Subtitle Generation 📜
-
-Currently, there are 2 ways to generate subtitles:
-
-- **edge**: Faster generation speed, better performance, no specific requirements for computer configuration, but the
- quality may be unstable
-- **whisper**: Slower generation speed, poorer performance, specific requirements for computer configuration, but more
- reliable quality
-
-You can switch between them by modifying the `subtitle_provider` in the `config.toml` configuration file
-
-It is recommended to use `edge` mode, and switch to `whisper` mode if the quality of the subtitles generated is not
-satisfactory.
-
-> Note:
->
-> 1. In whisper mode, you need to download a model file from HuggingFace, about 3GB in size, please ensure good internet connectivity
-> 2. If left blank, it means no subtitles will be generated.
-
-> Since HuggingFace is not accessible in China, you can use the following methods to download the `whisper-large-v3` model file
-
-Download links:
-
-- Baidu Netdisk: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
-- Quark Netdisk: https://pan.quark.cn/s/3ee3d991d64b
-
-After downloading the model, extract it and place the entire directory in `.\MoneyPrinterTurbo\models`,
-The final file path should look like this: `.\MoneyPrinterTurbo\models\whisper-large-v3`
-
-```
-MoneyPrinterTurbo
- ├─models
- │ └─whisper-large-v3
- │ config.json
- │ model.bin
- │ preprocessor_config.json
- │ tokenizer.json
- │ vocabulary.json
-```
-
-## Background Music 🎵
-
-Background music for videos is located in the project's `resource/songs` directory.
-> The current project includes some default music from YouTube videos. If there are copyright issues, please delete
-> them.
-
-## Subtitle Fonts 🅰
-
-Fonts for rendering video subtitles are located in the project's `resource/fonts` directory, and you can also add your
-own fonts.
-
-## Common Questions 🤔
-
-### ❓RuntimeError: No ffmpeg exe could be found
-
-Normally, ffmpeg will be automatically downloaded and detected.
-However, if your environment has issues preventing automatic downloads, you may encounter the following error:
-
-```
-RuntimeError: No ffmpeg exe could be found.
-Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
-```
-
-In this case, you can download ffmpeg from https://www.gyan.dev/ffmpeg/builds/, unzip it, and set `ffmpeg_path` to your
-actual installation path.
-
-```toml
-[app]
-# Please set according to your actual path, note that Windows path separators are \\
-ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
-```
-
-### ❓ImageMagick is not installed on your computer
-
-[issue 33](https://github.com/harry0703/MoneyPrinterTurbo/issues/33)
-
-1. Follow the `example configuration` provided `download address` to
- install https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-30-Q16-x64-static.exe, using the static library
-2. Do not install in a path with Chinese characters to avoid unpredictable issues
-
-[issue 54](https://github.com/harry0703/MoneyPrinterTurbo/issues/54#issuecomment-2017842022)
-
-For Linux systems, you can manually install it, refer to https://cn.linux-console.net/?p=16978
-
-Thanks to [@wangwenqiao666](https://github.com/wangwenqiao666) for their research and exploration
-
-### ❓ImageMagick's security policy prevents operations related to temporary file @/tmp/tmpur5hyyto.txt
-
-You can find these policies in ImageMagick's configuration file policy.xml.
-This file is usually located in /etc/ImageMagick-`X`/ or a similar location in the ImageMagick installation directory.
-Modify the entry containing `pattern="@"`, change `rights="none"` to `rights="read|write"` to allow read and write operations on files.
-
-### ❓OSError: [Errno 24] Too many open files
-
-This issue is caused by the system's limit on the number of open files. You can solve it by modifying the system's file open limit.
-
-Check the current limit:
-
-```shell
-ulimit -n
-```
-
-If it's too low, you can increase it, for example:
-
-```shell
-ulimit -n 10240
-```
-
-### ❓Whisper model download failed, with the following error
-
-LocalEntryNotfoundEror: Cannot find an appropriate cached snapshotfolderfor the specified revision on the local disk and
-outgoing trafic has been disabled.
-To enablerepo look-ups and downloads online, pass 'local files only=False' as input.
-
-or
-
-An error occured while synchronizing the model Systran/faster-whisper-large-v3 from the Hugging Face Hub:
-An error happened while trying to locate the files on the Hub and we cannot find the appropriate snapshot folder for the
-specified revision on the local disk. Please check your internet connection and try again.
-Trying to load the model directly from the local cache, if it exists.
-
-Solution: [Click to see how to manually download the model from netdisk](#subtitle-generation-)
-
-## Feedback & Suggestions 📢
-
-- You can submit an [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues) or
- a [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls).
-
-## License 📝
-
-Click to view the [`LICENSE`](LICENSE) file
-
-## Star History
-
-[](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
diff --git a/README.md b/README.md
index 7812761..5ff6be2 100644
--- a/README.md
+++ b/README.md
@@ -1,105 +1,75 @@
MoneyPrinterTurbo 💸
-
-
-
-
-
-
-
-
-
-

-
-
-只需提供一个视频
主题 或
关键词 ,就可以全自动生成视频文案、视频素材、视频字幕、视频背景音乐,然后合成一个高清的短视频。
-
-
Web界面
+Simply provide a
topic or
keyword for a video, and it will automatically generate the video copy, video
+materials, video subtitles, and video background music before synthesizing a high-definition short video.
-
+### WebUI
-
API界面
+
+
+### API Interface

-## 特别感谢 🙏
+## Features 🎯
-由于该项目的 **部署** 和 **使用**,对于一些小白用户来说,还是 **有一定的门槛**,在此特别感谢
-**录咖(AI智能 多媒体服务平台)** 网站基于该项目,提供的免费`AI视频生成器`服务,可以不用部署,直接在线使用,非常方便。
+- [x] Complete **MVC architecture**, **clearly structured** code, easy to maintain, supports both `API`
+ and `Web interface`
+- [x] Supports **AI-generated** video copy, as well as **customized copy**
+- [x] Supports various **high-definition video** sizes
+ - [x] Portrait 9:16, `1080x1920`
+ - [x] Landscape 16:9, `1920x1080`
+- [x] Supports **batch video generation**, allowing the creation of multiple videos at once, then selecting the most
+ satisfactory one
+- [x] Supports setting the **duration of video clips**, facilitating adjustments to material switching frequency
+- [x] Supports video copy in both **Chinese** and **English**
+- [x] Supports **multiple voice** synthesis, with **real-time preview** of effects
+- [x] Supports **subtitle generation**, with adjustable `font`, `position`, `color`, `size`, and also
+ supports `subtitle outlining`
+- [x] Supports **background music**, either random or specified music files, with adjustable `background music volume`
+- [x] Video material sources are **high-definition** and **royalty-free**, and you can also use your own **local materials**
+- [x] Supports integration with various models such as **OpenAI**, **Moonshot**, **Azure**, **gpt4free**, **one-api**, **Qwen**, **Google Gemini**, **Ollama**, **DeepSeek**, **ERNIE**, **Pollinations** and more
-- 中文版:https://reccloud.cn
-- 英文版:https://reccloud.com
+### Future Plans 📅
-
+- [ ] GPT-SoVITS dubbing support
+- [ ] Optimize voice synthesis using large models for more natural and emotionally rich voice output
+- [ ] Add video transition effects for a smoother viewing experience
+- [ ] Add more video material sources, improve the matching between video materials and script
+- [ ] Add video length options: short, medium, long
+- [ ] Support more voice synthesis providers, such as OpenAI TTS
+- [ ] Automate upload to YouTube platform
-## 感谢赞助 🙏
+## Video Demos 📺
-感谢佐糖 https://picwish.cn 对该项目的支持和赞助,使得该项目能够持续的更新和维护。
-
-佐糖专注于**图像处理领域**,提供丰富的**图像处理工具**,将复杂操作极致简化,真正实现让图像处理更简单。
-
-
-
-## 功能特性 🎯
-
-- [x] 完整的 **MVC架构**,代码 **结构清晰**,易于维护,支持 `API` 和 `Web界面`
-- [x] 支持视频文案 **AI自动生成**,也可以**自定义文案**
-- [x] 支持多种 **高清视频** 尺寸
- - [x] 竖屏 9:16,`1080x1920`
- - [x] 横屏 16:9,`1920x1080`
-- [x] 支持 **批量视频生成**,可以一次生成多个视频,然后选择一个最满意的
-- [x] 支持 **视频片段时长** 设置,方便调节素材切换频率
-- [x] 支持 **中文** 和 **英文** 视频文案
-- [x] 支持 **多种语音** 合成,可 **实时试听** 效果
-- [x] 支持 **字幕生成**,可以调整 `字体`、`位置`、`颜色`、`大小`,同时支持`字幕描边`设置
-- [x] 支持 **背景音乐**,随机或者指定音乐文件,可设置`背景音乐音量`
-- [x] 视频素材来源 **高清**,而且 **无版权**,也可以使用自己的 **本地素材**
-- [x] 支持 **OpenAI**、**Moonshot**、**Azure**、**gpt4free**、**one-api**、**通义千问**、**Google Gemini**、**Ollama**、**DeepSeek**、 **文心一言**, **Pollinations** 等多种模型接入
- - 中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商(国内可直接访问,不需要VPN。注册就送额度,基本够用)
-
-
-### 后期计划 📅
-
-- [ ] GPT-SoVITS 配音支持
-- [ ] 优化语音合成,利用大模型,使其合成的声音,更加自然,情绪更加丰富
-- [ ] 增加视频转场效果,使其看起来更加的流畅
-- [ ] 增加更多视频素材来源,优化视频素材和文案的匹配度
-- [ ] 增加视频长度选项:短、中、长
-- [ ] 支持更多的语音合成服务商,比如 OpenAI TTS
-- [ ] 自动上传到YouTube平台
-
-## 视频演示 📺
-
-### 竖屏 9:16
+### Portrait 9:16
-| ▶️ 《如何增加生活的乐趣》 |
-▶️ 《金钱的作用》 更真实的合成声音 |
-▶️ 《生命的意义是什么》 |
+▶️ How to Add Fun to Your Life |
+▶️ What is the Meaning of Life |
|
- |
|
-### 横屏 16:9
+### Landscape 16:9
-| ▶️《生命的意义是什么》 |
-▶️《为什么要运动》 |
+▶️ What is the Meaning of Life |
+▶️ Why Exercise |
@@ -110,86 +80,79 @@
-## 配置要求 📦
+## System Requirements 📦
-- 建议最低 CPU **4核** 或以上,内存 **4G** 或以上,显卡非必须
-- Windows 10 或 MacOS 11.0 以上系统
+- Recommended minimum 4 CPU cores or more, 4G of memory or more, GPU is not required
+- Windows 10 or MacOS 11.0, and their later versions
+## New updates and features will be released in the [changelog](CHANGELOG.md) file
-## 快速开始 🚀
+## Quick Start 🚀
-### 在 Google Colab 中运行
-免去本地环境配置,点击直接在 Google Colab 中快速体验 MoneyPrinterTurbo
+### Run in Google Colab
+Want to try MoneyPrinterTurbo without setting up a local environment? Run it directly in Google Colab!
[](https://colab.research.google.com/github/harry0703/MoneyPrinterTurbo/blob/main/docs/MoneyPrinterTurbo.ipynb)
-### Windows一键启动包
+### Windows
-下载一键启动包,解压直接使用(路径不要有 **中文**、**特殊字符**、**空格**)
+Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
-- 百度网盘(v1.2.6): https://pan.baidu.com/s/1wg0UaIyXpO3SqIpaq790SQ?pwd=sbqx 提取码: sbqx
-- Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
+After downloading, it is recommended to **double-click** `update.bat` first to update to the **latest code**, then double-click `start.bat` to launch
-下载后,建议先**双击执行** `update.bat` 更新到**最新代码**,然后双击 `start.bat` 启动
+After launching, the browser will open automatically (if it opens blank, it is recommended to use **Chrome** or **Edge**)
-启动后,会自动打开浏览器(如果打开是空白,建议换成 **Chrome** 或者 **Edge** 打开)
+### Other Systems
-## 安装部署 📥
+One-click startup packages have not been created yet. See the **Installation & Deployment** section below. It is recommended to use **docker** for deployment, which is more convenient.
-### 前提条件
+## Installation & Deployment 📥
-- 尽量不要使用 **中文路径**,避免出现一些无法预料的问题
-- 请确保你的 **网络** 是正常的,VPN需要打开`全局流量`模式
+### Prerequisites
-#### ① 克隆代码
+#### ① Clone the Project
```shell
git clone https://github.com/harry0703/MoneyPrinterTurbo.git
```
-#### ② 修改配置文件(可选,建议启动后也可以在 WebUI 里面配置)
+#### ② Modify the Configuration File
-- 将 `config.example.toml` 文件复制一份,命名为 `config.toml`
-- 按照 `config.toml` 文件中的说明,配置好 `pexels_api_keys` 和 `llm_provider`,并根据 llm_provider 对应的服务商,配置相关的
- API Key
+- Copy the `config.example.toml` file and rename it to `config.toml`
+- Follow the instructions in the `config.toml` file to configure `pexels_api_keys` and `llm_provider`, and according to
+ the llm_provider's service provider, set up the corresponding API Key
-### Docker部署 🐳
+### Docker Deployment 🐳
-#### ① 启动Docker
+#### ① Launch the Docker Container
-如果未安装 Docker,请先安装 https://www.docker.com/products/docker-desktop/
+If you haven't installed Docker, please install it first https://www.docker.com/products/docker-desktop/
+If you are using a Windows system, please refer to Microsoft's documentation:
-如果是Windows系统,请参考微软的文档:
-
-1. https://learn.microsoft.com/zh-cn/windows/wsl/install
-2. https://learn.microsoft.com/zh-cn/windows/wsl/tutorials/wsl-containers
+1. https://learn.microsoft.com/en-us/windows/wsl/install
+2. https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-containers
```shell
cd MoneyPrinterTurbo
docker-compose up
```
-> 注意:最新版的docker安装时会自动以插件的形式安装docker compose,启动命令调整为docker compose up
+> Note:The latest version of docker will automatically install docker compose in the form of a plug-in, and the start command is adjusted to `docker compose up `
-#### ② 访问Web界面
+#### ② Access the Web Interface
-打开浏览器,访问 http://0.0.0.0:8501
+Open your browser and visit http://0.0.0.0:8501
-#### ③ 访问API文档
+#### ③ Access the API Interface
-打开浏览器,访问 http://0.0.0.0:8080/docs 或者 http://0.0.0.0:8080/redoc
+Open your browser and visit http://0.0.0.0:8080/docs Or http://0.0.0.0:8080/redoc
-### 手动部署 📦
+### Manual Deployment 📦
-> 视频教程
+#### ① Create a Python Virtual Environment
-- 完整的使用演示:https://v.douyin.com/iFhnwsKY/
-- 如何在Windows上部署:https://v.douyin.com/iFyjoW3M
-
-#### ① 创建虚拟环境
-
-建议使用 [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) 创建 python 虚拟环境
+It is recommended to create a Python virtual environment using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)
```shell
git clone https://github.com/harry0703/MoneyPrinterTurbo.git
@@ -199,30 +162,35 @@ conda activate MoneyPrinterTurbo
pip install -r requirements.txt
```
-#### ② 安装好 ImageMagick
+#### ② Install ImageMagick
-- Windows:
- - 下载 https://imagemagick.org/script/download.php 选择Windows版本,切记一定要选择 **静态库** 版本,比如
- ImageMagick-7.1.1-32-Q16-x64-**static**.exe
- - 安装下载好的 ImageMagick,**注意不要修改安装路径**
- - 修改 `配置文件 config.toml` 中的 `imagemagick_path` 为你的 **实际安装路径**
+###### Windows:
-- MacOS:
- ```shell
- brew install imagemagick
- ````
-- Ubuntu
- ```shell
- sudo apt-get install imagemagick
- ```
-- CentOS
- ```shell
- sudo yum install ImageMagick
- ```
+- Download https://imagemagick.org/script/download.php Choose the Windows version, make sure to select the **static library** version, such as ImageMagick-7.1.1-32-Q16-x64-**static**.exe
+- Install the downloaded ImageMagick, **do not change the installation path**
+- Modify the `config.toml` configuration file, set `imagemagick_path` to your actual installation path
-#### ③ 启动Web界面 🌐
+###### MacOS:
-注意需要到 MoneyPrinterTurbo 项目 `根目录` 下执行以下命令
+```shell
+brew install imagemagick
+````
+
+###### Ubuntu
+
+```shell
+sudo apt-get install imagemagick
+```
+
+###### CentOS
+
+```shell
+sudo yum install ImageMagick
+```
+
+#### ③ Launch the Web Interface 🌐
+
+Note that you need to execute the following commands in the `root directory` of the MoneyPrinterTurbo project
###### Windows
@@ -236,50 +204,54 @@ webui.bat
sh webui.sh
```
-启动后,会自动打开浏览器(如果打开是空白,建议换成 **Chrome** 或者 **Edge** 打开)
+After launching, the browser will open automatically
-#### ④ 启动API服务 🚀
+#### ④ Launch the API Service 🚀
```shell
python main.py
```
-启动后,可以查看 `API文档` http://127.0.0.1:8080/docs 或者 http://127.0.0.1:8080/redoc 直接在线调试接口,快速体验。
+After launching, you can view the `API documentation` at http://127.0.0.1:8080/docs and directly test the interface
+online for a quick experience.
-## 语音合成 🗣
+## Voice Synthesis 🗣
-所有支持的声音列表,可以查看:[声音列表](./docs/voice-list.txt)
+A list of all supported voices can be viewed here: [Voice List](./docs/voice-list.txt)
-2024-04-16 v1.1.2 新增了9种Azure的语音合成声音,需要配置API KEY,该声音合成的更加真实。
+2024-04-16 v1.1.2 Added 9 new Azure voice synthesis voices that require API KEY configuration. These voices sound more realistic.
-## 字幕生成 📜
+## Subtitle Generation 📜
-当前支持2种字幕生成方式:
+Currently, there are 2 ways to generate subtitles:
-- **edge**: 生成`速度快`,性能更好,对电脑配置没有要求,但是质量可能不稳定
-- **whisper**: 生成`速度慢`,性能较差,对电脑配置有一定要求,但是`质量更可靠`。
+- **edge**: Faster generation speed, better performance, no specific requirements for computer configuration, but the
+ quality may be unstable
+- **whisper**: Slower generation speed, poorer performance, specific requirements for computer configuration, but more
+ reliable quality
-可以修改 `config.toml` 配置文件中的 `subtitle_provider` 进行切换
+You can switch between them by modifying the `subtitle_provider` in the `config.toml` configuration file
-建议使用 `edge` 模式,如果生成的字幕质量不好,再切换到 `whisper` 模式
+It is recommended to use `edge` mode, and switch to `whisper` mode if the quality of the subtitles generated is not
+satisfactory.
-> 注意:
+> Note:
+>
+> 1. In whisper mode, you need to download a model file from HuggingFace, about 3GB in size, please ensure good internet connectivity
+> 2. If left blank, it means no subtitles will be generated.
-1. whisper 模式下需要到 HuggingFace 下载一个模型文件,大约 3GB 左右,请确保网络通畅
-2. 如果留空,表示不生成字幕。
+> Since HuggingFace is not accessible in China, you can use the following methods to download the `whisper-large-v3` model file
-> 由于国内无法访问 HuggingFace,可以使用以下方法下载 `whisper-large-v3` 的模型文件
+Download links:
-下载地址:
+- Baidu Netdisk: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
+- Quark Netdisk: https://pan.quark.cn/s/3ee3d991d64b
-- 百度网盘: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
-- 夸克网盘:https://pan.quark.cn/s/3ee3d991d64b
-
-模型下载后解压,整个目录放到 `.\MoneyPrinterTurbo\models` 里面,
-最终的文件路径应该是这样: `.\MoneyPrinterTurbo\models\whisper-large-v3`
+After downloading the model, extract it and place the entire directory in `.\MoneyPrinterTurbo\models`,
+The final file path should look like this: `.\MoneyPrinterTurbo\models\whisper-large-v3`
```
-MoneyPrinterTurbo
+MoneyPrinterTurbo
├─models
│ └─whisper-large-v3
│ config.json
@@ -289,81 +261,98 @@ MoneyPrinterTurbo
│ vocabulary.json
```
-## 背景音乐 🎵
+## Background Music 🎵
-用于视频的背景音乐,位于项目的 `resource/songs` 目录下。
-> 当前项目里面放了一些默认的音乐,来自于 YouTube 视频,如有侵权,请删除。
+Background music for videos is located in the project's `resource/songs` directory.
+> The current project includes some default music from YouTube videos. If there are copyright issues, please delete
+> them.
-## 字幕字体 🅰
+## Subtitle Fonts 🅰
-用于视频字幕的渲染,位于项目的 `resource/fonts` 目录下,你也可以放进去自己的字体。
+Fonts for rendering video subtitles are located in the project's `resource/fonts` directory, and you can also add your
+own fonts.
-## 常见问题 🤔
+## Common Questions 🤔
### ❓RuntimeError: No ffmpeg exe could be found
-通常情况下,ffmpeg 会被自动下载,并且会被自动检测到。
-但是如果你的环境有问题,无法自动下载,可能会遇到如下错误:
+Normally, ffmpeg will be automatically downloaded and detected.
+However, if your environment has issues preventing automatic downloads, you may encounter the following error:
```
RuntimeError: No ffmpeg exe could be found.
Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
```
-此时你可以从 https://www.gyan.dev/ffmpeg/builds/ 下载ffmpeg,解压后,设置 `ffmpeg_path` 为你的实际安装路径即可。
+In this case, you can download ffmpeg from https://www.gyan.dev/ffmpeg/builds/, unzip it, and set `ffmpeg_path` to your
+actual installation path.
```toml
[app]
-# 请根据你的实际路径设置,注意 Windows 路径分隔符为 \\
+# Please set according to your actual path, note that Windows path separators are \\
ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
```
-### ❓ImageMagick的安全策略阻止了与临时文件@/tmp/tmpur5hyyto.txt相关的操作
+### ❓ImageMagick is not installed on your computer
-可以在ImageMagick的配置文件policy.xml中找到这些策略。
-这个文件通常位于 /etc/ImageMagick-`X`/ 或 ImageMagick 安装目录的类似位置。
-修改包含`pattern="@"`的条目,将`rights="none"`更改为`rights="read|write"`以允许对文件的读写操作。
+[issue 33](https://github.com/harry0703/MoneyPrinterTurbo/issues/33)
+
+1. Follow the `example configuration` provided `download address` to
+ install https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-30-Q16-x64-static.exe, using the static library
+2. Do not install in a path with Chinese characters to avoid unpredictable issues
+
+[issue 54](https://github.com/harry0703/MoneyPrinterTurbo/issues/54#issuecomment-2017842022)
+
+For Linux systems, you can manually install it, refer to https://cn.linux-console.net/?p=16978
+
+Thanks to [@wangwenqiao666](https://github.com/wangwenqiao666) for their research and exploration
+
+### ❓ImageMagick's security policy prevents operations related to temporary file @/tmp/tmpur5hyyto.txt
+
+You can find these policies in ImageMagick's configuration file policy.xml.
+This file is usually located in /etc/ImageMagick-`X`/ or a similar location in the ImageMagick installation directory.
+Modify the entry containing `pattern="@"`, change `rights="none"` to `rights="read|write"` to allow read and write operations on files.
### ❓OSError: [Errno 24] Too many open files
-这个问题是由于系统打开文件数限制导致的,可以通过修改系统的文件打开数限制来解决。
+This issue is caused by the system's limit on the number of open files. You can solve it by modifying the system's file open limit.
-查看当前限制
+Check the current limit:
```shell
ulimit -n
```
-如果过低,可以调高一些,比如
+If it's too low, you can increase it, for example:
```shell
ulimit -n 10240
```
-### ❓Whisper 模型下载失败,出现如下错误
+### ❓Whisper model download failed, with the following error
LocalEntryNotfoundEror: Cannot find an appropriate cached snapshotfolderfor the specified revision on the local disk and
outgoing trafic has been disabled.
To enablerepo look-ups and downloads online, pass 'local files only=False' as input.
-或者
+or
An error occured while synchronizing the model Systran/faster-whisper-large-v3 from the Hugging Face Hub:
An error happened while trying to locate the files on the Hub and we cannot find the appropriate snapshot folder for the
specified revision on the local disk. Please check your internet connection and try again.
Trying to load the model directly from the local cache, if it exists.
-解决方法:[点击查看如何从网盘手动下载模型](#%E5%AD%97%E5%B9%95%E7%94%9F%E6%88%90-)
+Solution: [Click to see how to manually download the model from netdisk](#subtitle-generation-)
-## 反馈建议 📢
+## Feedback & Suggestions 📢
-- 可以提交 [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues)
- 或者 [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls)。
+- You can submit an [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues) or
+ a [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls).
-## 许可证 📝
+## License 📝
-点击查看 [`LICENSE`](LICENSE) 文件
+Click to view the [`LICENSE`](LICENSE) file
## Star History
-[](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
\ No newline at end of file
+[](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
diff --git a/app/services/video.py b/app/services/video.py
index 1a79e30..8bdf5e3 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -50,6 +50,9 @@ class SubClippedVideoClip:
audio_codec = "aac"
video_codec = "libx264"
fps = 30
+video_preset = "medium"
+video_crf = "23"
+pixel_format = "yuv420p"
def close_clip(clip):
if clip is None:
@@ -170,6 +173,9 @@ def combine_videos(
try:
clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time)
+ clip = VideoFileClip(subclipped_item.file_path,
+ target_resolution=(video_height, video_width),
+ audio_fps=44100)
clip_duration = clip.duration
# Not all videos are same size, so we need to resize them
clip_w, clip_h = clip.size
@@ -179,7 +185,7 @@ def combine_videos(
logger.debug(f"resizing clip, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target: {video_width}x{video_height}, ratio: {video_ratio:.2f}")
if clip_ratio == video_ratio:
- clip = clip.resized(new_size=(video_width, video_height))
+ clip = clip.resized(new_size=(video_width, video_height), method='lanczos')
else:
if clip_ratio > video_ratio:
scale_factor = video_width / clip_w
@@ -219,8 +225,19 @@ def combine_videos(
# wirte clip to temp file
clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
- clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
-
+ #clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
+ clip.write_videofile(
+ clip_file,
+ logger=None,
+ fps=fps,
+ codec=video_codec,
+ preset=video_preset,
+ ffmpeg_params=[
+ '-crf', video_crf,
+ '-pix_fmt', pixel_format,
+ '-movflags', '+faststart'
+ ]
+ )
close_clip(clip)
processed_clips.append(SubClippedVideoClip(file_path=clip_file, duration=clip.duration, width=clip_w, height=clip_h))
@@ -275,6 +292,14 @@ def combine_videos(
merged_clip = concatenate_videoclips([base_clip, next_clip])
# save merged result to temp file
+ #merged_clip.write_videofile(
+ # filename=temp_merged_next,
+ # threads=threads,
+ # logger=None,
+ # temp_audiofile_path=output_dir,
+ # audio_codec=audio_codec,
+ # fps=fps,
+ #)
merged_clip.write_videofile(
filename=temp_merged_next,
threads=threads,
@@ -282,6 +307,12 @@ def combine_videos(
temp_audiofile_path=output_dir,
audio_codec=audio_codec,
fps=fps,
+ preset=video_preset,
+ ffmpeg_params=[
+ '-crf', video_crf,
+ '-pix_fmt', pixel_format,
+ '-movflags', '+faststart'
+ ]
)
close_clip(base_clip)
close_clip(next_clip)
@@ -472,6 +503,14 @@ def generate_video(
logger.error(f"failed to add bgm: {str(e)}")
video_clip = video_clip.with_audio(audio_clip)
+ #video_clip.write_videofile(
+ # output_file,
+ # audio_codec=audio_codec,
+ # temp_audiofile_path=output_dir,
+ # threads=params.n_threads or 2,
+ # logger=None,
+ # fps=fps,
+ #)
video_clip.write_videofile(
output_file,
audio_codec=audio_codec,
@@ -479,6 +518,12 @@ def generate_video(
threads=params.n_threads or 2,
logger=None,
fps=fps,
+ preset=video_preset,
+ ffmpeg_params=[
+ '-crf', video_crf,
+ '-pix_fmt', pixel_format,
+ '-movflags', '+faststart'
+ ]
)
video_clip.close()
del video_clip
@@ -514,8 +559,13 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
# The zoom effect starts from the original size and gradually scales up to 120%.
# t represents the current time, and clip.duration is the total duration of the clip (3 seconds).
# Note: 1 represents 100% size, so 1.2 represents 120% size.
+ #zoom_clip = clip.resized(
+ # lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
+ #)
+
zoom_clip = clip.resized(
- lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
+ lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration),
+ method='lanczos'
)
# Optionally, create a composite video clip containing the zoomed clip.
diff --git a/webui.bat b/webui.bat
index fd97514..8c8e08a 100644
--- a/webui.bat
+++ b/webui.bat
@@ -3,5 +3,12 @@ set CURRENT_DIR=%CD%
echo ***** Current directory: %CURRENT_DIR% *****
set PYTHONPATH=%CURRENT_DIR%
-rem set HF_ENDPOINT=https://hf-mirror.com
+rem Activate Python virtual environment if exists
+if exist "venv\Scripts\activate.bat" (
+ call venv\Scripts\activate.bat
+)
+
+rem Optional Hugging Face mirror setting
+rem set HF_ENDOINT=https://hf-mirror.com
+
streamlit run .\webui\Main.py --browser.gatherUsageStats=False --server.enableCORS=True
\ No newline at end of file
From 5055d940cb04eed897c2ec3537b9072fa12741dc Mon Sep 17 00:00:00 2001
From: Wael <3681221+overcrash66@users.noreply.github.com>
Date: Sat, 17 May 2025 22:44:29 -0300
Subject: [PATCH 3/9] Update CHANGELOG.md
---
CHANGELOG.md | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e69de29..d9c0b4d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,12 @@
+README Updates: The Chinese README (README.md) was heavily revised to include more English content and better structure, while the English README (README-en.md)
+was removed entirely. Much of the documentation, quick start, and FAQs are now unified in a single README and available in English.
+CHANGELOG.md Added: A new changelog file was created to track updates and features.
+Video Processing Improvements (app/services/video.py):
+Enhanced the video combining and processing logic by introducing more ffmpeg parameters for improved video encoding (e.g., preset, CRF, pixel format settings).
+Improved resizing methods (using 'lanczos') for better video quality.
+Updated functions to better handle merging, audio, and encoding parameters for output files.
+Added more robust logic for video preprocessing, merging, and clip writing, with commented-out code for optional features.
+Windows Web UI Startup Script (webui.bat):
+Improved the script to automatically activate a Python virtual environment if present.
+Clarified optional Hugging Face mirror settings.
+Overall, this commit modernizes the documentation, improves video encoding quality and flexibility, and makes the Windows startup script more robust and user-friendly.
From c487b874c1dd4cc3e87ca43cd83b8439b7c8778a Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Sun, 18 May 2025 09:36:04 -0300
Subject: [PATCH 4/9] Update video.py
---
app/services/video.py | 58 +++----------------------------------------
1 file changed, 4 insertions(+), 54 deletions(-)
diff --git a/app/services/video.py b/app/services/video.py
index 8bdf5e3..1a79e30 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -50,9 +50,6 @@ class SubClippedVideoClip:
audio_codec = "aac"
video_codec = "libx264"
fps = 30
-video_preset = "medium"
-video_crf = "23"
-pixel_format = "yuv420p"
def close_clip(clip):
if clip is None:
@@ -173,9 +170,6 @@ def combine_videos(
try:
clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time)
- clip = VideoFileClip(subclipped_item.file_path,
- target_resolution=(video_height, video_width),
- audio_fps=44100)
clip_duration = clip.duration
# Not all videos are same size, so we need to resize them
clip_w, clip_h = clip.size
@@ -185,7 +179,7 @@ def combine_videos(
logger.debug(f"resizing clip, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target: {video_width}x{video_height}, ratio: {video_ratio:.2f}")
if clip_ratio == video_ratio:
- clip = clip.resized(new_size=(video_width, video_height), method='lanczos')
+ clip = clip.resized(new_size=(video_width, video_height))
else:
if clip_ratio > video_ratio:
scale_factor = video_width / clip_w
@@ -225,19 +219,8 @@ def combine_videos(
# wirte clip to temp file
clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
- #clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
- clip.write_videofile(
- clip_file,
- logger=None,
- fps=fps,
- codec=video_codec,
- preset=video_preset,
- ffmpeg_params=[
- '-crf', video_crf,
- '-pix_fmt', pixel_format,
- '-movflags', '+faststart'
- ]
- )
+ clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
+
close_clip(clip)
processed_clips.append(SubClippedVideoClip(file_path=clip_file, duration=clip.duration, width=clip_w, height=clip_h))
@@ -292,14 +275,6 @@ def combine_videos(
merged_clip = concatenate_videoclips([base_clip, next_clip])
# save merged result to temp file
- #merged_clip.write_videofile(
- # filename=temp_merged_next,
- # threads=threads,
- # logger=None,
- # temp_audiofile_path=output_dir,
- # audio_codec=audio_codec,
- # fps=fps,
- #)
merged_clip.write_videofile(
filename=temp_merged_next,
threads=threads,
@@ -307,12 +282,6 @@ def combine_videos(
temp_audiofile_path=output_dir,
audio_codec=audio_codec,
fps=fps,
- preset=video_preset,
- ffmpeg_params=[
- '-crf', video_crf,
- '-pix_fmt', pixel_format,
- '-movflags', '+faststart'
- ]
)
close_clip(base_clip)
close_clip(next_clip)
@@ -503,14 +472,6 @@ def generate_video(
logger.error(f"failed to add bgm: {str(e)}")
video_clip = video_clip.with_audio(audio_clip)
- #video_clip.write_videofile(
- # output_file,
- # audio_codec=audio_codec,
- # temp_audiofile_path=output_dir,
- # threads=params.n_threads or 2,
- # logger=None,
- # fps=fps,
- #)
video_clip.write_videofile(
output_file,
audio_codec=audio_codec,
@@ -518,12 +479,6 @@ def generate_video(
threads=params.n_threads or 2,
logger=None,
fps=fps,
- preset=video_preset,
- ffmpeg_params=[
- '-crf', video_crf,
- '-pix_fmt', pixel_format,
- '-movflags', '+faststart'
- ]
)
video_clip.close()
del video_clip
@@ -559,13 +514,8 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
# The zoom effect starts from the original size and gradually scales up to 120%.
# t represents the current time, and clip.duration is the total duration of the clip (3 seconds).
# Note: 1 represents 100% size, so 1.2 represents 120% size.
- #zoom_clip = clip.resized(
- # lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
- #)
-
zoom_clip = clip.resized(
- lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration),
- method='lanczos'
+ lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
)
# Optionally, create a composite video clip containing the zoomed clip.
From 686fd3f307e2e9e3a7ed53a2f756fc0200587045 Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Wed, 18 Jun 2025 21:40:45 -0300
Subject: [PATCH 5/9] improve and fix stuff
---
CHANGELOG.md | 12 -
README-en.md | 389 +++++++++++++++++++++
README.md | 345 ++++++++++---------
app/controllers/v1/video.py | 113 +++++--
app/services/llm.py | 95 ++----
app/services/utils/video_effects.py | 4 +-
app/services/video.py | 506 +++++++++++++++++++++-------
app/services/voice.py | 2 +-
requirements.txt | 5 +-
9 files changed, 1078 insertions(+), 393 deletions(-)
delete mode 100644 CHANGELOG.md
create mode 100644 README-en.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index d9c0b4d..0000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,12 +0,0 @@
-README Updates: The Chinese README (README.md) was heavily revised to include more English content and better structure, while the English README (README-en.md)
-was removed entirely. Much of the documentation, quick start, and FAQs are now unified in a single README and available in English.
-CHANGELOG.md Added: A new changelog file was created to track updates and features.
-Video Processing Improvements (app/services/video.py):
-Enhanced the video combining and processing logic by introducing more ffmpeg parameters for improved video encoding (e.g., preset, CRF, pixel format settings).
-Improved resizing methods (using 'lanczos') for better video quality.
-Updated functions to better handle merging, audio, and encoding parameters for output files.
-Added more robust logic for video preprocessing, merging, and clip writing, with commented-out code for optional features.
-Windows Web UI Startup Script (webui.bat):
-Improved the script to automatically activate a Python virtual environment if present.
-Clarified optional Hugging Face mirror settings.
-Overall, this commit modernizes the documentation, improves video encoding quality and flexibility, and makes the Windows startup script more robust and user-friendly.
diff --git a/README-en.md b/README-en.md
new file mode 100644
index 0000000..7c07fa2
--- /dev/null
+++ b/README-en.md
@@ -0,0 +1,389 @@
+
+
MoneyPrinterTurbo 💸
+
+
+
+
+
+
+
+
+
English | 简体中文
+
+
+

+
+
+Simply provide a
topic or
keyword for a video, and it will automatically generate the video copy, video
+materials, video subtitles, and video background music before synthesizing a high-definition short video.
+
+### WebUI
+
+
+
+### API Interface
+
+
+
+
+
+## Special Thanks 🙏
+
+Due to the **deployment** and **usage** of this project, there is a certain threshold for some beginner users. We would
+like to express our special thanks to
+
+**RecCloud (AI-Powered Multimedia Service Platform)** for providing a free `AI Video Generator` service based on this
+project. It allows for online use without deployment, which is very convenient.
+
+- Chinese version: https://reccloud.cn
+- English version: https://reccloud.com
+
+
+
+## Thanks for Sponsorship 🙏
+
+Thanks to Picwish https://picwish.com for supporting and sponsoring this project, enabling continuous updates and maintenance.
+
+Picwish focuses on the **image processing field**, providing a rich set of **image processing tools** that extremely simplify complex operations, truly making image processing easier.
+
+
+
+## Features 🎯
+
+- [x] Complete **MVC architecture**, **clearly structured** code, easy to maintain, supports both `API`
+ and `Web interface`
+- [x] Supports **AI-generated** video copy, as well as **customized copy**
+- [x] Supports various **high-definition video** sizes
+ - [x] Portrait 9:16, `1080x1920`
+ - [x] Landscape 16:9, `1920x1080`
+- [x] Supports **batch video generation**, allowing the creation of multiple videos at once, then selecting the most
+ satisfactory one
+- [x] Supports setting the **duration of video clips**, facilitating adjustments to material switching frequency
+- [x] Supports video copy in both **Chinese** and **English**
+- [x] Supports **multiple voice** synthesis, with **real-time preview** of effects
+- [x] Supports **subtitle generation**, with adjustable `font`, `position`, `color`, `size`, and also
+ supports `subtitle outlining`
+- [x] Supports **background music**, either random or specified music files, with adjustable `background music volume`
+- [x] Video material sources are **high-definition** and **royalty-free**, and you can also use your own **local materials**
+- [x] Supports integration with various models such as **OpenAI**, **Moonshot**, **Azure**, **gpt4free**, **one-api**, **Qwen**, **Google Gemini**, **Ollama**, **DeepSeek**, **ERNIE**, **Pollinations** and more
+
+### Future Plans 📅
+
+- [ ] GPT-SoVITS dubbing support
+- [ ] Optimize voice synthesis using large models for more natural and emotionally rich voice output
+- [ ] Add video transition effects for a smoother viewing experience
+- [ ] Add more video material sources, improve the matching between video materials and script
+- [ ] Add video length options: short, medium, long
+- [ ] Support more voice synthesis providers, such as OpenAI TTS
+- [ ] Automate upload to YouTube platform
+
+## Video Demos 📺
+
+### Portrait 9:16
+
+
+
+
+| ▶️ How to Add Fun to Your Life |
+▶️ What is the Meaning of Life |
+
+
+
+
+ |
+ |
+
+
+
+
+### Landscape 16:9
+
+
+
+
+| ▶️ What is the Meaning of Life |
+▶️ Why Exercise |
+
+
+
+
+ |
+ |
+
+
+
+
+## System Requirements 📦
+
+- Recommended minimum 4 CPU cores or more, 4G of memory or more, GPU is not required
+- Windows 10 or MacOS 11.0, and their later versions
+
+## Quick Start 🚀
+
+### Run in Google Colab
+Want to try MoneyPrinterTurbo without setting up a local environment? Run it directly in Google Colab!
+
+[](https://colab.research.google.com/github/harry0703/MoneyPrinterTurbo/blob/main/docs/MoneyPrinterTurbo.ipynb)
+
+
+### Windows
+
+Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
+
+After downloading, it is recommended to **double-click** `update.bat` first to update to the **latest code**, then double-click `start.bat` to launch
+
+After launching, the browser will open automatically (if it opens blank, it is recommended to use **Chrome** or **Edge**)
+
+### Other Systems
+
+One-click startup packages have not been created yet. See the **Installation & Deployment** section below. It is recommended to use **docker** for deployment, which is more convenient.
+
+## Installation & Deployment 📥
+
+### Prerequisites
+
+#### ① Clone the Project
+
+```shell
+git clone https://github.com/harry0703/MoneyPrinterTurbo.git
+```
+
+#### ② Modify the Configuration File
+
+- Copy the `config.example.toml` file and rename it to `config.toml`
+- Follow the instructions in the `config.toml` file to configure `pexels_api_keys` and `llm_provider`, and according to
+ the llm_provider's service provider, set up the corresponding API Key
+
+### Docker Deployment 🐳
+
+#### ① Launch the Docker Container
+
+If you haven't installed Docker, please install it first https://www.docker.com/products/docker-desktop/
+If you are using a Windows system, please refer to Microsoft's documentation:
+
+1. https://learn.microsoft.com/en-us/windows/wsl/install
+2. https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-containers
+
+```shell
+cd MoneyPrinterTurbo
+docker-compose up
+```
+
+> Note:The latest version of docker will automatically install docker compose in the form of a plug-in, and the start command is adjusted to `docker compose up `
+
+#### ② Access the Web Interface
+
+Open your browser and visit http://0.0.0.0:8501
+
+#### ③ Access the API Interface
+
+Open your browser and visit http://0.0.0.0:8080/docs Or http://0.0.0.0:8080/redoc
+
+### Manual Deployment 📦
+
+#### ① Create a Python Virtual Environment
+
+It is recommended to create a Python virtual environment using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)
+
+```shell
+git clone https://github.com/harry0703/MoneyPrinterTurbo.git
+cd MoneyPrinterTurbo
+conda create -n MoneyPrinterTurbo python=3.11
+conda activate MoneyPrinterTurbo
+pip install -r requirements.txt
+```
+
+#### ② Install ImageMagick
+
+###### Windows:
+
+- Download https://imagemagick.org/script/download.php Choose the Windows version, make sure to select the **static library** version, such as ImageMagick-7.1.1-32-Q16-x64-**static**.exe
+- Install the downloaded ImageMagick, **do not change the installation path**
+- Modify the `config.toml` configuration file, set `imagemagick_path` to your actual installation path
+
+###### MacOS:
+
+```shell
+brew install imagemagick
+````
+
+###### Ubuntu
+
+```shell
+sudo apt-get install imagemagick
+```
+
+###### CentOS
+
+```shell
+sudo yum install ImageMagick
+```
+
+#### ③ Launch the Web Interface 🌐
+
+Note that you need to execute the following commands in the `root directory` of the MoneyPrinterTurbo project
+
+###### Windows
+
+```bat
+webui.bat
+```
+
+###### MacOS or Linux
+
+```shell
+sh webui.sh
+```
+
+After launching, the browser will open automatically
+
+#### ④ Launch the API Service 🚀
+
+```shell
+python main.py
+```
+
+After launching, you can view the `API documentation` at http://127.0.0.1:8080/docs and directly test the interface
+online for a quick experience.
+
+## Voice Synthesis 🗣
+
+A list of all supported voices can be viewed here: [Voice List](./docs/voice-list.txt)
+
+2024-04-16 v1.1.2 Added 9 new Azure voice synthesis voices that require API KEY configuration. These voices sound more realistic.
+
+## Subtitle Generation 📜
+
+Currently, there are 2 ways to generate subtitles:
+
+- **edge**: Faster generation speed, better performance, no specific requirements for computer configuration, but the
+ quality may be unstable
+- **whisper**: Slower generation speed, poorer performance, specific requirements for computer configuration, but more
+ reliable quality
+
+You can switch between them by modifying the `subtitle_provider` in the `config.toml` configuration file
+
+It is recommended to use `edge` mode, and switch to `whisper` mode if the quality of the subtitles generated is not
+satisfactory.
+
+> Note:
+>
+> 1. In whisper mode, you need to download a model file from HuggingFace, about 3GB in size, please ensure good internet connectivity
+> 2. If left blank, it means no subtitles will be generated.
+
+> Since HuggingFace is not accessible in China, you can use the following methods to download the `whisper-large-v3` model file
+
+Download links:
+
+- Baidu Netdisk: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
+- Quark Netdisk: https://pan.quark.cn/s/3ee3d991d64b
+
+After downloading the model, extract it and place the entire directory in `.\MoneyPrinterTurbo\models`,
+The final file path should look like this: `.\MoneyPrinterTurbo\models\whisper-large-v3`
+
+```
+MoneyPrinterTurbo
+ ├─models
+ │ └─whisper-large-v3
+ │ config.json
+ │ model.bin
+ │ preprocessor_config.json
+ │ tokenizer.json
+ │ vocabulary.json
+```
+
+## Background Music 🎵
+
+Background music for videos is located in the project's `resource/songs` directory.
+> The current project includes some default music from YouTube videos. If there are copyright issues, please delete
+> them.
+
+## Subtitle Fonts 🅰
+
+Fonts for rendering video subtitles are located in the project's `resource/fonts` directory, and you can also add your
+own fonts.
+
+## Common Questions 🤔
+
+### ❓RuntimeError: No ffmpeg exe could be found
+
+Normally, ffmpeg will be automatically downloaded and detected.
+However, if your environment has issues preventing automatic downloads, you may encounter the following error:
+
+```
+RuntimeError: No ffmpeg exe could be found.
+Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
+```
+
+In this case, you can download ffmpeg from https://www.gyan.dev/ffmpeg/builds/, unzip it, and set `ffmpeg_path` to your
+actual installation path.
+
+```toml
+[app]
+# Please set according to your actual path, note that Windows path separators are \\
+ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
+```
+
+### ❓ImageMagick is not installed on your computer
+
+[issue 33](https://github.com/harry0703/MoneyPrinterTurbo/issues/33)
+
+1. Follow the `example configuration` provided `download address` to
+ install https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-30-Q16-x64-static.exe, using the static library
+2. Do not install in a path with Chinese characters to avoid unpredictable issues
+
+[issue 54](https://github.com/harry0703/MoneyPrinterTurbo/issues/54#issuecomment-2017842022)
+
+For Linux systems, you can manually install it, refer to https://cn.linux-console.net/?p=16978
+
+Thanks to [@wangwenqiao666](https://github.com/wangwenqiao666) for their research and exploration
+
+### ❓ImageMagick's security policy prevents operations related to temporary file @/tmp/tmpur5hyyto.txt
+
+You can find these policies in ImageMagick's configuration file policy.xml.
+This file is usually located in /etc/ImageMagick-`X`/ or a similar location in the ImageMagick installation directory.
+Modify the entry containing `pattern="@"`, change `rights="none"` to `rights="read|write"` to allow read and write operations on files.
+
+### ❓OSError: [Errno 24] Too many open files
+
+This issue is caused by the system's limit on the number of open files. You can solve it by modifying the system's file open limit.
+
+Check the current limit:
+
+```shell
+ulimit -n
+```
+
+If it's too low, you can increase it, for example:
+
+```shell
+ulimit -n 10240
+```
+
+### ❓Whisper model download failed, with the following error
+
+LocalEntryNotfoundEror: Cannot find an appropriate cached snapshotfolderfor the specified revision on the local disk and
+outgoing trafic has been disabled.
+To enablerepo look-ups and downloads online, pass 'local files only=False' as input.
+
+or
+
+An error occured while synchronizing the model Systran/faster-whisper-large-v3 from the Hugging Face Hub:
+An error happened while trying to locate the files on the Hub and we cannot find the appropriate snapshot folder for the
+specified revision on the local disk. Please check your internet connection and try again.
+Trying to load the model directly from the local cache, if it exists.
+
+Solution: [Click to see how to manually download the model from netdisk](#subtitle-generation-)
+
+## Feedback & Suggestions 📢
+
+- You can submit an [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues) or
+ a [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls).
+
+## License 📝
+
+Click to view the [`LICENSE`](LICENSE) file
+
+## Star History
+
+[](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
\ No newline at end of file
diff --git a/README.md b/README.md
index 5ff6be2..7812761 100644
--- a/README.md
+++ b/README.md
@@ -1,75 +1,105 @@
MoneyPrinterTurbo 💸
+
+
+
+
+
+
+
+
+
+

+
+
+只需提供一个视频
主题 或
关键词 ,就可以全自动生成视频文案、视频素材、视频字幕、视频背景音乐,然后合成一个高清的短视频。
+
-Simply provide a
topic or
keyword for a video, and it will automatically generate the video copy, video
-materials, video subtitles, and video background music before synthesizing a high-definition short video.
+
Web界面
-### WebUI
+
-
-
-### API Interface
+
API界面

-## Features 🎯
+## 特别感谢 🙏
-- [x] Complete **MVC architecture**, **clearly structured** code, easy to maintain, supports both `API`
- and `Web interface`
-- [x] Supports **AI-generated** video copy, as well as **customized copy**
-- [x] Supports various **high-definition video** sizes
- - [x] Portrait 9:16, `1080x1920`
- - [x] Landscape 16:9, `1920x1080`
-- [x] Supports **batch video generation**, allowing the creation of multiple videos at once, then selecting the most
- satisfactory one
-- [x] Supports setting the **duration of video clips**, facilitating adjustments to material switching frequency
-- [x] Supports video copy in both **Chinese** and **English**
-- [x] Supports **multiple voice** synthesis, with **real-time preview** of effects
-- [x] Supports **subtitle generation**, with adjustable `font`, `position`, `color`, `size`, and also
- supports `subtitle outlining`
-- [x] Supports **background music**, either random or specified music files, with adjustable `background music volume`
-- [x] Video material sources are **high-definition** and **royalty-free**, and you can also use your own **local materials**
-- [x] Supports integration with various models such as **OpenAI**, **Moonshot**, **Azure**, **gpt4free**, **one-api**, **Qwen**, **Google Gemini**, **Ollama**, **DeepSeek**, **ERNIE**, **Pollinations** and more
+由于该项目的 **部署** 和 **使用**,对于一些小白用户来说,还是 **有一定的门槛**,在此特别感谢
+**录咖(AI智能 多媒体服务平台)** 网站基于该项目,提供的免费`AI视频生成器`服务,可以不用部署,直接在线使用,非常方便。
-### Future Plans 📅
+- 中文版:https://reccloud.cn
+- 英文版:https://reccloud.com
-- [ ] GPT-SoVITS dubbing support
-- [ ] Optimize voice synthesis using large models for more natural and emotionally rich voice output
-- [ ] Add video transition effects for a smoother viewing experience
-- [ ] Add more video material sources, improve the matching between video materials and script
-- [ ] Add video length options: short, medium, long
-- [ ] Support more voice synthesis providers, such as OpenAI TTS
-- [ ] Automate upload to YouTube platform
+
-## Video Demos 📺
+## 感谢赞助 🙏
-### Portrait 9:16
+感谢佐糖 https://picwish.cn 对该项目的支持和赞助,使得该项目能够持续的更新和维护。
+
+佐糖专注于**图像处理领域**,提供丰富的**图像处理工具**,将复杂操作极致简化,真正实现让图像处理更简单。
+
+
+
+## 功能特性 🎯
+
+- [x] 完整的 **MVC架构**,代码 **结构清晰**,易于维护,支持 `API` 和 `Web界面`
+- [x] 支持视频文案 **AI自动生成**,也可以**自定义文案**
+- [x] 支持多种 **高清视频** 尺寸
+ - [x] 竖屏 9:16,`1080x1920`
+ - [x] 横屏 16:9,`1920x1080`
+- [x] 支持 **批量视频生成**,可以一次生成多个视频,然后选择一个最满意的
+- [x] 支持 **视频片段时长** 设置,方便调节素材切换频率
+- [x] 支持 **中文** 和 **英文** 视频文案
+- [x] 支持 **多种语音** 合成,可 **实时试听** 效果
+- [x] 支持 **字幕生成**,可以调整 `字体`、`位置`、`颜色`、`大小`,同时支持`字幕描边`设置
+- [x] 支持 **背景音乐**,随机或者指定音乐文件,可设置`背景音乐音量`
+- [x] 视频素材来源 **高清**,而且 **无版权**,也可以使用自己的 **本地素材**
+- [x] 支持 **OpenAI**、**Moonshot**、**Azure**、**gpt4free**、**one-api**、**通义千问**、**Google Gemini**、**Ollama**、**DeepSeek**、 **文心一言**, **Pollinations** 等多种模型接入
+ - 中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商(国内可直接访问,不需要VPN。注册就送额度,基本够用)
+
+
+### 后期计划 📅
+
+- [ ] GPT-SoVITS 配音支持
+- [ ] 优化语音合成,利用大模型,使其合成的声音,更加自然,情绪更加丰富
+- [ ] 增加视频转场效果,使其看起来更加的流畅
+- [ ] 增加更多视频素材来源,优化视频素材和文案的匹配度
+- [ ] 增加视频长度选项:短、中、长
+- [ ] 支持更多的语音合成服务商,比如 OpenAI TTS
+- [ ] 自动上传到YouTube平台
+
+## 视频演示 📺
+
+### 竖屏 9:16
-| ▶️ How to Add Fun to Your Life |
-▶️ What is the Meaning of Life |
+▶️ 《如何增加生活的乐趣》 |
+▶️ 《金钱的作用》 更真实的合成声音 |
+▶️ 《生命的意义是什么》 |
|
+ |
|
-### Landscape 16:9
+### 横屏 16:9
-| ▶️ What is the Meaning of Life |
-▶️ Why Exercise |
+▶️《生命的意义是什么》 |
+▶️《为什么要运动》 |
@@ -80,79 +110,86 @@ materials, video subtitles, and video background music before synthesizing a hig
-## System Requirements 📦
+## 配置要求 📦
-- Recommended minimum 4 CPU cores or more, 4G of memory or more, GPU is not required
-- Windows 10 or MacOS 11.0, and their later versions
+- 建议最低 CPU **4核** 或以上,内存 **4G** 或以上,显卡非必须
+- Windows 10 或 MacOS 11.0 以上系统
-## New updates and features will be released in the [changelog](CHANGELOG.md) file
-## Quick Start 🚀
+## 快速开始 🚀
-### Run in Google Colab
-Want to try MoneyPrinterTurbo without setting up a local environment? Run it directly in Google Colab!
+### 在 Google Colab 中运行
+免去本地环境配置,点击直接在 Google Colab 中快速体验 MoneyPrinterTurbo
[](https://colab.research.google.com/github/harry0703/MoneyPrinterTurbo/blob/main/docs/MoneyPrinterTurbo.ipynb)
-### Windows
+### Windows一键启动包
-Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
+下载一键启动包,解压直接使用(路径不要有 **中文**、**特殊字符**、**空格**)
-After downloading, it is recommended to **double-click** `update.bat` first to update to the **latest code**, then double-click `start.bat` to launch
+- 百度网盘(v1.2.6): https://pan.baidu.com/s/1wg0UaIyXpO3SqIpaq790SQ?pwd=sbqx 提取码: sbqx
+- Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
-After launching, the browser will open automatically (if it opens blank, it is recommended to use **Chrome** or **Edge**)
+下载后,建议先**双击执行** `update.bat` 更新到**最新代码**,然后双击 `start.bat` 启动
-### Other Systems
+启动后,会自动打开浏览器(如果打开是空白,建议换成 **Chrome** 或者 **Edge** 打开)
-One-click startup packages have not been created yet. See the **Installation & Deployment** section below. It is recommended to use **docker** for deployment, which is more convenient.
+## 安装部署 📥
-## Installation & Deployment 📥
+### 前提条件
-### Prerequisites
+- 尽量不要使用 **中文路径**,避免出现一些无法预料的问题
+- 请确保你的 **网络** 是正常的,VPN需要打开`全局流量`模式
-#### ① Clone the Project
+#### ① 克隆代码
```shell
git clone https://github.com/harry0703/MoneyPrinterTurbo.git
```
-#### ② Modify the Configuration File
+#### ② 修改配置文件(可选,建议启动后也可以在 WebUI 里面配置)
-- Copy the `config.example.toml` file and rename it to `config.toml`
-- Follow the instructions in the `config.toml` file to configure `pexels_api_keys` and `llm_provider`, and according to
- the llm_provider's service provider, set up the corresponding API Key
+- 将 `config.example.toml` 文件复制一份,命名为 `config.toml`
+- 按照 `config.toml` 文件中的说明,配置好 `pexels_api_keys` 和 `llm_provider`,并根据 llm_provider 对应的服务商,配置相关的
+ API Key
-### Docker Deployment 🐳
+### Docker部署 🐳
-#### ① Launch the Docker Container
+#### ① 启动Docker
-If you haven't installed Docker, please install it first https://www.docker.com/products/docker-desktop/
-If you are using a Windows system, please refer to Microsoft's documentation:
+如果未安装 Docker,请先安装 https://www.docker.com/products/docker-desktop/
-1. https://learn.microsoft.com/en-us/windows/wsl/install
-2. https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-containers
+如果是Windows系统,请参考微软的文档:
+
+1. https://learn.microsoft.com/zh-cn/windows/wsl/install
+2. https://learn.microsoft.com/zh-cn/windows/wsl/tutorials/wsl-containers
```shell
cd MoneyPrinterTurbo
docker-compose up
```
-> Note:The latest version of docker will automatically install docker compose in the form of a plug-in, and the start command is adjusted to `docker compose up `
+> 注意:最新版的docker安装时会自动以插件的形式安装docker compose,启动命令调整为docker compose up
-#### ② Access the Web Interface
+#### ② 访问Web界面
-Open your browser and visit http://0.0.0.0:8501
+打开浏览器,访问 http://0.0.0.0:8501
-#### ③ Access the API Interface
+#### ③ 访问API文档
-Open your browser and visit http://0.0.0.0:8080/docs Or http://0.0.0.0:8080/redoc
+打开浏览器,访问 http://0.0.0.0:8080/docs 或者 http://0.0.0.0:8080/redoc
-### Manual Deployment 📦
+### 手动部署 📦
-#### ① Create a Python Virtual Environment
+> 视频教程
-It is recommended to create a Python virtual environment using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)
+- 完整的使用演示:https://v.douyin.com/iFhnwsKY/
+- 如何在Windows上部署:https://v.douyin.com/iFyjoW3M
+
+#### ① 创建虚拟环境
+
+建议使用 [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) 创建 python 虚拟环境
```shell
git clone https://github.com/harry0703/MoneyPrinterTurbo.git
@@ -162,35 +199,30 @@ conda activate MoneyPrinterTurbo
pip install -r requirements.txt
```
-#### ② Install ImageMagick
+#### ② 安装好 ImageMagick
-###### Windows:
+- Windows:
+ - 下载 https://imagemagick.org/script/download.php 选择Windows版本,切记一定要选择 **静态库** 版本,比如
+ ImageMagick-7.1.1-32-Q16-x64-**static**.exe
+ - 安装下载好的 ImageMagick,**注意不要修改安装路径**
+ - 修改 `配置文件 config.toml` 中的 `imagemagick_path` 为你的 **实际安装路径**
-- Download https://imagemagick.org/script/download.php Choose the Windows version, make sure to select the **static library** version, such as ImageMagick-7.1.1-32-Q16-x64-**static**.exe
-- Install the downloaded ImageMagick, **do not change the installation path**
-- Modify the `config.toml` configuration file, set `imagemagick_path` to your actual installation path
+- MacOS:
+ ```shell
+ brew install imagemagick
+ ````
+- Ubuntu
+ ```shell
+ sudo apt-get install imagemagick
+ ```
+- CentOS
+ ```shell
+ sudo yum install ImageMagick
+ ```
-###### MacOS:
+#### ③ 启动Web界面 🌐
-```shell
-brew install imagemagick
-````
-
-###### Ubuntu
-
-```shell
-sudo apt-get install imagemagick
-```
-
-###### CentOS
-
-```shell
-sudo yum install ImageMagick
-```
-
-#### ③ Launch the Web Interface 🌐
-
-Note that you need to execute the following commands in the `root directory` of the MoneyPrinterTurbo project
+注意需要到 MoneyPrinterTurbo 项目 `根目录` 下执行以下命令
###### Windows
@@ -204,54 +236,50 @@ webui.bat
sh webui.sh
```
-After launching, the browser will open automatically
+启动后,会自动打开浏览器(如果打开是空白,建议换成 **Chrome** 或者 **Edge** 打开)
-#### ④ Launch the API Service 🚀
+#### ④ 启动API服务 🚀
```shell
python main.py
```
-After launching, you can view the `API documentation` at http://127.0.0.1:8080/docs and directly test the interface
-online for a quick experience.
+启动后,可以查看 `API文档` http://127.0.0.1:8080/docs 或者 http://127.0.0.1:8080/redoc 直接在线调试接口,快速体验。
-## Voice Synthesis 🗣
+## 语音合成 🗣
-A list of all supported voices can be viewed here: [Voice List](./docs/voice-list.txt)
+所有支持的声音列表,可以查看:[声音列表](./docs/voice-list.txt)
-2024-04-16 v1.1.2 Added 9 new Azure voice synthesis voices that require API KEY configuration. These voices sound more realistic.
+2024-04-16 v1.1.2 新增了9种Azure的语音合成声音,需要配置API KEY,该声音合成的更加真实。
-## Subtitle Generation 📜
+## 字幕生成 📜
-Currently, there are 2 ways to generate subtitles:
+当前支持2种字幕生成方式:
-- **edge**: Faster generation speed, better performance, no specific requirements for computer configuration, but the
- quality may be unstable
-- **whisper**: Slower generation speed, poorer performance, specific requirements for computer configuration, but more
- reliable quality
+- **edge**: 生成`速度快`,性能更好,对电脑配置没有要求,但是质量可能不稳定
+- **whisper**: 生成`速度慢`,性能较差,对电脑配置有一定要求,但是`质量更可靠`。
-You can switch between them by modifying the `subtitle_provider` in the `config.toml` configuration file
+可以修改 `config.toml` 配置文件中的 `subtitle_provider` 进行切换
-It is recommended to use `edge` mode, and switch to `whisper` mode if the quality of the subtitles generated is not
-satisfactory.
+建议使用 `edge` 模式,如果生成的字幕质量不好,再切换到 `whisper` 模式
-> Note:
->
-> 1. In whisper mode, you need to download a model file from HuggingFace, about 3GB in size, please ensure good internet connectivity
-> 2. If left blank, it means no subtitles will be generated.
+> 注意:
-> Since HuggingFace is not accessible in China, you can use the following methods to download the `whisper-large-v3` model file
+1. whisper 模式下需要到 HuggingFace 下载一个模型文件,大约 3GB 左右,请确保网络通畅
+2. 如果留空,表示不生成字幕。
-Download links:
+> 由于国内无法访问 HuggingFace,可以使用以下方法下载 `whisper-large-v3` 的模型文件
-- Baidu Netdisk: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
-- Quark Netdisk: https://pan.quark.cn/s/3ee3d991d64b
+下载地址:
-After downloading the model, extract it and place the entire directory in `.\MoneyPrinterTurbo\models`,
-The final file path should look like this: `.\MoneyPrinterTurbo\models\whisper-large-v3`
+- 百度网盘: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
+- 夸克网盘:https://pan.quark.cn/s/3ee3d991d64b
+
+模型下载后解压,整个目录放到 `.\MoneyPrinterTurbo\models` 里面,
+最终的文件路径应该是这样: `.\MoneyPrinterTurbo\models\whisper-large-v3`
```
-MoneyPrinterTurbo
+MoneyPrinterTurbo
├─models
│ └─whisper-large-v3
│ config.json
@@ -261,98 +289,81 @@ MoneyPrinterTurbo
│ vocabulary.json
```
-## Background Music 🎵
+## 背景音乐 🎵
-Background music for videos is located in the project's `resource/songs` directory.
-> The current project includes some default music from YouTube videos. If there are copyright issues, please delete
-> them.
+用于视频的背景音乐,位于项目的 `resource/songs` 目录下。
+> 当前项目里面放了一些默认的音乐,来自于 YouTube 视频,如有侵权,请删除。
-## Subtitle Fonts 🅰
+## 字幕字体 🅰
-Fonts for rendering video subtitles are located in the project's `resource/fonts` directory, and you can also add your
-own fonts.
+用于视频字幕的渲染,位于项目的 `resource/fonts` 目录下,你也可以放进去自己的字体。
-## Common Questions 🤔
+## 常见问题 🤔
### ❓RuntimeError: No ffmpeg exe could be found
-Normally, ffmpeg will be automatically downloaded and detected.
-However, if your environment has issues preventing automatic downloads, you may encounter the following error:
+通常情况下,ffmpeg 会被自动下载,并且会被自动检测到。
+但是如果你的环境有问题,无法自动下载,可能会遇到如下错误:
```
RuntimeError: No ffmpeg exe could be found.
Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
```
-In this case, you can download ffmpeg from https://www.gyan.dev/ffmpeg/builds/, unzip it, and set `ffmpeg_path` to your
-actual installation path.
+此时你可以从 https://www.gyan.dev/ffmpeg/builds/ 下载ffmpeg,解压后,设置 `ffmpeg_path` 为你的实际安装路径即可。
```toml
[app]
-# Please set according to your actual path, note that Windows path separators are \\
+# 请根据你的实际路径设置,注意 Windows 路径分隔符为 \\
ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
```
-### ❓ImageMagick is not installed on your computer
+### ❓ImageMagick的安全策略阻止了与临时文件@/tmp/tmpur5hyyto.txt相关的操作
-[issue 33](https://github.com/harry0703/MoneyPrinterTurbo/issues/33)
-
-1. Follow the `example configuration` provided `download address` to
- install https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-30-Q16-x64-static.exe, using the static library
-2. Do not install in a path with Chinese characters to avoid unpredictable issues
-
-[issue 54](https://github.com/harry0703/MoneyPrinterTurbo/issues/54#issuecomment-2017842022)
-
-For Linux systems, you can manually install it, refer to https://cn.linux-console.net/?p=16978
-
-Thanks to [@wangwenqiao666](https://github.com/wangwenqiao666) for their research and exploration
-
-### ❓ImageMagick's security policy prevents operations related to temporary file @/tmp/tmpur5hyyto.txt
-
-You can find these policies in ImageMagick's configuration file policy.xml.
-This file is usually located in /etc/ImageMagick-`X`/ or a similar location in the ImageMagick installation directory.
-Modify the entry containing `pattern="@"`, change `rights="none"` to `rights="read|write"` to allow read and write operations on files.
+可以在ImageMagick的配置文件policy.xml中找到这些策略。
+这个文件通常位于 /etc/ImageMagick-`X`/ 或 ImageMagick 安装目录的类似位置。
+修改包含`pattern="@"`的条目,将`rights="none"`更改为`rights="read|write"`以允许对文件的读写操作。
### ❓OSError: [Errno 24] Too many open files
-This issue is caused by the system's limit on the number of open files. You can solve it by modifying the system's file open limit.
+这个问题是由于系统打开文件数限制导致的,可以通过修改系统的文件打开数限制来解决。
-Check the current limit:
+查看当前限制
```shell
ulimit -n
```
-If it's too low, you can increase it, for example:
+如果过低,可以调高一些,比如
```shell
ulimit -n 10240
```
-### ❓Whisper model download failed, with the following error
+### ❓Whisper 模型下载失败,出现如下错误
LocalEntryNotfoundEror: Cannot find an appropriate cached snapshotfolderfor the specified revision on the local disk and
outgoing trafic has been disabled.
To enablerepo look-ups and downloads online, pass 'local files only=False' as input.
-or
+或者
An error occured while synchronizing the model Systran/faster-whisper-large-v3 from the Hugging Face Hub:
An error happened while trying to locate the files on the Hub and we cannot find the appropriate snapshot folder for the
specified revision on the local disk. Please check your internet connection and try again.
Trying to load the model directly from the local cache, if it exists.
-Solution: [Click to see how to manually download the model from netdisk](#subtitle-generation-)
+解决方法:[点击查看如何从网盘手动下载模型](#%E5%AD%97%E5%B9%95%E7%94%9F%E6%88%90-)
-## Feedback & Suggestions 📢
+## 反馈建议 📢
-- You can submit an [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues) or
- a [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls).
+- 可以提交 [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues)
+ 或者 [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls)。
-## License 📝
+## 许可证 📝
-Click to view the [`LICENSE`](LICENSE) file
+点击查看 [`LICENSE`](LICENSE) 文件
## Star History
-[](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
+[](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
\ No newline at end of file
diff --git a/app/controllers/v1/video.py b/app/controllers/v1/video.py
index e80d762..310ad01 100644
--- a/app/controllers/v1/video.py
+++ b/app/controllers/v1/video.py
@@ -4,7 +4,7 @@ import pathlib
import shutil
from typing import Union
-from fastapi import BackgroundTasks, Depends, Path, Request, UploadFile
+from fastapi import BackgroundTasks, Depends, Path, Query, Request, UploadFile
from fastapi.params import File
from fastapi.responses import FileResponse, StreamingResponse
from loguru import logger
@@ -41,7 +41,10 @@ _redis_db = config.app.get("redis_db", 0)
_redis_password = config.app.get("redis_password", None)
_max_concurrent_tasks = config.app.get("max_concurrent_tasks", 5)
-redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
+if _redis_password:
+ redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
+else:
+ redis_url = f"redis://{_redis_host}:{_redis_port}/{_redis_db}"
# 根据配置选择合适的任务管理器
if _enable_redis:
task_manager = RedisTaskManager(
@@ -94,8 +97,6 @@ def create_task(
task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}"
)
-from fastapi import Query
-
@router.get("/tasks", response_model=TaskQueryResponse, summary="Get all tasks")
def get_all_tasks(request: Request, page: int = Query(1, ge=1), page_size: int = Query(10, ge=1)):
request_id = base.get_task_id(request)
@@ -131,7 +132,7 @@ def get_task(
def file_to_uri(file):
if not file.startswith(endpoint):
- _uri_path = v.replace(task_dir, "tasks").replace("\\", "/")
+ _uri_path = file.replace(task_dir, "tasks").replace("\\", "/")
_uri_path = f"{endpoint}/{_uri_path}"
else:
_uri_path = file
@@ -227,20 +228,44 @@ def upload_bgm_file(request: Request, file: UploadFile = File(...)):
async def stream_video(request: Request, file_path: str):
tasks_dir = utils.task_dir()
video_path = os.path.join(tasks_dir, file_path)
+
+ # Check if the file exists
+ if not os.path.exists(video_path):
+ raise HttpException(
+ "", status_code=404, message=f"File not found: {file_path}"
+ )
+
range_header = request.headers.get("Range")
video_size = os.path.getsize(video_path)
start, end = 0, video_size - 1
length = video_size
if range_header:
- range_ = range_header.split("bytes=")[1]
- start, end = [int(part) if part else None for part in range_.split("-")]
- if start is None:
- start = video_size - end
- end = video_size - 1
- if end is None:
- end = video_size - 1
- length = end - start + 1
+ try:
+ range_ = range_header.split("bytes=")[1]
+ start, end = [int(part) if part else None for part in range_.split("-")]
+
+ if start is None and end is not None:
+ # Format: bytes=-N (last N bytes)
+ start = max(0, video_size - end)
+ end = video_size - 1
+ elif end is None:
+ # Format: bytes=N- (from byte N to the end)
+ end = video_size - 1
+
+ # Ensure values are within valid range
+ start = max(0, min(start, video_size - 1))
+ end = min(end, video_size - 1)
+
+ if start > end:
+ # Invalid range, serve entire file
+ start, end = 0, video_size - 1
+
+ length = end - start + 1
+ except (ValueError, IndexError):
+ # On parsing error, serve entire content
+ start, end = 0, video_size - 1
+ length = video_size
def file_iterator(file_path, offset=0, bytes_to_read=None):
with open(file_path, "rb") as f:
@@ -258,30 +283,54 @@ async def stream_video(request: Request, file_path: str):
file_iterator(video_path, start, length), media_type="video/mp4"
)
response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
- response.headers["Accept-Ranges"] = "bytes"
- response.headers["Content-Length"] = str(length)
- response.status_code = 206 # Partial Content
-
return response
-
@router.get("/download/{file_path:path}")
-async def download_video(_: Request, file_path: str):
+async def download_video(request: Request, file_path: str):
"""
download video
- :param _: Request request
+ :param request: Request request
:param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
:return: video file
"""
- tasks_dir = utils.task_dir()
- video_path = os.path.join(tasks_dir, file_path)
- file_path = pathlib.Path(video_path)
- filename = file_path.stem
- extension = file_path.suffix
- headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
- return FileResponse(
- path=video_path,
- headers=headers,
- filename=f"{filename}{extension}",
- media_type=f"video/{extension[1:]}",
- )
+ try:
+ tasks_dir = utils.task_dir()
+ video_path = os.path.join(tasks_dir, file_path)
+
+ # Check if the file exists
+ if not os.path.exists(video_path):
+ raise HttpException(
+ "", status_code=404, message=f"File not found: {file_path}"
+ )
+
+ # Check if the file is readable
+ if not os.access(video_path, os.R_OK):
+ logger.error(f"File not readable: {video_path}")
+ raise HttpException(
+ "", status_code=403, message=f"File not accessible: {file_path}"
+ )
+
+ # Get the filename and extension
+ path_obj = pathlib.Path(video_path)
+ filename = path_obj.stem
+ extension = path_obj.suffix
+
+ # Determine appropriate media type
+ media_type = "application/octet-stream"
+ if extension.lower() in ['.mp4', '.webm']:
+ media_type = f"video/{extension[1:]}"
+
+ headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
+
+ logger.info(f"Sending file: {video_path}, size: {os.path.getsize(video_path)}")
+ return FileResponse(
+ path=video_path,
+ headers=headers,
+ filename=f"{filename}{extension}",
+ media_type=media_type,
+ )
+ except Exception as e:
+ logger.exception(f"Error downloading file: {str(e)}")
+ raise HttpException(
+ "", status_code=500, message=f"Failed to download file: {str(e)}"
+ )
diff --git a/app/services/llm.py b/app/services/llm.py
index 6c954a8..2c45ef9 100644
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -1,7 +1,6 @@
import json
import logging
import re
-import requests
from typing import List
import g4f
@@ -83,61 +82,23 @@ def _generate_response(prompt: str) -> str:
raise ValueError(
f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
)
- elif llm_provider == "pollinations":
- try:
- base_url = config.app.get("pollinations_base_url", "")
- if not base_url:
- base_url = "https://text.pollinations.ai/openai"
- model_name = config.app.get("pollinations_model_name", "openai-fast")
-
- # Prepare the payload
- payload = {
- "model": model_name,
- "messages": [
- {"role": "user", "content": prompt}
- ],
- "seed": 101 # Optional but helps with reproducibility
- }
-
- # Optional parameters if configured
- if config.app.get("pollinations_private"):
- payload["private"] = True
- if config.app.get("pollinations_referrer"):
- payload["referrer"] = config.app.get("pollinations_referrer")
-
- headers = {
- "Content-Type": "application/json"
- }
-
- # Make the API request
- response = requests.post(base_url, headers=headers, json=payload)
- response.raise_for_status()
- result = response.json()
-
- if result and "choices" in result and len(result["choices"]) > 0:
- content = result["choices"][0]["message"]["content"]
- return content.replace("\n", "")
- else:
- raise Exception(f"[{llm_provider}] returned an invalid response format")
-
- except requests.exceptions.RequestException as e:
- raise Exception(f"[{llm_provider}] request failed: {str(e)}")
- except Exception as e:
- raise Exception(f"[{llm_provider}] error: {str(e)}")
+ else:
+ raise ValueError(
+ "llm_provider is not set, please set it in the config.toml file."
+ )
- if llm_provider not in ["pollinations", "ollama"]: # Skip validation for providers that don't require API key
- if not api_key:
- raise ValueError(
- f"{llm_provider}: api_key is not set, please set it in the config.toml file."
- )
- if not model_name:
- raise ValueError(
- f"{llm_provider}: model_name is not set, please set it in the config.toml file."
- )
- if not base_url:
- raise ValueError(
- f"{llm_provider}: base_url is not set, please set it in the config.toml file."
- )
+ if not api_key:
+ raise ValueError(
+ f"{llm_provider}: api_key is not set, please set it in the config.toml file."
+ )
+ if not model_name:
+ raise ValueError(
+ f"{llm_provider}: model_name is not set, please set it in the config.toml file."
+ )
+ if not base_url:
+ raise ValueError(
+ f"{llm_provider}: base_url is not set, please set it in the config.toml file."
+ )
if llm_provider == "qwen":
import dashscope
@@ -211,6 +172,8 @@ def _generate_response(prompt: str) -> str:
return generated_text
if llm_provider == "cloudflare":
+ import requests
+
response = requests.post(
f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}",
headers={"Authorization": f"Bearer {api_key}"},
@@ -229,15 +192,20 @@ def _generate_response(prompt: str) -> str:
return result["result"]["response"]
if llm_provider == "ernie":
- response = requests.post(
- "https://aip.baidubce.com/oauth/2.0/token",
- params={
- "grant_type": "client_credentials",
- "client_id": api_key,
- "client_secret": secret_key,
- }
+ import requests
+
+ params = {
+ "grant_type": "client_credentials",
+ "client_id": api_key,
+ "client_secret": secret_key,
+ }
+ access_token = (
+ requests.post(
+ "https://aip.baidubce.com/oauth/2.0/token", params=params
+ )
+ .json()
+ .get("access_token")
)
- access_token = response.json().get("access_token")
url = f"{base_url}?access_token={access_token}"
payload = json.dumps(
@@ -441,4 +409,3 @@ if __name__ == "__main__":
)
print("######################")
print(search_terms)
-
\ No newline at end of file
diff --git a/app/services/utils/video_effects.py b/app/services/utils/video_effects.py
index 6cba8eb..e6213d2 100644
--- a/app/services/utils/video_effects.py
+++ b/app/services/utils/video_effects.py
@@ -1,6 +1,6 @@
from moviepy import Clip, vfx
-
-
+#from moviepy import Clip
+#import moviepy.video.fx.all as vfx
# FadeIn
def fadein_transition(clip: Clip, t: float) -> Clip:
return clip.with_effects([vfx.FadeIn(t)])
diff --git a/app/services/video.py b/app/services/video.py
index 1a79e30..6ce1108 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -4,7 +4,9 @@ import os
import random
import gc
import shutil
+import uuid
from typing import List
+import multiprocessing
from loguru import logger
from moviepy import (
AudioFileClip,
@@ -18,7 +20,8 @@ from moviepy import (
concatenate_videoclips,
)
from moviepy.video.tools.subtitles import SubtitlesClip
-from PIL import ImageFont
+from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter
+from PIL import Image, ImageEnhance, ImageFont
from app.models import const
from app.models.schema import (
@@ -47,45 +50,135 @@ class SubClippedVideoClip:
return f"SubClippedVideoClip(file_path={self.file_path}, start_time={self.start_time}, end_time={self.end_time}, duration={self.duration}, width={self.width}, height={self.height})"
+# Improved video quality settings
audio_codec = "aac"
video_codec = "libx264"
fps = 30
+video_bitrate = "25M" # Increased from 15M for better quality
+audio_bitrate = "320k" # Increased from 192k for better audio quality
+crf = "15" # Adjusted from 16 - better balance between quality and file size
+preset = "slower" # Changed from slower - better balance between speed and compression
+
+def get_optimal_encoding_params(width, height, content_type="video"):
+ """Get optimal encoding parameters based on resolution and content type."""
+ pixels = width * height
+
+ # Adjust settings based on resolution and content
+ if content_type == "image":
+ # Images need higher quality settings
+ if pixels >= 1920 * 1080: # 1080p+
+ return {"crf": "12", "bitrate": "35M", "preset": "slower"}
+ elif pixels >= 1280 * 720: # 720p+
+ return {"crf": "16", "bitrate": "30M", "preset": "slower"}
+ else:
+ return {"crf": "18", "bitrate": "25M", "preset": "slow"}
+ else:
+ # Regular video content
+ if pixels >= 1920 * 1080: # 1080p+
+ return {"crf": "18", "bitrate": "30M", "preset": "slower"}
+ elif pixels >= 1280 * 720: # 720p+
+ return {"crf": "20", "bitrate": "25M", "preset": "slower"}
+ else:
+ return {"crf": "22", "bitrate": "20M", "preset": "slow"}
+
+def get_standard_ffmpeg_params(width, height, content_type="video"):
+ """Get standardized FFmpeg parameters for consistent quality."""
+ params = get_optimal_encoding_params(width, height, content_type)
+ if content_type == "image" or (width * height >= 1920 * 1080):
+ # Use higher quality for images and high-res content
+ pix_fmt = "yuv444p"
+ else:
+ # Use more compatible format for standard video
+ pix_fmt = "yuv420p"
+
+ return [
+ "-crf", params["crf"],
+ "-preset", params["preset"],
+ "-profile:v", "high",
+ "-level", "4.1",
+ "-x264-params", "keyint=60:min-keyint=60:scenecut=0:ref=3:bframes=3:b-adapt=2:direct=auto:me=umh:subme=8:trellis=2:aq-mode=2",
+ "-pix_fmt", pix_fmt,
+ "-movflags", "+faststart",
+ "-tune", "film",
+ "-colorspace", "bt709",
+ "-color_primaries", "bt709",
+ "-color_trc", "bt709",
+ "-color_range", "tv",
+ "-bf", "5", # More B-frames for better compression
+ "-g", "60", # GOP size
+ "-qmin", "10", # Minimum quantizer
+ "-qmax", "51", # Maximum quantizer
+ "-qdiff", "4", # Max difference between quantizers
+ "-sc_threshold", "40", # Scene change threshold
+ "-flags", "+cgop+mv4" # Additional encoding flags
+ ]
+
+def ensure_even_dimensions(width, height):
+ """Ensure dimensions are even numbers (required for h264)."""
+ width = width if width % 2 == 0 else width - 1
+ height = height if height % 2 == 0 else height - 1
+ return width, height
def close_clip(clip):
if clip is None:
return
try:
- # close main resources
- if hasattr(clip, 'reader') and clip.reader is not None:
- clip.reader.close()
-
- # close audio resources
- if hasattr(clip, 'audio') and clip.audio is not None:
- if hasattr(clip.audio, 'reader') and clip.audio.reader is not None:
- clip.audio.reader.close()
- del clip.audio
-
- # close mask resources
- if hasattr(clip, 'mask') and clip.mask is not None:
- if hasattr(clip.mask, 'reader') and clip.mask.reader is not None:
- clip.mask.reader.close()
- del clip.mask
-
- # handle child clips in composite clips
+ # handle child clips in composite clips first
if hasattr(clip, 'clips') and clip.clips:
for child_clip in clip.clips:
if child_clip is not clip: # avoid possible circular references
close_clip(child_clip)
+
+ # close audio resources with better error handling
+ if hasattr(clip, 'audio') and clip.audio is not None:
+ if hasattr(clip.audio, 'reader') and clip.audio.reader is not None:
+ try:
+ # Check if the reader is still valid before closing
+ if hasattr(clip.audio.reader, 'proc') and clip.audio.reader.proc is not None:
+ if clip.audio.reader.proc.poll() is None:
+ clip.audio.reader.close()
+ else:
+ clip.audio.reader.close()
+ except (OSError, AttributeError):
+ # Handle invalid handles and missing attributes
+ pass
+ clip.audio = None
+
+ # close mask resources
+ if hasattr(clip, 'mask') and clip.mask is not None:
+ if hasattr(clip.mask, 'reader') and clip.mask.reader is not None:
+ try:
+ clip.mask.reader.close()
+ except (OSError, AttributeError):
+ pass
+ clip.mask = None
+
+ # close main resources
+ if hasattr(clip, 'reader') and clip.reader is not None:
+ try:
+ clip.reader.close()
+ except (OSError, AttributeError):
+ pass
# clear clip list
if hasattr(clip, 'clips'):
clip.clips = []
+ # call clip's own close method if it exists
+ if hasattr(clip, 'close'):
+ try:
+ clip.close()
+ except (OSError, AttributeError):
+ pass
+
except Exception as e:
logger.error(f"failed to close clip: {str(e)}")
- del clip
+ try:
+ del clip
+ except:
+ pass
gc.collect()
def delete_files(files: List[str] | str):
@@ -94,9 +187,10 @@ def delete_files(files: List[str] | str):
for file in files:
try:
- os.remove(file)
- except:
- pass
+ if os.path.exists(file):
+ os.remove(file)
+ except Exception as e:
+ logger.debug(f"failed to delete file {file}: {str(e)}")
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
if not bgm_type:
@@ -109,11 +203,11 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
suffix = "*.mp3"
song_dir = utils.song_dir()
files = glob.glob(os.path.join(song_dir, suffix))
- return random.choice(files)
+ if files:
+ return random.choice(files)
return ""
-
def combine_videos(
combined_video_path: str,
video_paths: List[str],
@@ -122,23 +216,25 @@ def combine_videos(
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
video_transition_mode: VideoTransitionMode = None,
max_clip_duration: int = 5,
- threads: int = 2,
+ #threads: int = 2,
+ threads = min(multiprocessing.cpu_count(), 6),
) -> str:
audio_clip = AudioFileClip(audio_file)
audio_duration = audio_clip.duration
logger.info(f"audio duration: {audio_duration} seconds")
# Required duration of each clip
- req_dur = audio_duration / len(video_paths)
- req_dur = max_clip_duration
- logger.info(f"maximum clip duration: {req_dur} seconds")
+ req_dur = min(audio_duration / len(video_paths), max_clip_duration)
+ logger.info(f"calculated clip duration: {req_dur} seconds")
output_dir = os.path.dirname(combined_video_path)
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
+ video_width, video_height = ensure_even_dimensions(video_width, video_height)
processed_clips = []
subclipped_items = []
video_duration = 0
+
for video_path in video_paths:
clip = VideoFileClip(video_path)
clip_duration = clip.duration
@@ -150,7 +246,7 @@ def combine_videos(
while start_time < clip_duration:
end_time = min(start_time + max_clip_duration, clip_duration)
if clip_duration - start_time >= max_clip_duration:
- subclipped_items.append(SubClippedVideoClip(file_path= video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h))
+ subclipped_items.append(SubClippedVideoClip(file_path=video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h))
start_time = end_time
if video_concat_mode.value == VideoConcatMode.sequential.value:
break
@@ -173,14 +269,16 @@ def combine_videos(
clip_duration = clip.duration
# Not all videos are same size, so we need to resize them
clip_w, clip_h = clip.size
+
if clip_w != video_width or clip_h != video_height:
clip_ratio = clip.w / clip.h
video_ratio = video_width / video_height
logger.debug(f"resizing clip, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target: {video_width}x{video_height}, ratio: {video_ratio:.2f}")
- if clip_ratio == video_ratio:
+ if abs(clip_ratio - video_ratio) < 0.01: # Almost same ratio
clip = clip.resized(new_size=(video_width, video_height))
else:
+ # Use better scaling algorithm for quality
if clip_ratio > video_ratio:
scale_factor = video_width / clip_w
else:
@@ -188,13 +286,16 @@ def combine_videos(
new_width = int(clip_w * scale_factor)
new_height = int(clip_h * scale_factor)
+
+ # Ensure dimensions are even numbers
+ new_width, new_height = ensure_even_dimensions(new_width, new_height)
background = ColorClip(size=(video_width, video_height), color=(0, 0, 0)).with_duration(clip_duration)
clip_resized = clip.resized(new_size=(new_width, new_height)).with_position("center")
clip = CompositeVideoClip([background, clip_resized])
shuffle_side = random.choice(["left", "right", "top", "bottom"])
- if video_transition_mode.value == VideoTransitionMode.none.value:
+ if video_transition_mode is None or video_transition_mode.value == VideoTransitionMode.none.value:
clip = clip
elif video_transition_mode.value == VideoTransitionMode.fade_in.value:
clip = video_effects.fadein_transition(clip, 1)
@@ -217,14 +318,24 @@ def combine_videos(
if clip.duration > max_clip_duration:
clip = clip.subclipped(0, max_clip_duration)
- # wirte clip to temp file
+ # Write clip to temp file with improved quality settings
clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
- clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
+ encoding_params = get_optimal_encoding_params(video_width, video_height, "video")
+ clip.write_videofile(clip_file,
+ logger=None,
+ fps=fps,
+ codec=video_codec,
+ # Remove bitrate parameter as it conflicts with CRF in ffmpeg_params
+ ffmpeg_params=get_standard_ffmpeg_params(video_width, video_height, "video")
+ )
+
+ # Store clip duration before closing
+ clip_duration_value = clip.duration
close_clip(clip)
- processed_clips.append(SubClippedVideoClip(file_path=clip_file, duration=clip.duration, width=clip_w, height=clip_h))
- video_duration += clip.duration
+ processed_clips.append(SubClippedVideoClip(file_path=clip_file, duration=clip_duration_value, width=clip_w, height=clip_h))
+ video_duration += clip_duration_value
except Exception as e:
logger.error(f"failed to process clip: {str(e)}")
@@ -250,62 +361,62 @@ def combine_videos(
if len(processed_clips) == 1:
logger.info("using single clip directly")
shutil.copy(processed_clips[0].file_path, combined_video_path)
- delete_files(processed_clips)
+ delete_files([clip.file_path for clip in processed_clips])
logger.info("video combining completed")
return combined_video_path
- # create initial video file as base
- base_clip_path = processed_clips[0].file_path
- temp_merged_video = f"{output_dir}/temp-merged-video.mp4"
- temp_merged_next = f"{output_dir}/temp-merged-next.mp4"
-
- # copy first clip as initial merged video
- shutil.copy(base_clip_path, temp_merged_video)
-
- # merge remaining video clips one by one
- for i, clip in enumerate(processed_clips[1:], 1):
- logger.info(f"merging clip {i}/{len(processed_clips)-1}, duration: {clip.duration:.2f}s")
+ try:
+ # Load all processed clips
+ video_clips = []
+ for clip_info in processed_clips:
+ try:
+ clip = VideoFileClip(clip_info.file_path)
+ if clip.duration > 0 and hasattr(clip, 'size') and None not in clip.size:
+ video_clips.append(clip)
+ else:
+ logger.warning(f"Skipping invalid clip: {clip_info.file_path}")
+ close_clip(clip)
+ except Exception as e:
+ logger.error(f"Failed to load clip {clip_info.file_path}: {str(e)}")
+
+ if not video_clips:
+ logger.error("No valid clips could be loaded for final concatenation")
+ return ""
+
+ # Concatenate all clips at once with compose method for better quality
+ logger.info(f"Concatenating {len(video_clips)} clips in a single operation")
+ final_clip = concatenate_videoclips(video_clips, method="compose")
- try:
- # load current base video and next clip to merge
- base_clip = VideoFileClip(temp_merged_video)
- next_clip = VideoFileClip(clip.file_path)
+ # Write the final result directly
+ encoding_params = get_optimal_encoding_params(video_width, video_height, "video")
+ logger.info(f"Writing final video with quality settings: CRF {encoding_params['crf']}, preset {encoding_params['preset']}")
+
+ final_clip.write_videofile(
+ combined_video_path,
+ threads=threads,
+ logger=None,
+ temp_audiofile_path=os.path.dirname(combined_video_path),
+ audio_codec=audio_codec,
+ fps=fps,
+ ffmpeg_params=get_standard_ffmpeg_params(video_width, video_height, "video")
+ )
+
+ # Close all clips
+ close_clip(final_clip)
+ for clip in video_clips:
+ close_clip(clip)
- # merge these two clips
- merged_clip = concatenate_videoclips([base_clip, next_clip])
-
- # save merged result to temp file
- merged_clip.write_videofile(
- filename=temp_merged_next,
- threads=threads,
- logger=None,
- temp_audiofile_path=output_dir,
- audio_codec=audio_codec,
- fps=fps,
- )
- close_clip(base_clip)
- close_clip(next_clip)
- close_clip(merged_clip)
-
- # replace base file with new merged file
- delete_files(temp_merged_video)
- os.rename(temp_merged_next, temp_merged_video)
-
- except Exception as e:
- logger.error(f"failed to merge clip: {str(e)}")
- continue
-
- # after merging, rename final result to target file name
- os.rename(temp_merged_video, combined_video_path)
-
- # clean temp files
- clip_files = [clip.file_path for clip in processed_clips]
- delete_files(clip_files)
-
- logger.info("video combining completed")
+ logger.info("Video combining completed successfully")
+
+ except Exception as e:
+ logger.error(f"Error during final video concatenation: {str(e)}")
+ finally:
+ # Clean up temp files
+ clip_files = [clip.file_path for clip in processed_clips]
+ delete_files(clip_files)
+
return combined_video_path
-
def wrap_text(text, max_width, font="Arial", fontsize=60):
# Create ImageFont
font = ImageFont.truetype(font, fontsize)
@@ -359,7 +470,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
height = len(_wrapped_lines_) * height
return result, height
-
def generate_video(
video_path: str,
audio_path: str,
@@ -369,6 +479,7 @@ def generate_video(
):
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
+ video_width, video_height = ensure_even_dimensions(video_width, video_height)
logger.info(f"generating video: {video_width} x {video_height}")
logger.info(f" ① video: {video_path}")
@@ -410,8 +521,8 @@ def generate_video(
bg_color=params.text_background_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
- # interline=interline,
- # size=size,
+ interline=interline,
+ size=size,
)
duration = subtitle_item[0][1] - subtitle_item[0][0]
_clip = _clip.with_start(subtitle_item[0][0])
@@ -472,60 +583,227 @@ def generate_video(
logger.error(f"failed to add bgm: {str(e)}")
video_clip = video_clip.with_audio(audio_clip)
- video_clip.write_videofile(
- output_file,
- audio_codec=audio_codec,
- temp_audiofile_path=output_dir,
- threads=params.n_threads or 2,
- logger=None,
- fps=fps,
- )
- video_clip.close()
- del video_clip
+
+ # Use improved encoding settings
+ try:
+ # Get optimized encoding parameters
+ encoding_params = get_optimal_encoding_params(video_width, video_height, "video")
+ ffmpeg_params = get_standard_ffmpeg_params(video_width, video_height, "video")
+
+ # For Windows, use a simpler approach to avoid path issues with two-pass encoding
+ if os.name == 'nt':
+ # Single pass with high quality settings
+ video_clip.write_videofile(
+ output_file,
+ codec=video_codec,
+ audio_codec=audio_codec,
+ temp_audiofile_path=output_dir,
+ threads=params.n_threads or 2,
+ logger=None,
+ fps=fps,
+ ffmpeg_params=ffmpeg_params
+ )
+ else:
+ # On Unix systems, we can use two-pass encoding more reliably
+ # Prepare a unique passlogfile name to avoid conflicts
+ passlog_id = str(uuid.uuid4())[:8]
+ passlogfile = os.path.join(output_dir, f"ffmpeg2pass_{passlog_id}")
+
+ # Create a temporary file for first pass output
+ temp_first_pass = os.path.join(output_dir, f"temp_first_pass_{passlog_id}.mp4")
+
+ # Flag to track if we should do second pass
+ do_second_pass = True
+
+ # First pass parameters with explicit passlogfile
+ first_pass_params = ffmpeg_params + [
+ "-pass", "1",
+ "-passlogfile", passlogfile,
+ "-an" # No audio in first pass
+ ]
+
+ logger.info("Starting first pass encoding...")
+ try:
+ video_clip.write_videofile(
+ temp_first_pass, # Write to temporary file instead of null
+ codec=video_codec,
+ audio=False, # Skip audio processing in first pass
+ threads=params.n_threads or 2,
+ logger=None,
+ fps=fps,
+ ffmpeg_params=first_pass_params
+ )
+ except Exception as e:
+ # If first pass fails, fallback to single-pass encoding
+ logger.warning(f"First pass encoding failed: {e}. Falling back to single-pass encoding.")
+ video_clip.write_videofile(
+ output_file,
+ codec=video_codec,
+ audio_codec=audio_codec,
+ temp_audiofile_path=output_dir,
+ threads=params.n_threads or 2,
+ logger=None,
+ fps=fps,
+ ffmpeg_params=ffmpeg_params
+ )
+ do_second_pass = False
+ finally:
+ # Clean up first pass temporary file
+ if os.path.exists(temp_first_pass):
+ try:
+ os.remove(temp_first_pass)
+ except Exception as e:
+ logger.warning(f"Failed to delete temporary first pass file: {e}")
+
+ # Second pass only if first pass succeeded
+ if do_second_pass:
+ logger.info("Starting second pass encoding...")
+ second_pass_params = ffmpeg_params + [
+ "-pass", "2",
+ "-passlogfile", passlogfile
+ ]
+ video_clip.write_videofile(
+ output_file,
+ codec=video_codec,
+ audio_codec=audio_codec,
+ temp_audiofile_path=output_dir,
+ threads=params.n_threads or 2,
+ logger=None,
+ fps=fps,
+ ffmpeg_params=second_pass_params
+ )
+
+ # Clean up pass log files
+ for f in glob.glob(f"{passlogfile}*"):
+ try:
+ os.remove(f)
+ except Exception as e:
+ logger.warning(f"Failed to delete pass log file {f}: {e}")
+ finally:
+ # Ensure all resources are properly closed
+ close_clip(video_clip)
+ close_clip(audio_clip)
+ if 'bgm_clip' in locals():
+ close_clip(bgm_clip)
+ # Force garbage collection
+ gc.collect()
-
-def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
+def preprocess_video(materials: List[MaterialInfo], clip_duration=4, apply_denoising=False):
for material in materials:
if not material.url:
continue
ext = utils.parse_extension(material.url)
+
+ # First load the clip
try:
clip = VideoFileClip(material.url)
except Exception:
clip = ImageClip(material.url)
+
+ # Then apply denoising if needed and it's a video
+ if ext not in const.FILE_TYPE_IMAGES and apply_denoising:
+ # Apply subtle denoising to video clips that might benefit
+ from moviepy.video.fx.all import denoise
+
+ try:
+ # Get a sample frame to analyze noise level
+ frame = clip.get_frame(0)
+ import numpy as np
+ noise_estimate = np.std(frame)
+
+ # Apply denoising only if noise level seems high
+ if noise_estimate > 15: # Threshold determined empirically
+ logger.info(f"Applying denoising to video with estimated noise: {noise_estimate:.2f}")
+ clip = denoise(clip, sigma=1.5, mode="fast")
+ except Exception as e:
+ logger.warning(f"Denoising attempt failed: {e}")
width = clip.size[0]
height = clip.size[1]
- if width < 480 or height < 480:
- logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
- continue
+
+ # Improved resolution check
+ min_resolution = 480
+ # Calculate aspect ratio outside of conditional blocks so it's always defined
+ aspect_ratio = width / height
+
+ if width < min_resolution or height < min_resolution:
+ logger.warning(f"Low resolution material: {width}x{height}, minimum {min_resolution}x{min_resolution} recommended")
+ # Instead of skipping, apply upscaling for very low-res content
+ if width < min_resolution/2 or height < min_resolution/2:
+ logger.warning("Resolution too low, skipping")
+ close_clip(clip)
+ continue
+ else:
+ # Apply high-quality upscaling for borderline content
+ logger.info(f"Applying high-quality upscaling to low-resolution content: {width}x{height}")
+
+ # Calculate target dimensions while maintaining aspect ratio
+ if width < height:
+ new_width = min_resolution
+ new_height = int(new_width / aspect_ratio)
+ else:
+ new_height = min_resolution
+ new_width = int(new_height * aspect_ratio)
+
+ # Ensure dimensions are even
+ new_width, new_height = ensure_even_dimensions(new_width, new_height)
+
+ # Use high-quality scaling
+ clip = clip.resized(new_size=(new_width, new_height), resizer='lanczos')
if ext in const.FILE_TYPE_IMAGES:
logger.info(f"processing image: {material.url}")
- # Create an image clip and set its duration to 3 seconds
+
+ # Ensure dimensions are even numbers and enhance for better quality
+ width, height = ensure_even_dimensions(width, height)
+
+ # Use higher resolution multiplier for sharper output
+ quality_multiplier = 1.2 if width < 1080 else 1.0
+ enhanced_width = int(width * quality_multiplier)
+ enhanced_height = int(height * quality_multiplier)
+ enhanced_width, enhanced_height = ensure_even_dimensions(enhanced_width, enhanced_height)
+
+ # Close the original clip before creating a new one to avoid file handle conflicts
+ close_clip(clip)
+
+ # Create a new ImageClip with the image
clip = (
ImageClip(material.url)
+ .resized(new_size=(enhanced_width, enhanced_height), resizer='bicubic') # Use bicubic for better quality
.with_duration(clip_duration)
.with_position("center")
)
- # Apply a zoom effect using the resize method.
- # A lambda function is used to make the zoom effect dynamic over time.
- # The zoom effect starts from the original size and gradually scales up to 120%.
- # t represents the current time, and clip.duration is the total duration of the clip (3 seconds).
- # Note: 1 represents 100% size, so 1.2 represents 120% size.
+ # More subtle and smoother zoom effect
zoom_clip = clip.resized(
- lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
+ lambda t: 1 + (0.05 * (t / clip.duration)), # Reduced zoom from 0.1 to 0.05 for smoother effect
+ resizer='lanczos' # Ensure high-quality scaling
)
- # Optionally, create a composite video clip containing the zoomed clip.
- # This is useful when you want to add other elements to the video.
+ # Create composite with enhanced quality
final_clip = CompositeVideoClip([zoom_clip])
- # Output the video to a file.
+ # Output with maximum quality settings
video_file = f"{material.url}.mp4"
- final_clip.write_videofile(video_file, fps=30, logger=None)
+ encoding_params = get_optimal_encoding_params(enhanced_width, enhanced_height, "image")
+
+ final_clip.write_videofile(video_file,
+ fps=fps,
+ logger='bar',
+ codec=video_codec,
+ # Remove bitrate parameter as it conflicts with CRF in ffmpeg_params
+ ffmpeg_params=get_standard_ffmpeg_params(enhanced_width, enhanced_height, "image"),
+ write_logfile=False,
+ verbose=False
+ )
+
+ # Close all clips to properly release resources
+ close_clip(final_clip)
+ close_clip(zoom_clip)
close_clip(clip)
material.url = video_file
- logger.success(f"image processed: {video_file}")
+ logger.success(f"high-quality image processed: {video_file}")
+ else:
+ close_clip(clip)
+
return materials
\ No newline at end of file
diff --git a/app/services/voice.py b/app/services/voice.py
index e6b4d59..522c237 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -1469,7 +1469,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items) + "\n")
try:
- sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
+ sbs = subtitles.file_to_subtitles(subtitle_file)
duration = max([tb for ((ta, tb), txt) in sbs])
logger.info(
f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
diff --git a/requirements.txt b/requirements.txt
index a1731f6..0083173 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,11 @@
moviepy==2.1.2
+Pillow
streamlit==1.45.0
edge_tts==6.1.19
fastapi==0.115.6
uvicorn==0.32.1
openai==1.56.1
-faster-whisper==1.1.0
+faster-whisper
loguru==0.7.3
google.generativeai==0.8.3
dashscope==1.20.14
@@ -14,3 +15,5 @@ redis==5.2.0
python-multipart==0.0.19
pyyaml
requests>=2.31.0
+numpy
+shutil
From a787e7941d8fd76d5f4270eeb3f83d05a90484b2 Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Wed, 18 Jun 2025 21:43:16 -0300
Subject: [PATCH 6/9] Update video.py
---
app/services/video.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/app/services/video.py b/app/services/video.py
index 6ce1108..305acda 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -54,10 +54,10 @@ class SubClippedVideoClip:
audio_codec = "aac"
video_codec = "libx264"
fps = 30
-video_bitrate = "25M" # Increased from 15M for better quality
-audio_bitrate = "320k" # Increased from 192k for better audio quality
-crf = "15" # Adjusted from 16 - better balance between quality and file size
-preset = "slower" # Changed from slower - better balance between speed and compression
+video_bitrate = "25M"
+audio_bitrate = "320k"
+crf = "15"
+preset = "slower"
def get_optimal_encoding_params(width, height, content_type="video"):
"""Get optimal encoding parameters based on resolution and content type."""
From b501f5b997788d99b981c8276640d3b5e5b72ed7 Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Wed, 18 Jun 2025 21:45:36 -0300
Subject: [PATCH 7/9] Update voice.py
---
app/services/voice.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/app/services/voice.py b/app/services/voice.py
index 522c237..e6b4d59 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -1469,7 +1469,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items) + "\n")
try:
- sbs = subtitles.file_to_subtitles(subtitle_file)
+ sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
duration = max([tb for ((ta, tb), txt) in sbs])
logger.info(
f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
From 62918099d39f36663ff45b9be57fc3b2984104a2 Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Wed, 18 Jun 2025 21:47:13 -0300
Subject: [PATCH 8/9] Update llm.py
---
app/services/llm.py | 95 ++++++++++++++++++++++++++++++---------------
1 file changed, 64 insertions(+), 31 deletions(-)
diff --git a/app/services/llm.py b/app/services/llm.py
index 2c45ef9..6c954a8 100644
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -1,6 +1,7 @@
import json
import logging
import re
+import requests
from typing import List
import g4f
@@ -82,23 +83,61 @@ def _generate_response(prompt: str) -> str:
raise ValueError(
f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
)
- else:
- raise ValueError(
- "llm_provider is not set, please set it in the config.toml file."
- )
+ elif llm_provider == "pollinations":
+ try:
+ base_url = config.app.get("pollinations_base_url", "")
+ if not base_url:
+ base_url = "https://text.pollinations.ai/openai"
+ model_name = config.app.get("pollinations_model_name", "openai-fast")
+
+ # Prepare the payload
+ payload = {
+ "model": model_name,
+ "messages": [
+ {"role": "user", "content": prompt}
+ ],
+ "seed": 101 # Optional but helps with reproducibility
+ }
+
+ # Optional parameters if configured
+ if config.app.get("pollinations_private"):
+ payload["private"] = True
+ if config.app.get("pollinations_referrer"):
+ payload["referrer"] = config.app.get("pollinations_referrer")
+
+ headers = {
+ "Content-Type": "application/json"
+ }
+
+ # Make the API request
+ response = requests.post(base_url, headers=headers, json=payload)
+ response.raise_for_status()
+ result = response.json()
+
+ if result and "choices" in result and len(result["choices"]) > 0:
+ content = result["choices"][0]["message"]["content"]
+ return content.replace("\n", "")
+ else:
+ raise Exception(f"[{llm_provider}] returned an invalid response format")
+
+ except requests.exceptions.RequestException as e:
+ raise Exception(f"[{llm_provider}] request failed: {str(e)}")
+ except Exception as e:
+ raise Exception(f"[{llm_provider}] error: {str(e)}")
- if not api_key:
- raise ValueError(
- f"{llm_provider}: api_key is not set, please set it in the config.toml file."
- )
- if not model_name:
- raise ValueError(
- f"{llm_provider}: model_name is not set, please set it in the config.toml file."
- )
- if not base_url:
- raise ValueError(
- f"{llm_provider}: base_url is not set, please set it in the config.toml file."
- )
+ if llm_provider not in ["pollinations", "ollama"]: # Skip validation for providers that don't require API key
+ if not api_key:
+ raise ValueError(
+ f"{llm_provider}: api_key is not set, please set it in the config.toml file."
+ )
+ if not model_name:
+ raise ValueError(
+ f"{llm_provider}: model_name is not set, please set it in the config.toml file."
+ )
+ if not base_url:
+ raise ValueError(
+ f"{llm_provider}: base_url is not set, please set it in the config.toml file."
+ )
if llm_provider == "qwen":
import dashscope
@@ -172,8 +211,6 @@ def _generate_response(prompt: str) -> str:
return generated_text
if llm_provider == "cloudflare":
- import requests
-
response = requests.post(
f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}",
headers={"Authorization": f"Bearer {api_key}"},
@@ -192,20 +229,15 @@ def _generate_response(prompt: str) -> str:
return result["result"]["response"]
if llm_provider == "ernie":
- import requests
-
- params = {
- "grant_type": "client_credentials",
- "client_id": api_key,
- "client_secret": secret_key,
- }
- access_token = (
- requests.post(
- "https://aip.baidubce.com/oauth/2.0/token", params=params
- )
- .json()
- .get("access_token")
+ response = requests.post(
+ "https://aip.baidubce.com/oauth/2.0/token",
+ params={
+ "grant_type": "client_credentials",
+ "client_id": api_key,
+ "client_secret": secret_key,
+ }
)
+ access_token = response.json().get("access_token")
url = f"{base_url}?access_token={access_token}"
payload = json.dumps(
@@ -409,3 +441,4 @@ if __name__ == "__main__":
)
print("######################")
print(search_terms)
+
\ No newline at end of file
From bb9ee76b841399a452a38dc321b28a641e86fe87 Mon Sep 17 00:00:00 2001
From: overcrash <3681221+overcrash66@users.noreply.github.com>
Date: Wed, 18 Jun 2025 21:48:38 -0300
Subject: [PATCH 9/9] Update video_effects.py
---
app/services/utils/video_effects.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/app/services/utils/video_effects.py b/app/services/utils/video_effects.py
index e6213d2..62e8cdb 100644
--- a/app/services/utils/video_effects.py
+++ b/app/services/utils/video_effects.py
@@ -1,6 +1,5 @@
from moviepy import Clip, vfx
-#from moviepy import Clip
-#import moviepy.video.fx.all as vfx
+
# FadeIn
def fadein_transition(clip: Clip, t: float) -> Clip:
return clip.with_effects([vfx.FadeIn(t)])