diff --git a/.gitignore b/.gitignore index dd7e788..00b64c8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,6 @@ /app/utils/__pycache__/ /*/__pycache__/* .vscode -/**/.streamlit \ No newline at end of file +/**/.streamlit +__pycache__ +logs/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index bd871c6..16a444a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM python:3.10-slim # Set the working directory in the container WORKDIR /MoneyPrinterTurbo -ENV PYTHONPATH="/MoneyPrinterTurbo:$PYTHONPATH" +ENV PYTHONPATH="/MoneyPrinterTurbo" # Install system dependencies RUN apt-get update && apt-get install -y \ @@ -17,11 +17,7 @@ RUN apt-get update && apt-get install -y \ RUN sed -i '/ + ## 视频演示 📺 ### 竖屏 9:16 @@ -102,8 +105,17 @@ +## 配置要求 📦 +- 建议最低 CPU 4核或以上,内存 8G 或以上,显卡非必须 +- Windows 10 或 MacOS 11.0 以上系统 + ## 安装部署 📥 +> 不想部署的可以直接下载安装包,解压直接使用 +- **Windows** 版本下载地址 + - 百度网盘: https://pan.baidu.com/s/1BB3SGtAFTytzFLS5t2d8Gg?pwd=5bry + +### 前提条件 - 尽量不要使用 **中文路径**,避免出现一些无法预料的问题 - 请确保你的 **网络** 是正常的,VPN需要打开`全局流量`模式 @@ -230,8 +242,8 @@ python main.py 当前支持2种字幕生成方式: -- edge: 生成速度更快,性能更好,对电脑配置没有要求,但是质量可能不稳定 -- whisper: 生成速度较慢,性能较差,对电脑配置有一定要求,但是质量更可靠。 +- **edge**: 生成`速度快`,性能更好,对电脑配置没有要求,但是质量可能不稳定 +- **whisper**: 生成`速度慢`,性能较差,对电脑配置有一定要求,但是`质量更可靠`。 可以修改 `config.toml` 配置文件中的 `subtitle_provider` 进行切换 @@ -241,6 +253,25 @@ python main.py 1. whisper 模式下需要到 HuggingFace 下载一个模型文件,大约 3GB 左右,请确保网络通畅 2. 如果留空,表示不生成字幕。 +> 由于国内无法访问 HuggingFace,可以使用以下方法下载 `whisper-large-v3` 的模型文件 + +下载地址: +- 百度网盘: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9 +- 夸克网盘:https://pan.quark.cn/s/3ee3d991d64b + +模型下载后解压,整个目录放到 `.\MoneyPrinterTurbo\models` 里面, +最终的文件路径应该是这样: `.\MoneyPrinterTurbo\models\whisper-large-v3` +``` +MoneyPrinterTurbo + ├─models + │ └─whisper-large-v3 + │ config.json + │ model.bin + │ preprocessor_config.json + │ tokenizer.json + │ vocabulary.json +``` + ## 背景音乐 🎵 用于视频的背景音乐,位于项目的 `resource/songs` 目录下。 @@ -375,14 +406,6 @@ pip install Pillow==8.4.0 - 可以提交 [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues) 或者 [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls)。 -- 也可以关注我的 **抖音** 或 **视频号**:`网旭哈瑞.AI` - - 我会在上面发布一些 **使用教程** 和 **纯技术** 分享。 - - 如果有更新和优化,我也会在上面 **及时通知**。 - - 有问题也可以在上面 **留言**,我会 **尽快回复**。 - -| 抖音 | | 视频号 | -|:---------------------------------------:|:------------:|:-------------------------------------------:| -| | | | ## 参考项目 📚 diff --git a/app/config/config.py b/app/config/config.py index cd3ba6a..19d4a77 100644 --- a/app/config/config.py +++ b/app/config/config.py @@ -1,28 +1,45 @@ import os import socket import toml +import shutil from loguru import logger root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) config_file = f"{root_dir}/config.toml" -if not os.path.isfile(config_file): - example_file = f"{root_dir}/config.example.toml" - if os.path.isfile(example_file): - import shutil - shutil.copyfile(example_file, config_file) - logger.info(f"copy config.example.toml to config.toml") -logger.info(f"load config from file: {config_file}") +def load_config(): + # fix: IsADirectoryError: [Errno 21] Is a directory: '/MoneyPrinterTurbo/config.toml' + if os.path.isdir(config_file): + shutil.rmtree(config_file) -try: - _cfg = toml.load(config_file) -except Exception as e: - logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig") - with open(config_file, mode="r", encoding='utf-8-sig') as fp: - _cfg_content = fp.read() - _cfg = toml.loads(_cfg_content) + if not os.path.isfile(config_file): + example_file = f"{root_dir}/config.example.toml" + if os.path.isfile(example_file): + shutil.copyfile(example_file, config_file) + logger.info(f"copy config.example.toml to config.toml") + logger.info(f"load config from file: {config_file}") + + try: + _config_ = toml.load(config_file) + except Exception as e: + logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig") + with open(config_file, mode="r", encoding='utf-8-sig') as fp: + _cfg_content = fp.read() + _config_ = toml.loads(_cfg_content) + return _config_ + + +def save_config(): + with open(config_file, "w", encoding="utf-8") as f: + _cfg["app"] = app + _cfg["whisper"] = whisper + _cfg["pexels"] = pexels + f.write(toml.dumps(_cfg)) + + +_cfg = load_config() app = _cfg.get("app", {}) whisper = _cfg.get("whisper", {}) pexels = _cfg.get("pexels", {}) @@ -36,7 +53,7 @@ listen_port = _cfg.get("listen_port", 8080) project_name = _cfg.get("project_name", "MoneyPrinterTurbo") project_description = _cfg.get("project_description", "https://github.com/harry0703/MoneyPrinterTurbo") -project_version = _cfg.get("project_version", "1.0.1") +project_version = _cfg.get("project_version", "1.1.0") reload_debug = False imagemagick_path = app.get("imagemagick_path", "") @@ -46,19 +63,3 @@ if imagemagick_path and os.path.isfile(imagemagick_path): ffmpeg_path = app.get("ffmpeg_path", "") if ffmpeg_path and os.path.isfile(ffmpeg_path): os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path - - -# __cfg = { -# "hostname": hostname, -# "listen_host": listen_host, -# "listen_port": listen_port, -# } -# logger.info(__cfg) - - -def save_config(): - with open(config_file, "w", encoding="utf-8") as f: - _cfg["app"] = app - _cfg["whisper"] = whisper - _cfg["pexels"] = pexels - f.write(toml.dumps(_cfg)) diff --git a/app/services/llm.py b/app/services/llm.py index fca6bca..b13a670 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -5,6 +5,8 @@ from typing import List from loguru import logger from openai import OpenAI from openai import AzureOpenAI +from openai.types.chat import ChatCompletion + from app.config import config @@ -57,6 +59,11 @@ def _generate_response(prompt: str) -> str: api_key = config.app.get("qwen_api_key") model_name = config.app.get("qwen_model_name") base_url = "***" + elif llm_provider == "cloudflare": + api_key = config.app.get("cloudflare_api_key") + model_name = config.app.get("cloudflare_model_name") + account_id = config.app.get("cloudflare_account_id") + base_url = "***" else: raise ValueError("llm_provider is not set, please set it in the config.toml file.") @@ -69,17 +76,31 @@ def _generate_response(prompt: str) -> str: if llm_provider == "qwen": import dashscope + from dashscope.api_entities.dashscope_response import GenerationResponse dashscope.api_key = api_key response = dashscope.Generation.call( model=model_name, messages=[{"role": "user", "content": prompt}] ) - content = response["output"]["text"] - return content.replace("\n", "") + if response: + if isinstance(response, GenerationResponse): + status_code = response.status_code + if status_code != 200: + raise Exception( + f"[{llm_provider}] returned an error response: \"{response}\"") + + content = response["output"]["text"] + return content.replace("\n", "") + else: + raise Exception( + f"[{llm_provider}] returned an invalid response: \"{response}\"") + else: + raise Exception( + f"[{llm_provider}] returned an empty response") if llm_provider == "gemini": import google.generativeai as genai - genai.configure(api_key=api_key) + genai.configure(api_key=api_key, transport='rest') generation_config = { "temperature": 0.5, @@ -111,10 +132,30 @@ def _generate_response(prompt: str) -> str: generation_config=generation_config, safety_settings=safety_settings) - convo = model.start_chat(history=[]) + try: + response = model.generate_content(prompt) + candidates = response.candidates + generated_text = candidates[0].content.parts[0].text + except (AttributeError, IndexError) as e: + print("Gemini Error:", e) - convo.send_message(prompt) - return convo.last.text + return generated_text + + if llm_provider == "cloudflare": + import requests + response = requests.post( + f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}", + headers={"Authorization": f"Bearer {api_key}"}, + json={ + "messages": [ + {"role": "system", "content": "You are a friendly assistant"}, + {"role": "user", "content": prompt} + ] + } + ) + result = response.json() + logger.info(result) + return result["result"]["response"] if llm_provider == "azure": client = AzureOpenAI( @@ -133,7 +174,15 @@ def _generate_response(prompt: str) -> str: messages=[{"role": "user", "content": prompt}] ) if response: - content = response.choices[0].message.content + if isinstance(response, ChatCompletion): + content = response.choices[0].message.content + else: + raise Exception( + f"[{llm_provider}] returned an invalid response: \"{response}\", please check your network " + f"connection and try again.") + else: + raise Exception( + f"[{llm_provider}] returned an empty response, please check your network connection and try again.") return content.replace("\n", "") @@ -149,9 +198,9 @@ Generate a script for a video, depending on the subject of the video. 1. the script is to be returned as a string with the specified number of paragraphs. 2. do not under any circumstance reference this prompt in your response. 3. get straight to the point, don't start with unnecessary things like, "welcome to this video". -4. you must not include any type of markdown or formatting in the script, never use a title. -5. only return the raw content of the script. -6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line. +4. you must not include any type of markdown or formatting in the script, never use a title. +5. only return the raw content of the script. +6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line. 7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script. 8. respond in the same language as the video subject. diff --git a/app/services/subtitle.py b/app/services/subtitle.py index 5ea8028..d19ac59 100644 --- a/app/services/subtitle.py +++ b/app/services/subtitle.py @@ -1,4 +1,5 @@ import json +import os.path import re from faster_whisper import WhisperModel @@ -17,8 +18,13 @@ model = None def create(audio_file, subtitle_file: str = ""): global model if not model: - logger.info(f"loading model: {model_size}, device: {device}, compute_type: {compute_type}") - model = WhisperModel(model_size_or_path=model_size, + model_path = f"{utils.root_dir()}/models/whisper-{model_size}" + model_bin_file = f"{model_path}/model.bin" + if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file): + model_path = model_size + + logger.info(f"loading model: {model_path}, device: {device}, compute_type: {compute_type}") + model = WhisperModel(model_size_or_path=model_path, device=device, compute_type=compute_type) diff --git a/app/services/video.py b/app/services/video.py index 6ada901..e05dc12 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -124,7 +124,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60): width, height = get_text_size(text) if width <= max_width: - return text + return text, height logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}") @@ -149,8 +149,9 @@ def wrap_text(text, max_width, font='Arial', fontsize=60): if processed: _wrapped_lines_ = [line.strip() for line in _wrapped_lines_] result = '\n'.join(_wrapped_lines_).strip() + height = len(_wrapped_lines_) * height logger.warning(f"wrapped text: {result}") - return result + return result, height _wrapped_lines_ = [] chars = list(text) @@ -165,8 +166,9 @@ def wrap_text(text, max_width, font='Arial', fontsize=60): _txt_ = '' _wrapped_lines_.append(_txt_) result = '\n'.join(_wrapped_lines_).strip() + height = len(_wrapped_lines_) * height logger.warning(f"wrapped text: {result}") - return result + return result, height def generate_video(video_path: str, @@ -199,23 +201,15 @@ def generate_video(video_path: str, logger.info(f"using font: {font_path}") - if params.subtitle_position == "top": - position_height = video_height * 0.1 - elif params.subtitle_position == "bottom": - position_height = video_height * 0.9 - else: - position_height = "center" - - def generator(txt, **kwargs): + def create_text_clip(subtitle_item): + phrase = subtitle_item[1] max_width = video_width * 0.9 - # logger.debug(f"rendering text: {txt}") - wrapped_txt = wrap_text(txt, - max_width=max_width, - font=font_path, - fontsize=params.font_size - ) # 调整max_width以适应你的视频 - - clip = TextClip( + wrapped_txt, txt_height = wrap_text(phrase, + max_width=max_width, + font=font_path, + fontsize=params.font_size + ) + _clip = TextClip( wrapped_txt, font=font_path, fontsize=params.font_size, @@ -225,15 +219,28 @@ def generate_video(video_path: str, stroke_width=params.stroke_width, print_cmd=False, ) - return clip + duration = subtitle_item[0][1] - subtitle_item[0][0] + _clip = _clip.set_start(subtitle_item[0][0]) + _clip = _clip.set_end(subtitle_item[0][1]) + _clip = _clip.set_duration(duration) + if params.subtitle_position == "bottom": + _clip = _clip.set_position(('center', video_height * 0.95 - _clip.h)) + elif params.subtitle_position == "top": + _clip = _clip.set_position(('center', video_height * 0.1)) + else: + _clip = _clip.set_position(('center', 'center')) + return _clip video_clip = VideoFileClip(video_path) audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume) if subtitle_path and os.path.exists(subtitle_path): - sub = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8') - sub_clip = sub.set_position(lambda _t: ('center', position_height)) - video_clip = CompositeVideoClip([video_clip, sub_clip]) + sub = SubtitlesClip(subtitles=subtitle_path, encoding='utf-8') + text_clips = [] + for item in sub.subtitles: + clip = create_text_clip(subtitle_item=item) + text_clips.append(clip) + video_clip = CompositeVideoClip([video_clip, *text_clips]) bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file) if bgm_file: @@ -258,7 +265,7 @@ if __name__ == "__main__": txt_zh = "测试长字段这是您的旅行技巧指南帮助您进行预算友好的冒险" font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc" for txt in [txt_en, txt_zh]: - t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60) + t, h = wrap_text(text=txt, max_width=1000, font=font, fontsize=60) print(t) task_id = "aa563149-a7ea-49c2-b39f-8c32cc225baf" diff --git a/docker-compose.yml b/docker-compose.yml index 8c932bf..2fc8a2b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,5 @@ -version: "3" - x-common-volumes: &common-volumes - - ./config.toml:/MoneyPrinterTurbo/config.toml - - ./storage:/MoneyPrinterTurbo/storage + - ./:/MoneyPrinterTurbo services: webui: @@ -12,7 +9,7 @@ services: container_name: "webui" ports: - "8501:8501" - command: ["streamlit", "run", "./webui/Main.py","--browser.serverAddress=0.0.0.0","--server.enableCORS=True","--browser.gatherUsageStats=False"] + command: [ "streamlit", "run", "./webui/Main.py","--browser.serverAddress=127.0.0.1","--server.enableCORS=True","--browser.gatherUsageStats=False" ] volumes: *common-volumes restart: always api: diff --git a/docs/wechat-01.jpg b/docs/wechat-01.jpg new file mode 100644 index 0000000..96add13 Binary files /dev/null and b/docs/wechat-01.jpg differ diff --git a/webui.bat b/webui.bat index 03e54ed..fd97514 100644 --- a/webui.bat +++ b/webui.bat @@ -1,2 +1,7 @@ +@echo off +set CURRENT_DIR=%CD% +echo ***** Current directory: %CURRENT_DIR% ***** +set PYTHONPATH=%CURRENT_DIR% + rem set HF_ENDPOINT=https://hf-mirror.com streamlit run .\webui\Main.py --browser.gatherUsageStats=False --server.enableCORS=True \ No newline at end of file diff --git a/webui/Main.py b/webui/Main.py index 2712b28..6ff61cf 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -38,7 +38,7 @@ hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) -st.title("MoneyPrinterTurbo") +st.title(f"MoneyPrinterTurbo v{config.project_version}") font_dir = os.path.join(root_dir, "resource", "fonts") song_dir = os.path.join(root_dir, "resource", "songs") @@ -175,7 +175,7 @@ with st.expander(tr("Basic Settings"), expanded=False): # qwen (通义千问) # gemini # ollama - llm_providers = ['OpenAI', 'Moonshot', 'Azure', 'Qwen', 'Gemini', 'Ollama', 'G4f', 'OneAPI'] + llm_providers = ['OpenAI', 'Moonshot', 'Azure', 'Qwen', 'Gemini', 'Ollama', 'G4f', 'OneAPI', "Cloudflare"] saved_llm_provider = config.app.get("llm_provider", "OpenAI").lower() saved_llm_provider_index = 0 for i, provider in enumerate(llm_providers): @@ -190,6 +190,7 @@ with st.expander(tr("Basic Settings"), expanded=False): llm_api_key = config.app.get(f"{llm_provider}_api_key", "") llm_base_url = config.app.get(f"{llm_provider}_base_url", "") llm_model_name = config.app.get(f"{llm_provider}_model_name", "") + llm_account_id = config.app.get(f"{llm_provider}_account_id", "") st_llm_api_key = st.text_input(tr("API Key"), value=llm_api_key, type="password") st_llm_base_url = st.text_input(tr("Base Url"), value=llm_base_url) st_llm_model_name = st.text_input(tr("Model Name"), value=llm_model_name) @@ -200,6 +201,11 @@ with st.expander(tr("Basic Settings"), expanded=False): if st_llm_model_name: config.app[f"{llm_provider}_model_name"] = st_llm_model_name + if llm_provider == 'cloudflare': + st_llm_account_id = st.text_input(tr("Account ID"), value=llm_account_id) + if st_llm_account_id: + config.app[f"{llm_provider}_account_id"] = st_llm_account_id + config.save_config() with right_config_panel: diff --git a/webui/i18n/de.json b/webui/i18n/de.json index 63bd397..d68899b 100644 --- a/webui/i18n/de.json +++ b/webui/i18n/de.json @@ -58,6 +58,6 @@ "Model Name": "Model Name", "Please Enter the LLM API Key": "Please Enter the **LLM API Key**", "Please Enter the Pexels API Key": "Please Enter the **Pexels API Key**", - "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc/moneyprinterturbo" + "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc" } } \ No newline at end of file diff --git a/webui/i18n/en.json b/webui/i18n/en.json index 47bd073..5fca842 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -55,9 +55,10 @@ "LLM Provider": "LLM Provider", "API Key": "API Key (:red[Required])", "Base Url": "Base Url", + "Account ID": "Account ID (Get from Cloudflare dashboard)", "Model Name": "Model Name", "Please Enter the LLM API Key": "Please Enter the **LLM API Key**", "Please Enter the Pexels API Key": "Please Enter the **Pexels API Key**", - "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc/moneyprinterturbo" + "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc" } } \ No newline at end of file diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 7d71d05..28f719d 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -55,9 +55,10 @@ "LLM Provider": "大模型提供商", "API Key": "API Key (:red[必填,需要到大模型提供商的后台申请])", "Base Url": "Base Url (可选)", + "Account ID": "账户ID (Cloudflare的dash面板url中获取)", "Model Name": "模型名称 (:blue[需要到大模型提供商的后台确认被授权的模型名称])", "Please Enter the LLM API Key": "请先填写大模型 **API Key**", "Please Enter the Pexels API Key": "请先填写 **Pexels API Key**", - "Get Help": "有任何问题或建议,可以加入 **微信群** 求助或讨论:https://harryai.cc/moneyprinterturbo" + "Get Help": "有任何问题或建议,可以加入 **微信群** 求助或讨论:https://harryai.cc" } } \ No newline at end of file