From b033e074e1bcf5322e8e39157c1fedd3577cd66c Mon Sep 17 00:00:00 2001 From: cystal-dot Date: Tue, 13 May 2025 23:49:08 +0900 Subject: [PATCH] =?UTF-8?q?feat:stable-diffusion=E3=81=AB=E7=94=9F?= =?UTF-8?q?=E6=88=90=E3=81=95=E3=81=9B=E3=81=9F=E7=94=BB=E5=83=8F=E3=82=92?= =?UTF-8?q?=E4=BD=BF=E3=81=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 61 +++++++++++++++++++------------ app/services/llm.py | 87 +++++++++++++++++--------------------------- app/services/task.py | 18 ++++++++- webui/Main.py | 5 ++- 4 files changed, 91 insertions(+), 80 deletions(-) diff --git a/.gitignore b/.gitignore index 6aa0ca7..4244085 100644 --- a/.gitignore +++ b/.gitignore @@ -1,28 +1,41 @@ +# OS固有 .DS_Store -/config.toml -/storage/ -/.idea/ -/app/services/__pycache__ -/app/__pycache__/ -/app/config/__pycache__/ -/app/models/__pycache__/ -/app/utils/__pycache__/ -/*/__pycache__/* -.vscode -/**/.streamlit -__pycache__ -logs/ -node_modules -# VuePress 默认临时文件目录 -/sites/docs/.vuepress/.temp -# VuePress 默认缓存目录 -/sites/docs/.vuepress/.cache -# VuePress 默认构建生成的静态文件目录 -/sites/docs/.vuepress/dist -# 模型目录 -/models/ -./models/* +# Pythonキャッシュ +__pycache__/ +**/__pycache__/ +# Python仮想環境 venv/ -.venv \ No newline at end of file +.venv/ + +# 設定・ログ・一時ファイル +config.toml +logs/ +.idea/ +.vscode/ +storage/ +*.log + +# Streamlit +**/.streamlit/ + +# Node.js +node_modules/ + +# VuePress +/sites/docs/.vuepress/.temp/ +/sites/docs/.vuepress/.cache/ +/sites/docs/.vuepress/dist/ + +# モデル +models/ +models/* + +# その他 +app/services/__pycache__/ +app/config/__pycache__/ +app/models/__pycache__/ +app/utils/__pycache__/ +.pdm-python +forme diff --git a/app/services/llm.py b/app/services/llm.py index 2c45ef9..5d87f3d 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -332,70 +332,51 @@ Generate a script for a video, depending on the subject of the video. logger.success(f"completed: \n{final_script}") return final_script.strip() +from typing import List +import json -def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]: +def generate_terms( + video_subject: str, + video_script: str, + amount: int = 5 +) -> List[str]: + """ + 動画主題とスクリプトからAI画像生成プロンプトを生成します。 + LLMに生のJSON配列のみを返させるよう、明確に指示を強化しています。 + """ prompt = f""" -# Role: Video Search Terms Generator +Generate exactly {amount} image prompts as a raw JSON array of strings. No markdown, code fences, or extra characters. -## Goals: -Generate {amount} search terms for stock videos, depending on the subject of a video. - -## Constrains: -1. the search terms are to be returned as a json-array of strings. -2. each search term should consist of 1-3 words, always add the main subject of the video. -3. you must only return the json-array of strings. you must not return anything else. you must not return the script. -4. the search terms must be related to the subject of the video. -5. reply with english search terms only. - -## Output Example: -["search term 1", "search term 2", "search term 3","search term 4","search term 5"] - -## Context: -### Video Subject +Video Subject: {video_subject} -### Video Script +Video Script: {video_script} -Please note that you must use English for generating video search terms; Chinese is not accepted. +Rules: +1. Each prompt must start with either: + - "1girl, solo, anatomically correct" for character prompts, or + - a concise scene noun phrase for scene prompts (e.g., "moonlit forest clearing"). +2. Include at least one abstract symbol (hourglass, gear, DNA helix, etc.) and one environmental element (lantern, river, ancient ruins, etc.). +3. Specify mood & lighting (e.g., golden hour, moody fog). +4. Append: "portrait, 9:16, masterpiece:1.1, high detail, beautiful lighting, cinematic". + +Return only the JSON array, for example: +["prompt1", "prompt2", ..., "promptN"] """.strip() - logger.info(f"subject: {video_subject}") + # LLM呼び出し + response = _generate_response(prompt) - search_terms = [] - response = "" - for i in range(_max_retries): - try: - response = _generate_response(prompt) - if "Error: " in response: - logger.error(f"failed to generate video script: {response}") - return response - search_terms = json.loads(response) - if not isinstance(search_terms, list) or not all( - isinstance(term, str) for term in search_terms - ): - logger.error("response is not a list of strings.") - continue - - except Exception as e: - logger.warning(f"failed to generate video terms: {str(e)}") - if response: - match = re.search(r"\[.*]", response) - if match: - try: - search_terms = json.loads(match.group()) - except Exception as e: - logger.warning(f"failed to generate video terms: {str(e)}") - pass - - if search_terms and len(search_terms) > 0: - break - if i < _max_retries: - logger.warning(f"failed to generate video terms, trying again... {i + 1}") - - logger.success(f"completed: \n{search_terms}") - return search_terms + # JSONとしてパース + try: + prompts = json.loads(response) + except json.JSONDecodeError: + raise ValueError(f"LLM response is not valid JSON: {response}") + if not isinstance(prompts, list) or not all(isinstance(p, str) for p in prompts): + raise ValueError(f"LLM response is not a list of strings: {prompts}") + return prompts if __name__ == "__main__": video_subject = "生命的意义是什么" diff --git a/app/services/task.py b/app/services/task.py index 77ca908..ffabe4b 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -8,7 +8,7 @@ from loguru import logger from app.config import config from app.models import const from app.models.schema import VideoConcatMode, VideoParams -from app.services import llm, material, subtitle, video, voice +from app.services import llm, material, subtitle, video, voice, imagegen from app.services import state as sm from app.utils import utils @@ -124,7 +124,21 @@ def generate_subtitle(task_id, params, video_script, sub_maker, audio_file): def get_video_materials(task_id, params, video_terms, audio_duration): - if params.video_source == "local": + if params.video_source == "local-ai": + logger.info("\n\n## generating AI images as materials") + prompts = video_terms if isinstance(video_terms, list) else [video_terms] + image_paths = [] + for i, prompt in enumerate(prompts): + output_path = path.join(utils.task_dir(task_id), f"aiimg_{i}.png") + imagegen.generate_image(prompt, output_path) + image_paths.append(output_path) + materials = [] + for img_path in image_paths: + m = material.MaterialInfo(provider="local-ai", url=img_path, duration=0) + materials.append(m) + processed = video.preprocess_video(materials, clip_duration=params.video_clip_duration) + return [material.url for material in materials] + elif params.video_source == "local": logger.info("\n\n## preprocess local materials") materials = video.preprocess_video( materials=params.video_materials, clip_duration=params.video_clip_duration diff --git a/webui/Main.py b/webui/Main.py index aafed1b..9adb772 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -532,6 +532,7 @@ with middle_panel: (tr("Pexels"), "pexels"), (tr("Pixabay"), "pixabay"), (tr("Local file"), "local"), + (tr("AI Image (local)"), "local-ai"), (tr("TikTok"), "douyin"), (tr("Bilibili"), "bilibili"), (tr("Xiaohongshu"), "xiaohongshu"), @@ -904,7 +905,7 @@ if start_button: scroll_to_bottom() st.stop() - if params.video_source not in ["pexels", "pixabay", "local"]: + if params.video_source not in ["pexels", "pixabay", "local", "local-ai", "douyin", "bilibili", "xiaohongshu"]: st.error(tr("Please Select a Valid Video Source")) scroll_to_bottom() st.stop() @@ -966,6 +967,8 @@ if start_button: except Exception: pass + # logger.info(tr("!!! Uploading Video Files To Tiktok!!!")) + open_task_folder(task_id) logger.info(tr("Video Generation Completed")) scroll_to_bottom()