feat:stable-diffusionに生成させた画像を使う

This commit is contained in:
cystal-dot 2025-05-13 23:49:08 +09:00
parent 8449303a90
commit b033e074e1
4 changed files with 91 additions and 80 deletions

61
.gitignore vendored
View File

@ -1,28 +1,41 @@
# OS固有
.DS_Store
/config.toml
/storage/
/.idea/
/app/services/__pycache__
/app/__pycache__/
/app/config/__pycache__/
/app/models/__pycache__/
/app/utils/__pycache__/
/*/__pycache__/*
.vscode
/**/.streamlit
__pycache__
logs/
node_modules
# VuePress 默认临时文件目录
/sites/docs/.vuepress/.temp
# VuePress 默认缓存目录
/sites/docs/.vuepress/.cache
# VuePress 默认构建生成的静态文件目录
/sites/docs/.vuepress/dist
# 模型目录
/models/
./models/*
# Pythonキャッシュ
__pycache__/
**/__pycache__/
# Python仮想環境
venv/
.venv
.venv/
# 設定・ログ・一時ファイル
config.toml
logs/
.idea/
.vscode/
storage/
*.log
# Streamlit
**/.streamlit/
# Node.js
node_modules/
# VuePress
/sites/docs/.vuepress/.temp/
/sites/docs/.vuepress/.cache/
/sites/docs/.vuepress/dist/
# モデル
models/
models/*
# その他
app/services/__pycache__/
app/config/__pycache__/
app/models/__pycache__/
app/utils/__pycache__/
.pdm-python
forme

View File

@ -332,70 +332,51 @@ Generate a script for a video, depending on the subject of the video.
logger.success(f"completed: \n{final_script}")
return final_script.strip()
from typing import List
import json
def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
def generate_terms(
video_subject: str,
video_script: str,
amount: int = 5
) -> List[str]:
"""
動画主題とスクリプトからAI画像生成プロンプトを生成します
LLMに生のJSON配列のみを返させるよう明確に指示を強化しています
"""
prompt = f"""
# Role: Video Search Terms Generator
Generate exactly {amount} image prompts as a raw JSON array of strings. No markdown, code fences, or extra characters.
## Goals:
Generate {amount} search terms for stock videos, depending on the subject of a video.
## Constrains:
1. the search terms are to be returned as a json-array of strings.
2. each search term should consist of 1-3 words, always add the main subject of the video.
3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
4. the search terms must be related to the subject of the video.
5. reply with english search terms only.
## Output Example:
["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
## Context:
### Video Subject
Video Subject:
{video_subject}
### Video Script
Video Script:
{video_script}
Please note that you must use English for generating video search terms; Chinese is not accepted.
Rules:
1. Each prompt must start with either:
- "1girl, solo, anatomically correct" for character prompts, or
- a concise scene noun phrase for scene prompts (e.g., "moonlit forest clearing").
2. Include at least one abstract symbol (hourglass, gear, DNA helix, etc.) and one environmental element (lantern, river, ancient ruins, etc.).
3. Specify mood & lighting (e.g., golden hour, moody fog).
4. Append: "portrait, 9:16, masterpiece:1.1, high detail, beautiful lighting, cinematic".
Return only the JSON array, for example:
["prompt1", "prompt2", ..., "promptN"]
""".strip()
logger.info(f"subject: {video_subject}")
# LLM呼び出し
response = _generate_response(prompt)
search_terms = []
response = ""
for i in range(_max_retries):
try:
response = _generate_response(prompt)
if "Error: " in response:
logger.error(f"failed to generate video script: {response}")
return response
search_terms = json.loads(response)
if not isinstance(search_terms, list) or not all(
isinstance(term, str) for term in search_terms
):
logger.error("response is not a list of strings.")
continue
except Exception as e:
logger.warning(f"failed to generate video terms: {str(e)}")
if response:
match = re.search(r"\[.*]", response)
if match:
try:
search_terms = json.loads(match.group())
except Exception as e:
logger.warning(f"failed to generate video terms: {str(e)}")
pass
if search_terms and len(search_terms) > 0:
break
if i < _max_retries:
logger.warning(f"failed to generate video terms, trying again... {i + 1}")
logger.success(f"completed: \n{search_terms}")
return search_terms
# JSONとしてパース
try:
prompts = json.loads(response)
except json.JSONDecodeError:
raise ValueError(f"LLM response is not valid JSON: {response}")
if not isinstance(prompts, list) or not all(isinstance(p, str) for p in prompts):
raise ValueError(f"LLM response is not a list of strings: {prompts}")
return prompts
if __name__ == "__main__":
video_subject = "生命的意义是什么"

View File

@ -8,7 +8,7 @@ from loguru import logger
from app.config import config
from app.models import const
from app.models.schema import VideoConcatMode, VideoParams
from app.services import llm, material, subtitle, video, voice
from app.services import llm, material, subtitle, video, voice, imagegen
from app.services import state as sm
from app.utils import utils
@ -124,7 +124,21 @@ def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
def get_video_materials(task_id, params, video_terms, audio_duration):
if params.video_source == "local":
if params.video_source == "local-ai":
logger.info("\n\n## generating AI images as materials")
prompts = video_terms if isinstance(video_terms, list) else [video_terms]
image_paths = []
for i, prompt in enumerate(prompts):
output_path = path.join(utils.task_dir(task_id), f"aiimg_{i}.png")
imagegen.generate_image(prompt, output_path)
image_paths.append(output_path)
materials = []
for img_path in image_paths:
m = material.MaterialInfo(provider="local-ai", url=img_path, duration=0)
materials.append(m)
processed = video.preprocess_video(materials, clip_duration=params.video_clip_duration)
return [material.url for material in materials]
elif params.video_source == "local":
logger.info("\n\n## preprocess local materials")
materials = video.preprocess_video(
materials=params.video_materials, clip_duration=params.video_clip_duration

View File

@ -532,6 +532,7 @@ with middle_panel:
(tr("Pexels"), "pexels"),
(tr("Pixabay"), "pixabay"),
(tr("Local file"), "local"),
(tr("AI Image (local)"), "local-ai"),
(tr("TikTok"), "douyin"),
(tr("Bilibili"), "bilibili"),
(tr("Xiaohongshu"), "xiaohongshu"),
@ -904,7 +905,7 @@ if start_button:
scroll_to_bottom()
st.stop()
if params.video_source not in ["pexels", "pixabay", "local"]:
if params.video_source not in ["pexels", "pixabay", "local", "local-ai", "douyin", "bilibili", "xiaohongshu"]:
st.error(tr("Please Select a Valid Video Source"))
scroll_to_bottom()
st.stop()
@ -966,6 +967,8 @@ if start_button:
except Exception:
pass
# logger.info(tr("!!! Uploading Video Files To Tiktok!!!"))
open_task_folder(task_id)
logger.info(tr("Video Generation Completed"))
scroll_to_bottom()