feat:stable-diffusionに生成させた画像を使う

2026-02-21 08:27:22 +08:00 · 2025-05-13 23:49:08 +09:00 · 2025-05-13 23:49:08 +09:00 · b033e074e1
commit b033e074e1
parent 8449303a90
4 changed files with 91 additions and 80 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,28 +1,41 @@
+# OS固有
 .DS_Store
-/config.toml
-/storage/
-/.idea/
-/app/services/__pycache__
-/app/__pycache__/
-/app/config/__pycache__/
-/app/models/__pycache__/
-/app/utils/__pycache__/
-/*/__pycache__/*
-.vscode
-/**/.streamlit
-__pycache__
-logs/

-node_modules
-# VuePress 默认临时文件目录
-/sites/docs/.vuepress/.temp
-# VuePress 默认缓存目录
-/sites/docs/.vuepress/.cache
-# VuePress 默认构建生成的静态文件目录
-/sites/docs/.vuepress/dist
-# 模型目录
-/models/
-./models/*
+# Pythonキャッシュ
+__pycache__/
+**/__pycache__/

+# Python仮想環境
 venv/
-.venv
+.venv/
+
+# 設定・ログ・一時ファイル
+config.toml
+logs/
+.idea/
+.vscode/
+storage/
+*.log
+
+# Streamlit
+**/.streamlit/
+
+# Node.js
+node_modules/
+
+# VuePress
+/sites/docs/.vuepress/.temp/
+/sites/docs/.vuepress/.cache/
+/sites/docs/.vuepress/dist/
+
+# モデル
+models/
+models/*
+
+# その他
+app/services/__pycache__/
+app/config/__pycache__/
+app/models/__pycache__/
+app/utils/__pycache__/
+.pdm-python
+forme
--- a/app/services/llm.py
+++ b/app/services/llm.py
@ -332,70 +332,51 @@ Generate a script for a video, depending on the subject of the video.
        logger.success(f"completed: \n{final_script}")
    return final_script.strip()

+from typing import List
+import json

-def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
+def generate_terms(
+    video_subject: str,
+    video_script: str,
+    amount: int = 5
+) -> List[str]:
+    """
+    動画主題とスクリプトからAI画像生成プロンプトを生成します。
+    LLMに生のJSON配列のみを返させるよう、明確に指示を強化しています。
+    """
    prompt = f"""
-# Role: Video Search Terms Generator
+Generate exactly {amount} image prompts as a raw JSON array of strings. No markdown, code fences, or extra characters.

-## Goals:
-Generate {amount} search terms for stock videos, depending on the subject of a video.
-
-## Constrains:
-1. the search terms are to be returned as a json-array of strings.
-2. each search term should consist of 1-3 words, always add the main subject of the video.
-3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
-4. the search terms must be related to the subject of the video.
-5. reply with english search terms only.
-
-## Output Example:
-["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
-
-## Context:
-### Video Subject
+Video Subject:
 {video_subject}

-### Video Script
+Video Script:
 {video_script}

-Please note that you must use English for generating video search terms; Chinese is not accepted.
+Rules:
+1. Each prompt must start with either:
+   - "1girl, solo, anatomically correct" for character prompts, or
+   - a concise scene noun phrase for scene prompts (e.g., "moonlit forest clearing").
+2. Include at least one abstract symbol (hourglass, gear, DNA helix, etc.) and one environmental element (lantern, river, ancient ruins, etc.).
+3. Specify mood & lighting (e.g., golden hour, moody fog).
+4. Append: "portrait, 9:16, masterpiece:1.1, high detail, beautiful lighting, cinematic".
+
+Return only the JSON array, for example:
+["prompt1", "prompt2", ..., "promptN"]
 """.strip()

-    logger.info(f"subject: {video_subject}")
+    # LLM呼び出し
+    response = _generate_response(prompt)

-    search_terms = []
-    response = ""
-    for i in range(_max_retries):
-        try:
-            response = _generate_response(prompt)
-            if "Error: " in response:
-                logger.error(f"failed to generate video script: {response}")
-                return response
-            search_terms = json.loads(response)
-            if not isinstance(search_terms, list) or not all(
-                isinstance(term, str) for term in search_terms
-            ):
-                logger.error("response is not a list of strings.")
-                continue
-
-        except Exception as e:
-            logger.warning(f"failed to generate video terms: {str(e)}")
-            if response:
-                match = re.search(r"\[.*]", response)
-                if match:
-                    try:
-                        search_terms = json.loads(match.group())
-                    except Exception as e:
-                        logger.warning(f"failed to generate video terms: {str(e)}")
-                        pass
-
-        if search_terms and len(search_terms) > 0:
-            break
-        if i < _max_retries:
-            logger.warning(f"failed to generate video terms, trying again... {i + 1}")
-
-    logger.success(f"completed: \n{search_terms}")
-    return search_terms
+    # JSONとしてパース
+    try:
+        prompts = json.loads(response)
+    except json.JSONDecodeError:
+        raise ValueError(f"LLM response is not valid JSON: {response}")
+    if not isinstance(prompts, list) or not all(isinstance(p, str) for p in prompts):
+        raise ValueError(f"LLM response is not a list of strings: {prompts}")

+    return prompts

 if __name__ == "__main__":
    video_subject = "生命的意义是什么"
--- a/app/services/task.py
+++ b/app/services/task.py
@ -8,7 +8,7 @@ from loguru import logger
 from app.config import config
 from app.models import const
 from app.models.schema import VideoConcatMode, VideoParams
-from app.services import llm, material, subtitle, video, voice
+from app.services import llm, material, subtitle, video, voice, imagegen
 from app.services import state as sm
 from app.utils import utils

@ -124,7 +124,21 @@ def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):


 def get_video_materials(task_id, params, video_terms, audio_duration):
-    if params.video_source == "local":
+    if params.video_source == "local-ai":
+        logger.info("\n\n## generating AI images as materials")
+        prompts = video_terms if isinstance(video_terms, list) else [video_terms]
+        image_paths = []
+        for i, prompt in enumerate(prompts):
+            output_path = path.join(utils.task_dir(task_id), f"aiimg_{i}.png")
+            imagegen.generate_image(prompt, output_path)
+            image_paths.append(output_path)
+        materials = []
+        for img_path in image_paths:
+            m = material.MaterialInfo(provider="local-ai", url=img_path, duration=0)
+            materials.append(m)
+        processed = video.preprocess_video(materials, clip_duration=params.video_clip_duration)
+        return [material.url for material in materials]
+    elif params.video_source == "local":
        logger.info("\n\n## preprocess local materials")
        materials = video.preprocess_video(
            materials=params.video_materials, clip_duration=params.video_clip_duration
--- a/webui/Main.py
+++ b/webui/Main.py
@ -532,6 +532,7 @@ with middle_panel:
            (tr("Pexels"), "pexels"),
            (tr("Pixabay"), "pixabay"),
            (tr("Local file"), "local"),
+            (tr("AI Image (local)"), "local-ai"),
            (tr("TikTok"), "douyin"),
            (tr("Bilibili"), "bilibili"),
            (tr("Xiaohongshu"), "xiaohongshu"),
@ -904,7 +905,7 @@ if start_button:
        scroll_to_bottom()
        st.stop()

-    if params.video_source not in ["pexels", "pixabay", "local"]:
+    if params.video_source not in ["pexels", "pixabay", "local", "local-ai", "douyin", "bilibili", "xiaohongshu"]:
        st.error(tr("Please Select a Valid Video Source"))
        scroll_to_bottom()
        st.stop()
@ -966,6 +967,8 @@ if start_button:
    except Exception:
        pass

+    # logger.info(tr("!!! Uploading Video Files To Tiktok!!!"))
+
    open_task_folder(task_id)
    logger.info(tr("Video Generation Completed"))
    scroll_to_bottom()