From fd5c92423854efa5d095547c172657ddc208ca9f Mon Sep 17 00:00:00 2001
From: yanjianzao <yanjianzao@gmail.com>
Date: Tue, 8 Jul 2025 15:26:06 +0800
Subject: [PATCH] =?UTF-8?q?=E7=94=9F=E6=88=90=E7=9A=84=E8=A7=86=E9=A2=91?=
 =?UTF-8?q?=E8=BF=98=E4=B8=8D=E9=94=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/models/schema.py     |  10 ++-
 app/services/material.py | 162 ++++++++++++++++++++-------------------
 app/services/task.py     |  63 ++++++++-------
 app/services/video.py    | 102 +++++++++++-------------
 4 files changed, 172 insertions(+), 165 deletions(-)

diff --git a/app/models/schema.py b/app/models/schema.py
index 8f0bd32..045911a 100644
--- a/app/models/schema.py
+++ b/app/models/schema.py
@@ -50,7 +50,15 @@ class _Config:
 class MaterialInfo:
     provider: str = "pexels"
     url: str = ""
-    duration: int = 0
+    path: str = ""
+    duration: float = 0.0
+    start_time: float = 0.0
+
+
+@pydantic.dataclasses.dataclass(config=_Config)
+class VideoSegment:
+    path: str
+    duration: float
 
 
 class VideoParams(BaseModel):
diff --git a/app/services/material.py b/app/services/material.py
index fe01971..c7152d6 100644
--- a/app/services/material.py
+++ b/app/services/material.py
@@ -3,6 +3,7 @@ import os
 import random
 from typing import List
 from urllib.parse import urlencode
+import math
 
 import requests
 from loguru import logger
@@ -317,6 +318,85 @@ def save_video(video_url: str, save_dir: str = "") -> str:
     return ""
 
 
+def download_videos_for_clips(video_search_terms: List[str], num_clips: int, source: str) -> List[MaterialInfo]:
+    logger.info(f"Attempting to download {num_clips} unique video clips for {len(video_search_terms)} terms.")
+    downloaded_videos = []
+    used_video_urls = set()
+
+    if not video_search_terms:
+        logger.error("No video search terms provided. Cannot download videos.")
+        return []
+
+    import itertools
+    # Expand search terms if not enough for the number of clips
+    if len(video_search_terms) < num_clips:
+        logger.warning(f"Number of search terms ({len(video_search_terms)}) is less than the required number of clips ({num_clips}). Reusing terms.")
+        video_search_terms = list(itertools.islice(itertools.cycle(video_search_terms), num_clips))
+
+    search_term_queue = list(video_search_terms)
+    random.shuffle(search_term_queue)
+
+    while len(downloaded_videos) < num_clips and search_term_queue:
+        term = search_term_queue.pop(0)
+        try:
+            if source == "pexels":
+                video_items = search_videos_pexels(
+                    search_term=term,
+                    minimum_duration=5,
+                    video_aspect=VideoAspect.portrait,
+                )
+            elif source == "pixabay":
+                video_items = search_videos_pixabay(
+                    search_term=term,
+                    minimum_duration=5,
+                    video_aspect=VideoAspect.portrait,
+                )
+            else:
+                video_items = []
+            
+            if not video_items:
+                logger.warning(f"No video results for term: '{term}'")
+                continue
+
+            random.shuffle(video_items)
+
+            for item in video_items:
+                if item.url in used_video_urls:
+                    continue
+
+                logger.info(f"Downloading video for term '{term}': {item.url}")
+                file_path = save_video(item.url)
+                if file_path:
+                    video_material = MaterialInfo(
+                        path=file_path,
+                        url=item.url,
+                        duration=_get_video_info_ffprobe(file_path).get("duration", 0.0),
+                        start_time=0.0
+                    )
+                    downloaded_videos.append(video_material)
+                    used_video_urls.add(item.url)
+                    logger.info(f"Video saved: {file_path}")
+                    break  # Move to the next search term
+                else:
+                    logger.warning(f"Video download failed: {item.url}")
+
+        except Exception as e:
+            logger.error(f"Error processing search term '{term}': {e}")
+
+    # Fallback: If not enough unique videos were found, reuse the ones we have
+    if downloaded_videos and len(downloaded_videos) < num_clips:
+        logger.warning(f"Could not find enough unique videos. Required: {num_clips}, Found: {len(downloaded_videos)}. Reusing downloaded videos.")
+        needed = num_clips - len(downloaded_videos)
+        reused_videos = list(itertools.islice(itertools.cycle(downloaded_videos), needed))
+        downloaded_videos.extend(reused_videos)
+
+    if len(downloaded_videos) < num_clips:
+        logger.error(f"Failed to download enough videos. Required: {num_clips}, Found: {len(downloaded_videos)}. Aborting.")
+        return []
+
+    logger.success(f"Successfully downloaded {len(downloaded_videos)} video clips.")
+    return downloaded_videos
+
 def download_videos(
     task_id: str,
     video_subject: str,
@@ -327,86 +407,14 @@ def download_videos(
     audio_duration: float = 0.0,
     max_clip_duration: int = 5,
 ) -> List[MaterialInfo]:
-    """
-    Download videos from Pexels or Pixabay based on search terms.
-    """
-    all_video_items: List[MaterialInfo] = []
-    for term in search_terms:
-        if source == "pexels":
-            video_items = search_videos_pexels(
-                search_term=term,
-                minimum_duration=max_clip_duration,
-                video_aspect=video_aspect,
-            )
-        elif source == "pixabay":
-            video_items = search_videos_pixabay(
-                search_term=term,
-                minimum_duration=max_clip_duration,
-                video_aspect=video_aspect,
-            )
-        else:
-            video_items = []
-        
-        logger.info(f"found {len(video_items)} videos for '{term}'")
-        all_video_items.extend(video_items)
-
-    # Remove duplicates and calculate total duration
-    unique_video_items = []
-    seen_urls = set()
-    for item in all_video_items:
-        if item.url not in seen_urls:
-            unique_video_items.append(item)
-            seen_urls.add(item.url)
-
-    if video_concat_mode == VideoConcatMode.random:
-        random.shuffle(unique_video_items)
-
-    found_duration = sum(item.duration for item in unique_video_items)
-    logger.info(f"found total unique videos: {len(unique_video_items)}, required duration: {audio_duration:.4f} seconds, found duration: {found_duration:.2f} seconds")
-    logger.info(f"Video download list (first 5): {[item.url for item in unique_video_items[:5]]}")
-
-    if not unique_video_items:
-        logger.warning("No videos found for the given search terms.")
-        return []
-
-    if found_duration < audio_duration:
-        logger.warning(f"total duration of found videos ({found_duration:.2f}s) is less than audio duration ({audio_duration:.2f}s).")
-
-    downloaded_materials: List[MaterialInfo] = []
-    downloaded_duration = 0.0
-    
-    for item in unique_video_items:
-        if downloaded_duration >= audio_duration:
-            logger.info(f"total duration of downloaded videos: {downloaded_duration:.2f} seconds, skip downloading more")
-            break
-        
-        try:
-            logger.info(f"downloading video: {item.url}")
-            file_path = save_video(video_url=item.url)
-            if file_path:
-                logger.info(f"video saved: {file_path}")
-                material_info = MaterialInfo()
-                material_info.path = file_path
-                material_info.start_time = 0.0
-                ffprobe_info = _get_video_info_ffprobe(file_path)
-                if ffprobe_info and ffprobe_info.get("duration"):
-                    material_info.duration = float(ffprobe_info.get("duration"))
-                    downloaded_duration += material_info.duration
-                else:
-                    material_info.duration = item.duration # fallback
-                    downloaded_duration += item.duration
-                
-                downloaded_materials.append(material_info)
-
-        except Exception as e:
-            logger.error(f"failed to download video: {item.url} => {e}")
-
-    logger.success(f"downloaded {len(downloaded_materials)} videos")
-    return downloaded_materials
+    sm.state.update_task(task_id, status_message=f"Downloading videos for terms: {search_terms}")
+    num_clips = math.ceil(audio_duration / max_clip_duration) if max_clip_duration > 0 else 1
+    logger.info(f"Required audio duration: {audio_duration:.2f}s, max_clip_duration: {max_clip_duration}s. Calculated number of clips: {num_clips}")
+    return download_videos_for_clips(video_search_terms=search_terms, num_clips=num_clips, source=source)
 
 
 # 以下为调试入口，仅供开发测试
 if __name__ == "__main__":
     download_videos(
-        "test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay"
+        "test123", ["Money Exchange Medium"], ["Money Exchange Medium"], audio_duration=100, source="pixabay"
     )
diff --git a/app/services/task.py b/app/services/task.py
index f0928cd..caef46c 100644
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -12,6 +12,7 @@ from app.models.schema import (
     VideoParams,
     VideoAspect,
     MaterialInfo,
+    VideoSegment,
 )
 from app.services import llm, material, subtitle, voice, video
 from app.services import video as video_utils
@@ -91,39 +92,43 @@ def start_storyboard_task(task_id, params: VideoParams):
             audio_duration = voice.get_audio_duration(sub_maker)
             total_duration += audio_duration
 
-            # b. Search and download video materials for each term
-            video_materials = []
-            downloaded_duration = 0
-            for term in search_terms:
-                if downloaded_duration >= audio_duration:
-                    break
-                term_materials = material.download_videos(
-                    task_id=task_id,
-                    video_subject=params.video_subject,
-                    search_terms=[term],  # Pass one term at a time
-                    source=params.video_source,
-                    video_aspect=params.video_aspect,
-                    video_concat_mode=params.video_concat_mode,
-                    audio_duration=audio_duration - downloaded_duration,
-                    max_clip_duration=params.max_clip_duration,
-                )
-                if term_materials:
-                    video_materials.extend(term_materials)
-                    downloaded_duration = sum(m.duration for m in video_materials)
-            if not video_materials:
-                raise Exception(f"Failed to find materials for segment {i + 1}")
+            # b. Calculate the number of clips needed and download them
+            num_clips = math.ceil(audio_duration / params.max_clip_duration) if params.max_clip_duration > 0 else 1
+            logger.info(f"Segment {i+1} audio duration: {audio_duration:.2f}s, max_clip_duration: {params.max_clip_duration}s. Calculated number of clips: {num_clips}")
 
-            # c. Create a video clip matching the audio duration
-            segment_video_path = path.join(workdir, f"segment_video_{i + 1}.mp4")
-            clip_created = video.create_video_clip_from_materials(
-                video_materials=video_materials,
-                audio_duration=audio_duration,
-                max_clip_duration=params.max_clip_duration,
+            video_materials = material.download_videos_for_clips(
+                video_search_terms=search_terms,
+                num_clips=num_clips,
+                source=params.video_source
+            )
+            if not video_materials or len(video_materials) < num_clips:
+                sm.state.update_task(task_id, state=const.TASK_STATE_FAILED, status_message=f"Failed to download enough video materials for segment {i + 1}")
+                return
+
+            # c. Create video clip by combining materials with precise durations
+            video_segments = []
+            remaining_audio_duration = audio_duration
+            for video_material in video_materials:
+                if remaining_audio_duration <= 0:
+                    break
+                clip_duration = min(remaining_audio_duration, params.max_clip_duration)
+                video_segments.append(VideoSegment(path=video_material.path, duration=clip_duration))
+                remaining_audio_duration -= clip_duration
+
+            # If the total duration of the clips is still less than the audio duration, adjust the last clip
+            if remaining_audio_duration > 0.01 and video_segments:
+                video_segments[-1].duration += remaining_audio_duration
+
+            segment_video_path = os.path.join(workdir, f"segment_video_{i + 1}.mp4")
+            video_created = video.create_video_clip_from_segments(
+                segments=video_segments,
                 video_aspect=params.video_aspect,
                 output_path=segment_video_path
             )
-            if not clip_created:
-                raise Exception(f"Failed to create video clip for segment {i + 1}")
+
+            if not video_created:
+                sm.state.update_task(task_id, state=const.TASK_STATE_FAILED, status_message=f"Video clip creation failed for segment {i + 1}")
+                return
 
             segment_video_paths.append(segment_video_path)
             segment_audio_paths.append(segment_audio_file)
diff --git a/app/services/video.py b/app/services/video.py
index 5d18d0f..7284ac8 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -87,91 +87,75 @@ def delete_files(files: List[str] | str):
                 logger.warning(f"Failed to delete file {file}: {e}")
 
 
-def create_video_clip_from_materials(video_materials: list, audio_duration: float, max_clip_duration: int, video_aspect: VideoAspect, output_path: str):
-    logger.info(f"Optimized: Creating video clip for {output_path} with duration {audio_duration:.2f}s using ffmpeg")
+def create_video_clip_from_segments(segments: list, video_aspect: VideoAspect, output_path: str):
+    """
+    Creates a video clip by concatenating pre-defined video segments.
 
-    if audio_duration <= 0:
-        logger.warning("Audio duration is zero or negative, cannot create video clip.")
+    Args:
+        segments (list): A list of VideoSegment objects, where each object represents a video segment
+                         and contains 'path' and 'duration' attributes.
+        video_aspect (VideoAspect): The aspect ratio of the output video.
+        output_path (str): The path to save the output video clip.
+
+    Returns:
+        bool: True if the command was successful, False otherwise.
+    """
+    if not segments:
+        logger.warning("No video segments provided, cannot create video clip.")
         return False
 
-    total_duration_of_materials = sum(m.duration for m in video_materials)
-    if total_duration_of_materials < audio_duration:
-        logger.warning(f"Total material duration ({total_duration_of_materials}s) is less than audio duration ({audio_duration}s). Video will be shorter.")
-        audio_duration = total_duration_of_materials
-
     w, h = video_aspect.to_resolution()
-    # Use the most robust method: scale to fill, then crop to center.
-    # This avoids black bars by ensuring the video fills the frame, cropping excess.
     scale_filter = f"scale={w}:{h}:force_original_aspect_ratio=increase"
     crop_filter = f"crop={w}:{h}"
-    fade_in_filter = "fade=in:st=0:d=0.5"
+    sar_filter = "setsar=1"
+    fps_filter = "fps=30"
 
     filter_complex_parts = []
     concat_inputs = ""
-    time_so_far = 0.0
+    input_files = []
+    input_mappings = {}
 
-    # If only one material, just trim and process it
-    if len(video_materials) == 1:
-        material = video_materials[0]
-        duration_needed = audio_duration
-        start_time = material.start_time if material.start_time >= 0 else 0
-        trim_filter = f"[0:v]trim=start={start_time}:duration={duration_needed},setpts=PTS-STARTPTS"
-        sar_filter = "setsar=1"
+    total_duration = sum(seg.duration for seg in segments)
 
-        command = [
-            "ffmpeg",
-            "-y",
-            "-i", material.path,
-            "-vf", f"{trim_filter},{sar_filter},{scale_filter},{crop_filter},{fade_in_filter}",
-            "-an",  # remove audio
-            "-c:v", "libx264",
-            "-preset", "ultrafast",
-            "-crf", "23",
-            "-maxrate", "10M",
-            "-bufsize", "20M",
-            "-r", "30",
-            output_path
-        ]
-        return _run_ffmpeg_command(command)
+    for i, segment in enumerate(segments):
+        input_path = segment.path
+        duration = segment.duration
 
-    # If multiple materials, create clips and concatenate
-    for i, material in enumerate(video_materials):
-        if time_so_far >= audio_duration:
-            break
+        if input_path not in input_mappings:
+            input_mappings[input_path] = len(input_files)
+            input_files.append(input_path)
 
-        duration_from_this_clip = min(material.duration, audio_duration - time_so_far, max_clip_duration)
-        if duration_from_this_clip <= 0:
-            continue
+        input_idx = input_mappings[input_path]
+        input_specifier = f"[{input_idx}:v]"
 
-        start_time = material.start_time if material.start_time >= 0 else 0
-        trim_filter = f"[{i}:v]trim=start={start_time}:duration={duration_from_this_clip},setpts=PTS-STARTPTS"
-        sar_filter = "setsar=1"
-        filter_complex_parts.append(f"{trim_filter},{sar_filter},{scale_filter},{crop_filter}[v{i}]" )
-        concat_inputs += f"[v{i}]"
-        time_so_far += duration_from_this_clip
+        # Each segment is trimmed from the start of the source video.
+        trim_filter = f"{input_specifier}trim=start=0:duration={duration},setpts=PTS-STARTPTS"
 
-    if not filter_complex_parts:
-        logger.error("No video clips could be prepared for concatenation.")
-        return False
+        processed_clip_name = f"[v{i}]"
+        filter_complex_parts.append(f"{trim_filter},{sar_filter},{scale_filter},{crop_filter},{fps_filter}{processed_clip_name}")
+        concat_inputs += processed_clip_name
 
-    concat_filter = f"{concat_inputs}concat=n={len(concat_inputs)//3}:v=1:a=0[outv]"
+    concat_filter = f"{concat_inputs}concat=n={len(segments)}:v=1:a=0[outv]"
     filter_complex_parts.append(concat_filter)
 
     command = [
         "ffmpeg", "-y",
     ]
-    for material in video_materials[:len(concat_inputs)//3]:
-        command.extend(["-i", material.path])
+    for file_path in input_files:
+        command.extend(["-i", file_path])
 
     command.extend([
-        "-filter_complex", ';'.join(filter_complex_parts),
+        "-filter_complex",
+        ";".join(filter_complex_parts),
         "-map", "[outv]",
         "-c:v", "libx264",
         "-an",
         "-r", "30",
+        "-t", str(total_duration),
         output_path
     ])
 
+    logger.info(f"Creating video clip for {output_path} with {len(segments)} segments (total duration: {total_duration:.2f}s) using ffmpeg.")
     return _run_ffmpeg_command(command)
 
 
@@ -340,7 +324,7 @@ def add_bgm_to_video(video_path: str, bgm_path: str, bgm_volume: float, output_p
         "-c:v", "copy",
         "-c:a", "aac",
         "-t", str(video_duration),
-        "-shortest",
+        "-shortest", # Add -shortest parameter here
         output_path,
     ]
 
@@ -397,13 +381,15 @@ def add_subtitles_to_video(video_path: str, srt_path: str, font_name: str, font_
         "-i", video_path,
         "-vf", subtitles_filter,
         "-c:v", "libx264",
-        "-c:a", "copy",
-        "-preset", "ultrafast",
+        "-c:a", "aac",
+        "-b:a", "192k",
+        "-shortest",
         output_path
     ]
 
     return _run_ffmpeg_command(command)
 
+# ... (rest of the code remains the same)
 
 def process_scene_video(material_url: str, output_dir: str, target_duration: float, aspect_ratio: str = "16:9") -> str:
     """