MoneyPrinterTurbo/app/services/material.py

import logging
import os
import random
from typing import List
from urllib.parse import urlencode
import math

import requests
from loguru import logger
import subprocess
import json

from app.config import config
from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode
from app.services import llm
from app.utils import utils

requested_count = 0


def get_api_key(cfg_key: str):
    api_keys = config.app.get(cfg_key)
    if not api_keys:
        raise ValueError(
            f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
            f"{utils.to_json(config.app)}"
        )

    # if only one key is provided, return it
    if isinstance(api_keys, str):
        return api_keys

    global requested_count
    requested_count += 1
    return api_keys[requested_count % len(api_keys)]


def search_videos_pexels(
    search_term: str,
    minimum_duration: int,
    video_aspect: VideoAspect = VideoAspect.portrait,
) -> List[MaterialInfo]:
    aspect = VideoAspect(video_aspect)
    video_orientation = aspect.name
    video_width, video_height = aspect.to_resolution()
    api_key = get_api_key("pexels_api_keys")
    headers = {
        "Authorization": api_key,
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
    }
    # Build URL
    params = {"query": search_term, "page": 1, "per_page": 5, "orientation": video_orientation, "size": "large","locale":"en-US"}
    query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")

    try:
        r = requests.get(
            query_url,
            headers=headers,
            proxies=config.proxy,
            verify=False,
            timeout=(30, 60),
        )
        response = r.json()
        video_items = []
        if "videos" not in response:
            logger.error(f"search videos failed: {response}")
            return video_items

        videos = response["videos"]

        for v in videos:
            duration = v.get("duration")
            if not duration or duration < minimum_duration:
                continue

            video_files = v.get("video_files", [])

            # ---- Dynamic aspect ratio filtering logic ----
            best_matching_file = None
            max_resolution = 0

            # Find the highest resolution video that matches the requested aspect ratio
            for video_file in video_files:
                width = video_file.get("width")
                height = video_file.get("height")

                # Ensure width and height are available
                if not width or not height:
                    continue

                # Check if the video orientation matches the requirement
                is_landscape = width > height
                is_portrait = height > width

                if video_aspect == VideoAspect.landscape and not is_landscape:
                    continue
                if video_aspect == VideoAspect.portrait and not is_portrait:
                    continue

                # Calculate the total pixels for the current resolution
                current_resolution = width * height

                # If the current version has a higher resolution, update the best match
                if current_resolution > max_resolution:
                    max_resolution = current_resolution
                    best_matching_file = video_file

            # If a matching video was found
            if best_matching_file:
                item = MaterialInfo()
                item.provider = "pexels"
                item.url = best_matching_file["link"] # Use the link of the best matching version
                item.duration = duration
                item.path = ""
                item.start_time = 0.0
                video_items.append(item)
        logging.info("选取的Mp4链接地址为{}".format(item.url))
        return video_items

    except Exception as e:
        logger.error(f"search videos failed: {str(e)}")

    return []


def search_videos_pixabay(
    search_term: str,
    minimum_duration: int,
    video_aspect: VideoAspect = VideoAspect.portrait,
    category: str = "",
) -> List[MaterialInfo]:
    aspect = VideoAspect(video_aspect)
    video_width, video_height = aspect.to_resolution()
    api_key = get_api_key("pixabay_api_keys")

    def perform_search(params):
        params["key"] = api_key
        query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
        logger.info(f"Searching videos: {query_url}, with proxies: {config.proxy}")
        try:
            r = requests.get(
                query_url,
                proxies=config.proxy,
                verify=False,
                timeout=(30, 60),
            )
            r.raise_for_status()
            response = r.json()
            if "hits" not in response or not response["hits"]:
                return []

            video_items = []
            for v in response["hits"]:
                duration = v.get("duration")
                if not duration or duration < minimum_duration:
                    continue

                video_files = v.get("videos", {})
                best_video = None
                # Simplified logic to find a suitable video rendition
                for size in ["large", "medium", "small", "tiny"]:
                    rendition = video_files.get(size)
                    if not rendition or not rendition.get("url"):
                        continue

                    width = rendition.get("width", 0)
                    height = rendition.get("height", 0)

                    is_portrait = height > width
                    is_landscape = width > height

                    if aspect == VideoAspect.portrait and is_portrait:
                        best_video = rendition
                        break
                    elif aspect != VideoAspect.portrait and is_landscape:
                        best_video = rendition
                        break

                # Fallback to any available video if exact aspect not found
                if not best_video:
                    for size in ["large", "medium", "small", "tiny"]:
                        if video_files.get(size) and video_files.get(size).get("url"):
                            best_video = video_files.get(size)
                            break

                if best_video:
                    item = MaterialInfo()
                    item.provider = "pixabay"
                    item.url = best_video.get("url")
                    item.duration = duration
                    item.path = ""
                    item.start_time = 0.0
                    video_items.append(item)

            return video_items

        except requests.exceptions.RequestException as e:
            logger.error(f"Search videos failed: {str(e)}")
            return []
        except Exception as e:
            logger.error(f"An unexpected error occurred during video search: {str(e)}")
            return []

    # Attempt 1: Strict search with category and editors_choice
    logger.info("Attempt 1: Strict search with category and editors_choice")
    params = {
        "q": search_term,
        "video_type": "film",
        "safesearch": "true",
        "editors_choice": "true",
        "order": "popular",
        "page": 1,
        "per_page": 10,
    }
    if category:
        params["category"] = category
    if video_width > 0:
        params["min_width"] = video_width
    if video_height > 0:
        params["min_height"] = video_height

    results = perform_search(params)
    if results:
        logger.success(f"Found {len(results)} videos on first attempt.")
        return results

    # Attempt 2: Search with editors_choice but without category
    logger.warning("First attempt failed. Attempt 2: Retrying without category.")
    params.pop("category", None)
    results = perform_search(params)
    if results:
        logger.success(f"Found {len(results)} videos on second attempt.")
        return results

    # Attempt 3: Broadest search, without editors_choice
    logger.warning("Second attempt failed. Attempt 3: Retrying with broadest settings.")
    params.pop("editors_choice", None)
    results = perform_search(params)
    if results:
        logger.success(f"Found {len(results)} videos on third attempt.")
    else:
        logger.error("All search attempts failed to find any videos.")

    return results


def _get_video_info_ffprobe(video_path: str) -> dict:
    """
    Get video information using ffprobe.
    """
    command = [
        "ffprobe",
        "-v", "quiet",
        "-print_format", "json",
        "-show_format",
        "-show_streams",
        video_path
    ]
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        info = json.loads(result.stdout)
        video_stream = next((s for s in info['streams'] if s['codec_type'] == 'video'), None)
        if not video_stream:
            return None

        fps_str = video_stream.get('avg_frame_rate', video_stream.get('r_frame_rate', '0/1'))
        num, den = map(int, fps_str.split('/'))
        fps = num / den if den != 0 else 0

        return {
            "duration": float(video_stream.get('duration', info['format'].get('duration', 0))),
            "fps": fps
        }
    except (subprocess.CalledProcessError, json.JSONDecodeError, StopIteration, KeyError, ZeroDivisionError) as e:
        logger.error(f"Failed to get video info for {video_path} using ffprobe: {e}")
        return None


def save_video(video_url: str, save_dir: str = "") -> str:
    if not save_dir:
        save_dir = utils.storage_dir("cache_videos")

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    url_without_query = video_url.split("?")[0]
    url_hash = utils.md5(url_without_query)
    video_id = f"vid-{url_hash}"
    video_path = f"{save_dir}/{video_id}.mp4"

    # if video already exists, return the path
    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
        logger.info(f"video already exists: {video_path}")
        return video_path

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
    }

    # if video does not exist, download it
    with open(video_path, "wb") as f:
        f.write(
            requests.get(
                video_url,
                headers=headers,
                proxies=config.proxy,
                verify=False,
                timeout=(60, 240),
            ).content
        )

    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
        try:
            info = _get_video_info_ffprobe(video_path)
            if info and info.get("duration", 0) > 0 and info.get("fps", 0) > 0:
                logger.info(f"video validated: {video_path}")
                return video_path
            else:
                raise ValueError("Invalid video file, duration or fps is 0.")
        except Exception as e:
            try:
                os.remove(video_path)
            except Exception:
                pass
            logger.warning(f"invalid video file: {video_path} => {str(e)}")
    return ""


def download_videos_for_clips(video_search_terms: List[str], num_clips: int, source: str, video_aspect: VideoAspect) -> List[MaterialInfo]:
    logger.info(f"Attempting to download {num_clips} unique video clips for {len(video_search_terms)} terms.")
    downloaded_videos = []
    used_video_urls = set()

    if not video_search_terms:
        logger.error("No video search terms provided. Cannot download videos.")
        return []

    import itertools
    # Expand search terms if not enough for the number of clips
    if len(video_search_terms) < num_clips:
        logger.warning(f"Number of search terms ({len(video_search_terms)}) is less than the required number of clips ({num_clips}). Reusing terms.")
        video_search_terms = list(itertools.islice(itertools.cycle(video_search_terms), num_clips))

    search_term_queue = list(video_search_terms)
    random.shuffle(search_term_queue)

    while len(downloaded_videos) < num_clips and search_term_queue:
        term = search_term_queue.pop(0)
        try:
            if source == "pexels":
                video_items = search_videos_pexels(
                    search_term=term,
                    minimum_duration=5,
                    video_aspect=video_aspect,
                )
            elif source == "pixabay":
                video_items = search_videos_pixabay(
                    search_term=term,
                    minimum_duration=5,
                    video_aspect=video_aspect,
                )
            else:
                video_items = []

            if not video_items:
                logger.warning(f"No video results for term: '{term}'")
                continue

            random.shuffle(video_items)

            for item in video_items:
                if item.url in used_video_urls:
                    continue

                logger.info(f"Downloading video for term '{term}': {item.url}")
                file_path = save_video(item.url)
                if file_path:
                    video_material = MaterialInfo(
                        path=file_path,
                        url=item.url,
                        duration=_get_video_info_ffprobe(file_path).get("duration", 0.0),
                        start_time=0.0
                    )
                    downloaded_videos.append(video_material)
                    used_video_urls.add(item.url)
                    logger.info(f"Video saved: {file_path}")
                    break  # Move to the next search term
                else:
                    logger.warning(f"Video download failed: {item.url}")

        except Exception as e:
            logger.error(f"Error processing search term '{term}': {e}")

    # Fallback: If not enough unique videos were found, reuse the ones we have
    if downloaded_videos and len(downloaded_videos) < num_clips:
        logger.warning(f"Could not find enough unique videos. Required: {num_clips}, Found: {len(downloaded_videos)}. Reusing downloaded videos.")
        needed = num_clips - len(downloaded_videos)
        reused_videos = list(itertools.islice(itertools.cycle(downloaded_videos), needed))
        downloaded_videos.extend(reused_videos)

    if len(downloaded_videos) < num_clips:
        logger.error(f"Failed to download enough videos. Required: {num_clips}, Found: {len(downloaded_videos)}. Aborting.")
        return []

    logger.success(f"Successfully downloaded {len(downloaded_videos)} video clips.")
    return downloaded_videos

def download_videos(
    task_id: str,
    video_subject: str,
    search_terms: List[str],
    source: str = "pexels",
    video_aspect: VideoAspect = VideoAspect.portrait,
    video_concat_mode: VideoConcatMode = VideoConcatMode.random,
    audio_duration: float = 0.0,
    max_clip_duration: int = 5,
) -> List[MaterialInfo]:
    sm.state.update_task(task_id, status_message=f"Downloading videos for terms: {search_terms}")
    num_clips = math.ceil(audio_duration / max_clip_duration) if max_clip_duration > 0 else 1
    logger.info(f"Required audio duration: {audio_duration:.2f}s, max_clip_duration: {max_clip_duration}s. Calculated number of clips: {num_clips}")
    return download_videos_for_clips(video_search_terms=search_terms, num_clips=num_clips, source=source)


# 以下为调试入口，仅供开发测试
if __name__ == "__main__":
    download_videos(
        "test123", ["Money Exchange Medium"], ["Money Exchange Medium"], audio_duration=100, source="pixabay"
    )