mirror of
https://github.com/harry0703/MoneyPrinterTurbo.git
synced 2026-02-21 16:37:21 +08:00
430 lines
16 KiB
Python
430 lines
16 KiB
Python
import logging
|
|
import os
|
|
import random
|
|
from typing import List
|
|
from urllib.parse import urlencode
|
|
import math
|
|
|
|
import requests
|
|
from loguru import logger
|
|
import subprocess
|
|
import json
|
|
|
|
from app.config import config
|
|
from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode
|
|
from app.services import llm
|
|
from app.utils import utils
|
|
|
|
requested_count = 0
|
|
|
|
|
|
def get_api_key(cfg_key: str):
|
|
api_keys = config.app.get(cfg_key)
|
|
if not api_keys:
|
|
raise ValueError(
|
|
f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
|
|
f"{utils.to_json(config.app)}"
|
|
)
|
|
|
|
# if only one key is provided, return it
|
|
if isinstance(api_keys, str):
|
|
return api_keys
|
|
|
|
global requested_count
|
|
requested_count += 1
|
|
return api_keys[requested_count % len(api_keys)]
|
|
|
|
|
|
def search_videos_pexels(
|
|
search_term: str,
|
|
minimum_duration: int,
|
|
video_aspect: VideoAspect = VideoAspect.portrait,
|
|
) -> List[MaterialInfo]:
|
|
aspect = VideoAspect(video_aspect)
|
|
video_orientation = aspect.name
|
|
video_width, video_height = aspect.to_resolution()
|
|
api_key = get_api_key("pexels_api_keys")
|
|
headers = {
|
|
"Authorization": api_key,
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
|
|
}
|
|
# Build URL
|
|
params = {"query": search_term, "page": 1, "per_page": 5, "orientation": video_orientation, "size": "large","locale":"en-US"}
|
|
query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
|
|
logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
|
|
|
|
try:
|
|
r = requests.get(
|
|
query_url,
|
|
headers=headers,
|
|
proxies=config.proxy,
|
|
verify=False,
|
|
timeout=(30, 60),
|
|
)
|
|
response = r.json()
|
|
video_items = []
|
|
if "videos" not in response:
|
|
logger.error(f"search videos failed: {response}")
|
|
return video_items
|
|
|
|
videos = response["videos"]
|
|
|
|
for v in videos:
|
|
duration = v.get("duration")
|
|
if not duration or duration < minimum_duration:
|
|
continue
|
|
|
|
video_files = v.get("video_files", [])
|
|
|
|
# ---- Dynamic aspect ratio filtering logic ----
|
|
best_matching_file = None
|
|
max_resolution = 0
|
|
|
|
# Find the highest resolution video that matches the requested aspect ratio
|
|
for video_file in video_files:
|
|
width = video_file.get("width")
|
|
height = video_file.get("height")
|
|
|
|
# Ensure width and height are available
|
|
if not width or not height:
|
|
continue
|
|
|
|
# Check if the video orientation matches the requirement
|
|
is_landscape = width > height
|
|
is_portrait = height > width
|
|
|
|
if video_aspect == VideoAspect.landscape and not is_landscape:
|
|
continue
|
|
if video_aspect == VideoAspect.portrait and not is_portrait:
|
|
continue
|
|
|
|
# Calculate the total pixels for the current resolution
|
|
current_resolution = width * height
|
|
|
|
# If the current version has a higher resolution, update the best match
|
|
if current_resolution > max_resolution:
|
|
max_resolution = current_resolution
|
|
best_matching_file = video_file
|
|
|
|
# If a matching video was found
|
|
if best_matching_file:
|
|
item = MaterialInfo()
|
|
item.provider = "pexels"
|
|
item.url = best_matching_file["link"] # Use the link of the best matching version
|
|
item.duration = duration
|
|
item.path = ""
|
|
item.start_time = 0.0
|
|
video_items.append(item)
|
|
logging.info("选取的Mp4链接地址为{}".format(item.url))
|
|
return video_items
|
|
|
|
except Exception as e:
|
|
logger.error(f"search videos failed: {str(e)}")
|
|
|
|
return []
|
|
|
|
|
|
def search_videos_pixabay(
|
|
search_term: str,
|
|
minimum_duration: int,
|
|
video_aspect: VideoAspect = VideoAspect.portrait,
|
|
category: str = "",
|
|
) -> List[MaterialInfo]:
|
|
aspect = VideoAspect(video_aspect)
|
|
video_width, video_height = aspect.to_resolution()
|
|
api_key = get_api_key("pixabay_api_keys")
|
|
|
|
def perform_search(params):
|
|
params["key"] = api_key
|
|
query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
|
|
logger.info(f"Searching videos: {query_url}, with proxies: {config.proxy}")
|
|
try:
|
|
r = requests.get(
|
|
query_url,
|
|
proxies=config.proxy,
|
|
verify=False,
|
|
timeout=(30, 60),
|
|
)
|
|
r.raise_for_status()
|
|
response = r.json()
|
|
if "hits" not in response or not response["hits"]:
|
|
return []
|
|
|
|
video_items = []
|
|
for v in response["hits"]:
|
|
duration = v.get("duration")
|
|
if not duration or duration < minimum_duration:
|
|
continue
|
|
|
|
video_files = v.get("videos", {})
|
|
best_video = None
|
|
# Simplified logic to find a suitable video rendition
|
|
for size in ["large", "medium", "small", "tiny"]:
|
|
rendition = video_files.get(size)
|
|
if not rendition or not rendition.get("url"):
|
|
continue
|
|
|
|
width = rendition.get("width", 0)
|
|
height = rendition.get("height", 0)
|
|
|
|
is_portrait = height > width
|
|
is_landscape = width > height
|
|
|
|
if aspect == VideoAspect.portrait and is_portrait:
|
|
best_video = rendition
|
|
break
|
|
elif aspect != VideoAspect.portrait and is_landscape:
|
|
best_video = rendition
|
|
break
|
|
|
|
# Fallback to any available video if exact aspect not found
|
|
if not best_video:
|
|
for size in ["large", "medium", "small", "tiny"]:
|
|
if video_files.get(size) and video_files.get(size).get("url"):
|
|
best_video = video_files.get(size)
|
|
break
|
|
|
|
if best_video:
|
|
item = MaterialInfo()
|
|
item.provider = "pixabay"
|
|
item.url = best_video.get("url")
|
|
item.duration = duration
|
|
item.path = ""
|
|
item.start_time = 0.0
|
|
video_items.append(item)
|
|
|
|
return video_items
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Search videos failed: {str(e)}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"An unexpected error occurred during video search: {str(e)}")
|
|
return []
|
|
|
|
# Attempt 1: Strict search with category and editors_choice
|
|
logger.info("Attempt 1: Strict search with category and editors_choice")
|
|
params = {
|
|
"q": search_term,
|
|
"video_type": "film",
|
|
"safesearch": "true",
|
|
"editors_choice": "true",
|
|
"order": "popular",
|
|
"page": 1,
|
|
"per_page": 10,
|
|
}
|
|
if category:
|
|
params["category"] = category
|
|
if video_width > 0:
|
|
params["min_width"] = video_width
|
|
if video_height > 0:
|
|
params["min_height"] = video_height
|
|
|
|
results = perform_search(params)
|
|
if results:
|
|
logger.success(f"Found {len(results)} videos on first attempt.")
|
|
return results
|
|
|
|
# Attempt 2: Search with editors_choice but without category
|
|
logger.warning("First attempt failed. Attempt 2: Retrying without category.")
|
|
params.pop("category", None)
|
|
results = perform_search(params)
|
|
if results:
|
|
logger.success(f"Found {len(results)} videos on second attempt.")
|
|
return results
|
|
|
|
# Attempt 3: Broadest search, without editors_choice
|
|
logger.warning("Second attempt failed. Attempt 3: Retrying with broadest settings.")
|
|
params.pop("editors_choice", None)
|
|
results = perform_search(params)
|
|
if results:
|
|
logger.success(f"Found {len(results)} videos on third attempt.")
|
|
else:
|
|
logger.error("All search attempts failed to find any videos.")
|
|
|
|
return results
|
|
|
|
|
|
def _get_video_info_ffprobe(video_path: str) -> dict:
|
|
"""
|
|
Get video information using ffprobe.
|
|
"""
|
|
command = [
|
|
"ffprobe",
|
|
"-v", "quiet",
|
|
"-print_format", "json",
|
|
"-show_format",
|
|
"-show_streams",
|
|
video_path
|
|
]
|
|
try:
|
|
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
|
info = json.loads(result.stdout)
|
|
video_stream = next((s for s in info['streams'] if s['codec_type'] == 'video'), None)
|
|
if not video_stream:
|
|
return None
|
|
|
|
fps_str = video_stream.get('avg_frame_rate', video_stream.get('r_frame_rate', '0/1'))
|
|
num, den = map(int, fps_str.split('/'))
|
|
fps = num / den if den != 0 else 0
|
|
|
|
return {
|
|
"duration": float(video_stream.get('duration', info['format'].get('duration', 0))),
|
|
"fps": fps
|
|
}
|
|
except (subprocess.CalledProcessError, json.JSONDecodeError, StopIteration, KeyError, ZeroDivisionError) as e:
|
|
logger.error(f"Failed to get video info for {video_path} using ffprobe: {e}")
|
|
return None
|
|
|
|
|
|
def save_video(video_url: str, save_dir: str = "") -> str:
|
|
if not save_dir:
|
|
save_dir = utils.storage_dir("cache_videos")
|
|
|
|
if not os.path.exists(save_dir):
|
|
os.makedirs(save_dir)
|
|
|
|
url_without_query = video_url.split("?")[0]
|
|
url_hash = utils.md5(url_without_query)
|
|
video_id = f"vid-{url_hash}"
|
|
video_path = f"{save_dir}/{video_id}.mp4"
|
|
|
|
# if video already exists, return the path
|
|
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
|
logger.info(f"video already exists: {video_path}")
|
|
return video_path
|
|
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
|
|
}
|
|
|
|
# if video does not exist, download it
|
|
with open(video_path, "wb") as f:
|
|
f.write(
|
|
requests.get(
|
|
video_url,
|
|
headers=headers,
|
|
proxies=config.proxy,
|
|
verify=False,
|
|
timeout=(60, 240),
|
|
).content
|
|
)
|
|
|
|
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
|
try:
|
|
info = _get_video_info_ffprobe(video_path)
|
|
if info and info.get("duration", 0) > 0 and info.get("fps", 0) > 0:
|
|
logger.info(f"video validated: {video_path}")
|
|
return video_path
|
|
else:
|
|
raise ValueError("Invalid video file, duration or fps is 0.")
|
|
except Exception as e:
|
|
try:
|
|
os.remove(video_path)
|
|
except Exception:
|
|
pass
|
|
logger.warning(f"invalid video file: {video_path} => {str(e)}")
|
|
return ""
|
|
|
|
|
|
def download_videos_for_clips(video_search_terms: List[str], num_clips: int, source: str, video_aspect: VideoAspect) -> List[MaterialInfo]:
|
|
logger.info(f"Attempting to download {num_clips} unique video clips for {len(video_search_terms)} terms.")
|
|
downloaded_videos = []
|
|
used_video_urls = set()
|
|
|
|
if not video_search_terms:
|
|
logger.error("No video search terms provided. Cannot download videos.")
|
|
return []
|
|
|
|
import itertools
|
|
# Expand search terms if not enough for the number of clips
|
|
if len(video_search_terms) < num_clips:
|
|
logger.warning(f"Number of search terms ({len(video_search_terms)}) is less than the required number of clips ({num_clips}). Reusing terms.")
|
|
video_search_terms = list(itertools.islice(itertools.cycle(video_search_terms), num_clips))
|
|
|
|
search_term_queue = list(video_search_terms)
|
|
random.shuffle(search_term_queue)
|
|
|
|
while len(downloaded_videos) < num_clips and search_term_queue:
|
|
term = search_term_queue.pop(0)
|
|
try:
|
|
if source == "pexels":
|
|
video_items = search_videos_pexels(
|
|
search_term=term,
|
|
minimum_duration=5,
|
|
video_aspect=video_aspect,
|
|
)
|
|
elif source == "pixabay":
|
|
video_items = search_videos_pixabay(
|
|
search_term=term,
|
|
minimum_duration=5,
|
|
video_aspect=video_aspect,
|
|
)
|
|
else:
|
|
video_items = []
|
|
|
|
if not video_items:
|
|
logger.warning(f"No video results for term: '{term}'")
|
|
continue
|
|
|
|
random.shuffle(video_items)
|
|
|
|
for item in video_items:
|
|
if item.url in used_video_urls:
|
|
continue
|
|
|
|
logger.info(f"Downloading video for term '{term}': {item.url}")
|
|
file_path = save_video(item.url)
|
|
if file_path:
|
|
video_material = MaterialInfo(
|
|
path=file_path,
|
|
url=item.url,
|
|
duration=_get_video_info_ffprobe(file_path).get("duration", 0.0),
|
|
start_time=0.0
|
|
)
|
|
downloaded_videos.append(video_material)
|
|
used_video_urls.add(item.url)
|
|
logger.info(f"Video saved: {file_path}")
|
|
break # Move to the next search term
|
|
else:
|
|
logger.warning(f"Video download failed: {item.url}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing search term '{term}': {e}")
|
|
|
|
# Fallback: If not enough unique videos were found, reuse the ones we have
|
|
if downloaded_videos and len(downloaded_videos) < num_clips:
|
|
logger.warning(f"Could not find enough unique videos. Required: {num_clips}, Found: {len(downloaded_videos)}. Reusing downloaded videos.")
|
|
needed = num_clips - len(downloaded_videos)
|
|
reused_videos = list(itertools.islice(itertools.cycle(downloaded_videos), needed))
|
|
downloaded_videos.extend(reused_videos)
|
|
|
|
if len(downloaded_videos) < num_clips:
|
|
logger.error(f"Failed to download enough videos. Required: {num_clips}, Found: {len(downloaded_videos)}. Aborting.")
|
|
return []
|
|
|
|
logger.success(f"Successfully downloaded {len(downloaded_videos)} video clips.")
|
|
return downloaded_videos
|
|
|
|
def download_videos(
|
|
task_id: str,
|
|
video_subject: str,
|
|
search_terms: List[str],
|
|
source: str = "pexels",
|
|
video_aspect: VideoAspect = VideoAspect.portrait,
|
|
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
|
|
audio_duration: float = 0.0,
|
|
max_clip_duration: int = 5,
|
|
) -> List[MaterialInfo]:
|
|
sm.state.update_task(task_id, status_message=f"Downloading videos for terms: {search_terms}")
|
|
num_clips = math.ceil(audio_duration / max_clip_duration) if max_clip_duration > 0 else 1
|
|
logger.info(f"Required audio duration: {audio_duration:.2f}s, max_clip_duration: {max_clip_duration}s. Calculated number of clips: {num_clips}")
|
|
return download_videos_for_clips(video_search_terms=search_terms, num_clips=num_clips, source=source)
|
|
|
|
|
|
# 以下为调试入口,仅供开发测试
|
|
if __name__ == "__main__":
|
|
download_videos(
|
|
"test123", ["Money Exchange Medium"], ["Money Exchange Medium"], audio_duration=100, source="pixabay"
|
|
)
|