diff --git a/app/services/material.py b/app/services/material.py index 2dda2b2..fd48b24 100644 --- a/app/services/material.py +++ b/app/services/material.py @@ -1,5 +1,5 @@ +import os import random -import time from urllib.parse import urlencode import requests @@ -80,9 +80,21 @@ def search_videos(search_term: str, return [] -def save_video(video_url: str, save_dir: str) -> str: - video_id = f"vid-{str(int(time.time() * 1000))}" +def save_video(video_url: str, save_dir: str = "") -> str: + if not save_dir: + save_dir = utils.storage_dir("cache_videos") + + url_without_query = video_url.split("?")[0] + url_hash = utils.md5(url_without_query) + video_id = f"vid-{url_hash}" video_path = f"{save_dir}/{video_id}.mp4" + + # if video already exists, return the path + if os.path.exists(video_path): + logger.info(f"video already exists: {video_path}") + return video_path + + # if video does not exist, download it proxies = config.pexels.get("proxies", None) with open(video_path, "wb") as f: f.write(requests.get(video_url, proxies=proxies, verify=False, timeout=(10, 180)).content) @@ -116,7 +128,12 @@ def download_videos(task_id: str, logger.info( f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds") video_paths = [] - save_dir = utils.task_dir(task_id) + + material_directory = config.app.get("material_directory", "").strip() + if material_directory == "task": + material_directory = utils.task_dir(task_id) + elif material_directory and not os.path.isdir(material_directory): + material_directory = "" if video_contact_mode.value == VideoConcatMode.random.value: random.shuffle(valid_video_items) @@ -125,7 +142,8 @@ def download_videos(task_id: str, for item in valid_video_items: try: logger.info(f"downloading video: {item.url}") - saved_video_path = save_video(item.url, save_dir) + saved_video_path = save_video(video_url=item.url, save_dir=material_directory) + logger.info(f"video saved: {saved_video_path}") video_paths.append(saved_video_path) seconds = min(max_clip_duration, item.duration) total_duration += seconds @@ -136,3 +154,7 @@ def download_videos(task_id: str, logger.error(f"failed to download video: {utils.to_json(item)} => {str(e)}") logger.success(f"downloaded {len(video_paths)} videos") return video_paths + + +if __name__ == "__main__": + download_videos("test123", ["cat"], audio_duration=100) diff --git a/app/utils/utils.py b/app/utils/utils.py index 5a4f7a1..086e65c 100644 --- a/app/utils/utils.py +++ b/app/utils/utils.py @@ -70,6 +70,8 @@ def storage_dir(sub_dir: str = ""): d = os.path.join(root_dir(), "storage") if sub_dir: d = os.path.join(d, sub_dir) + if not os.path.exists(d): + os.makedirs(d) return d @@ -169,3 +171,8 @@ def split_string_by_punctuations(s): result.append(txt.strip()) txt = "" return result + + +def md5(text): + import hashlib + return hashlib.md5(text.encode('utf-8')).hexdigest() diff --git a/config.example.toml b/config.example.toml index 19d98f4..fd4adba 100644 --- a/config.example.toml +++ b/config.example.toml @@ -111,6 +111,19 @@ endpoint="" + # Video material storage location + # material_directory = "" # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project + # material_directory = "/user/harry/videos" # Indicates that video materials will be downloaded to a specified folder + # material_directory = "task" # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials + + # 视频素材存放位置 + # material_directory = "" #表示将视频素材下载到默认的文件夹,默认文件夹为当前项目下的 ./storage/cache_videos + # material_directory = "/user/harry/videos" #表示将视频素材下载到指定的文件夹中 + # material_directory = "task" #表示将视频素材下载到当前任务的文件夹中,这种方式无法共享已经下载的视频素材 + + material_directory = "" + + [whisper] # Only effective when subtitle_provider is "whisper"