From f07e5802f7bbc326806710dd023c6e27f75beb98 Mon Sep 17 00:00:00 2001 From: harry Date: Fri, 9 May 2025 20:55:12 +0800 Subject: [PATCH] perf: optimize memory usage and processing performance --- app/services/video.py | 318 ++++++++++++++++++++++++------------------ 1 file changed, 182 insertions(+), 136 deletions(-) diff --git a/app/services/video.py b/app/services/video.py index 751930e..e3a5abe 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -1,8 +1,9 @@ import glob import os import random +import gc +import shutil from typing import List - from loguru import logger from moviepy import ( AudioFileClip, @@ -29,6 +30,68 @@ from app.models.schema import ( from app.services.utils import video_effects from app.utils import utils +class SubClippedVideoClip: + def __init__(self, file_path, start_time, end_time, width=None, height=None): + self.file_path = file_path + self.start_time = start_time + self.end_time = end_time + self.width = width + self.height = height + + def __str__(self): + return f"SubClippedVideoClip(file_path={self.file_path}, start_time={self.start_time}, end_time={self.end_time}, width={self.width}, height={self.height})" + + +audio_codec = "aac" +video_codec = "libx264" +fps = 30 + +def close_clip(clip): + if clip is None: + return + + try: + # close main resources + if hasattr(clip, 'reader') and clip.reader is not None: + clip.reader.close() + + # close audio resources + if hasattr(clip, 'audio') and clip.audio is not None: + if hasattr(clip.audio, 'reader') and clip.audio.reader is not None: + clip.audio.reader.close() + del clip.audio + + # close mask resources + if hasattr(clip, 'mask') and clip.mask is not None: + if hasattr(clip.mask, 'reader') and clip.mask.reader is not None: + clip.mask.reader.close() + del clip.mask + + # handle child clips in composite clips + if hasattr(clip, 'clips') and clip.clips: + for child_clip in clip.clips: + if child_clip is not clip: # avoid possible circular references + close_clip(child_clip) + + # clear clip list + if hasattr(clip, 'clips'): + clip.clips = [] + + except Exception as e: + logger.error(f"failed to close clip: {str(e)}") + + del clip + gc.collect() + +def delete_files(files: List[str] | str): + if isinstance(files, str): + files = [files] + + for file in files: + try: + os.remove(file) + except: + pass def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""): if not bgm_type: @@ -58,85 +121,76 @@ def combine_videos( ) -> str: audio_clip = AudioFileClip(audio_file) audio_duration = audio_clip.duration - logger.info(f"max duration of audio: {audio_duration} seconds") + logger.info(f"audio duration: {audio_duration} seconds") # Required duration of each clip req_dur = audio_duration / len(video_paths) req_dur = max_clip_duration - logger.info(f"each clip will be maximum {req_dur} seconds long") + logger.info(f"maximum clip duration: {req_dur} seconds") output_dir = os.path.dirname(combined_video_path) aspect = VideoAspect(video_aspect) video_width, video_height = aspect.to_resolution() - clips = [] + clip_files = [] + subclipped_items = [] video_duration = 0 - - raw_clips = [] for video_path in video_paths: - clip = VideoFileClip(video_path).without_audio() + clip = VideoFileClip(video_path) clip_duration = clip.duration + clip_w, clip_h = clip.size + close_clip(clip) + start_time = 0 while start_time < clip_duration: - end_time = min(start_time + max_clip_duration, clip_duration) - split_clip = clip.subclipped(start_time, end_time) - raw_clips.append(split_clip) - # logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}") - start_time = end_time + end_time = min(start_time + max_clip_duration, clip_duration) + if clip_duration - start_time > max_clip_duration: + subclipped_items.append(SubClippedVideoClip(file_path= video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h)) + start_time = end_time if video_concat_mode.value == VideoConcatMode.sequential.value: break - # random video_paths order + # random subclipped_items order if video_concat_mode.value == VideoConcatMode.random.value: - random.shuffle(raw_clips) - + random.shuffle(subclipped_items) + + logger.debug(f"total subclipped items: {len(subclipped_items)}") + # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached - while video_duration < audio_duration: - for clip in raw_clips: - # Check if clip is longer than the remaining audio - if (audio_duration - video_duration) < clip.duration: - clip = clip.subclipped(0, (audio_duration - video_duration)) - # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image - elif req_dur < clip.duration: - clip = clip.subclipped(0, req_dur) - clip = clip.with_fps(30) - + for i, subclipped_item in enumerate(subclipped_items): + if video_duration > audio_duration: + break + + logger.debug(f"processing clip {i+1}: {subclipped_item.width}x{subclipped_item.height}, current duration: {video_duration:.2f}s, remaining: {audio_duration - video_duration:.2f}s") + + try: + clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time) + clip_duration = clip.duration # Not all videos are same size, so we need to resize them clip_w, clip_h = clip.size if clip_w != video_width or clip_h != video_height: clip_ratio = clip.w / clip.h video_ratio = video_width / video_height - + logger.debug(f"resizing to {video_width}x{video_height}, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target ratio: {video_ratio:.2f}") + if clip_ratio == video_ratio: - # Resize proportionally - clip = clip.resized((video_width, video_height)) + clip = clip.resized(new_size=(video_width, video_height)) else: - # Resize proportionally if clip_ratio > video_ratio: - # Resize proportionally based on the target width scale_factor = video_width / clip_w else: - # Resize proportionally based on the target height scale_factor = video_height / clip_h new_width = int(clip_w * scale_factor) new_height = int(clip_h * scale_factor) - clip_resized = clip.resized(new_size=(new_width, new_height)) - - background = ColorClip( - size=(video_width, video_height), color=(0, 0, 0) - ) - clip = CompositeVideoClip( - [ - background.with_duration(clip.duration), - clip_resized.with_position("center"), - ] - ) - - logger.info( - f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}" - ) + background = ColorClip(size=(video_width, video_height), color=(0, 0, 0)).with_duration(clip_duration) + clip_resized = clip.resized(new_size=(new_width, new_height)).with_position("center") + clip = CompositeVideoClip([background, clip_resized]) + + close_clip(clip_resized) + close_clip(background) + shuffle_side = random.choice(["left", "right", "top", "bottom"]) if video_transition_mode.value == VideoTransitionMode.none.value: clip = clip @@ -160,24 +214,81 @@ def combine_videos( if clip.duration > max_clip_duration: clip = clip.subclipped(0, max_clip_duration) - - clips.append(clip) + + # wirte clip to temp file + clip_file = f"{output_dir}/temp-clip-{i+1}.mp4" + clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec) + + close_clip(clip) + + clip_files.append(clip_file) video_duration += clip.duration - clips = [CompositeVideoClip([clip]) for clip in clips] - video_clip = concatenate_videoclips(clips) - video_clip = video_clip.with_fps(30) - logger.info("writing") - # https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030 - video_clip.write_videofile( - filename=combined_video_path, - threads=threads, - logger=None, - temp_audiofile_path=output_dir, - audio_codec="aac", - fps=30, - ) - video_clip.close() - logger.success("completed") + + except Exception as e: + logger.error(f"failed to process clip: {str(e)}") + + # merge video clips progressively, avoid loading all videos at once to avoid memory overflow + logger.info("starting clip merging process") + if not clip_files: + logger.warning("no clips available for merging") + return combined_video_path + + # if there is only one clip, use it directly + if len(clip_files) == 1: + logger.info("using single clip directly") + shutil.copy(clip_files[0], combined_video_path) + delete_files(clip_files) + logger.info("video combining completed") + return combined_video_path + + # create initial video file as base + base_clip_path = clip_files[0] + temp_merged_video = f"{output_dir}/temp-merged-video.mp4" + temp_merged_next = f"{output_dir}/temp-merged-next.mp4" + + # copy first clip as initial merged video + shutil.copy(base_clip_path, temp_merged_video) + + # merge remaining video clips one by one + for i, clip_path in enumerate(clip_files[1:], 1): + logger.info(f"merging clip {i}/{len(clip_files)-1}") + + try: + # load current base video and next clip to merge + base_clip = VideoFileClip(temp_merged_video) + next_clip = VideoFileClip(clip_path) + + # merge these two clips + merged_clip = concatenate_videoclips([base_clip, next_clip]) + + # save merged result to temp file + merged_clip.write_videofile( + filename=temp_merged_next, + threads=threads, + logger=None, + temp_audiofile_path=output_dir, + audio_codec=audio_codec, + fps=fps, + ) + close_clip(base_clip) + close_clip(next_clip) + close_clip(merged_clip) + + # replace base file with new merged file + delete_files(temp_merged_video) + os.rename(temp_merged_next, temp_merged_video) + + except Exception as e: + logger.error(f"failed to merge clip: {str(e)}") + continue + + # after merging, rename final result to target file name + os.rename(temp_merged_video, combined_video_path) + + # clean temp files + delete_files(clip_files) + + logger.info("video combining completed") return combined_video_path @@ -194,8 +305,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60): if width <= max_width: return text, height - # logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}") - processed = True _wrapped_lines_ = [] @@ -218,7 +327,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60): _wrapped_lines_ = [line.strip() for line in _wrapped_lines_] result = "\n".join(_wrapped_lines_).strip() height = len(_wrapped_lines_) * height - # logger.warning(f"wrapped text: {result}") return result, height _wrapped_lines_ = [] @@ -235,7 +343,6 @@ def wrap_text(text, max_width, font="Arial", fontsize=60): _wrapped_lines_.append(_txt_) result = "\n".join(_wrapped_lines_).strip() height = len(_wrapped_lines_) * height - # logger.warning(f"wrapped text: {result}") return result, height @@ -249,7 +356,7 @@ def generate_video( aspect = VideoAspect(params.video_aspect) video_width, video_height = aspect.to_resolution() - logger.info(f"start, video size: {video_width} x {video_height}") + logger.info(f"generating video: {video_width} x {video_height}") logger.info(f" ① video: {video_path}") logger.info(f" ② audio: {audio_path}") logger.info(f" ③ subtitle: {subtitle_path}") @@ -268,7 +375,7 @@ def generate_video( if os.name == "nt": font_path = font_path.replace("\\", "/") - logger.info(f"using font: {font_path}") + logger.info(f" ⑤ font: {font_path}") def create_text_clip(subtitle_item): params.font_size = int(params.font_size) @@ -314,7 +421,7 @@ def generate_video( _clip = _clip.with_position(("center", "center")) return _clip - video_clip = VideoFileClip(video_path) + video_clip = VideoFileClip(video_path).without_audio() audio_clip = AudioFileClip(audio_path).with_effects( [afx.MultiplyVolume(params.voice_volume)] ) @@ -353,15 +460,14 @@ def generate_video( video_clip = video_clip.with_audio(audio_clip) video_clip.write_videofile( output_file, - audio_codec="aac", + audio_codec=audio_codec, temp_audiofile_path=output_dir, threads=params.n_threads or 2, logger=None, - fps=30, + fps=fps, ) video_clip.close() del video_clip - logger.success("completed") def preprocess_video(materials: List[MaterialInfo], clip_duration=4): @@ -378,7 +484,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4): width = clip.size[0] height = clip.size[1] if width < 480 or height < 480: - logger.warning(f"video is too small, width: {width}, height: {height}") + logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required") continue if ext in const.FILE_TYPE_IMAGES: @@ -408,65 +514,5 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4): final_clip.close() del final_clip material.url = video_file - logger.success(f"completed: {video_file}") - return materials - - -if __name__ == "__main__": - m = MaterialInfo() - m.url = "/Users/harry/Downloads/IMG_2915.JPG" - m.provider = "local" - materials = preprocess_video([m], clip_duration=4) - print(materials) - - # txt_en = "Here's your guide to travel hacks for budget-friendly adventures" - # txt_zh = "测试长字段这是您的旅行技巧指南帮助您进行预算友好的冒险" - # font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc" - # for txt in [txt_en, txt_zh]: - # t, h = wrap_text(text=txt, max_width=1000, font=font, fontsize=60) - # print(t) - # - # task_id = "aa563149-a7ea-49c2-b39f-8c32cc225baf" - # task_dir = utils.task_dir(task_id) - # video_file = f"{task_dir}/combined-1.mp4" - # audio_file = f"{task_dir}/audio.mp3" - # subtitle_file = f"{task_dir}/subtitle.srt" - # output_file = f"{task_dir}/final.mp4" - # - # # video_paths = [] - # # for file in os.listdir(utils.storage_dir("test")): - # # if file.endswith(".mp4"): - # # video_paths.append(os.path.join(utils.storage_dir("test"), file)) - # # - # # combine_videos(combined_video_path=video_file, - # # audio_file=audio_file, - # # video_paths=video_paths, - # # video_aspect=VideoAspect.portrait, - # # video_concat_mode=VideoConcatMode.random, - # # max_clip_duration=5, - # # threads=2) - # - # cfg = VideoParams() - # cfg.video_aspect = VideoAspect.portrait - # cfg.font_name = "STHeitiMedium.ttc" - # cfg.font_size = 60 - # cfg.stroke_color = "#000000" - # cfg.stroke_width = 1.5 - # cfg.text_fore_color = "#FFFFFF" - # cfg.text_background_color = "transparent" - # cfg.bgm_type = "random" - # cfg.bgm_file = "" - # cfg.bgm_volume = 1.0 - # cfg.subtitle_enabled = True - # cfg.subtitle_position = "bottom" - # cfg.n_threads = 2 - # cfg.paragraph_number = 1 - # - # cfg.voice_volume = 1.0 - # - # generate_video(video_path=video_file, - # audio_path=audio_file, - # subtitle_path=subtitle_file, - # output_file=output_file, - # params=cfg - # ) + logger.success(f"image processed: {video_file}") + return materials \ No newline at end of file