diff --git a/app/services/video.py b/app/services/video.py index 1a79e30..4971a69 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -249,6 +249,9 @@ def combine_videos( # if there is only one clip, use it directly if len(processed_clips) == 1: logger.info("using single clip directly") + # remove existing file to avoid FileExistsError + if os.path.exists(combined_video_path): + delete_files(combined_video_path) shutil.copy(processed_clips[0].file_path, combined_video_path) delete_files(processed_clips) logger.info("video combining completed") @@ -260,6 +263,7 @@ def combine_videos( temp_merged_next = f"{output_dir}/temp-merged-next.mp4" # copy first clip as initial merged video + delete_files([temp_merged_video, temp_merged_next]) shutil.copy(base_clip_path, temp_merged_video) # merge remaining video clips one by one @@ -289,14 +293,16 @@ def combine_videos( # replace base file with new merged file delete_files(temp_merged_video) - os.rename(temp_merged_next, temp_merged_video) + os.replace(temp_merged_next, temp_merged_video) except Exception as e: logger.error(f"failed to merge clip: {str(e)}") continue # after merging, rename final result to target file name - os.rename(temp_merged_video, combined_video_path) + if os.path.exists(combined_video_path): + delete_files(combined_video_path) + os.replace(temp_merged_video, combined_video_path) # clean temp files clip_files = [clip.file_path for clip in processed_clips] @@ -490,42 +496,44 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4): continue ext = utils.parse_extension(material.url) + clip = None try: - clip = VideoFileClip(material.url) - except Exception: - clip = ImageClip(material.url) + try: + clip = VideoFileClip(material.url) + except Exception: + clip = ImageClip(material.url) - width = clip.size[0] - height = clip.size[1] - if width < 480 or height < 480: - logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required") - continue + width = clip.size[0] + height = clip.size[1] + if width < 480 or height < 480: + logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required") + continue - if ext in const.FILE_TYPE_IMAGES: - logger.info(f"processing image: {material.url}") - # Create an image clip and set its duration to 3 seconds - clip = ( - ImageClip(material.url) - .with_duration(clip_duration) - .with_position("center") - ) - # Apply a zoom effect using the resize method. - # A lambda function is used to make the zoom effect dynamic over time. - # The zoom effect starts from the original size and gradually scales up to 120%. - # t represents the current time, and clip.duration is the total duration of the clip (3 seconds). - # Note: 1 represents 100% size, so 1.2 represents 120% size. - zoom_clip = clip.resized( - lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration) - ) + if ext in const.FILE_TYPE_IMAGES: + logger.info(f"processing image: {material.url}") + image_clip = None + zoom_clip = None + final_clip = None + try: + image_clip = ( + ImageClip(material.url) + .with_duration(clip_duration) + .with_position("center") + ) + zoom_clip = image_clip.resized( + lambda t: 1 + (clip_duration * 0.03) * (t / image_clip.duration) + ) - # Optionally, create a composite video clip containing the zoomed clip. - # This is useful when you want to add other elements to the video. - final_clip = CompositeVideoClip([zoom_clip]) + final_clip = CompositeVideoClip([zoom_clip]) - # Output the video to a file. - video_file = f"{material.url}.mp4" - final_clip.write_videofile(video_file, fps=30, logger=None) + video_file = f"{material.url}.mp4" + final_clip.write_videofile(video_file, fps=30, logger=None) + material.url = video_file + logger.success(f"image processed: {video_file}") + finally: + close_clip(final_clip) + close_clip(zoom_clip) + close_clip(image_clip) + finally: close_clip(clip) - material.url = video_file - logger.success(f"image processed: {video_file}") return materials \ No newline at end of file diff --git a/app/services/voice.py b/app/services/voice.py index e6b4d59..25cbb5f 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -7,6 +7,7 @@ from xml.sax.saxutils import unescape import edge_tts import requests +from aiohttp import ClientConnectorError from edge_tts import SubMaker, submaker from edge_tts.submaker import mktimestamp from loguru import logger @@ -1115,20 +1116,34 @@ def convert_rate_to_percent(rate: float) -> str: else: return f"{percent}%" +def _ensure_voice_directory(voice_file: str) -> None: + dir_path = os.path.dirname(voice_file) + if dir_path: + os.makedirs(dir_path, exist_ok=True) def azure_tts_v1( text: str, voice_name: str, voice_rate: float, voice_file: str ) -> Union[SubMaker, None]: - voice_name = parse_voice_name(voice_name) + norm_voice_name = parse_voice_name(voice_name) text = text.strip() rate_str = convert_rate_to_percent(voice_rate) + azure_key = config.azure.get("speech_key", "") + azure_region = config.azure.get("speech_region", "") + fallback_available = bool(azure_key and azure_region) + fallback_attempted = False + for i in range(3): try: - logger.info(f"start, voice name: {voice_name}, try: {i + 1}") + logger.info( + f"Edge TTS start, voice name: {norm_voice_name}, try: {i + 1}" + ) async def _do() -> SubMaker: - communicate = edge_tts.Communicate(text, voice_name, rate=rate_str) + communicate = edge_tts.Communicate( + text, norm_voice_name, rate=rate_str + ) sub_maker = edge_tts.SubMaker() + _ensure_voice_directory(voice_file) with open(voice_file, "wb") as file: async for chunk in communicate.stream(): if chunk["type"] == "audio": @@ -1141,13 +1156,53 @@ def azure_tts_v1( sub_maker = asyncio.run(_do()) if not sub_maker or not sub_maker.subs: - logger.warning("failed, sub_maker is None or sub_maker.subs is None") + logger.warning( + "Edge TTS failed, sub_maker is None or sub_maker.subs is None" + ) continue - logger.info(f"completed, output file: {voice_file}") + logger.info(f"Edge TTS completed, output file: {voice_file}") return sub_maker + except (asyncio.TimeoutError, ClientConnectorError) as network_error: + logger.warning( + "Edge TTS encountered a network issue: {}".format(network_error) + ) + if fallback_available and not fallback_attempted: + fallback_attempted = True + fallback_voice_name = f"{norm_voice_name}-V2" + logger.info( + "Attempting Azure Speech SDK fallback with voice: {}".format( + fallback_voice_name + ) + ) + fallback_sub_maker = azure_tts_v2( + text=text, + voice_name=fallback_voice_name, + voice_file=voice_file, + ) + if fallback_sub_maker and getattr( + fallback_sub_maker, "subs", None + ): + logger.info( + f"Azure Speech SDK fallback completed, output file: {voice_file}" + ) + return fallback_sub_maker + logger.error("Azure Speech SDK fallback failed to synthesize audio") + elif not fallback_available: + logger.warning( + "Azure Speech SDK fallback unavailable - missing credentials" + ) except Exception as e: - logger.error(f"failed, error: {str(e)}") + logger.error(f"Edge TTS failed, error: {str(e)}") + + if fallback_available and fallback_attempted: + logger.error( + f"Edge TTS and Azure Speech SDK fallback both failed for voice: {norm_voice_name}" + ) + else: + logger.error( + f"Edge TTS failed for voice: {norm_voice_name} after retries" + ) return None @@ -1211,6 +1266,7 @@ def siliconflow_tts( if response.status_code == 200: # 保存音频文件 + _ensure_voice_directory(voice_file) with open(voice_file, "wb") as f: f.write(response.content) @@ -1341,6 +1397,8 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, logger.error("Azure speech key or region is not set") return None + _ensure_voice_directory(voice_file) + audio_config = speechsdk.audio.AudioOutputConfig( filename=voice_file, use_default_speaker=True ) diff --git a/test/services/test_video.py b/test/services/test_video.py index d204acc..f2c3803 100644 --- a/test/services/test_video.py +++ b/test/services/test_video.py @@ -41,8 +41,11 @@ class TestVideoService(unittest.TestCase): # moviepy get video info clip = VideoFileClip(materials[0].url) - print(clip) - + try: + print(clip) + finally: + clip.close() + # clean generated test video file if os.path.exists(materials[0].url): os.remove(materials[0].url) diff --git a/test/services/test_voice.py b/test/services/test_voice.py index 31f1799..afcadef 100644 --- a/test/services/test_voice.py +++ b/test/services/test_voice.py @@ -3,6 +3,7 @@ import unittest import os import sys from pathlib import Path +from unittest import mock # add project root to python path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) @@ -101,6 +102,55 @@ class TestVoiceService(unittest.TestCase): self.loop.run_until_complete(_do()) + def test_azure_tts_v1_fallback_to_v2(self): + voice_name = "en-US-JennyNeural-Female" + normalized_voice_name = vs.parse_voice_name(voice_name) + voice_file = f"{temp_dir}/tts-azure-fallback-{normalized_voice_name}.mp3" + fallback_sub_maker = vs.SubMaker() + fallback_sub_maker.subs = ["hello"] + + text_value = " hello world " + + def raise_timeout(coro): + coro.close() + raise asyncio.TimeoutError() + + with mock.patch( + "app.services.voice.asyncio.run", side_effect=raise_timeout + ) as mock_asyncio_run, mock.patch( + "app.services.voice.azure_tts_v2", return_value=fallback_sub_maker + ) as mock_azure_v2: + original_key = vs.config.azure.get("speech_key") + original_region = vs.config.azure.get("speech_region") + vs.config.azure["speech_key"] = "dummy-key" + vs.config.azure["speech_region"] = "dummy-region" + + try: + sub_maker = vs.azure_tts_v1( + text=text_value, + voice_name=voice_name, + voice_rate=1.0, + voice_file=voice_file, + ) + finally: + if original_key is None: + vs.config.azure.pop("speech_key", None) + else: + vs.config.azure["speech_key"] = original_key + + if original_region is None: + vs.config.azure.pop("speech_region", None) + else: + vs.config.azure["speech_region"] = original_region + + self.assertIs(sub_maker, fallback_sub_maker) + mock_asyncio_run.assert_called_once() + mock_azure_v2.assert_called_once_with( + text=text_value.strip(), + voice_name=f"{normalized_voice_name}-V2", + voice_file=voice_file, + ) + if __name__ == "__main__": # python -m unittest test.services.test_voice.TestVoiceService.test_azure_tts_v1 # python -m unittest test.services.test_voice.TestVoiceService.test_azure_tts_v2