mirror of
https://github.com/harry0703/MoneyPrinterTurbo.git
synced 2026-02-21 16:37:21 +08:00
fix: Handle Edge TTS failures with Azure fallback and stabilize media pipeline
This commit is contained in:
parent
6cb5f23487
commit
f76e19ed52
@ -249,6 +249,9 @@ def combine_videos(
|
||||
# if there is only one clip, use it directly
|
||||
if len(processed_clips) == 1:
|
||||
logger.info("using single clip directly")
|
||||
# remove existing file to avoid FileExistsError
|
||||
if os.path.exists(combined_video_path):
|
||||
delete_files(combined_video_path)
|
||||
shutil.copy(processed_clips[0].file_path, combined_video_path)
|
||||
delete_files(processed_clips)
|
||||
logger.info("video combining completed")
|
||||
@ -260,6 +263,7 @@ def combine_videos(
|
||||
temp_merged_next = f"{output_dir}/temp-merged-next.mp4"
|
||||
|
||||
# copy first clip as initial merged video
|
||||
delete_files([temp_merged_video, temp_merged_next])
|
||||
shutil.copy(base_clip_path, temp_merged_video)
|
||||
|
||||
# merge remaining video clips one by one
|
||||
@ -289,14 +293,16 @@ def combine_videos(
|
||||
|
||||
# replace base file with new merged file
|
||||
delete_files(temp_merged_video)
|
||||
os.rename(temp_merged_next, temp_merged_video)
|
||||
os.replace(temp_merged_next, temp_merged_video)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"failed to merge clip: {str(e)}")
|
||||
continue
|
||||
|
||||
# after merging, rename final result to target file name
|
||||
os.rename(temp_merged_video, combined_video_path)
|
||||
if os.path.exists(combined_video_path):
|
||||
delete_files(combined_video_path)
|
||||
os.replace(temp_merged_video, combined_video_path)
|
||||
|
||||
# clean temp files
|
||||
clip_files = [clip.file_path for clip in processed_clips]
|
||||
@ -490,42 +496,44 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
|
||||
continue
|
||||
|
||||
ext = utils.parse_extension(material.url)
|
||||
clip = None
|
||||
try:
|
||||
clip = VideoFileClip(material.url)
|
||||
except Exception:
|
||||
clip = ImageClip(material.url)
|
||||
try:
|
||||
clip = VideoFileClip(material.url)
|
||||
except Exception:
|
||||
clip = ImageClip(material.url)
|
||||
|
||||
width = clip.size[0]
|
||||
height = clip.size[1]
|
||||
if width < 480 or height < 480:
|
||||
logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
|
||||
continue
|
||||
width = clip.size[0]
|
||||
height = clip.size[1]
|
||||
if width < 480 or height < 480:
|
||||
logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
|
||||
continue
|
||||
|
||||
if ext in const.FILE_TYPE_IMAGES:
|
||||
logger.info(f"processing image: {material.url}")
|
||||
# Create an image clip and set its duration to 3 seconds
|
||||
clip = (
|
||||
ImageClip(material.url)
|
||||
.with_duration(clip_duration)
|
||||
.with_position("center")
|
||||
)
|
||||
# Apply a zoom effect using the resize method.
|
||||
# A lambda function is used to make the zoom effect dynamic over time.
|
||||
# The zoom effect starts from the original size and gradually scales up to 120%.
|
||||
# t represents the current time, and clip.duration is the total duration of the clip (3 seconds).
|
||||
# Note: 1 represents 100% size, so 1.2 represents 120% size.
|
||||
zoom_clip = clip.resized(
|
||||
lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
|
||||
)
|
||||
if ext in const.FILE_TYPE_IMAGES:
|
||||
logger.info(f"processing image: {material.url}")
|
||||
image_clip = None
|
||||
zoom_clip = None
|
||||
final_clip = None
|
||||
try:
|
||||
image_clip = (
|
||||
ImageClip(material.url)
|
||||
.with_duration(clip_duration)
|
||||
.with_position("center")
|
||||
)
|
||||
zoom_clip = image_clip.resized(
|
||||
lambda t: 1 + (clip_duration * 0.03) * (t / image_clip.duration)
|
||||
)
|
||||
|
||||
# Optionally, create a composite video clip containing the zoomed clip.
|
||||
# This is useful when you want to add other elements to the video.
|
||||
final_clip = CompositeVideoClip([zoom_clip])
|
||||
final_clip = CompositeVideoClip([zoom_clip])
|
||||
|
||||
# Output the video to a file.
|
||||
video_file = f"{material.url}.mp4"
|
||||
final_clip.write_videofile(video_file, fps=30, logger=None)
|
||||
video_file = f"{material.url}.mp4"
|
||||
final_clip.write_videofile(video_file, fps=30, logger=None)
|
||||
material.url = video_file
|
||||
logger.success(f"image processed: {video_file}")
|
||||
finally:
|
||||
close_clip(final_clip)
|
||||
close_clip(zoom_clip)
|
||||
close_clip(image_clip)
|
||||
finally:
|
||||
close_clip(clip)
|
||||
material.url = video_file
|
||||
logger.success(f"image processed: {video_file}")
|
||||
return materials
|
||||
@ -7,6 +7,7 @@ from xml.sax.saxutils import unescape
|
||||
|
||||
import edge_tts
|
||||
import requests
|
||||
from aiohttp import ClientConnectorError
|
||||
from edge_tts import SubMaker, submaker
|
||||
from edge_tts.submaker import mktimestamp
|
||||
from loguru import logger
|
||||
@ -1115,20 +1116,34 @@ def convert_rate_to_percent(rate: float) -> str:
|
||||
else:
|
||||
return f"{percent}%"
|
||||
|
||||
def _ensure_voice_directory(voice_file: str) -> None:
|
||||
dir_path = os.path.dirname(voice_file)
|
||||
if dir_path:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
def azure_tts_v1(
|
||||
text: str, voice_name: str, voice_rate: float, voice_file: str
|
||||
) -> Union[SubMaker, None]:
|
||||
voice_name = parse_voice_name(voice_name)
|
||||
norm_voice_name = parse_voice_name(voice_name)
|
||||
text = text.strip()
|
||||
rate_str = convert_rate_to_percent(voice_rate)
|
||||
azure_key = config.azure.get("speech_key", "")
|
||||
azure_region = config.azure.get("speech_region", "")
|
||||
fallback_available = bool(azure_key and azure_region)
|
||||
fallback_attempted = False
|
||||
|
||||
for i in range(3):
|
||||
try:
|
||||
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
||||
logger.info(
|
||||
f"Edge TTS start, voice name: {norm_voice_name}, try: {i + 1}"
|
||||
)
|
||||
|
||||
async def _do() -> SubMaker:
|
||||
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
|
||||
communicate = edge_tts.Communicate(
|
||||
text, norm_voice_name, rate=rate_str
|
||||
)
|
||||
sub_maker = edge_tts.SubMaker()
|
||||
_ensure_voice_directory(voice_file)
|
||||
with open(voice_file, "wb") as file:
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
@ -1141,13 +1156,53 @@ def azure_tts_v1(
|
||||
|
||||
sub_maker = asyncio.run(_do())
|
||||
if not sub_maker or not sub_maker.subs:
|
||||
logger.warning("failed, sub_maker is None or sub_maker.subs is None")
|
||||
logger.warning(
|
||||
"Edge TTS failed, sub_maker is None or sub_maker.subs is None"
|
||||
)
|
||||
continue
|
||||
|
||||
logger.info(f"completed, output file: {voice_file}")
|
||||
logger.info(f"Edge TTS completed, output file: {voice_file}")
|
||||
return sub_maker
|
||||
except (asyncio.TimeoutError, ClientConnectorError) as network_error:
|
||||
logger.warning(
|
||||
"Edge TTS encountered a network issue: {}".format(network_error)
|
||||
)
|
||||
if fallback_available and not fallback_attempted:
|
||||
fallback_attempted = True
|
||||
fallback_voice_name = f"{norm_voice_name}-V2"
|
||||
logger.info(
|
||||
"Attempting Azure Speech SDK fallback with voice: {}".format(
|
||||
fallback_voice_name
|
||||
)
|
||||
)
|
||||
fallback_sub_maker = azure_tts_v2(
|
||||
text=text,
|
||||
voice_name=fallback_voice_name,
|
||||
voice_file=voice_file,
|
||||
)
|
||||
if fallback_sub_maker and getattr(
|
||||
fallback_sub_maker, "subs", None
|
||||
):
|
||||
logger.info(
|
||||
f"Azure Speech SDK fallback completed, output file: {voice_file}"
|
||||
)
|
||||
return fallback_sub_maker
|
||||
logger.error("Azure Speech SDK fallback failed to synthesize audio")
|
||||
elif not fallback_available:
|
||||
logger.warning(
|
||||
"Azure Speech SDK fallback unavailable - missing credentials"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
logger.error(f"Edge TTS failed, error: {str(e)}")
|
||||
|
||||
if fallback_available and fallback_attempted:
|
||||
logger.error(
|
||||
f"Edge TTS and Azure Speech SDK fallback both failed for voice: {norm_voice_name}"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"Edge TTS failed for voice: {norm_voice_name} after retries"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@ -1211,6 +1266,7 @@ def siliconflow_tts(
|
||||
|
||||
if response.status_code == 200:
|
||||
# 保存音频文件
|
||||
_ensure_voice_directory(voice_file)
|
||||
with open(voice_file, "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
@ -1341,6 +1397,8 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker,
|
||||
logger.error("Azure speech key or region is not set")
|
||||
return None
|
||||
|
||||
_ensure_voice_directory(voice_file)
|
||||
|
||||
audio_config = speechsdk.audio.AudioOutputConfig(
|
||||
filename=voice_file, use_default_speaker=True
|
||||
)
|
||||
|
||||
@ -41,8 +41,11 @@ class TestVideoService(unittest.TestCase):
|
||||
|
||||
# moviepy get video info
|
||||
clip = VideoFileClip(materials[0].url)
|
||||
print(clip)
|
||||
|
||||
try:
|
||||
print(clip)
|
||||
finally:
|
||||
clip.close()
|
||||
|
||||
# clean generated test video file
|
||||
if os.path.exists(materials[0].url):
|
||||
os.remove(materials[0].url)
|
||||
|
||||
@ -3,6 +3,7 @@ import unittest
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
# add project root to python path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
@ -101,6 +102,55 @@ class TestVoiceService(unittest.TestCase):
|
||||
|
||||
self.loop.run_until_complete(_do())
|
||||
|
||||
def test_azure_tts_v1_fallback_to_v2(self):
|
||||
voice_name = "en-US-JennyNeural-Female"
|
||||
normalized_voice_name = vs.parse_voice_name(voice_name)
|
||||
voice_file = f"{temp_dir}/tts-azure-fallback-{normalized_voice_name}.mp3"
|
||||
fallback_sub_maker = vs.SubMaker()
|
||||
fallback_sub_maker.subs = ["hello"]
|
||||
|
||||
text_value = " hello world "
|
||||
|
||||
def raise_timeout(coro):
|
||||
coro.close()
|
||||
raise asyncio.TimeoutError()
|
||||
|
||||
with mock.patch(
|
||||
"app.services.voice.asyncio.run", side_effect=raise_timeout
|
||||
) as mock_asyncio_run, mock.patch(
|
||||
"app.services.voice.azure_tts_v2", return_value=fallback_sub_maker
|
||||
) as mock_azure_v2:
|
||||
original_key = vs.config.azure.get("speech_key")
|
||||
original_region = vs.config.azure.get("speech_region")
|
||||
vs.config.azure["speech_key"] = "dummy-key"
|
||||
vs.config.azure["speech_region"] = "dummy-region"
|
||||
|
||||
try:
|
||||
sub_maker = vs.azure_tts_v1(
|
||||
text=text_value,
|
||||
voice_name=voice_name,
|
||||
voice_rate=1.0,
|
||||
voice_file=voice_file,
|
||||
)
|
||||
finally:
|
||||
if original_key is None:
|
||||
vs.config.azure.pop("speech_key", None)
|
||||
else:
|
||||
vs.config.azure["speech_key"] = original_key
|
||||
|
||||
if original_region is None:
|
||||
vs.config.azure.pop("speech_region", None)
|
||||
else:
|
||||
vs.config.azure["speech_region"] = original_region
|
||||
|
||||
self.assertIs(sub_maker, fallback_sub_maker)
|
||||
mock_asyncio_run.assert_called_once()
|
||||
mock_azure_v2.assert_called_once_with(
|
||||
text=text_value.strip(),
|
||||
voice_name=f"{normalized_voice_name}-V2",
|
||||
voice_file=voice_file,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# python -m unittest test.services.test_voice.TestVoiceService.test_azure_tts_v1
|
||||
# python -m unittest test.services.test_voice.TestVoiceService.test_azure_tts_v2
|
||||
|
||||
Loading…
Reference in New Issue
Block a user