mirror of
https://github.com/harry0703/MoneyPrinterTurbo.git
synced 2026-02-21 08:27:22 +08:00
Merge pull request #715 from michaeltmk/feat/custom_audio
feat: add custom audio file support
This commit is contained in:
commit
951460b9f1
@ -81,7 +81,8 @@ class VideoParams(BaseModel):
|
||||
video_materials: Optional[List[MaterialInfo]] = (
|
||||
None # Materials used to generate the video
|
||||
)
|
||||
|
||||
|
||||
custom_audio_file: Optional[str] = None # Custom audio file path, will ignore video_script and disable subtitle
|
||||
video_language: Optional[str] = "" # auto detect
|
||||
|
||||
voice_name: Optional[str] = ""
|
||||
|
||||
@ -71,30 +71,66 @@ def save_script_data(task_id, video_script, video_terms, params):
|
||||
|
||||
|
||||
def generate_audio(task_id, params, video_script):
|
||||
'''
|
||||
Generate audio for the video script.
|
||||
If a custom audio file is provided, it will be used directly.
|
||||
There will be no subtitle maker object returned in this case.
|
||||
Otherwise, TTS will be used to generate the audio.
|
||||
Returns:
|
||||
- audio_file: path to the generated or provided audio file
|
||||
- audio_duration: duration of the audio in seconds
|
||||
- sub_maker: subtitle maker object if TTS is used, None otherwise
|
||||
'''
|
||||
logger.info("\n\n## generating audio")
|
||||
audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
|
||||
sub_maker = voice.tts(
|
||||
text=video_script,
|
||||
voice_name=voice.parse_voice_name(params.voice_name),
|
||||
voice_rate=params.voice_rate,
|
||||
voice_file=audio_file,
|
||||
)
|
||||
if sub_maker is None:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
"""failed to generate audio:
|
||||
custom_audio_file = params.custom_audio_file
|
||||
if not custom_audio_file or not os.path.exists(custom_audio_file):
|
||||
if custom_audio_file:
|
||||
logger.warning(
|
||||
f"custom audio file not found: {custom_audio_file}, using TTS to generate audio."
|
||||
)
|
||||
else:
|
||||
logger.info("no custom audio file provided, using TTS to generate audio.")
|
||||
audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
|
||||
sub_maker = voice.tts(
|
||||
text=video_script,
|
||||
voice_name=voice.parse_voice_name(params.voice_name),
|
||||
voice_rate=params.voice_rate,
|
||||
voice_file=audio_file,
|
||||
)
|
||||
if sub_maker is None:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
"""failed to generate audio:
|
||||
1. check if the language of the voice matches the language of the video script.
|
||||
2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
|
||||
""".strip()
|
||||
)
|
||||
return None, None, None
|
||||
|
||||
audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
|
||||
return audio_file, audio_duration, sub_maker
|
||||
|
||||
""".strip()
|
||||
)
|
||||
return None, None, None
|
||||
audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
|
||||
if audio_duration == 0:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error("failed to get audio duration.")
|
||||
return None, None, None
|
||||
return audio_file, audio_duration, sub_maker
|
||||
else:
|
||||
logger.info(f"using custom audio file: {custom_audio_file}")
|
||||
audio_duration = voice.get_audio_duration(custom_audio_file)
|
||||
if audio_duration == 0:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error("failed to get audio duration from custom audio file.")
|
||||
return None, None, None
|
||||
return custom_audio_file, audio_duration, None
|
||||
|
||||
def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
|
||||
if not params.subtitle_enabled:
|
||||
'''
|
||||
Generate subtitle for the video script.
|
||||
If subtitle generation is disabled or no subtitle maker is provided, it will return an empty string.
|
||||
Otherwise, it will generate the subtitle using the specified provider.
|
||||
Returns:
|
||||
- subtitle_path: path to the generated subtitle file
|
||||
'''
|
||||
logger.info("\n\n## generating subtitle")
|
||||
if not params.subtitle_enabled or sub_maker is None:
|
||||
return ""
|
||||
|
||||
subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt")
|
||||
|
||||
@ -11,6 +11,7 @@ from edge_tts import SubMaker, submaker
|
||||
from edge_tts.submaker import mktimestamp
|
||||
from loguru import logger
|
||||
from moviepy.video.tools import subtitles
|
||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||
|
||||
from app.config import config
|
||||
from app.utils import utils
|
||||
@ -1660,7 +1661,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
|
||||
|
||||
def get_audio_duration(sub_maker: submaker.SubMaker):
|
||||
def _get_audio_duration_from_submaker(sub_maker: submaker.SubMaker):
|
||||
"""
|
||||
获取音频时长
|
||||
"""
|
||||
@ -1668,6 +1669,35 @@ def get_audio_duration(sub_maker: submaker.SubMaker):
|
||||
return 0.0
|
||||
return sub_maker.offset[-1][1] / 10000000
|
||||
|
||||
def _get_audio_duration_from_mp3(mp3_file: str) -> float:
|
||||
"""
|
||||
获取MP3音频时长
|
||||
"""
|
||||
if not os.path.exists(mp3_file):
|
||||
logger.error(f"MP3 file does not exist: {mp3_file}")
|
||||
return 0.0
|
||||
|
||||
try:
|
||||
# Use moviepy to get the duration of the MP3 file
|
||||
with AudioFileClip(mp3_file) as audio:
|
||||
return audio.duration # Duration in seconds
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get audio duration from MP3: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def get_audio_duration( target: Union[str, submaker.SubMaker]) -> float:
|
||||
"""
|
||||
获取音频时长
|
||||
如果是SubMaker对象,则从SubMaker中获取时长
|
||||
如果是MP3文件,则从MP3文件中获取时长
|
||||
"""
|
||||
if isinstance(target, submaker.SubMaker):
|
||||
return _get_audio_duration_from_submaker(target)
|
||||
elif isinstance(target, str) and target.endswith(".mp3"):
|
||||
return _get_audio_duration_from_mp3(target)
|
||||
else:
|
||||
logger.error(f"Invalid target type: {type(target)}")
|
||||
return 0.0
|
||||
|
||||
if __name__ == "__main__":
|
||||
voice_name = "zh-CN-XiaoxiaoMultilingualNeural-V2-Female"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user