mirror of
https://github.com/harry0703/MoneyPrinterTurbo.git
synced 2026-02-21 16:37:21 +08:00
1. Add azure_tts_v1 to control the speed of speech
This commit is contained in:
parent
6853163905
commit
63fb848a17
@ -108,6 +108,7 @@ class VideoParams(BaseModel):
|
||||
|
||||
voice_name: Optional[str] = ""
|
||||
voice_volume: Optional[float] = 1.0
|
||||
voice_rate: Optional[float] = 1.0
|
||||
bgm_type: Optional[str] = "random"
|
||||
bgm_file: Optional[str] = ""
|
||||
bgm_volume: Optional[float] = 0.2
|
||||
|
||||
@ -32,6 +32,7 @@ def start(task_id, params: VideoParams):
|
||||
|
||||
video_subject = params.video_subject
|
||||
voice_name = voice.parse_voice_name(params.voice_name)
|
||||
voice_rate = params.voice_rate
|
||||
paragraph_number = params.paragraph_number
|
||||
n_threads = params.n_threads
|
||||
max_clip_duration = params.video_clip_duration
|
||||
@ -84,7 +85,7 @@ def start(task_id, params: VideoParams):
|
||||
|
||||
logger.info("\n\n## generating audio")
|
||||
audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
|
||||
sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_file=audio_file)
|
||||
sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_rate=voice_rate, voice_file=audio_file)
|
||||
if sub_maker is None:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
|
||||
@ -1028,21 +1028,32 @@ def is_azure_v2_voice(voice_name: str):
|
||||
return ""
|
||||
|
||||
|
||||
def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
||||
def tts(text: str, voice_name: str, voice_rate: float, voice_file: str) -> [SubMaker, None]:
|
||||
if is_azure_v2_voice(voice_name):
|
||||
return azure_tts_v2(text, voice_name, voice_file)
|
||||
return azure_tts_v1(text, voice_name, voice_file)
|
||||
return azure_tts_v1(text, voice_name, voice_rate, voice_file)
|
||||
|
||||
|
||||
def azure_tts_v1(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
||||
def convert_rate_to_percent(rate: float) -> str:
|
||||
if rate == 1.0:
|
||||
return "+0%"
|
||||
percent = round((rate - 1.0) * 100)
|
||||
if percent > 0:
|
||||
return f"+{percent}%"
|
||||
else:
|
||||
return f"{percent}%"
|
||||
|
||||
|
||||
def azure_tts_v1(text: str, voice_name: str, voice_rate: float, voice_file: str) -> [SubMaker, None]:
|
||||
voice_name = parse_voice_name(voice_name)
|
||||
text = text.strip()
|
||||
rate_str = convert_rate_to_percent(voice_rate)
|
||||
for i in range(3):
|
||||
try:
|
||||
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
||||
|
||||
async def _do() -> SubMaker:
|
||||
communicate = edge_tts.Communicate(text, voice_name)
|
||||
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
|
||||
sub_maker = edge_tts.SubMaker()
|
||||
with open(voice_file, "wb") as file:
|
||||
async for chunk in communicate.stream():
|
||||
|
||||
@ -510,11 +510,11 @@ with middle_panel:
|
||||
with st.spinner(tr("Synthesizing Voice")):
|
||||
temp_dir = utils.storage_dir("temp", create=True)
|
||||
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
|
||||
sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
|
||||
sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file)
|
||||
# if the voice file generation failed, try again with a default content.
|
||||
if not sub_maker:
|
||||
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
|
||||
sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
|
||||
sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file)
|
||||
|
||||
if sub_maker and os.path.exists(audio_file):
|
||||
st.audio(audio_file, format="audio/mp3")
|
||||
@ -531,6 +531,10 @@ with middle_panel:
|
||||
|
||||
params.voice_volume = st.selectbox(tr("Speech Volume"),
|
||||
options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], index=2)
|
||||
|
||||
params.voice_rate = st.selectbox(tr("Speech Rate"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], index=2)
|
||||
|
||||
bgm_options = [
|
||||
(tr("No Background Music"), ""),
|
||||
(tr("Random Background Music"), "random"),
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
"Speech Region": "服务区域 (:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Key": "API Key (:red[必填,密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Volume": "朗读音量(1.0表示100%)",
|
||||
"Speech Rate": "朗读速度(1.0表示1倍速)",
|
||||
"Male": "男性",
|
||||
"Female": "女性",
|
||||
"Background Music": "背景音乐",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user