From b471a272b6f7b8a2b7e52ca10ea565fa54a7e874 Mon Sep 17 00:00:00 2001 From: harry Date: Sun, 24 Mar 2024 17:50:50 +0800 Subject: [PATCH] 1, optimize the subtitle generation in edge mode 2, optimize the llm prompt, use the same language as the video subject --- app/models/schema.py | 4 ++-- app/services/llm.py | 2 +- app/services/task.py | 10 ++++++++-- app/services/voice.py | 13 +++++++++---- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/app/models/schema.py b/app/models/schema.py index 5dae826..6849735 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -89,7 +89,7 @@ class VideoParams: """ video_subject: str video_script: str = "" # 用于生成视频的脚本 - video_terms: str = "" # 用于生成视频的关键词 + video_terms: Optional[str | list] = None # 用于生成视频的关键词 video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value video_clip_duration: Optional[int] = 5 @@ -98,7 +98,7 @@ class VideoParams: bgm_type: Optional[str] = "random" bgm_file: Optional[str] = "" bgm_volume: Optional[float] = 0.2 - + subtitle_enabled: Optional[bool] = True subtitle_position: Optional[str] = "bottom" # top, bottom, center font_name: Optional[str] = "STHeitiMedium.ttc" diff --git a/app/services/llm.py b/app/services/llm.py index 116f3d0..97f3fff 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -77,13 +77,13 @@ Generate a script for a video, depending on the subject of the video. 5. only return the raw content of the script. 6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line. 7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script. +8. respond in the same language as the video subject. ## Output Example: What is the meaning of life. This question has puzzled philosophers. # Initialization: - video subject: {video_subject} -- output language: {language} - number of paragraphs: {paragraph_number} """.strip() diff --git a/app/services/task.py b/app/services/task.py index 159e841..af1881b 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -58,7 +58,13 @@ def start(task_id, params: VideoParams): if not video_terms: video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5) else: - video_terms = [term.strip() for term in re.split(r'[,,]', video_terms)] + if isinstance(video_terms, str): + video_terms = [term.strip() for term in re.split(r'[,,]', video_terms)] + elif isinstance(video_terms, list): + video_terms = [term.strip() for term in video_terms] + else: + raise ValueError("video_terms must be a string or a list of strings.") + logger.debug(f"video terms: {utils.to_json(video_terms)}") script_file = path.join(utils.task_dir(task_id), f"script.json") @@ -95,7 +101,7 @@ def start(task_id, params: VideoParams): else: subtitle_lines = subtitle.file_to_subtitles(subtitle_path) if not subtitle_lines: - logger.warning(f"subtitle file is invalid: {subtitle_path}") + logger.warning(f"subtitle file is invalid, fallback to whisper : {subtitle_path}") subtitle_fallback = True if subtitle_provider == "whisper" or subtitle_fallback: diff --git a/app/services/voice.py b/app/services/voice.py index 6769961..b8a5d5c 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -57,6 +57,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) sub_index = 0 script_lines = utils.split_string_by_punctuations(text) + # remove space in every word + script_lines_without_space = [line.replace(" ", "") for line in script_lines] sub_line = "" for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)): @@ -66,14 +68,17 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) sub = unescape(sub) sub_line += sub - if sub_line == script_lines[sub_index]: + if sub_line == script_lines[sub_index] or sub_line == script_lines_without_space[sub_index]: + sub_text = script_lines[sub_index] sub_index += 1 - sub_items.append(formatter( + line = formatter( idx=sub_index, start_time=start_time, end_time=end_time, - sub_text=sub_line, - )) + sub_text=sub_text, + ) + # logger.debug(line.strip()) + sub_items.append(line) start_time = -1.0 sub_line = ""