1, optimize the subtitle generation in edge mode

2, optimize the llm prompt, use the same language as the video subject
This commit is contained in:
harry 2024-03-24 17:50:50 +08:00
parent 296a1370d3
commit b471a272b6
4 changed files with 20 additions and 9 deletions

View File

@ -89,7 +89,7 @@ class VideoParams:
"""
video_subject: str
video_script: str = "" # 用于生成视频的脚本
video_terms: str = "" # 用于生成视频的关键词
video_terms: Optional[str | list] = None # 用于生成视频的关键词
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5
@ -98,7 +98,7 @@ class VideoParams:
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
font_name: Optional[str] = "STHeitiMedium.ttc"

View File

@ -77,13 +77,13 @@ Generate a script for a video, depending on the subject of the video.
5. only return the raw content of the script.
6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
8. respond in the same language as the video subject.
## Output Example:
What is the meaning of life. This question has puzzled philosophers.
# Initialization:
- video subject: {video_subject}
- output language: {language}
- number of paragraphs: {paragraph_number}
""".strip()

View File

@ -58,7 +58,13 @@ def start(task_id, params: VideoParams):
if not video_terms:
video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5)
else:
video_terms = [term.strip() for term in re.split(r'[,]', video_terms)]
if isinstance(video_terms, str):
video_terms = [term.strip() for term in re.split(r'[,]', video_terms)]
elif isinstance(video_terms, list):
video_terms = [term.strip() for term in video_terms]
else:
raise ValueError("video_terms must be a string or a list of strings.")
logger.debug(f"video terms: {utils.to_json(video_terms)}")
script_file = path.join(utils.task_dir(task_id), f"script.json")
@ -95,7 +101,7 @@ def start(task_id, params: VideoParams):
else:
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
logger.warning(f"subtitle file is invalid: {subtitle_path}")
logger.warning(f"subtitle file is invalid, fallback to whisper : {subtitle_path}")
subtitle_fallback = True
if subtitle_provider == "whisper" or subtitle_fallback:

View File

@ -57,6 +57,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
sub_index = 0
script_lines = utils.split_string_by_punctuations(text)
# remove space in every word
script_lines_without_space = [line.replace(" ", "") for line in script_lines]
sub_line = ""
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
@ -66,14 +68,17 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
sub = unescape(sub)
sub_line += sub
if sub_line == script_lines[sub_index]:
if sub_line == script_lines[sub_index] or sub_line == script_lines_without_space[sub_index]:
sub_text = script_lines[sub_index]
sub_index += 1
sub_items.append(formatter(
line = formatter(
idx=sub_index,
start_time=start_time,
end_time=end_time,
sub_text=sub_line,
))
sub_text=sub_text,
)
# logger.debug(line.strip())
sub_items.append(line)
start_time = -1.0
sub_line = ""