-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
170 lines (135 loc) · 7.52 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import streamlit as st
from texttospeech.tts_coqui import *
from translation.translate_huggingface import *
from integrations.youtube import *
from audiovideo.utilities import *
from playsound import playsound
LANGUAGES = {
"English": {"model": "en", "youtube":"en"},
"French": {"model": "fr-fr", "youtube":"fr"},
"Portuguese": {"model": "pt-br", "youtube":"pt"}
}
def stt_pipeline():
pass
def transcript_pipeline(url: str, translation_model: str, voice_model:str):
print("Downloading video...")
video = download_video(url)
print("Downloading audio...")
audio = download_wav_from_video(url)
print("Fetching transcribe...")
_,transcript = download_transcript(url)
print("Splitting audio based on timestamps...")
audio_timestamped = split_audio_on_transcript_timestamps(audio, transcript)
print("Translating text...")
translated_text = translate_timestamped(transcript, translation_model)
print("Generating voice...")
wav_files_timestamped = tts_timestamped(translated_text, voice_model, audio_timestamped)
print("Merging audio...")
merged_wav_file = merge_timestamped_wav(wav_files_timestamped)
print("Merging video with audio...")
merged_video_file = merge_video_and_wav(video, merged_wav_file)
@st.experimental_singleton
def _download_video(url):
return download_video(url)
@st.experimental_singleton
def _download_video_and_extract_wav(url, _progress_hooks=[]):
return download_video_and_extract_wav(url, _progress_hooks)
def _download_transcript(url, from_language='fr'):
return download_transcript(url, from_language)
def _split_audio_on_timestamps(audio, data_holder):
return split_audio_on_timestamps(audio, data_holder)
def _translate_timestamped(transcript, translation_model):
return translate_timestamped(transcript, translation_model)
def _fetch_translated_transcript(url, to_language='en'):
return fetch_translated_transcript(url, to_language)
def _tts_timestamped(data_holder: List, model_name: str, update_progress: Function, speaker_idx: str=None, language: str="en"):
return tts_timestamped(data_holder, model_name, update_progress, speaker_idx, language)
def _merge_timestamped_wav(data_holder):
return merge_timestamped_wav(data_holder)
def _merge_video_and_wav(video, merged_wav_file):
return merge_video_and_wav(video, merged_wav_file)
def _create_data_holder_from_transcript(transcript):
return create_data_holder_from_transcript(transcript)
def _create_data_holder_from_translated_transcript(transcript):
return create_data_holder_from_translated_transcript(transcript)
def main_dev():
st.title("Translate a YouTube video")
url = st.text_input("YouTube URL", value="https://www.youtube.com/watch?v=LA8L3IvFBvQ")
transcript_button = st.checkbox("Get transcript")
if transcript_button:
clean_transcript,transcript = _download_transcript(url)
video, audio = _download_video_and_extract_wav(url) #TODO: Just get the audio from the video file instead of downloading it seperately, the download size is the same
audio_timestamped = _split_audio_on_timestamps(audio, transcript)
st.header("Transcript")
for snippet in zip(transcript, audio_timestamped):
text_snippet = snippet[0]
audio_snippet = snippet[1]
st.text_input(str(text_snippet["start"]), value=text_snippet["text"], key=hash("transcript"+str(text_snippet["start"])))
audio_file = open(audio_snippet["audio"], 'rb')
st.audio(audio_file)
translate_button = st.checkbox("Translate")
if translate_button:
translated_text = _translate_timestamped(transcript, "SEBIS/legal_t5_small_trans_en_sv_small_finetuned")
for snippet in translated_text:
st.text_input(str(snippet["start"]), value=str(snippet["text"]), key=hash("translation"+str(snippet["start"])))
voice_button = st.checkbox("Convert to speech")
if voice_button:
wav_files_timestamped = _tts_timestamped(translated_text, "de/thorsten/tacotron2-DCA", speaker_timestamped=audio_timestamped)
for snippet in wav_files_timestamped:
st.text(str(snippet["start"]))
audio_file = open(snippet["audio"], 'rb')
st.audio(audio_file)
create_video_button = st.checkbox("Create video")
if create_video_button:
merged_wav_file = _merge_timestamped_wav(wav_files_timestamped)
merged_video_file = _merge_video_and_wav(video, merged_wav_file)
video_file = open(merged_video_file, 'rb')
st.video(video_file)
# transcript_pipeline("https://www.youtube.com/watch?v=LA8L3IvFBvQ", "Helsinki-NLP/opus-mt-en-de", "de/thorsten/tacotron2-DCA")
def main():
st.title("Translate a YouTube video")
st.warning("The processing time is about 4x the video lenght, so grab yourself a coffe while waiting ☕")
url = st.text_input("YouTube URL", value="https://www.youtube.com/watch?v=u_XIDO79zaQ")
from_language_dropdown = st.selectbox("From language: ", list(LANGUAGES.keys()))
to_language_dropdown = st.selectbox("To language: ", list(LANGUAGES.keys()))
if st.button("Translate!"):
# Get a translated transcript
# try:
translated_transcript = _fetch_translated_transcript(url, to_language=LANGUAGES[to_language_dropdown]["youtube"])
# Create a data holder to keep track of all data throughout the process
data_holder = _create_data_holder_from_translated_transcript(translated_transcript)
# except:
# transcript = _download_transcript(url, from_language=LANGUAGES[from_language_dropdown]["youtube"])
# # Create a data holder to keep track of all data throughout the process
# data_holder = _create_data_holder_from_transcript(transcript)
# translated_transcript = _translate_timestamped(transcript, "SEBIS/legal_t5_small_trans_en_sv_small_finetuned")
# add_translated_transcript(data_holder, translated_transcript)
# Download audio and video
download_progress = st.progress(0)
download_progress_message = st.empty()
def update_download_progress(d):
if d['status'] == 'downloading':
p = d['_percent_str']
p = p.replace('%','')
download_progress.progress(float(p)/100)
download_progress_message.markdown("Downloading: " + d['filename'] + " | " + d['_percent_str'] + "ETA: " + d['_eta_str'])
video, audio = _download_video_and_extract_wav(url, [update_download_progress])
del download_progress
del download_progress_message
# Split audio into timestamps
_split_audio_on_timestamps(audio, data_holder)
# Run TTS to generate new audio
tts_progress = st.progress(0)
translation_progress_message = st.empty()
def progressbar_update(progress, message):
tts_progress.progress(progress)
translation_progress_message.markdown(message)
_tts_timestamped(data_holder, "de/thorsten/tacotron2-DCA", progressbar_update, language=LANGUAGES[to_language_dropdown]["model"])
# Merge the new audio with the video
merged_wav_file = _merge_timestamped_wav(data_holder)
merged_video_file = _merge_video_and_wav(video, merged_wav_file)
#Display the new video
video_file = open(merged_video_file, 'rb')
st.video(video_file)
if __name__ == "__main__":
main()