Spaces:
Build error
Build error
Alex Volkov
commited on
Commit
·
6fd7ef3
1
Parent(s):
8a19f9b
Initial commit, let's see if this works on HF
Browse files- README.md +1 -1
- app.py +117 -4
- download.py +146 -0
- fonts/arial.ttf +0 -0
- utils/__init__.py +0 -0
- utils/subs.py +84 -0
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
title: Vidtranslator
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
|
|
|
| 1 |
---
|
| 2 |
title: Vidtranslator
|
| 3 |
+
emoji: 🎥
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
app.py
CHANGED
|
@@ -1,7 +1,120 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
-
|
| 4 |
-
return "Hello there " + name + "!!"
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
+
from download import check_download, download, download_generator
|
|
|
|
| 4 |
|
| 5 |
+
import anvil.media
|
| 6 |
+
import os
|
| 7 |
+
import pathlib
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from shutil import rmtree
|
| 10 |
+
|
| 11 |
+
anvil.server.connect(os.environ['ANVIL_UPLINK_KEY'])
|
| 12 |
+
queue_placeholder = None
|
| 13 |
+
|
| 14 |
+
url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
|
| 15 |
+
download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
|
| 16 |
+
init_video = gr.Video(label="Downloaded video", visible=False)
|
| 17 |
+
init_audio = gr.Audio(label="Downloaded audio", visible=False)
|
| 18 |
+
output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10)
|
| 19 |
+
sub_video = gr.Video(label="Subbed video", visible=False)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@anvil.server.callable
|
| 23 |
+
def cleanup_output_dir():
|
| 24 |
+
#make sure we're in the main directory
|
| 25 |
+
os.chdir(pathlib.Path(__file__).parent.absolute())
|
| 26 |
+
#delete the output directory contents
|
| 27 |
+
for path in Path("output").glob("**/*"):
|
| 28 |
+
if path.is_file():
|
| 29 |
+
path.unlink()
|
| 30 |
+
elif path.is_dir():
|
| 31 |
+
rmtree(path)
|
| 32 |
+
|
| 33 |
+
@anvil.server.callable
|
| 34 |
+
def download_api(url):
|
| 35 |
+
print(f'Request from Anvil with URL {url}')
|
| 36 |
+
final_response = ''
|
| 37 |
+
#TODO: figure out how to push an incoming event to the queue
|
| 38 |
+
#THIS DOESN'T WORK queue_placeholder.push_event('download', url)
|
| 39 |
+
#TODO: handle errors
|
| 40 |
+
for response in download_generator(url):
|
| 41 |
+
final_response = response['message']
|
| 42 |
+
print(final_response)
|
| 43 |
+
return final_response
|
| 44 |
+
|
| 45 |
+
def predownload(url):
|
| 46 |
+
for response in download_generator(url):
|
| 47 |
+
updates_object = {}
|
| 48 |
+
updates_object[download_status] = response.get('message', '')
|
| 49 |
+
meta = response.get('meta')
|
| 50 |
+
if 'video' in response:
|
| 51 |
+
updates_object[init_video] = gr.update(visible=True, value=response["video"],
|
| 52 |
+
label=f"Init Video: {meta['id']}.{meta['ext']}")
|
| 53 |
+
updates_object[init_audio] = gr.update(visible=True, value=response["audio"],
|
| 54 |
+
label=f"Extracted audio : {meta['id']}.mp3")
|
| 55 |
+
if 'whisper_result' in response:
|
| 56 |
+
updates_object[output_text] = gr.update(value=response['whisper_result'].get('srt'), visible=True,
|
| 57 |
+
label=f"Subtitles translated from {response['whisper_result'].get('language')} (detected language)")
|
| 58 |
+
if 'sub_video' in response:
|
| 59 |
+
updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
|
| 60 |
+
label=f"Subbed video: {meta['id']}_translated.mp4")
|
| 61 |
+
|
| 62 |
+
yield updates_object
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
subtitled_video = False
|
| 66 |
+
css = """
|
| 67 |
+
#submit{
|
| 68 |
+
position: absolute;
|
| 69 |
+
flex:0 !important;
|
| 70 |
+
width: 120px;
|
| 71 |
+
right: 13px;
|
| 72 |
+
top: 40px;
|
| 73 |
+
}
|
| 74 |
+
#url_input{
|
| 75 |
+
font-size: 40px !important;
|
| 76 |
+
}
|
| 77 |
+
#download_status{
|
| 78 |
+
font-size: 40px !important;
|
| 79 |
+
}
|
| 80 |
+
.gradio-container {background-color: red}
|
| 81 |
+
#input_row{
|
| 82 |
+
position: relative;
|
| 83 |
+
}
|
| 84 |
+
.gradio-interface #submit{
|
| 85 |
+
|
| 86 |
+
}
|
| 87 |
+
"""
|
| 88 |
+
with gr.Blocks(css=css+"") as demo:
|
| 89 |
+
gr.Markdown('# Vid Translator 0.1 - get english subtitles for videos in any language')
|
| 90 |
+
gr.Markdown('### Link to a tweet, youtube or other video and get a translated video with @openAi #whisper, built by [@altryne](https://twitter.com/altryne/)')
|
| 91 |
+
gr.Markdown('### This is used as the backend for [@vidtranslator](https://twitter.com/vidtranslator/)')
|
| 92 |
+
with gr.Row(elem_id="input_row"):
|
| 93 |
+
with gr.Group() as group:
|
| 94 |
+
url_input.render()
|
| 95 |
+
greet_btn = gr.Button("Download", elem_id='submit', variant='primary')
|
| 96 |
+
pause_for_editing = gr.Checkbox(label="Pause for editing")
|
| 97 |
+
with gr.Row():
|
| 98 |
+
with gr.Column():
|
| 99 |
+
download_status.render()
|
| 100 |
+
init_video.render()
|
| 101 |
+
init_audio.render()
|
| 102 |
+
with gr.Column():
|
| 103 |
+
with gr.Group() :
|
| 104 |
+
output_text.render()
|
| 105 |
+
gr.Button("Download srt file")
|
| 106 |
+
gr.Button("Bake subtitles into video")
|
| 107 |
+
sub_video.render()
|
| 108 |
+
if(subtitled_video):
|
| 109 |
+
download_video = gr.Button("Download Video", variant='primary')
|
| 110 |
+
download_srt = gr.Button("Download Srt", variant='primary')
|
| 111 |
+
|
| 112 |
+
greet_btn.click(fn=predownload, inputs=[url_input], outputs=[download_status, init_video, init_audio, output_text, sub_video], api_name='predownload')
|
| 113 |
+
url_input.submit(fn=predownload, inputs=[url_input], outputs=[download_status, init_video, init_audio, output_text, sub_video])
|
| 114 |
+
|
| 115 |
+
api_button = gr.Button("API", variant='primary', visible=False).click(fn=cleanup_output_dir, inputs=[], outputs=[], api_name='cleanup_output_dir')
|
| 116 |
+
|
| 117 |
+
queue_placeholder = demo.queue()
|
| 118 |
+
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
demo.launch(show_error=True, debug=True)
|
download.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import time
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import anvil.server
|
| 5 |
+
import anvil.media
|
| 6 |
+
from whisper.utils import write_srt
|
| 7 |
+
from youtube_dl import YoutubeDL
|
| 8 |
+
from youtube_dl.utils import DownloadError
|
| 9 |
+
import os
|
| 10 |
+
import tempfile
|
| 11 |
+
import json
|
| 12 |
+
import whisper
|
| 13 |
+
from whisper.tokenizer import LANGUAGES
|
| 14 |
+
|
| 15 |
+
import ffmpeg
|
| 16 |
+
from utils.subs import bake_subs
|
| 17 |
+
|
| 18 |
+
original_dir = os.getcwd()
|
| 19 |
+
output_dir = Path('output')
|
| 20 |
+
|
| 21 |
+
def download_generator(url):
|
| 22 |
+
### Step 1 : check if video is available
|
| 23 |
+
yield {"message": f"Checking {url} for videos"}
|
| 24 |
+
try:
|
| 25 |
+
meta = check_download(url)
|
| 26 |
+
yield {"message": f"Found video with {meta['duration']} seconds duration from {meta['extractor']}", "meta": meta}
|
| 27 |
+
# create a temp directory with meta ID
|
| 28 |
+
# os.makedirs(output_dir/f"{meta['id']}", exist_ok=False)
|
| 29 |
+
tempdir = output_dir/f"{meta['id']}"
|
| 30 |
+
except Exception as e:
|
| 31 |
+
os.chdir(original_dir)
|
| 32 |
+
yield {"message": f"{e}"}
|
| 33 |
+
return
|
| 34 |
+
|
| 35 |
+
### Step 2 : Download video and extract audio
|
| 36 |
+
yield {"message": f"Starting download with URL {url}, this may take a while"}
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
meta, video, audio = download(url, tempdir)
|
| 40 |
+
yield {"message": f"Downloaded video and extracted audio", "video": video, "audio": audio, "meta": meta}
|
| 41 |
+
except Exception as e:
|
| 42 |
+
os.chdir(original_dir)
|
| 43 |
+
yield {"message": f"{e}"}
|
| 44 |
+
raise e
|
| 45 |
+
|
| 46 |
+
### Step 3 : Transcribe with whisper
|
| 47 |
+
yield {"message": f"[PLEASE WAIT] Starting whisper transcribe with {meta['id']}.mp3"}
|
| 48 |
+
try:
|
| 49 |
+
whisper_result = transcribe(audio)
|
| 50 |
+
srt_path = tempdir / f"{meta['id']}.srt"
|
| 51 |
+
with open(srt_path, "w", encoding="utf-8") as srt:
|
| 52 |
+
write_srt(whisper_result["segments"], file=srt)
|
| 53 |
+
|
| 54 |
+
whisper_result["srt"] = Path(srt_path).read_text()
|
| 55 |
+
yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta}
|
| 56 |
+
except Exception as e:
|
| 57 |
+
os.chdir(original_dir)
|
| 58 |
+
yield {"message": f"{e}"}
|
| 59 |
+
#TODO: add return here so users can continue after editing subtitles
|
| 60 |
+
|
| 61 |
+
### Step 4 : Bake subtitles into video with ffmpeg
|
| 62 |
+
yield {"message": f"[PLEASE WAIT] baking subtitles into video"}
|
| 63 |
+
try:
|
| 64 |
+
|
| 65 |
+
subbed_video_path = tempdir / f"{meta['id']}_translated.mp4"
|
| 66 |
+
|
| 67 |
+
fontsdir = Path('fonts')
|
| 68 |
+
bake_subs(video, subbed_video_path.absolute() , srt_path.absolute(), fontsdir)
|
| 69 |
+
yield {"message": f"Subtitled video ready!", "sub_video": str(subbed_video_path.absolute()), "meta": meta}
|
| 70 |
+
except ffmpeg.Error as e:
|
| 71 |
+
print('stdout:', e.stdout.decode('utf8'))
|
| 72 |
+
print('stderr:', e.stderr.decode('utf8'))
|
| 73 |
+
raise e
|
| 74 |
+
except Exception as e:
|
| 75 |
+
os.chdir(original_dir)
|
| 76 |
+
print('error', file=sys.stderr)
|
| 77 |
+
raise e
|
| 78 |
+
yield {"message": f"{e}"}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def progress_hook(d):
|
| 82 |
+
if d['status'] == 'downloading':
|
| 83 |
+
print("downloading " + str(round(float(d['downloaded_bytes']) / float(d['total_bytes']) * 100, 1)) + "%")
|
| 84 |
+
yield f"{d['_percent_str']} downloaded"
|
| 85 |
+
if d['status'] == 'finished':
|
| 86 |
+
filename = d['filename']
|
| 87 |
+
print(filename)
|
| 88 |
+
yield f"Downloaded {filename}"
|
| 89 |
+
|
| 90 |
+
def download(url, tempdir):
|
| 91 |
+
try:
|
| 92 |
+
ydl_opts = {
|
| 93 |
+
"format": "bestvideo[ext=mp4]+bestaudio/best",
|
| 94 |
+
"keepvideo": True,
|
| 95 |
+
'postprocessors': [{
|
| 96 |
+
'key': 'FFmpegExtractAudio',
|
| 97 |
+
'preferredcodec': 'mp3',
|
| 98 |
+
'preferredquality': '192',
|
| 99 |
+
}],
|
| 100 |
+
"skip_download": False,
|
| 101 |
+
"outtmpl": f"{tempdir}/%(id)s.%(ext)s",
|
| 102 |
+
"noplaylist": True,
|
| 103 |
+
"verbose": False,
|
| 104 |
+
"quiet": True,
|
| 105 |
+
"progress_hooks": [progress_hook],
|
| 106 |
+
|
| 107 |
+
}
|
| 108 |
+
ydl = YoutubeDL(ydl_opts)
|
| 109 |
+
meta = ydl.extract_info(
|
| 110 |
+
url,
|
| 111 |
+
download=True,
|
| 112 |
+
)
|
| 113 |
+
except DownloadError as e:
|
| 114 |
+
raise e
|
| 115 |
+
else:
|
| 116 |
+
video = tempdir / f"{meta['id']}.{meta['ext']}"
|
| 117 |
+
audio = tempdir / f"{meta['id']}.mp3"
|
| 118 |
+
print(str(video.resolve()))
|
| 119 |
+
return meta, str(video.resolve()), str(audio.resolve())
|
| 120 |
+
|
| 121 |
+
def check_download(url):
|
| 122 |
+
ydl_opts = {
|
| 123 |
+
"format": "bestvideo[ext=mp4]+bestaudio/best",
|
| 124 |
+
"skip_download": True,
|
| 125 |
+
"verbose": False,
|
| 126 |
+
}
|
| 127 |
+
ydl = YoutubeDL(ydl_opts)
|
| 128 |
+
try:
|
| 129 |
+
meta = ydl.extract_info(
|
| 130 |
+
url,
|
| 131 |
+
download=False,
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
except DownloadError as e:
|
| 135 |
+
raise e
|
| 136 |
+
else:
|
| 137 |
+
return meta
|
| 138 |
+
|
| 139 |
+
def transcribe(audio):
|
| 140 |
+
print('Starting transcribe...')
|
| 141 |
+
# model = whisper.load_model('medium')
|
| 142 |
+
# output = model.transcribe(audio, task="translate")
|
| 143 |
+
# output["language"] = LANGUAGES[output["language"]]
|
| 144 |
+
output = {"text": " And then, you throw it and bam! How do you do this when it's going this way? Throw it and bam! That's right! Is that it? Throw it and bam! That's it. That's it. That's it. That's it. That's it. That's right. Throw it and bam! No, this is... Do it like this and... Isn't this the most natural? Bam! What is it?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 2.46, "text": " And then, you throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 1, "seek": 0, "start": 2.46, "end": 4.86, "text": " How do you do this when it's going this way?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 2, "seek": 0, "start": 4.86, "end": 6.36, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 3, "seek": 0, "start": 6.36, "end": 7.92, "text": " That's right!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 4, "seek": 0, "start": 7.92, "end": 8.42, "text": " Is that it?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 5, "seek": 0, "start": 8.42, "end": 9.76, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 6, "seek": 0, "start": 9.76, "end": 10.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 7, "seek": 0, "start": 10.26, "end": 10.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 8, "seek": 0, "start": 10.76, "end": 11.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 9, "seek": 0, "start": 11.26, "end": 11.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 10, "seek": 0, "start": 11.76, "end": 12.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 11, "seek": 0, "start": 12.26, "end": 12.76, "text": " That's right.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 12, "seek": 0, "start": 12.76, "end": 14.6, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 13, "seek": 0, "start": 14.6, "end": 15.32, "text": " No, this is...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 14, "seek": 0, "start": 15.32, "end": 16.36, "text": " Do it like this and...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 15, "seek": 0, "start": 16.36, "end": 17.92, "text": " Isn't this the most natural?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 16, "seek": 0, "start": 17.92, "end": 18.92, "text": " Bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 17, "seek": 1892, "start": 18.92, "end": 30.92, "text": " What is it?", "tokens": [50364, 708, 307, 309, 30, 50964], "temperature": 0.0, "avg_logprob": -0.9666390419006348, "compression_ratio": 0.5789473684210527, "no_speech_prob": 0.0033069916535168886}], "language": "korean"}
|
| 145 |
+
print(f'Finished transcribe from {output["language"]}', output["text"])
|
| 146 |
+
return output
|
fonts/arial.ttf
ADDED
|
Binary file (367 kB). View file
|
|
|
utils/__init__.py
ADDED
|
File without changes
|
utils/subs.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ffmpeg
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path, PureWindowsPath
|
| 4 |
+
import anvil.media
|
| 5 |
+
import os
|
| 6 |
+
from typing import Iterator, TextIO
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def bake_subs(input_file, output_file, subs_file, fontsdir):
|
| 11 |
+
print(f"Baking {subs_file} into video... {input_file} -> {output_file}")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
fontfile = fontsdir / 'arial.ttf'
|
| 15 |
+
fontstyle = 'Fontsize=18,OutlineColour=&H40000000,BorderStyle=3,FontName=Arial'
|
| 16 |
+
video = ffmpeg.input(input_file)
|
| 17 |
+
audio = video.audio
|
| 18 |
+
(
|
| 19 |
+
ffmpeg
|
| 20 |
+
.concat(
|
| 21 |
+
video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
|
| 22 |
+
audio, v=1, a=1
|
| 23 |
+
)
|
| 24 |
+
.output(filename=output_file)
|
| 25 |
+
.run(quiet=True, overwrite_output=True)
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def str2bool(string):
|
| 30 |
+
str2val = {"True": True, "False": False}
|
| 31 |
+
if string in str2val:
|
| 32 |
+
return str2val[string]
|
| 33 |
+
else:
|
| 34 |
+
raise ValueError(
|
| 35 |
+
f"Expected one of {set(str2val.keys())}, got {string}")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def format_timestamp(seconds: float, always_include_hours: bool = False):
|
| 39 |
+
assert seconds >= 0, "non-negative timestamp expected"
|
| 40 |
+
milliseconds = round(seconds * 1000.0)
|
| 41 |
+
|
| 42 |
+
hours = milliseconds // 3_600_000
|
| 43 |
+
milliseconds -= hours * 3_600_000
|
| 44 |
+
|
| 45 |
+
minutes = milliseconds // 60_000
|
| 46 |
+
milliseconds -= minutes * 60_000
|
| 47 |
+
|
| 48 |
+
seconds = milliseconds // 1_000
|
| 49 |
+
milliseconds -= seconds * 1_000
|
| 50 |
+
|
| 51 |
+
hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
|
| 52 |
+
return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def write_srt(transcript: Iterator[dict], file: TextIO):
|
| 56 |
+
for i, segment in enumerate(transcript, start=1):
|
| 57 |
+
print(
|
| 58 |
+
f"{i}\n"
|
| 59 |
+
f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
|
| 60 |
+
f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
|
| 61 |
+
f"{segment['text'].strip().replace('-->', '->')}\n",
|
| 62 |
+
file=file,
|
| 63 |
+
flush=True,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def filename(path):
|
| 68 |
+
return os.path.splitext(os.path.basename(path))[0]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# if __name__ == '__main__':
|
| 73 |
+
# meta = {
|
| 74 |
+
# "id": 1576155093245693954,
|
| 75 |
+
# "ext": 'mp4'
|
| 76 |
+
# }
|
| 77 |
+
# tempdirname = Path(f"encoding/temp/{meta['id']}")
|
| 78 |
+
# video_file_path = f"{meta['id']}.{meta['ext']}"
|
| 79 |
+
# srt_path = f"{meta['id']}.srt"
|
| 80 |
+
# out_path = f"{meta['id']}_translated.mp4"
|
| 81 |
+
# os.chdir(tempdirname)
|
| 82 |
+
# bake_subs(video_file_path, out_path, srt_path)
|
| 83 |
+
# anvil_media = anvil.media.from_file(out_path, 'video/mp4')
|
| 84 |
+
# print(anvil_media)
|