jeanma kern3l Alexis-Hz EC2 Default User mduppes EC2 Default User Mark Duppenthaler commited on
Commit
ae238b3
·
verified ·
0 Parent(s):

Omnilingual ASR transcription demo

Browse files

Co-authored-by: kern3l <[email protected]>
Co-authored-by: Alexis-Hz <[email protected]>
Co-authored-by: EC2 Default User <EC2 Default [email protected]>
Co-authored-by: mduppes <[email protected]>
Co-authored-by: EC2 Default User <EC2 Default [email protected]>
Co-authored-by: Mark Duppenthaler <Mark [email protected]>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +96 -0
  2. .gitattributes +36 -0
  3. .gitignore +158 -0
  4. Dockerfile +151 -0
  5. README.md +160 -0
  6. frontend/.env +5 -0
  7. frontend/.gitignore +38 -0
  8. frontend/README.md +60 -0
  9. frontend/index.html +13 -0
  10. frontend/package-lock.json +0 -0
  11. frontend/package.json +32 -0
  12. frontend/postcss.config.js +6 -0
  13. frontend/public/vite.svg +6 -0
  14. frontend/src/App.tsx +28 -0
  15. frontend/src/analytics/Analytics.tsx +181 -0
  16. frontend/src/analytics/CookieBanner.tsx +53 -0
  17. frontend/src/analytics/gaEvents.ts +97 -0
  18. frontend/src/components/CanvasTimeline.tsx +393 -0
  19. frontend/src/components/ErrorBoundary.tsx +131 -0
  20. frontend/src/components/FeedbackCard.tsx +27 -0
  21. frontend/src/components/FullTranscription.tsx +152 -0
  22. frontend/src/components/LanguageSelector.tsx +262 -0
  23. frontend/src/components/MediaDownloadControls.tsx +75 -0
  24. frontend/src/components/MediaEditControls.tsx +113 -0
  25. frontend/src/components/MediaPlayer.tsx +130 -0
  26. frontend/src/components/MediaRecorder.tsx +353 -0
  27. frontend/src/components/MinimapTimeline.tsx +509 -0
  28. frontend/src/components/QuickGuide.tsx +176 -0
  29. frontend/src/components/SegmentEditor.tsx +92 -0
  30. frontend/src/components/ServerStatusIndicator.tsx +241 -0
  31. frontend/src/components/TermsModal.tsx +675 -0
  32. frontend/src/components/TipsNotice.tsx +25 -0
  33. frontend/src/components/TranscriptionControls.tsx +152 -0
  34. frontend/src/components/TranscriptionPlayer.tsx +221 -0
  35. frontend/src/components/TranscriptionSideBar.tsx +172 -0
  36. frontend/src/components/TranscriptionWarningModal.tsx +69 -0
  37. frontend/src/components/WelcomeModal.tsx +94 -0
  38. frontend/src/hooks/useAudioAnalyzer.ts +91 -0
  39. frontend/src/hooks/useDragAndDrop.ts +118 -0
  40. frontend/src/hooks/useMediaTimeSync.ts +69 -0
  41. frontend/src/hooks/useTimelineDragControls.ts +416 -0
  42. frontend/src/hooks/useTimelineGeometry.ts +62 -0
  43. frontend/src/hooks/useTimelineRenderer.ts +254 -0
  44. frontend/src/index.css +7 -0
  45. frontend/src/main.tsx +10 -0
  46. frontend/src/pages/TranscriptionPage.tsx +136 -0
  47. frontend/src/services/transcriptionApi.ts +273 -0
  48. frontend/src/stores/transcriptionStore.ts +1161 -0
  49. frontend/src/utils/languages.ts +0 -0
  50. frontend/src/utils/mediaTypes.ts +60 -0
.dockerignore ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Models directory - downloaded at runtime
2
+ server/models/
3
+ models/
4
+
5
+ # Node modules - installed during build
6
+ frontend/node_modules/
7
+ **/node_modules/
8
+
9
+ # Frontend build artifacts - built during Docker build
10
+ frontend/dist/
11
+ frontend/build/
12
+
13
+ # Development files
14
+ .git/
15
+ .gitignore
16
+ *.md
17
+ README*
18
+
19
+ # IDE and editor files
20
+ .vscode/
21
+ .idea/
22
+ *.swp
23
+ *.swo
24
+ *~
25
+
26
+ # OS generated files
27
+ .DS_Store
28
+ .DS_Store?
29
+ ._*
30
+ .Spotlight-V100
31
+ .Trashes
32
+ ehthumbs.db
33
+ Thumbs.db
34
+
35
+ # Python cache and artifacts
36
+ __pycache__/
37
+ *.py[cod]
38
+ *$py.class
39
+ *.so
40
+ .Python
41
+ build/
42
+ develop-eggs/
43
+ dist/
44
+ downloads/
45
+ eggs/
46
+ .eggs/
47
+ lib/
48
+ lib64/
49
+ parts/
50
+ sdist/
51
+ var/
52
+ wheels/
53
+ *.egg-info/
54
+ .installed.cfg
55
+ *.egg
56
+
57
+ # Virtual environments
58
+ venv/
59
+ env/
60
+ ENV/
61
+
62
+ # Jupyter Notebook
63
+ .ipynb_checkpoints
64
+
65
+ # pytest
66
+ .pytest_cache/
67
+
68
+ # Coverage reports
69
+ htmlcov/
70
+ .tox/
71
+ .coverage
72
+ .coverage.*
73
+ .cache
74
+ nosetests.xml
75
+ coverage.xml
76
+ *.cover
77
+ .hypothesis/
78
+
79
+ # Logs
80
+ *.log
81
+ logs/
82
+
83
+ # Runtime data
84
+ pids
85
+ *.pid
86
+ *.seed
87
+ *.pid.lock
88
+
89
+ # Docker files (optional - uncomment if you don't want to include docker files)
90
+ # Dockerfile*
91
+ # docker-compose*
92
+ # .dockerignore
93
+
94
+ # Temporary files
95
+ tmp/
96
+ temp/
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ # wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .venv
106
+ env/
107
+ venv/
108
+ ENV/
109
+ env.bak/
110
+ venv.bak/
111
+
112
+ # Spyder project settings
113
+ .spyderproject
114
+ .spyproject
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # mypy
123
+ .mypy_cache/
124
+ .dmypy.json
125
+ dmypy.json
126
+
127
+ # Pyre type checker
128
+ .pyre/
129
+
130
+ # IDE
131
+ .vscode/
132
+ .idea/
133
+ *.swp
134
+ *.swo
135
+ *~
136
+
137
+ # OS
138
+ .DS_Store
139
+ .DS_Store?
140
+ ._*
141
+ .Spotlight-V100
142
+ .Trashes
143
+ ehthumbs.db
144
+ Thumbs.db
145
+
146
+ # Logs
147
+ *.log
148
+ logs/
149
+
150
+ # Temporary files
151
+ *.tmp
152
+ *.temp
153
+
154
+ # Git backup directories
155
+ .git.backup
156
+
157
+ # Models directory (only at root level)
158
+ /models/
Dockerfile ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Frontend build stage
2
+ FROM node:18-alpine AS frontend-build
3
+
4
+ # Google Analytics build args
5
+ ARG VITE_ENABLE_ANALYTICS
6
+ ARG VITE_REACT_APP_GOOGLE_ANALYTICS_ID
7
+ ARG VITE_ALLOW_ALL_LANGUAGES
8
+
9
+ # Make build args available as environment variables during build
10
+ ENV VITE_ENABLE_ANALYTICS=${VITE_ENABLE_ANALYTICS}
11
+ ENV VITE_REACT_APP_GOOGLE_ANALYTICS_ID=${VITE_REACT_APP_GOOGLE_ANALYTICS_ID}
12
+ ENV VITE_ALLOW_ALL_LANGUAGES=${VITE_ALLOW_ALL_LANGUAGES}
13
+
14
+ WORKDIR /app/frontend
15
+ COPY frontend/package.json frontend/package-lock.json* ./
16
+ RUN npm install
17
+ COPY frontend/ ./
18
+ RUN npm run build
19
+
20
+ # Dockerfile to support Translations API Build - works locally and on Hugging Face Spaces
21
+ FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 as base
22
+
23
+ ENV PYTHON_VERSION=3.10 \
24
+ PYTHON_VERSION_SHORT=310
25
+
26
+ RUN apt-get update && apt-get upgrade -y
27
+
28
+ # Install system packages including audio processing libraries
29
+ RUN apt-get install -y \
30
+ build-essential \
31
+ wget \
32
+ python${PYTHON_VERSION} \
33
+ python3-pip \
34
+ libpq-dev
35
+
36
+ #Constants
37
+ ENV PYTHONUNBUFFERED TRUE
38
+
39
+ ARG DEBIAN_FRONTEND=noninteractive
40
+
41
+ # Set up user with UID 1000 for HF Spaces compatibility
42
+ RUN useradd -m -u 1000 user
43
+
44
+ # Install base utilities, linux packages, and audio processing libraries
45
+ RUN apt-get update && \
46
+ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
47
+ fakeroot \
48
+ ca-certificates \
49
+ curl \
50
+ vim \
51
+ ssh \
52
+ wget \
53
+ gcc \
54
+ git \
55
+ ffmpeg \
56
+ libsndfile1 \
57
+ libsox-fmt-all \
58
+ sox \
59
+ libavcodec-extra && \
60
+ apt-get clean && \
61
+ rm -rf /var/lib/apt/lists/*
62
+
63
+ # Install miniconda
64
+ ENV CONDA_DIR /opt/conda
65
+ # Put conda in path and install
66
+ ENV PATH=$CONDA_DIR/bin:$PATH
67
+ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \
68
+ && /bin/bash ~/miniconda.sh -b -p /opt/conda
69
+
70
+ RUN conda config --set auto_activate_base false && \
71
+ conda config --set channel_priority flexible && \
72
+ mkdir -p ~/.conda && \
73
+ echo "channel_priority: flexible" > ~/.condarc && \
74
+ conda config --add channels conda-forge && \
75
+ conda config --set remote_max_retries 5 && \
76
+ conda config --set remote_connect_timeout_secs 30 && \
77
+ conda config --set remote_read_timeout_secs 30 && \
78
+ conda config --set show_channel_urls True && \
79
+ conda config --set auto_update_conda False && \
80
+ conda config --set notify_outdated_conda False && \
81
+ conda config --set report_errors False && \
82
+ conda config --set always_yes True && \
83
+ conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
84
+ conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r && \
85
+ conda clean -afy
86
+
87
+ RUN conda config --set channel_priority false && \
88
+ conda create -n transcriptions-api python=${PYTHON_VERSION} -y && \
89
+ conda install -n transcriptions-api -c conda-forge \
90
+ libsndfile=1.0.31 \
91
+ numpy \
92
+ scipy \
93
+ -y
94
+
95
+ # Enable conda
96
+ SHELL ["conda", "run", "-n", "transcriptions-api", "/bin/bash", "-c"]
97
+
98
+ # Set up working directory and environment for user
99
+ ENV HOME=/home/user \
100
+ PATH=/home/user/.local/bin:$PATH
101
+
102
+ WORKDIR $HOME/app
103
+
104
+ # Copy requirements.txt and wheel file before installing dependencies
105
+ COPY --chown=user server/requirements.txt ./
106
+ COPY --chown=user server/wheels/omnilingual_asr-0.1.0-py3-none-any.whl ./
107
+
108
+ # Install MMS library from local wheel file
109
+ RUN pip install omnilingual_asr-0.1.0-py3-none-any.whl
110
+
111
+ # Install Python dependencies with proper conda activation
112
+ RUN pip install -r requirements.txt
113
+
114
+ # Install debugpy for development debugging
115
+ RUN pip install debugpy
116
+
117
+ # Copy server code into the image with proper ownership
118
+ COPY --chown=user ./server $HOME/app/server
119
+
120
+ # Copy frontend build from the frontend-build stage
121
+ COPY --from=frontend-build --chown=user /app/frontend/dist $HOME/app/frontend/dist
122
+
123
+ # Make scripts executable and create directories with proper ownership
124
+ RUN chmod +x $HOME/app/server/run.sh $HOME/app/server/download_models.sh && \
125
+ mkdir -p $HOME/app/models && \
126
+ chown -R user:user $HOME/app && \
127
+ chmod -R 755 $HOME/app
128
+
129
+ # Switch to user for runtime
130
+ USER user
131
+
132
+ # Create /data/models and if possible (for HF Spaces)
133
+ RUN mkdir -p /data/models 2>/dev/null || true
134
+
135
+ # Set working directory to server
136
+ WORKDIR $HOME/app/server
137
+
138
+ # Expose port 7860 for HF Spaces (also works locally)
139
+ EXPOSE 7860
140
+
141
+ # For production: pre-download models into the image (optional)
142
+ # Uncomment the following lines if you want models baked into the production image
143
+ # RUN mkdir -p $HOME/app/models
144
+ # RUN cd $HOME/app/models && \
145
+ # wget -O ctc_alignment_mling_uroman_model_dict.txt https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/dictionary.txt && \
146
+ # wget -O ctc_alignment_mling_uroman_model.pt https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/model.pt && \
147
+ # wget https://dl.fbaipublicfiles.com/mms/mms_1143_langs_tokenizer_spm.model && \
148
+ # wget https://dl.fbaipublicfiles.com/mms/mms_XRI.pt
149
+
150
+ # Default command - works for both local and HF Spaces
151
+ CMD ["conda", "run", "--no-capture-output", "-n", "transcriptions-api", "./run.sh"]
README.md ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Omnilingual ASR Media Transcription
3
+ emoji: 🌍
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: mit
10
+ suggested_hardware: a100-large
11
+ ---
12
+
13
+ # Experimental Omnilingual ASR Media Transcription Demo
14
+
15
+ A media transcription tool with a web interface for multilingual audio and video transcription using Meta's Omnilingual ASR model. Transcriptions are supported for 1600+ languages.
16
+
17
+ This application is designed primarily as a **web-based media transcription tool** with an intuitive frontend interface. While you can interact directly with the API endpoints, the recommended usage is through the web interface at `http://localhost:7860`.
18
+
19
+ ## HuggingFace Space Configuration
20
+
21
+ This application is configured to run as a HuggingFace Space, however has resource limitations as it is a public. In order to have your own dedicated space, please clone with the following recommended specifications:
22
+
23
+ - **Hardware**: A100 GPU (80GB) - Required for loading the 7B parameter Omnilingual ASR model
24
+ - _Alternative_: Machines with lower GPU memory can use smaller models by setting the `MODEL_NAME` environment variable in HuggingFace Space settings, e.g. `omniASR_LLM_300M` (requires ~8GB GPU memory)
25
+ - **Persistent Storage**: Enabled for model caching and improved loading times. Medium (150GB)
26
+ - **Docker Runtime**: Uses custom Dockerfile for fairseq2 and PyTorch integration
27
+ - **Port**: 7860 (HuggingFace standard)
28
+
29
+ The A100 machine is specifically chosen to accommodate the large Omnilingual ASR model (~14GB) in GPU memory, ensuring fast inference and real-time transcription capabilities.
30
+
31
+ ## Running Outside HuggingFace
32
+
33
+ While this application is designed for HuggingFace Spaces, **it can be run on any machine with Docker and GPU support** with similar hardware requirements as the machines on HuggingFace.
34
+
35
+ ## Getting Started
36
+
37
+ ### Running with Docker
38
+
39
+ 1. Build and run the container:
40
+
41
+ ```bash
42
+ docker build -t omnilingual-asr-transcriptions .
43
+ docker run --rm -p 7860:7860 --gpus all \
44
+ -e MODEL_NAME=omniASR_LLM_300M \
45
+ -v {your cache directory}:/home/user/app/models \
46
+ omnilingual-asr-transcriptions
47
+ ```
48
+
49
+ The media transcription app will be available at `http://localhost:7860`
50
+
51
+ #### Docker Run Parameters Explained:
52
+
53
+ - `--rm`: Automatically remove the container when it exits
54
+ - `-p 7860:7860`: Map host port 7860 to container port 7860
55
+ - `--gpus all`: Enable GPU access for CUDA acceleration
56
+ - `-e MODEL_NAME=omniASR_LLM_300M`: Set the Omnilingual ASR model variant to use
57
+ - Options: `omniASR_LLM_1B` (default, 1B parameters), `omniASR_LLM_300M` (300M parameters, faster)
58
+ - `-e ENABLE_TOXIC_FILTERING=true`: Enable filtering of toxic words from transcription results (optional)
59
+ - `-v {your cache directory}:/home/user/app/models`: Mount local models directory
60
+ - **Purpose**: Persist downloaded models between container runs (14GB+ cache)
61
+ - **Benefits**: Avoid re-downloading models on each container restart
62
+ - **Path**: Adjust `{your cache directory}` to your local models directory
63
+
64
+ ### Available API Endpoints
65
+
66
+ #### Core Transcription Routes
67
+
68
+ - `GET /health` - Comprehensive health check with GPU/CUDA status, FFmpeg availability, and transcription status
69
+ - `GET /status` - Get current transcription status (busy/idle, progress, operation type)
70
+ - `POST /transcribe` - Audio transcription with automatic chunking for files of any length
71
+
72
+ #### Additional Routes
73
+
74
+ - `POST /combine-video-subtitles` - Combine video files with subtitle tracks
75
+ - `GET /` - Serve the web application frontend
76
+ - `GET /assets/<filename>` - Serve frontend static assets
77
+
78
+ ### Usage Recommendations
79
+
80
+ **Primary Usage**: Access the web interface at `http://localhost:7860` for an intuitive media transcription experience with drag-and-drop file upload, real-time progress tracking, and downloadable results.
81
+
82
+ **API Usage**: For programmatic access or integration with other tools, you can call the API endpoints directly as shown in the examples below.
83
+
84
+ ### Environment Variables
85
+
86
+ You are free to change these if you clone the space and set them in the Huggingface space settings or in your own server environment. In the public shared demo these are controled for an optimal experience.
87
+
88
+ #### Server Environment Variables
89
+
90
+ - `API_LOG_LEVEL` - Set logging level (DEBUG, INFO, WARNING, ERROR)
91
+ - `MODEL_NAME` - Omnilingual ASR model to use (default: omniASR_LLM_1B)
92
+ - `USE_CHUNKING` - Enable/disable audio chunking (default: true)
93
+ - `ENABLE_TOXIC_FILTERING` - Enable toxic word filtering from transcription results (default: false)
94
+
95
+ #### Frontend Environment Variables
96
+
97
+ - `VITE_ALLOW_ALL_LANGUAGES` - Set to `true` to show all 1,400+ supported languages in the language selector, or `false` to only show languages with error rates < 10% for public demo (default: false)
98
+ - `VITE_ENABLE_ANALYTICS` - Set to `true` to enable Google Analytics tracking, or `false` to disable analytics (default: false)
99
+ - `VITE_REACT_APP_GOOGLE_ANALYTICS_ID` - Your Google Analytics measurement ID (e.g., `G-XXXXXXXXXX`) for tracking usage when analytics are enabled
100
+
101
+ ### API Examples (For Developers)
102
+
103
+ For programmatic access or integration with other tools, you can call the API endpoints directly:
104
+
105
+ ```bash
106
+ # Health check
107
+ curl http://localhost:7860/health
108
+
109
+ # Get transcription status
110
+ curl http://localhost:7860/status
111
+
112
+ # Transcribe audio file
113
+ curl -X POST http://localhost:7860/transcribe \
114
+ -F "audio=@path/to/your/audio.wav"
115
+ ```
116
+
117
+ ## Project Structure
118
+
119
+ ```
120
+ omnilingual-asr-transcriptions/
121
+ ├── Dockerfile # Multi-stage build with frontend + backend
122
+ ├── README.md
123
+ ├── requirements.txt # Python dependencies
124
+ ├── deploy.sh # Deployment script
125
+ ├── run_docker.sh # Local Docker run script
126
+ ├── frontend/ # Web interface (React/Vite)
127
+ │ ├── package.json
128
+ │ ├── src/
129
+ │ └── dist/ # Built frontend (served by Flask)
130
+ ├── models/ # Model files (automatically downloaded)
131
+ │ ├── ctc_alignment_mling_uroman_model.pt
132
+ │ ├── ctc_alignment_mling_uroman_model_dict.txt
133
+ │ └── [Additional model files downloaded at runtime]
134
+ └── server/ # Flask API backend
135
+ ├── server.py # Main Flask application
136
+ ├── transcriptions_blueprint.py # API routes
137
+ ├── audio_transcription.py # Core transcription logic
138
+ ├── media_transcription_processor.py # Media processing
139
+ ├── transcription_status.py # Status tracking
140
+ ├── env_vars.py # Environment configuration
141
+ ├── run.sh # Production startup script
142
+ ├── download_models.sh # Model download script
143
+ ├── wheels/ # Pre-built Omnilingual ASR wheel packages
144
+ └── inference/ # Model inference components
145
+ ├── mms_model_pipeline.py # Omnilingual ASR model wrapper
146
+ ├── audio_chunker.py # Audio chunking logic
147
+ └── audio_sentence_alignment.py # Forced alignment
148
+ ```
149
+
150
+ ### Key Features
151
+
152
+ - **Simplified Architecture**: Single Docker container with built-in model management
153
+ - **Auto Model Download**: Models are downloaded automatically during container startup
154
+ - **Omnilingual ASR Integration**: Uses the latest Omnilingual ASR library with 1600+ language support
155
+ - **GPU Acceleration**: CUDA-enabled inference with automatic device detection
156
+ - **Web Interface**: Modern React frontend for easy testing and usage
157
+ - **Smart Transcription**: Single endpoint handles files of any length with automatic chunking
158
+ - **Intelligent Processing**: Automatic audio format detection and conversion
159
+
160
+ **Note**: Model files are large (14GB+ total) and are downloaded automatically when the container starts. The first run may take longer due to model downloads.
frontend/.env ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ VITE_SERVER_URL=''
2
+
3
+ # Set to 'true' to show all languages, 'false' to only show accurate languages for demo
4
+ VITE_ALLOW_ALL_LANGUAGES=true
5
+ VITE_ENABLE_ANALYTICS=false
frontend/.gitignore ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Vite/React/TypeScript frontend
2
+ node_modules/
3
+ dist/
4
+ .vite/
5
+ .env.local
6
+ .env.*
7
+ .DS_Store
8
+ npm-debug.log*
9
+ yarn-debug.log*
10
+ yarn-error.log*
11
+ pnpm-debug.log*
12
+ .eslintcache
13
+ .parcel-cache
14
+ .turbo/
15
+ .next/
16
+ .vercel/
17
+ .cache/
18
+ .storybook/
19
+ .swc/
20
+ .coverage/
21
+ coverage/
22
+ .sass-cache/
23
+ .nuxt/
24
+ .output/
25
+ .firebase/
26
+ .firebaserc
27
+ .netlify/
28
+ netlify.toml
29
+ .envrc
30
+ .env.test.local
31
+ .env.production.local
32
+ .env.development.local
33
+
34
+ # Editor directories and files
35
+ .idea/
36
+ .vscode/
37
+ *.sublime-workspace
38
+ *.sublime-project
frontend/README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project Title
2
+
3
+ A brief description of your project and its purpose.
4
+
5
+ ## Getting Started
6
+
7
+ These instructions will help you set up the project on your local machine for development and testing.
8
+
9
+ ### Prerequisites
10
+
11
+ - Node.js (version X.X.X or later)
12
+ - npm (version X.X.X or later)
13
+
14
+ ### Installation
15
+
16
+ 1. Clone the repository:
17
+ ```
18
+ git clone <repository-url>
19
+ ```
20
+
21
+ 2. Navigate to the project directory:
22
+ ```
23
+ cd frontend
24
+ ```
25
+
26
+ 3. Install the dependencies:
27
+ ```
28
+ npm install
29
+ ```
30
+
31
+ ### Running the Application
32
+
33
+ To start the development server, run:
34
+ ```
35
+ npm run dev
36
+ ```
37
+
38
+ Open your browser and go to `http://localhost:3000` to see the application in action.
39
+
40
+ ### Building for Production
41
+
42
+ To create a production build, run:
43
+ ```
44
+ npm run build
45
+ ```
46
+
47
+ The built files will be generated in the `dist` directory.
48
+
49
+ ## Usage
50
+
51
+ - Upload audio or video files to transcribe.
52
+ - View synchronized transcriptions and download subtitles in various formats.
53
+
54
+ ## Contributing
55
+
56
+ If you would like to contribute to this project, please fork the repository and submit a pull request.
57
+
58
+ ## License
59
+
60
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
frontend/index.html ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Omnilingual ASR Media Transcription</title>
7
+ <link rel="stylesheet" href="/src/index.css">
8
+ </head>
9
+ <body class="bg-gray-900 text-white">
10
+ <div id="root"></div>
11
+ <script type="module" src="/src/main.tsx"></script>
12
+ </body>
13
+ </html>
frontend/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
frontend/package.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "frontend",
3
+ "version": "1.0.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "vite",
7
+ "build": "vite build",
8
+ "serve": "vite preview"
9
+ },
10
+ "dependencies": {
11
+ "@heroicons/react": "^2.2.0",
12
+ "daisyui": "^4.12.23",
13
+ "debounce": "^2.2.0",
14
+ "match-sorter": "^8.1.0",
15
+ "react": "^18.2.0",
16
+ "react-cookie-consent": "^9.0.0",
17
+ "react-dom": "^18.2.0",
18
+ "react-select": "^5.10.2",
19
+ "react-use": "^17.6.0",
20
+ "tailwindcss": "^3.0.0",
21
+ "zustand": "^5.0.7"
22
+ },
23
+ "devDependencies": {
24
+ "@types/react": "^18.2.15",
25
+ "@types/react-dom": "^18.2.7",
26
+ "@vitejs/plugin-react": "^4.0.3",
27
+ "autoprefixer": "^10.0.0",
28
+ "postcss": "^8.0.0",
29
+ "typescript": "^5.1.6",
30
+ "vite": "^4.4.5"
31
+ }
32
+ }
frontend/postcss.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ module.exports = {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {},
5
+ },
6
+ };
frontend/public/vite.svg ADDED
frontend/src/App.tsx ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import TranscriptionPage from './pages/TranscriptionPage';
3
+ import WelcomeModal from './components/WelcomeModal';
4
+ import { useTranscriptionStore } from './stores/transcriptionStore';
5
+ import { trackWelcomeModalClose } from './analytics/gaEvents';
6
+ import Analytics from './analytics/Analytics';
7
+
8
+ const App: React.FC = () => {
9
+ const { showWelcomeModal, setShowWelcomeModal } = useTranscriptionStore();
10
+
11
+ const handleCloseWelcomeModal = () => {
12
+ trackWelcomeModalClose();
13
+ setShowWelcomeModal(false);
14
+ };
15
+
16
+ return (
17
+ <div className="App">
18
+ <TranscriptionPage />
19
+ <WelcomeModal
20
+ isOpen={showWelcomeModal}
21
+ onClose={handleCloseWelcomeModal}
22
+ />
23
+ <Analytics />
24
+ </div>
25
+ );
26
+ };
27
+
28
+ export default App;
frontend/src/analytics/Analytics.tsx ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import CookieBanner from "./CookieBanner";
3
+
4
+ export const CONSENT_COOKIE_NAME = "omniasr_transcription_consent";
5
+
6
+ // Declare gtag global function
7
+ declare global {
8
+ interface Window {
9
+ gtag: (...args: any[]) => void;
10
+ dataLayer: any[];
11
+ }
12
+ }
13
+
14
+ const Analytics = () => {
15
+ const [analyticsEnabled, setAnalyticsEnabled] = React.useState(false);
16
+ const [consentState, setConsentState] = React.useState<boolean | null>(null); // In-memory fallback
17
+ const [showBanner, setShowBanner] = React.useState(false); // Control banner visibility
18
+
19
+ // Check if we're in iframe (like HuggingFace Spaces)
20
+ const isInIframe = () => {
21
+ try {
22
+ return window.self !== window.top;
23
+ } catch (e) {
24
+ return true;
25
+ }
26
+ };
27
+
28
+ // Get consent with fallback chain: memory -> localStorage -> sessionStorage -> cookies
29
+ const getConsent = (): boolean => {
30
+ // First check in-memory state (for HF spaces that block all storage)
31
+ if (consentState !== null) {
32
+ return consentState;
33
+ }
34
+
35
+ // Try localStorage
36
+ try {
37
+ const localValue = window.localStorage.getItem(CONSENT_COOKIE_NAME);
38
+ if (localValue !== null) {
39
+ return localValue === "true";
40
+ }
41
+ } catch (e) {}
42
+
43
+ // Try sessionStorage
44
+ try {
45
+ const sessionValue = window.sessionStorage.getItem(CONSENT_COOKIE_NAME);
46
+ if (sessionValue !== null) {
47
+ return sessionValue === "true";
48
+ }
49
+ } catch (e) {}
50
+
51
+ // Try cookies
52
+ try {
53
+ return document.cookie.includes(`${CONSENT_COOKIE_NAME}=true`);
54
+ } catch (e) {}
55
+
56
+ return false;
57
+ };
58
+
59
+ // Set consent with fallback chain
60
+ const setConsent = (accepted: boolean) => {
61
+ // Always set in-memory state first (works in all environments)
62
+ setConsentState(accepted);
63
+
64
+ // Try localStorage
65
+ try {
66
+ window.localStorage.setItem(CONSENT_COOKIE_NAME, accepted.toString());
67
+ } catch (e) {}
68
+
69
+ // Try sessionStorage as fallback
70
+ try {
71
+ window.sessionStorage.setItem(CONSENT_COOKIE_NAME, accepted.toString());
72
+ } catch (e) {}
73
+
74
+ // Try cookies (mainly for non-iframe environments)
75
+ if (!isInIframe()) {
76
+ try {
77
+ const expires = new Date();
78
+ expires.setFullYear(expires.getFullYear() + 1);
79
+ document.cookie = `${CONSENT_COOKIE_NAME}=${accepted}; expires=${expires.toUTCString()}; path=/; SameSite=Lax`;
80
+ } catch (e) {}
81
+ }
82
+ };
83
+
84
+ // Load gtag script dynamically
85
+ const loadGtagScript = (gaId: string) => {
86
+ return new Promise<void>((resolve, reject) => {
87
+ // Check if gtag is already loaded
88
+ if (window.gtag) {
89
+ resolve();
90
+ return;
91
+ }
92
+
93
+ // Create script element
94
+ const script = document.createElement('script');
95
+ script.async = true;
96
+ script.src = `https://www.googletagmanager.com/gtag/js?id=${gaId}`;
97
+
98
+ script.onload = () => {
99
+ // Initialize gtag
100
+ window.dataLayer = window.dataLayer || [];
101
+ window.gtag = function gtag() {
102
+ window.dataLayer.push(arguments);
103
+ };
104
+ window.gtag('js', new Date());
105
+ window.gtag('config', gaId, {
106
+ // Settings for iframe environments
107
+ send_page_view: false, // We'll send manually
108
+ cookie_flags: 'max-age=7200;secure;samesite=none', // For iframe support
109
+ });
110
+
111
+ console.log('GA: gtag script loaded');
112
+ resolve();
113
+ };
114
+
115
+ script.onerror = () => {
116
+ console.error('❌ Failed to load gtag script');
117
+ reject(new Error('Failed to load gtag script'));
118
+ };
119
+
120
+ document.head.appendChild(script);
121
+ });
122
+ };
123
+
124
+ // Enable analytics if consent given
125
+ const handleAcceptCookie = React.useCallback(() => {
126
+ console.log('User accepted analytics cookies');
127
+ setConsent(true);
128
+ setShowBanner(false); // Hide banner after acceptance
129
+
130
+ const gaId = import.meta.env.VITE_REACT_APP_GOOGLE_ANALYTICS_ID;
131
+ const analyticsEnabled = import.meta.env.VITE_ENABLE_ANALYTICS === 'true';
132
+
133
+ if (gaId && analyticsEnabled) {
134
+ loadGtagScript(gaId)
135
+ .then(() => {
136
+ setAnalyticsEnabled(true);
137
+ console.log('GA initialized successfully');
138
+
139
+ // Send initial pageview
140
+ const pathname = window.location.pathname;
141
+ window.gtag('event', 'page_view', {
142
+ page_title: document.title,
143
+ page_location: window.location.href,
144
+ page_path: pathname,
145
+ });
146
+ })
147
+ .catch((e) => {
148
+ console.error('GA initialization failed:', e);
149
+ });
150
+ }
151
+ }, []);
152
+
153
+ const handleDeclineCookie = React.useCallback(() => {
154
+ console.log('User declined analytics cookies');
155
+ setConsent(false);
156
+ setShowBanner(false); // Hide banner after decline
157
+ }, []);
158
+
159
+ // Check for existing consent on mount
160
+ React.useEffect(() => {
161
+ const existingConsent = getConsent();
162
+ if (existingConsent) {
163
+ console.log('GA: Found existing consent, initializing...');
164
+ handleAcceptCookie();
165
+ setShowBanner(false); // Don't show banner if consent already exists
166
+ } else {
167
+ setShowBanner(true); // Show banner if no consent
168
+ }
169
+ }, [handleAcceptCookie]);
170
+
171
+ // Note: pageview is now sent directly in handleAcceptCookie when gtag is loaded
172
+
173
+ return showBanner ? (
174
+ <CookieBanner
175
+ onAccept={handleAcceptCookie}
176
+ onDecline={handleDeclineCookie}
177
+ />
178
+ ) : null;
179
+ };
180
+
181
+ export default Analytics;
frontend/src/analytics/CookieBanner.tsx ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import CookieConsent from "react-cookie-consent";
2
+ import { CONSENT_COOKIE_NAME } from "./Analytics";
3
+
4
+ interface CookieBannerProps {
5
+ onAccept?: (acceptedByScrolling: boolean) => void;
6
+ onDecline?: () => void;
7
+ }
8
+
9
+ const CookieBanner = ({ onAccept, onDecline }: CookieBannerProps) => {
10
+ return (
11
+ <CookieConsent
12
+ style={{
13
+ backgroundColor: "white",
14
+ color: "black",
15
+ alignItems: "center",
16
+ flexDirection: "column",
17
+ }}
18
+ contentStyle={{ flex: 1, margin: 0 }}
19
+ overlayStyle={{ backgroundColor: "rgba(0, 0, 0, .65)" }}
20
+ overlay
21
+ cookieName={CONSENT_COOKIE_NAME}
22
+ disableButtonStyles
23
+ declineButtonClasses="bg-gray-800 hover:bg-gray-900 text-white border border-gray-600 w-[136px] md:w-[208px] h-[36px] capitalize text-base font-medium rounded px-4 py-2 transition-colors"
24
+ buttonClasses="bg-blue-600 hover:bg-blue-700 text-white border-blue-600 w-[136px] md:w-[208px] h-[36px] capitalize text-base font-medium rounded px-4 py-2 transition-colors"
25
+ buttonText="Accept"
26
+ declineButtonText="Decline"
27
+ enableDeclineButton={true}
28
+ onAccept={onAccept}
29
+ onDecline={onDecline}
30
+ containerClasses="text-base font-medium p-10 md:p-14 pb-14 text-center" // overriding the default so that the popup isn't hidden by adblockers: https://github.com/Mastermindzh/react-cookie-consent/issues/64
31
+ contentClasses="max-w-[565px]"
32
+ buttonWrapperClasses="mt-7 mb-2 flex gap-5"
33
+ >
34
+ {" "}
35
+ Allow the use of cookies from Meta on this browser? To find out more about
36
+ the use of cookies, see our{" "}
37
+ <a href="https://www.facebook.com/privacy/policy" target="_blank" rel="noopener noreferrer">
38
+ <b>Privacy Policy</b>
39
+ </a>{" "}
40
+ and{" "}
41
+ <a
42
+ href="https://www.facebook.com/privacy/policies/cookies"
43
+ target="_blank"
44
+ rel="noopener noreferrer"
45
+ >
46
+ <b>Cookies Policy</b>
47
+ </a>
48
+ .
49
+ </CookieConsent>
50
+ );
51
+ };
52
+
53
+ export default CookieBanner;
frontend/src/analytics/gaEvents.ts ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { CONSENT_COOKIE_NAME } from "./Analytics";
2
+
3
+ // Declare gtag global function
4
+ declare global {
5
+ interface Window {
6
+ gtag: (...args: any[]) => void;
7
+ }
8
+ }
9
+
10
+ // Check if analytics are enabled and user has consented
11
+ const isAnalyticsEnabled = (): boolean => {
12
+ if (import.meta.env.VITE_ENABLE_ANALYTICS !== 'true') {
13
+ return false;
14
+ }
15
+
16
+ // Check localStorage first (works in iframes like HuggingFace), then cookies
17
+ try {
18
+ const localStorageValue = localStorage.getItem(CONSENT_COOKIE_NAME);
19
+ if (localStorageValue === "true") return true;
20
+ } catch (e) {}
21
+
22
+ return document.cookie.includes(`${CONSENT_COOKIE_NAME}=true`);
23
+ };
24
+
25
+ // Send GA4 event using gtag
26
+ export const sendGAEvent = (
27
+ eventCategory: string,
28
+ eventAction: string,
29
+ eventLabel?: string,
30
+ value?: number
31
+ ) => {
32
+ if (!isAnalyticsEnabled() || !window.gtag) {
33
+ return;
34
+ }
35
+
36
+ try {
37
+ // Use gtag event format for GA4
38
+ const eventName = `${eventCategory.toLowerCase()}_${eventAction.toLowerCase()}`;
39
+ const eventParams: any = {
40
+ event_category: eventCategory,
41
+ event_label: eventLabel,
42
+ };
43
+
44
+ if (value !== undefined) {
45
+ eventParams.value = value;
46
+ }
47
+
48
+ window.gtag('event', eventName, eventParams);
49
+ } catch (error) {
50
+ console.error("GA Event Error:", error);
51
+ }
52
+ };
53
+
54
+ // Predefined event functions for common actions
55
+ export const trackTranscriptionStart = (languageCode: string) => {
56
+ sendGAEvent("Transcription", "start", languageCode);
57
+ };
58
+
59
+ export const trackTranscriptionComplete = (languageCode: string, duration?: number) => {
60
+ sendGAEvent("Transcription", "complete", languageCode, duration);
61
+ };
62
+
63
+ export const trackTranscriptionError = (languageCode: string, errorMessage?: string) => {
64
+ sendGAEvent("Transcription", "error", `${languageCode}${errorMessage ? ` - ${errorMessage}` : ''}`);
65
+ };
66
+
67
+ export const trackFileUpload = (fileType: string, fileSizeMB?: number) => {
68
+ sendGAEvent("File", "upload", fileType, fileSizeMB);
69
+ };
70
+
71
+ export const trackLanguageChange = (languageCode: string) => {
72
+ sendGAEvent("Language", "select", languageCode);
73
+ };
74
+
75
+ export const trackDownloadSRT = (languageCode: string) => {
76
+ sendGAEvent("Download", "srt", languageCode);
77
+ };
78
+
79
+ export const trackDownloadVideoWithSubtitles = (languageCode: string) => {
80
+ sendGAEvent("Download", "video_with_subtitles", languageCode);
81
+ };
82
+
83
+ export const trackReset = () => {
84
+ sendGAEvent("App", "reset");
85
+ };
86
+
87
+ export const trackWelcomeModalClose = () => {
88
+ sendGAEvent("Modal", "welcome_close");
89
+ };
90
+
91
+ export const trackSegmentEdit = (languageCode: string, editType: "text" | "timing") => {
92
+ sendGAEvent("Edit", editType, languageCode);
93
+ };
94
+
95
+ export const trackSegmentDelete = (languageCode: string) => {
96
+ sendGAEvent("Edit", "delete_segment", languageCode);
97
+ };
frontend/src/components/CanvasTimeline.tsx ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, {
2
+ useRef,
3
+ useEffect,
4
+ useState,
5
+ useCallback,
6
+ forwardRef,
7
+ } from "react";
8
+ import {AlignedSegment} from "../services/transcriptionApi";
9
+ import {useTranscriptionStore} from "../stores/transcriptionStore";
10
+ import {formatTime} from "../utils/subtitleUtils";
11
+ import {assignTracksToSegments, getMaxTrackCount} from "../utils/trackUtils";
12
+ import {useTimelineGeometry} from "../hooks/useTimelineGeometry";
13
+ import {useTimelineDragControls} from "../hooks/useTimelineDragControls";
14
+ import {useTimelineRenderer} from "../hooks/useTimelineRenderer";
15
+ import SegmentEditor from "./SegmentEditor";
16
+ import MediaDownloadControls from "./MediaDownloadControls";
17
+ import MediaEditControls from "./MediaEditControls";
18
+
19
+ interface CanvasTimelineProps {
20
+ audioRef: React.RefObject<HTMLAudioElement>;
21
+ videoRef: React.RefObject<HTMLVideoElement>;
22
+ onSeekToSegment: (segment: AlignedSegment) => void;
23
+ onTimeUpdate: () => void;
24
+ viewport?: {start: number; end: number};
25
+ }
26
+
27
+ const CanvasTimeline = forwardRef<HTMLDivElement, CanvasTimelineProps>(
28
+ ({audioRef, videoRef}, ref) => {
29
+ const canvasRef = useRef<HTMLCanvasElement>(null);
30
+ const containerRef = useRef<HTMLDivElement>(null);
31
+
32
+ // Combine the forwarded ref with our internal ref
33
+ const combinedRef = useCallback(
34
+ (node: HTMLDivElement | null) => {
35
+ // Use Object.defineProperty to safely assign to current
36
+ if (containerRef.current !== node) {
37
+ Object.defineProperty(containerRef, "current", {
38
+ value: node,
39
+ writable: true,
40
+ configurable: true,
41
+ });
42
+ }
43
+
44
+ if (typeof ref === "function") {
45
+ ref(node);
46
+ } else if (ref) {
47
+ // Type assertion to overcome readonly constraint
48
+ const mutableRef = ref as any;
49
+ mutableRef.current = node;
50
+ }
51
+ },
52
+ [ref]
53
+ );
54
+ const [canvasSize, setCanvasSize] = useState({width: 1200, height: 200});
55
+
56
+ const {
57
+ transcription,
58
+ currentTime,
59
+ activeSegmentIndex,
60
+ selectedSegmentIndex,
61
+ currentSegments,
62
+ setSelectedSegmentIndex,
63
+ updateSegmentText,
64
+ deleteSegment,
65
+ } = useTranscriptionStore();
66
+
67
+ // Constants
68
+ const constants = {
69
+ TRACK_HEIGHT: 32,
70
+ TRACK_PADDING: 4,
71
+ TIMELINE_PADDING: 0,
72
+ PIXELS_PER_SECOND: 300, // Increased from 200 to give segments more space
73
+ };
74
+
75
+ // Early return if no transcription
76
+ if (!transcription) {
77
+ return null;
78
+ }
79
+
80
+ const displaySegments = currentSegments || transcription.aligned_segments;
81
+ const segmentsWithTracks = assignTracksToSegments(displaySegments);
82
+ const trackCount = getMaxTrackCount(segmentsWithTracks);
83
+
84
+ // Get actual media duration from audio/video elements
85
+ const getMediaDuration = useCallback(() => {
86
+ const audioElement = audioRef.current;
87
+ const videoElement = videoRef.current;
88
+
89
+ if (audioElement && !isNaN(audioElement.duration)) {
90
+ return audioElement.duration;
91
+ }
92
+ if (videoElement && !isNaN(videoElement.duration)) {
93
+ return videoElement.duration;
94
+ }
95
+
96
+ // Fallback to transcription duration if media duration is not available
97
+ return transcription.total_duration;
98
+ }, [audioRef, videoRef, transcription.total_duration]);
99
+
100
+ const mediaDuration = getMediaDuration();
101
+ console.log({mediaDuration});
102
+
103
+ // Calculate canvas dimensions based on full media duration
104
+ const timelineWidth = mediaDuration * constants.PIXELS_PER_SECOND;
105
+ const timelineHeight =
106
+ constants.TIMELINE_PADDING * 2 +
107
+ trackCount * (constants.TRACK_HEIGHT + constants.TRACK_PADDING);
108
+
109
+ // Update canvas size when needed
110
+ useEffect(() => {
111
+ setCanvasSize({
112
+ width: timelineWidth, // Canvas internal resolution
113
+ height: Math.max(timelineHeight, 200),
114
+ });
115
+ }, [timelineWidth, timelineHeight, trackCount]);
116
+
117
+ // Initialize geometry utilities
118
+ const geometryUtils = useTimelineGeometry({
119
+ mediaDuration,
120
+ constants,
121
+ });
122
+
123
+ // Initialize drag controls
124
+ const dragControls = useTimelineDragControls({
125
+ segmentsWithTracks,
126
+ displaySegments,
127
+ geometryUtils,
128
+ canvasRef,
129
+ containerRef,
130
+ mediaDuration,
131
+ constants: {
132
+ TRACK_HEIGHT: constants.TRACK_HEIGHT,
133
+ TIMELINE_PADDING: constants.TIMELINE_PADDING,
134
+ },
135
+ });
136
+
137
+ // Initialize renderer
138
+ const {draw} = useTimelineRenderer({
139
+ canvasRef,
140
+ canvasSize,
141
+ segmentsWithTracks,
142
+ displaySegments,
143
+ currentTime,
144
+ activeSegmentIndex,
145
+ selectedSegmentIndex,
146
+ hoveredSegment: dragControls.hoveredSegment,
147
+ isDragging: dragControls.isDragging,
148
+ dragSegmentIndex: dragControls.dragSegmentIndex,
149
+ mediaDuration,
150
+ geometryUtils,
151
+ constants,
152
+ });
153
+
154
+ // State for smooth scrolling animation
155
+ const scrollAnimationRef = useRef<number | null>(null);
156
+
157
+ // Smooth scroll implementation using requestAnimationFrame
158
+ const smoothScrollTo = useCallback(
159
+ (
160
+ container: HTMLDivElement,
161
+ targetScrollLeft: number,
162
+ duration = 500
163
+ ): Promise<void> => {
164
+ return new Promise((resolve) => {
165
+ const startScrollLeft = container.scrollLeft;
166
+ const scrollDistance = targetScrollLeft - startScrollLeft;
167
+ const startTime = Date.now();
168
+
169
+ const animate = () => {
170
+ const currentTime = Date.now();
171
+ const elapsedTime = currentTime - startTime;
172
+ const progress = Math.min(elapsedTime / duration, 1);
173
+
174
+ // Use easeOutQuart for smooth deceleration
175
+ const easeOutQuart = 1 - Math.pow(1 - progress, 4);
176
+
177
+ container.scrollLeft =
178
+ startScrollLeft + scrollDistance * easeOutQuart;
179
+
180
+ if (progress < 1) {
181
+ scrollAnimationRef.current = requestAnimationFrame(animate);
182
+ } else {
183
+ scrollAnimationRef.current = null;
184
+ resolve();
185
+ }
186
+ };
187
+
188
+ // Cancel any existing animation
189
+ if (scrollAnimationRef.current) {
190
+ cancelAnimationFrame(scrollAnimationRef.current);
191
+ }
192
+
193
+ animate();
194
+ });
195
+ },
196
+ []
197
+ );
198
+
199
+ // Cleanup animation on unmount
200
+ useEffect(() => {
201
+ return () => {
202
+ if (scrollAnimationRef.current) {
203
+ cancelAnimationFrame(scrollAnimationRef.current);
204
+ }
205
+ };
206
+ }, []);
207
+
208
+ // Determine if media is playing for auto-scroll behavior
209
+ const isMediaPlaying = useCallback(() => {
210
+ const audioElement = audioRef.current;
211
+ const videoElement = videoRef.current;
212
+ const mediaElement = audioElement || videoElement;
213
+ return mediaElement && !mediaElement.paused && !mediaElement.ended;
214
+ }, [audioRef, videoRef]);
215
+
216
+ // Track if we're currently animating scroll to avoid re-triggering
217
+ const isScrollingRef = useRef(false);
218
+ const prevCurrentTimeRef = useRef(currentTime);
219
+
220
+ // Auto-scroll during playback: only when playing and playhead gets near edges (20%)
221
+ useEffect(() => {
222
+ const container = containerRef.current;
223
+ if (!container || !isMediaPlaying() || isScrollingRef.current) return;
224
+
225
+ const timeX = geometryUtils.timeToX(currentTime);
226
+ const containerWidth = container.clientWidth;
227
+ const currentScrollLeft = container.scrollLeft;
228
+ const maxScrollLeft = Math.max(0, container.scrollWidth - containerWidth);
229
+
230
+ // Calculate 20% edge boundaries
231
+ const leftEdge = currentScrollLeft + containerWidth * 0.2;
232
+ const rightEdge =
233
+ currentScrollLeft + containerWidth - containerWidth * 0.2;
234
+
235
+ // Only scroll if playhead is near edges
236
+ if (timeX < leftEdge || timeX > rightEdge) {
237
+ isScrollingRef.current = true;
238
+
239
+ // Center the playhead position
240
+ const targetScrollLeft = Math.max(
241
+ 0,
242
+ Math.min(maxScrollLeft, timeX - containerWidth / 2)
243
+ );
244
+
245
+ smoothScrollTo(container, targetScrollLeft, 800).then(() => {
246
+ isScrollingRef.current = false;
247
+ });
248
+ }
249
+ }, [currentTime, geometryUtils, isMediaPlaying, smoothScrollTo]);
250
+
251
+ // Handle manual seeking (scrubbing, keyboard shortcuts, etc.)
252
+ useEffect(() => {
253
+ const container = containerRef.current;
254
+ if (!container || isScrollingRef.current) return;
255
+
256
+ const timeDifference = Math.abs(currentTime - prevCurrentTimeRef.current);
257
+ const isSeekOperation = timeDifference > 0.5; // Significant time jump indicates seeking
258
+
259
+ if (isSeekOperation) {
260
+ const timeX = geometryUtils.timeToX(currentTime);
261
+ const containerWidth = container.clientWidth;
262
+ const currentScrollLeft = container.scrollLeft;
263
+ const maxScrollLeft = Math.max(
264
+ 0,
265
+ container.scrollWidth - containerWidth
266
+ );
267
+
268
+ // Check if the seek position is outside the visible area
269
+ const visibleStart = currentScrollLeft;
270
+ const visibleEnd = currentScrollLeft + containerWidth;
271
+
272
+ if (timeX < visibleStart || timeX > visibleEnd) {
273
+ isScrollingRef.current = true;
274
+
275
+ // Center the seek position
276
+ const targetScrollLeft = Math.max(
277
+ 0,
278
+ Math.min(maxScrollLeft, timeX - containerWidth / 2)
279
+ );
280
+
281
+ smoothScrollTo(container, targetScrollLeft, 600).then(() => {
282
+ isScrollingRef.current = false;
283
+ });
284
+ }
285
+ }
286
+
287
+ prevCurrentTimeRef.current = currentTime;
288
+ }, [currentTime, geometryUtils, smoothScrollTo]);
289
+
290
+ // Redraw on scroll
291
+ useEffect(() => {
292
+ const container = containerRef.current;
293
+ if (!container) return;
294
+
295
+ const handleScroll = () => {
296
+ draw();
297
+ };
298
+
299
+ container.addEventListener("scroll", handleScroll);
300
+ return () => container.removeEventListener("scroll", handleScroll);
301
+ }, [draw]);
302
+
303
+ return (
304
+ <div className="flex-1 flex flex-col bg-gray-900 border-t border-gray-700 min-h-32">
305
+ {/* Header */}
306
+ <div className="px-4 py-2 bg-gray-800 border-b border-gray-700">
307
+ {/* Download Buttons - Centered above edit controls */}
308
+ {/* <div className="flex justify-center mb-2">
309
+ <MediaDownloadControls />
310
+ </div> */}
311
+
312
+ {/* Edit Controls */}
313
+ {/* <MediaEditControls /> */}
314
+ </div>
315
+
316
+ {/* Canvas Container */}
317
+ <div
318
+ ref={combinedRef}
319
+ className="flex-1 overflow-auto bg-black border-t border-slate-700"
320
+ style={{
321
+ minHeight: "200px",
322
+ scrollBehavior: "auto", // Changed from 'smooth' to 'auto' for responsive following
323
+ }}
324
+ >
325
+ <canvas
326
+ ref={canvasRef}
327
+ onMouseMove={dragControls.handleMouseMove}
328
+ onMouseDown={dragControls.handleMouseDown}
329
+ className="block"
330
+ style={{
331
+ width: `${canvasSize.width}px`,
332
+ height: `${canvasSize.height}px`,
333
+ }}
334
+ />
335
+ </div>
336
+
337
+ {/* Tooltip for hovered segment */}
338
+ {dragControls.hoveredSegment !== null &&
339
+ !dragControls.isDragging &&
340
+ !dragControls.isTimelineDragging &&
341
+ (() => {
342
+ // Find the segment in segmentsWithTracks that corresponds to the hovered original segment
343
+ const originalSegment =
344
+ displaySegments[dragControls.hoveredSegment];
345
+
346
+ // Safety check: ensure the segment exists
347
+ if (!originalSegment) return null;
348
+
349
+ const hoveredSegmentWithTrack = segmentsWithTracks.find(
350
+ (s) =>
351
+ s.start === originalSegment.start &&
352
+ s.end === originalSegment.end &&
353
+ s.text === originalSegment.text
354
+ );
355
+
356
+ if (!hoveredSegmentWithTrack) return null;
357
+
358
+ return (
359
+ <div className="absolute bottom-4 left-4 bg-gray-800 text-white text-xs rounded px-2 py-1 pointer-events-none z-30 max-w-xs">
360
+ <div className="whitespace-normal break-words">
361
+ {hoveredSegmentWithTrack.text}
362
+ </div>
363
+ <div className="text-gray-400 mt-1">
364
+ {formatTime(hoveredSegmentWithTrack.start)} -{" "}
365
+ {formatTime(hoveredSegmentWithTrack.end)} (
366
+ {hoveredSegmentWithTrack.duration.toFixed(1)}s)
367
+ </div>
368
+ <div className="text-yellow-400 mt-1 text-xs">
369
+ Click to select • Drag to move • Drag edges to resize
370
+ </div>
371
+ </div>
372
+ );
373
+ })()}
374
+
375
+ {/* Segment Editor at Bottom */}
376
+ {selectedSegmentIndex !== null &&
377
+ displaySegments[selectedSegmentIndex] && (
378
+ <SegmentEditor
379
+ segment={displaySegments[selectedSegmentIndex]}
380
+ segmentIndex={selectedSegmentIndex}
381
+ onUpdateText={updateSegmentText}
382
+ onDeleteSegment={deleteSegment}
383
+ onClose={() => setSelectedSegmentIndex(null)}
384
+ />
385
+ )}
386
+ </div>
387
+ );
388
+ }
389
+ );
390
+
391
+ CanvasTimeline.displayName = "CanvasTimeline";
392
+
393
+ export default CanvasTimeline;
frontend/src/components/ErrorBoundary.tsx ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { Component, ReactNode } from 'react';
2
+
3
+ interface Props {
4
+ children: ReactNode;
5
+ componentName?: string;
6
+ }
7
+
8
+ interface State {
9
+ hasError: boolean;
10
+ error: Error | null;
11
+ errorInfo: React.ErrorInfo | null;
12
+ }
13
+
14
+ class ErrorBoundary extends Component<Props, State> {
15
+ constructor(props: Props) {
16
+ super(props);
17
+ this.state = { hasError: false, error: null, errorInfo: null };
18
+ }
19
+
20
+ static getDerivedStateFromError(error: Error): State {
21
+ return { hasError: true, error, errorInfo: null };
22
+ }
23
+
24
+ componentDidCatch(error: Error, errorInfo: React.ErrorInfo) {
25
+ console.error('ErrorBoundary caught an error:', error, errorInfo);
26
+ this.setState({
27
+ error,
28
+ errorInfo
29
+ });
30
+ }
31
+
32
+ handleRetry = () => {
33
+ this.setState({ hasError: false, error: null, errorInfo: null });
34
+ };
35
+
36
+ handleCopyError = () => {
37
+ const { error, errorInfo } = this.state;
38
+ const { componentName } = this.props;
39
+
40
+ const errorText = `
41
+ Component: ${componentName || 'Unknown'}
42
+ Error: ${error?.message || 'Unknown error'}
43
+ Stack: ${error?.stack || 'No stack trace available'}
44
+ Component Stack: ${errorInfo?.componentStack || 'No component stack available'}
45
+ Timestamp: ${new Date().toISOString()}
46
+ `.trim();
47
+
48
+ navigator.clipboard.writeText(errorText).then(() => {
49
+ alert('Error details copied to clipboard!');
50
+ }).catch(() => {
51
+ // Fallback for older browsers
52
+ const textArea = document.createElement('textarea');
53
+ textArea.value = errorText;
54
+ document.body.appendChild(textArea);
55
+ textArea.select();
56
+ document.execCommand('copy');
57
+ document.body.removeChild(textArea);
58
+ alert('Error details copied to clipboard!');
59
+ });
60
+ };
61
+
62
+ render() {
63
+ if (this.state.hasError) {
64
+ const { componentName } = this.props;
65
+ const { error, errorInfo } = this.state;
66
+
67
+ return (
68
+ <div className="flex flex-col items-center justify-center p-8 bg-red-50 border border-red-200 rounded-lg m-4">
69
+ <div className="text-center mb-6">
70
+ <h2 className="text-xl font-semibold text-red-800 mb-2">
71
+ Sorry, something went wrong
72
+ </h2>
73
+ <p className="text-red-600 mb-4">
74
+ {componentName ? `An error occurred in the ${componentName} component.` : 'An unexpected error occurred.'}
75
+ </p>
76
+ <button
77
+ onClick={this.handleRetry}
78
+ className="px-4 py-2 bg-blue-600 text-white rounded hover:bg-blue-700 transition-colors mr-2"
79
+ >
80
+ Try Again
81
+ </button>
82
+ <button
83
+ onClick={this.handleCopyError}
84
+ className="px-4 py-2 bg-gray-600 text-white rounded hover:bg-gray-700 transition-colors"
85
+ >
86
+ Copy Error Details
87
+ </button>
88
+ </div>
89
+
90
+ <details className="w-full max-w-4xl">
91
+ <summary className="cursor-pointer text-red-700 font-medium mb-2 hover:text-red-800">
92
+ Show Error Details (for developers)
93
+ </summary>
94
+ <div className="bg-gray-100 p-4 rounded border text-sm font-mono overflow-auto max-h-96">
95
+ <div className="mb-4">
96
+ <strong className="text-red-700">Error Message:</strong>
97
+ <pre className="mt-1 whitespace-pre-wrap text-red-800">
98
+ {error?.message || 'Unknown error'}
99
+ </pre>
100
+ </div>
101
+
102
+ <div className="mb-4">
103
+ <strong className="text-red-700">Stack Trace:</strong>
104
+ <pre className="mt-1 whitespace-pre-wrap text-gray-800 text-xs">
105
+ {error?.stack || 'No stack trace available'}
106
+ </pre>
107
+ </div>
108
+
109
+ {errorInfo?.componentStack && (
110
+ <div className="mb-4">
111
+ <strong className="text-red-700">Component Stack:</strong>
112
+ <pre className="mt-1 whitespace-pre-wrap text-gray-800 text-xs">
113
+ {errorInfo.componentStack}
114
+ </pre>
115
+ </div>
116
+ )}
117
+
118
+ <div className="text-xs text-gray-600">
119
+ <strong>Timestamp:</strong> {new Date().toISOString()}
120
+ </div>
121
+ </div>
122
+ </details>
123
+ </div>
124
+ );
125
+ }
126
+
127
+ return this.props.children;
128
+ }
129
+ }
130
+
131
+ export default ErrorBoundary;
frontend/src/components/FeedbackCard.tsx ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+
3
+ const FeedbackCard: React.FC = () => {
4
+ return (
5
+ <div className="mb-4 p-3 bg-green-900/30 rounded-lg border border-green-600/50">
6
+ <div className="mb-2">
7
+ <h3 className="text-sm font-semibold text-green-300">Help Us Improve</h3>
8
+ </div>
9
+
10
+ <div className="text-xs text-green-100 leading-relaxed">
11
+ Your feedback is crucial for improving language coverage and model quality for low-resource languages.
12
+ Please{' '}
13
+ <a
14
+ href="https://forms.gle/JZhFWsA36sg2DdtN9"
15
+ target="_blank"
16
+ rel="noopener noreferrer"
17
+ className="text-green-300 hover:text-green-200 underline transition-colors"
18
+ >
19
+ provide feedback
20
+ </a>
21
+ {' '}to share your experiences, suggestions, and any issues you encounter.
22
+ </div>
23
+ </div>
24
+ );
25
+ };
26
+
27
+ export default FeedbackCard;
frontend/src/components/FullTranscription.tsx ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState } from 'react';
2
+ import { formatTime } from '../utils/subtitleUtils';
3
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
4
+ import { InformationCircleIcon } from '@heroicons/react/24/outline';
5
+
6
+ const FullTranscription: React.FC = () => {
7
+ const [showInfoTooltip, setShowInfoTooltip] = useState(false);
8
+ const [showExpandedChunks, setShowExpandedChunks] = useState(false);
9
+ const { transcription, selectedLanguage } = useTranscriptionStore();
10
+
11
+ const renderInfoTooltip = () => {
12
+ if (!transcription) return null;
13
+
14
+ const chunks = transcription.chunks || [];
15
+ const maxVisibleChunks = 3;
16
+ const hasMoreChunks = chunks.length > maxVisibleChunks;
17
+ const visibleChunks = showExpandedChunks ? chunks : chunks.slice(0, maxVisibleChunks);
18
+
19
+ // Calculate better positioning to avoid edge cuts
20
+ const tooltipStyle: React.CSSProperties = {
21
+ position: 'fixed',
22
+ left: '50%',
23
+ top: '50%',
24
+ transform: 'translate(-50%, -50%)',
25
+ maxHeight: '80vh', // Prevent tooltip from being taller than viewport
26
+ overflowY: 'auto' as const
27
+ };
28
+
29
+ return (
30
+ <div
31
+ className="z-50 p-3 bg-gray-900 text-white text-xs rounded-lg shadow-xl border border-gray-600 min-w-64 max-w-96"
32
+ style={tooltipStyle}>
33
+
34
+ <div className="font-semibold mb-2 text-blue-300">Transcription Details</div>
35
+
36
+ <div className="space-y-1">
37
+ <div className="flex justify-between">
38
+ <span className="text-gray-300">Model:</span>
39
+ <span>{transcription.model}</span>
40
+ </div>
41
+
42
+ <div className="flex justify-between">
43
+ <span className="text-gray-300">Language:</span>
44
+ <span className="font-mono">
45
+ {selectedLanguage || 'auto-detect'}
46
+ </span>
47
+ </div>
48
+
49
+ <div className="flex justify-between">
50
+ <span className="text-gray-300">Segments:</span>
51
+ <span>{transcription.num_segments}</span>
52
+ </div>
53
+
54
+ <div className="flex justify-between">
55
+ <span className="text-gray-300">Duration:</span>
56
+ <span>{formatTime(transcription.total_duration)}</span>
57
+ </div>
58
+
59
+ <div className="flex justify-between">
60
+ <span className="text-gray-300">Device:</span>
61
+ <span className="font-mono">{transcription.device}</span>
62
+ </div>
63
+
64
+ {/* Long-form specific info */}
65
+ {transcription.num_chunks && (
66
+ <>
67
+ <div className="border-t border-gray-600 pt-1 mt-1">
68
+ <div className="text-xs text-blue-300 font-semibold mb-1">Long-form Processing</div>
69
+ </div>
70
+ <div className="flex justify-between">
71
+ <span className="text-gray-300">Chunks:</span>
72
+ <span>{transcription.num_chunks}</span>
73
+ </div>
74
+ </>
75
+ )}
76
+ </div>
77
+
78
+ {/* Improved Chunk details */}
79
+ {chunks.length > 0 && (
80
+ <div className="mt-2">
81
+ <div className="flex items-center justify-between mb-1">
82
+ <div className="text-xs text-blue-300 font-semibold">Chunk Details</div>
83
+ {hasMoreChunks && (
84
+ <button
85
+ onClick={() => setShowExpandedChunks(!showExpandedChunks)}
86
+ className="text-xs text-blue-400 hover:text-blue-300 underline transition-colors"
87
+ >
88
+ {showExpandedChunks ? 'Show Less' : `Show All (${chunks.length})`}
89
+ </button>
90
+ )}
91
+ </div>
92
+
93
+ <div className="space-y-1 max-h-48 overflow-y-auto">
94
+ {visibleChunks.map((chunk, index) => (
95
+ <div key={index} className="text-xs bg-gray-700 p-2 rounded border border-gray-600">
96
+ <div className="flex justify-between items-start">
97
+ <div className="font-medium text-gray-200">
98
+ Chunk #{index + 1}
99
+ </div>
100
+ <div className="text-gray-400 text-xs">
101
+ {chunk.duration.toFixed(1)}s
102
+ </div>
103
+ </div>
104
+ <div className="text-gray-300 font-mono mt-1">
105
+ {formatTime(chunk.start_time)} → {formatTime(chunk.end_time)}
106
+ </div>
107
+ </div>
108
+ ))}
109
+
110
+ {hasMoreChunks && !showExpandedChunks && (
111
+ <div className="text-center py-1">
112
+ <button
113
+ onClick={() => setShowExpandedChunks(true)}
114
+ className="text-xs text-blue-400 hover:text-blue-300 underline transition-colors"
115
+ >
116
+ +{chunks.length - maxVisibleChunks} more chunks...
117
+ </button>
118
+ </div>
119
+ )}
120
+ </div>
121
+ </div>
122
+ )}
123
+ </div>
124
+ );
125
+ };
126
+
127
+ if (!transcription) return null;
128
+
129
+ return (
130
+ <div className="p-4 bg-gray-800 border-t border-gray-700">
131
+ <div className="flex items-center gap-2 mb-3">
132
+ <h3 className="text-sm font-semibold text-white">Full Transcription</h3>
133
+
134
+ {/* Info tooltip */}
135
+ <div className="relative">
136
+ <InformationCircleIcon
137
+ className="w-4 h-4 text-gray-400 hover:text-gray-200 cursor-help transition-colors"
138
+ onMouseEnter={() => setShowInfoTooltip(true)}
139
+ onMouseLeave={() => setShowInfoTooltip(false)}
140
+ />
141
+ {showInfoTooltip && renderInfoTooltip()}
142
+ </div>
143
+ </div>
144
+
145
+ <div className="text-sm max-h-32 overflow-y-auto text-gray-300 font-mono bg-gray-900 p-3 rounded border border-gray-600">
146
+ {transcription.transcription}
147
+ </div>
148
+ </div>
149
+ );
150
+ };
151
+
152
+ export default FullTranscription;
frontend/src/components/LanguageSelector.tsx ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useMemo, useState, useEffect } from 'react';
2
+ import Select, { components, StylesConfig, type SingleValue } from 'react-select';
3
+ import { matchSorter } from 'match-sorter';
4
+ import { LANGUAGE_MAP, ACCURATE_LANGUAGES } from '../utils/languages';
5
+ import { getScriptName, getScriptDescription } from '../utils/scripts';
6
+ import { getSupportedLanguages } from '../services/transcriptionApi';
7
+
8
+ interface LanguageSelectorProps {
9
+ selectedLanguage: string | null;
10
+ selectedScript: string | null;
11
+ onLanguageAndScriptSelect: (language: string | null, script: string | null) => void;
12
+ disabled?: boolean;
13
+ }
14
+
15
+ interface OptionType {
16
+ value: string; // The full code_script combination
17
+ label: string;
18
+ languageName: string;
19
+ scriptName: string;
20
+ languageCode: string;
21
+ scriptCode: string;
22
+ }
23
+
24
+ const parseLanguage = (languageString: string): OptionType | null => {
25
+ const parts = languageString.split('_');
26
+
27
+ // Always expect format: "eng_Latn"
28
+ if (parts.length === 2) {
29
+ const [languageCode, scriptCode] = parts;
30
+ const languageName = (LANGUAGE_MAP as Record<string, string>)[languageCode] || languageCode;
31
+ const scriptName = getScriptName(scriptCode);
32
+
33
+ return {
34
+ value: languageString,
35
+ label: `${languageName} ${scriptName} (${languageString})`,
36
+ languageName,
37
+ scriptName,
38
+ languageCode,
39
+ scriptCode,
40
+ };
41
+ }
42
+
43
+ return null;
44
+ };
45
+
46
+ // Custom Option component to show language, script, and code with tooltip
47
+ const Option = (props: any) => {
48
+ const scriptDescription = getScriptDescription(props.data.scriptCode);
49
+
50
+ return (
51
+ <components.Option {...props}>
52
+ <div className="flex flex-col" title={scriptDescription || undefined}>
53
+ <div className="font-medium text-sm">{props.data.languageName}</div>
54
+ <div className="text-xs text-gray-400">{props.data.scriptName} ({props.data.value})</div>
55
+ </div>
56
+ </components.Option>
57
+ );
58
+ };
59
+
60
+ // Custom SingleValue component for selected value
61
+ const SingleValue = (props: any) => (
62
+ <components.SingleValue {...props}>
63
+ <div className="flex flex-col">
64
+ <div className="font-medium text-sm leading-tight">{props.data.languageName}</div>
65
+ <div className="text-xs text-gray-400 leading-tight">{props.data.scriptName} ({props.data.value})</div>
66
+ </div>
67
+ </components.SingleValue>
68
+ );
69
+
70
+ // Custom styles to match the dark theme
71
+ const customStyles: StylesConfig<OptionType> = {
72
+ control: (styles, { isDisabled, isFocused }) => ({
73
+ ...styles,
74
+ backgroundColor: isDisabled ? '#374151' : '#374151', // gray-700
75
+ borderColor: isFocused ? '#3b82f6' : '#4b5563', // blue-500 : gray-600
76
+ borderRadius: '0.375rem',
77
+ minHeight: '40px',
78
+ boxShadow: isFocused ? '0 0 0 1px #3b82f6' : 'none',
79
+ '&:hover': {
80
+ borderColor: isDisabled ? '#4b5563' : '#6b7280', // gray-600 : gray-500
81
+ backgroundColor: isDisabled ? '#374151' : '#4b5563', // gray-700 : gray-600
82
+ },
83
+ cursor: isDisabled ? 'not-allowed' : 'pointer',
84
+ }),
85
+ singleValue: (styles) => ({
86
+ ...styles,
87
+ color: '#f9fafb', // gray-50
88
+ }),
89
+ placeholder: (styles) => ({
90
+ ...styles,
91
+ color: '#9ca3af', // gray-400
92
+ }),
93
+ input: (styles) => ({
94
+ ...styles,
95
+ color: '#f9fafb', // gray-50
96
+ }),
97
+ menu: (styles) => ({
98
+ ...styles,
99
+ backgroundColor: '#374151', // gray-700
100
+ border: '1px solid #4b5563', // gray-600
101
+ borderRadius: '0.5rem',
102
+ boxShadow: '0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05)',
103
+ zIndex: 50,
104
+ }),
105
+ menuList: (styles) => ({
106
+ ...styles,
107
+ maxHeight: '200px',
108
+ padding: 0,
109
+ }),
110
+ option: (styles, { isFocused, isSelected }) => ({
111
+ ...styles,
112
+ backgroundColor: isSelected
113
+ ? '#2563eb' // blue-600
114
+ : isFocused
115
+ ? '#4b5563' // gray-600
116
+ : 'transparent',
117
+ color: '#f9fafb', // gray-50
118
+ cursor: 'pointer',
119
+ padding: '8px 12px',
120
+ '&:hover': {
121
+ backgroundColor: isSelected ? '#2563eb' : '#4b5563', // blue-600 : gray-600
122
+ },
123
+ }),
124
+ indicatorSeparator: (styles) => ({
125
+ ...styles,
126
+ backgroundColor: '#4b5563', // gray-600
127
+ }),
128
+ dropdownIndicator: (styles, { isDisabled }) => ({
129
+ ...styles,
130
+ color: isDisabled ? '#6b7280' : '#9ca3af', // gray-500 : gray-400
131
+ '&:hover': {
132
+ color: isDisabled ? '#6b7280' : '#d1d5db', // gray-500 : gray-300
133
+ },
134
+ }),
135
+ clearIndicator: (styles) => ({
136
+ ...styles,
137
+ color: '#9ca3af', // gray-400
138
+ '&:hover': {
139
+ color: '#d1d5db', // gray-300
140
+ },
141
+ }),
142
+ noOptionsMessage: (styles) => ({
143
+ ...styles,
144
+ color: '#9ca3af', // gray-400
145
+ }),
146
+ };
147
+
148
+ const LanguageSelector: React.FC<LanguageSelectorProps> = ({
149
+ selectedLanguage,
150
+ selectedScript,
151
+ onLanguageAndScriptSelect,
152
+ disabled = false
153
+ }) => {
154
+ const [supportedLanguages, setSupportedLanguages] = useState<string[]>([]);
155
+ const [isLoading, setIsLoading] = useState(true);
156
+ const [error, setError] = useState<string | null>(null);
157
+
158
+ // Fetch supported languages from API
159
+ useEffect(() => {
160
+ const fetchSupportedLanguages = async () => {
161
+ try {
162
+ setIsLoading(true);
163
+ const languages = await getSupportedLanguages();
164
+ setSupportedLanguages(languages);
165
+ } catch (err) {
166
+ console.error('Failed to fetch supported languages:', err);
167
+ setError('Failed to load supported languages');
168
+ } finally {
169
+ setIsLoading(false);
170
+ }
171
+ };
172
+
173
+ fetchSupportedLanguages();
174
+ }, []);
175
+
176
+ // Convert supported languages to options
177
+ const languageOptions = useMemo(() => {
178
+ const allowAllLanguages = import.meta.env.VITE_ALLOW_ALL_LANGUAGES === 'true';
179
+
180
+ return supportedLanguages
181
+ .map(parseLanguage)
182
+ .filter((option): option is OptionType => option !== null)
183
+ .filter((option) => {
184
+ if (allowAllLanguages) {
185
+ return true;
186
+ }
187
+ return ACCURATE_LANGUAGES.includes(option.languageCode);
188
+ })
189
+ .sort((a, b) => a.languageName.localeCompare(b.languageName));
190
+ }, [supportedLanguages]);
191
+
192
+ // Find the selected option
193
+ const selectedOption = useMemo(() => {
194
+ if (!selectedLanguage || !selectedScript) return null;
195
+ const combinedValue = `${selectedLanguage}_${selectedScript}`;
196
+ return languageOptions.find(option => option.value === combinedValue) || null;
197
+ }, [selectedLanguage, selectedScript, languageOptions]);
198
+
199
+ const handleChange = (newValue: SingleValue<OptionType>) => {
200
+ if (newValue) {
201
+ onLanguageAndScriptSelect(newValue.languageCode, newValue.scriptCode);
202
+ } else {
203
+ onLanguageAndScriptSelect(null, null);
204
+ }
205
+ };
206
+
207
+ // Custom filterOption function using match-sorter
208
+ const filterOptions = useMemo(() => {
209
+ return (option: { label: string; value: string; data: OptionType }, inputValue: string) => {
210
+ if (!inputValue.trim()) return true;
211
+
212
+ // Use match-sorter to check if this individual option matches
213
+ const matches = matchSorter([option.data], inputValue, {
214
+ keys: [
215
+ 'languageName', // Primary: language name
216
+ 'scriptName', // Secondary: script name
217
+ 'languageCode', // Tertiary: language code
218
+ 'scriptCode', // Quaternary: script code
219
+ 'label', // Fallback: full label
220
+ ],
221
+ threshold: matchSorter.rankings.CONTAINS,
222
+ });
223
+
224
+ return matches.length > 0;
225
+ };
226
+ }, []);
227
+
228
+ if (error) {
229
+ return (
230
+ <div className="text-red-400 text-sm p-2 bg-red-900/20 rounded">
231
+ {error}
232
+ </div>
233
+ );
234
+ }
235
+
236
+ return (
237
+ <Select<OptionType>
238
+ value={selectedOption}
239
+ onChange={handleChange}
240
+ options={languageOptions}
241
+ placeholder={isLoading ? "Loading languages..." : "Select language..."}
242
+ isClearable
243
+ isDisabled={disabled || isLoading}
244
+ isSearchable
245
+ filterOption={(option, inputValue) => filterOptions(option, inputValue)}
246
+ components={{ Option, SingleValue }}
247
+ styles={customStyles}
248
+ menuPortalTarget={document.body}
249
+ menuPosition="fixed"
250
+ noOptionsMessage={({ inputValue }) =>
251
+ `No languages found matching "${inputValue}"`
252
+ }
253
+ // Performance optimizations
254
+ menuIsOpen={undefined} // Let react-select manage this
255
+ blurInputOnSelect={true}
256
+ closeMenuOnSelect={true}
257
+ hideSelectedOptions={false}
258
+ />
259
+ );
260
+ };
261
+
262
+ export default LanguageSelector;
frontend/src/components/MediaDownloadControls.tsx ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { ArrowDownTrayIcon } from '@heroicons/react/24/outline';
3
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
4
+ import { generateSRT, generateWebVTT, downloadSubtitles } from '../utils/subtitleUtils';
5
+ import { trackDownloadSRT, sendGAEvent } from '../analytics/gaEvents';
6
+
7
+ const MediaDownloadControls: React.FC = () => {
8
+ const {
9
+ file,
10
+ transcription,
11
+ selectedLanguage,
12
+ isVideoFile,
13
+ isDownloadingVideo,
14
+ handleDownloadVideoWithSubtitles,
15
+ } = useTranscriptionStore();
16
+
17
+ if (!transcription) {
18
+ return null;
19
+ }
20
+
21
+ return (
22
+ <div className="flex items-center space-x-2">
23
+ <div className="tooltip tooltip-bottom" data-tip="Download subtitle file in SRT format. Compatible with most video players and editing software">
24
+ <button
25
+ onClick={() => {
26
+ const srtContent = generateSRT(transcription.aligned_segments);
27
+ const filename = file?.name?.replace(/\.[^/.]+$/, ".srt") || "subtitles.srt";
28
+ downloadSubtitles(srtContent, filename);
29
+
30
+ // Track SRT download
31
+ if (selectedLanguage) {
32
+ trackDownloadSRT(selectedLanguage);
33
+ }
34
+ }}
35
+ className="flex items-center gap-1 px-2 py-1 text-xs bg-purple-600 hover:bg-purple-700 rounded transition-colors text-white"
36
+ >
37
+ <ArrowDownTrayIcon className="w-3 h-3" />
38
+ .srt
39
+ </button>
40
+ </div>
41
+ <div className="tooltip tooltip-bottom" data-tip="Download subtitle file in WebVTT format. Ideal for web browsers and HTML5 video players">
42
+ <button
43
+ onClick={() => {
44
+ const vttContent = generateWebVTT(transcription.aligned_segments);
45
+ const filename = file?.name?.replace(/\.[^/.]+$/, ".vtt") || "subtitles.vtt";
46
+ downloadSubtitles(vttContent, filename);
47
+
48
+ // Track VTT download
49
+ if (selectedLanguage) {
50
+ sendGAEvent("Download", "vtt", selectedLanguage);
51
+ }
52
+ }}
53
+ className="flex items-center gap-1 px-2 py-1 text-xs bg-indigo-600 hover:bg-indigo-700 rounded transition-colors text-white"
54
+ >
55
+ <ArrowDownTrayIcon className="w-3 h-3" />
56
+ .vtt
57
+ </button>
58
+ </div>
59
+ {isVideoFile && (
60
+ <div className="tooltip tooltip-bottom" data-tip="Download video with embedded subtitle track. Selectable from compatible media players">
61
+ <button
62
+ onClick={handleDownloadVideoWithSubtitles}
63
+ disabled={isDownloadingVideo}
64
+ className="flex items-center gap-1 px-2 py-1 text-xs bg-orange-600 hover:bg-orange-700 disabled:bg-gray-600 rounded transition-colors text-white"
65
+ >
66
+ <ArrowDownTrayIcon className="w-3 h-3" />
67
+ {isDownloadingVideo ? "Creating..." : ".mp4"}
68
+ </button>
69
+ </div>
70
+ )}
71
+ </div>
72
+ );
73
+ };
74
+
75
+ export default MediaDownloadControls;
frontend/src/components/MediaEditControls.tsx ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from 'react';
2
+ import { InformationCircleIcon } from '@heroicons/react/24/outline';
3
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
4
+ import { formatTime } from '../utils/subtitleUtils';
5
+
6
+ const DEFAULT_MERGE_THRESHOLD = 2;
7
+
8
+ const MediaEditControls: React.FC = () => {
9
+ const [mergeThreshold, setMergeThreshold] = useState(DEFAULT_MERGE_THRESHOLD);
10
+
11
+ const {
12
+ transcription,
13
+ currentTime,
14
+ activeSegmentIndex,
15
+ currentSegments,
16
+ undo,
17
+ redo,
18
+ canUndo,
19
+ canRedo,
20
+ mergeSegmentsByProximity,
21
+ } = useTranscriptionStore();
22
+
23
+ const MAX_MERGE_INTERVAL_SECONDS = 30;
24
+
25
+ if (!transcription) {
26
+ return null;
27
+ }
28
+
29
+ const displaySegments = currentSegments || transcription.aligned_segments;
30
+
31
+ // Handle merge threshold changes
32
+ useEffect(() => {
33
+ mergeSegmentsByProximity(mergeThreshold);
34
+ }, [mergeThreshold, mergeSegmentsByProximity]);
35
+
36
+ // Keyboard shortcuts for undo/redo
37
+ useEffect(() => {
38
+ const handleKeyDown = (e: KeyboardEvent) => {
39
+ if ((e.ctrlKey || e.metaKey) && e.key === 'z' && !e.shiftKey) {
40
+ e.preventDefault();
41
+ undo();
42
+ } else if ((e.ctrlKey || e.metaKey) && (e.key === 'y' || (e.key === 'z' && e.shiftKey))) {
43
+ e.preventDefault();
44
+ redo();
45
+ }
46
+ };
47
+
48
+ document.addEventListener('keydown', handleKeyDown);
49
+ return () => document.removeEventListener('keydown', handleKeyDown);
50
+ }, [undo, redo]);
51
+
52
+ return (
53
+ <div className="flex items-center justify-center">
54
+ <div className="flex items-center space-x-4">
55
+ {/* Current Status Info */}
56
+ <div className="flex items-center space-x-4 text-xs text-gray-400">
57
+ <span className="text-green-400">
58
+ {formatTime(currentTime)} / {formatTime(transcription.total_duration)}
59
+ </span>
60
+ <span className="text-blue-400">
61
+ {activeSegmentIndex !== null
62
+ ? `Segment ${activeSegmentIndex + 1}/${displaySegments.length}`
63
+ : "No active segment"}
64
+ </span>
65
+ </div>
66
+
67
+ {/* Combine Segments Slider */}
68
+ <div className="flex items-center space-x-2">
69
+ <label className="text-xs text-gray-300 whitespace-nowrap flex items-center space-x-1">
70
+ <span>Combine Words:</span>
71
+ <div className="tooltip" data-tip="This slider merges nearby words. The higher the more words are combined">
72
+ <InformationCircleIcon className="w-4 h-4 text-gray-100 hover:text-gray-300 cursor-help inline ml-1" />
73
+ </div>
74
+ </label>
75
+ <input
76
+ type="range"
77
+ min="0"
78
+ max={MAX_MERGE_INTERVAL_SECONDS}
79
+ step="0.5"
80
+ value={mergeThreshold}
81
+ onChange={(e) => setMergeThreshold(Number(e.target.value))}
82
+ className="w-20 h-1 bg-gray-600 rounded-lg appearance-none cursor-pointer slider"
83
+ style={{
84
+ background: `linear-gradient(to right, #3B82F6 0%, #3B82F6 ${(mergeThreshold / MAX_MERGE_INTERVAL_SECONDS) * 100}%, #4B5563 ${(mergeThreshold / MAX_MERGE_INTERVAL_SECONDS) * 100}%, #4B5563 100%)`
85
+ }}
86
+ />
87
+ </div>
88
+
89
+ {/* Undo/Redo Buttons */}
90
+ <div className="flex items-center space-x-2">
91
+ <button
92
+ onClick={undo}
93
+ disabled={!canUndo}
94
+ className="px-3 py-1 text-xs bg-blue-600 hover:bg-blue-700 disabled:bg-gray-600 disabled:cursor-not-allowed text-white rounded transition-colors"
95
+ title="Undo (Ctrl+Z)"
96
+ >
97
+ ↶ Undo
98
+ </button>
99
+ <button
100
+ onClick={redo}
101
+ disabled={!canRedo}
102
+ className="px-3 py-1 text-xs bg-blue-600 hover:bg-blue-700 disabled:bg-gray-600 disabled:cursor-not-allowed text-white rounded transition-colors"
103
+ title="Redo (Ctrl+Y)"
104
+ >
105
+ ↷ Redo
106
+ </button>
107
+ </div>
108
+ </div>
109
+ </div>
110
+ );
111
+ };
112
+
113
+ export default MediaEditControls;
frontend/src/components/MediaPlayer.tsx ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
3
+ import { generateWebVTT } from '../utils/subtitleUtils';
4
+ import {LANGUAGE_MAP} from '../utils/languages';
5
+
6
+ interface MediaPlayerProps {
7
+ audioRef: React.RefObject<HTMLAudioElement>;
8
+ videoRef: React.RefObject<HTMLVideoElement>;
9
+ onTimeUpdate?: () => void;
10
+ }
11
+
12
+ export default function MediaPlayer({
13
+ audioRef,
14
+ videoRef,
15
+ onTimeUpdate,
16
+ }: MediaPlayerProps) {
17
+ const {
18
+ file,
19
+ mediaUrl,
20
+ isVideoFile,
21
+ currentSegments,
22
+ selectedLanguage,
23
+ setCurrentTime
24
+ } = useTranscriptionStore();
25
+
26
+ const handleSeeked = (event: React.SyntheticEvent<HTMLMediaElement>) => {
27
+ const target = event.target as HTMLMediaElement;
28
+ setCurrentTime(target.currentTime);
29
+ // Call onTimeUpdate to trigger segment selection logic
30
+ if (onTimeUpdate) {
31
+ onTimeUpdate();
32
+ }
33
+ };
34
+
35
+ const handleLoadedMetadata = (event: React.SyntheticEvent<HTMLMediaElement>) => {
36
+ const target = event.target as HTMLMediaElement;
37
+ setCurrentTime(target.currentTime);
38
+
39
+ // Call onTimeUpdate to trigger segment selection logic
40
+ if (onTimeUpdate) {
41
+ onTimeUpdate();
42
+ }
43
+ };
44
+
45
+ // Helper function to encode UTF-8 string to base64
46
+ const utf8ToBase64 = (str: string): string => {
47
+ // Convert string to UTF-8 bytes, then to base64
48
+ const encoder = new TextEncoder();
49
+ const bytes = encoder.encode(str);
50
+ let binary = '';
51
+ bytes.forEach(byte => binary += String.fromCharCode(byte));
52
+ return btoa(binary);
53
+ };
54
+
55
+ // Get language info for subtitles
56
+ const getLanguageInfo = () => {
57
+ if (!selectedLanguage) {
58
+ return { code: 'en', name: 'English' };
59
+ }
60
+ const languageName = (LANGUAGE_MAP as Record<string, string>)[selectedLanguage];
61
+ return {
62
+ code: selectedLanguage,
63
+ name: languageName || 'Unknown'
64
+ };
65
+ };
66
+
67
+ // Early return if no file is selected
68
+ if (!file) {
69
+ return null;
70
+ }
71
+
72
+ // Early return if no media URL is available
73
+ if (!mediaUrl) {
74
+ return (
75
+ <div className="p-6 bg-gray-800">
76
+ <div className="max-w-4xl mx-auto text-center text-gray-300">
77
+ Loading media...
78
+ </div>
79
+ </div>
80
+ );
81
+ }
82
+
83
+ return (
84
+ <div className="p-6 bg-gray-800">
85
+ <div className="max-w-4xl mx-auto">
86
+ {isVideoFile ? (
87
+ <video
88
+ ref={videoRef}
89
+ src={mediaUrl || ""}
90
+ className="w-full max-h-96 rounded-lg"
91
+ onSeeked={handleSeeked}
92
+ onLoadedMetadata={handleLoadedMetadata}
93
+ controls
94
+ controlsList="nodownload nofullscreen noremoteplayback"
95
+ disablePictureInPicture
96
+ >
97
+ {currentSegments && currentSegments.length > 0 && (() => {
98
+ const { code, name } = getLanguageInfo();
99
+ return (
100
+ <track
101
+ kind="subtitles"
102
+ src={`data:text/vtt;base64,${utf8ToBase64(generateWebVTT(currentSegments))}`}
103
+ srcLang={code}
104
+ label={name}
105
+ default
106
+ />
107
+ );
108
+ })()}
109
+ </video>
110
+ ) : (
111
+ <div className="bg-gray-700 p-8 rounded-lg">
112
+ <audio
113
+ ref={audioRef}
114
+ src={mediaUrl || ""}
115
+ className="w-full"
116
+ onSeeked={handleSeeked}
117
+ onLoadedMetadata={handleLoadedMetadata}
118
+ controls
119
+ controlsList="nodownload"
120
+ />
121
+ <div className="mt-4 text-center text-gray-300">
122
+ <div className="text-lg font-medium">Audio File</div>
123
+ <div className="text-sm">{file.name}</div>
124
+ </div>
125
+ </div>
126
+ )}
127
+ </div>
128
+ </div>
129
+ );
130
+ }
frontend/src/components/MediaRecorder.tsx ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useRef, useEffect } from 'react';
2
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
3
+ import { useAudioAnalyzer } from '../hooks/useAudioAnalyzer';
4
+
5
+ interface MediaRecorderProps {
6
+ onComplete: () => void;
7
+ onCancel: () => void;
8
+ }
9
+
10
+ const MediaRecorder: React.FC<MediaRecorderProps> = ({ onComplete, onCancel }) => {
11
+ const { recordingType, setRecordedBlob } = useTranscriptionStore();
12
+
13
+ const [isRecording, setIsRecording] = useState(false);
14
+ const [recordingTime, setRecordingTime] = useState(0);
15
+ const [stream, setStream] = useState<MediaStream | null>(null);
16
+ const [error, setError] = useState<string | null>(null);
17
+ const [permissionState, setPermissionState] = useState<'prompt' | 'granted' | 'denied'>('prompt');
18
+ const [currentMicrophone, setCurrentMicrophone] = useState<string | null>(null);
19
+
20
+ const mediaRecorderRef = useRef<MediaRecorder | null>(null);
21
+ const videoRef = useRef<HTMLVideoElement>(null);
22
+ const chunksRef = useRef<Blob[]>([]);
23
+ const timerRef = useRef<number | null>(null);
24
+
25
+ const isVideo = recordingType === 'video';
26
+
27
+ // Audio analyzer for real-time waveform
28
+ const { audioData, connectToStream, disconnect } = useAudioAnalyzer(256);
29
+
30
+ // Get microphone device info
31
+ const getMicrophoneInfo = async (mediaStream: MediaStream) => {
32
+ try {
33
+ // Get all available audio input devices
34
+ const devices = await navigator.mediaDevices.enumerateDevices();
35
+ const audioInputDevices = devices.filter(device => device.kind === 'audioinput');
36
+
37
+ // Get the audio track from the current stream
38
+ const audioTrack = mediaStream.getAudioTracks()[0];
39
+
40
+ if (audioTrack) {
41
+ // Get the device settings
42
+ const settings = audioTrack.getSettings();
43
+ const deviceId = settings.deviceId;
44
+
45
+ // Find the matching device in our list
46
+ const currentDevice = audioInputDevices.find(device => device.deviceId === deviceId);
47
+
48
+ if (currentDevice && currentDevice.label) {
49
+ setCurrentMicrophone(currentDevice.label);
50
+ } else {
51
+ // Fallback to device ID if label is not available
52
+ setCurrentMicrophone(`Microphone (${deviceId?.substring(0, 8)}...)`);
53
+ }
54
+ }
55
+ } catch (err) {
56
+ console.error('Error getting microphone info:', err);
57
+ setCurrentMicrophone('Unknown microphone');
58
+ }
59
+ };
60
+
61
+ // Request permissions and setup media stream
62
+ const requestPermissions = async () => {
63
+ try {
64
+ setError(null);
65
+
66
+ const constraints: MediaStreamConstraints = {
67
+ audio: {
68
+ echoCancellation: true,
69
+ noiseSuppression: true,
70
+ autoGainControl: true,
71
+ },
72
+ video: isVideo ? {
73
+ width: { ideal: 1280 },
74
+ height: { ideal: 720 },
75
+ facingMode: 'user'
76
+ } : false
77
+ };
78
+
79
+ const mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
80
+ setStream(mediaStream);
81
+ setPermissionState('granted');
82
+
83
+ // Get microphone device information
84
+ await getMicrophoneInfo(mediaStream);
85
+
86
+ // Show video preview if recording video
87
+ if (isVideo && videoRef.current) {
88
+ videoRef.current.srcObject = mediaStream;
89
+ videoRef.current.play();
90
+ }
91
+
92
+ // Connect audio analyzer for waveform visualization
93
+ connectToStream(mediaStream);
94
+
95
+ } catch (err) {
96
+ console.error('Error accessing media devices:', err);
97
+ setPermissionState('denied');
98
+
99
+ if (err instanceof DOMException) {
100
+ switch (err.name) {
101
+ case 'NotAllowedError':
102
+ setError('Permission denied. Please allow access to your microphone' + (isVideo ? ' and camera' : '') + '.');
103
+ break;
104
+ case 'NotFoundError':
105
+ setError('No ' + (isVideo ? 'camera or ' : '') + 'microphone found.');
106
+ break;
107
+ case 'NotReadableError':
108
+ setError('Media device is already in use by another application.');
109
+ break;
110
+ default:
111
+ setError('Failed to access media devices: ' + err.message);
112
+ }
113
+ } else {
114
+ setError('An unexpected error occurred while accessing media devices.');
115
+ }
116
+ }
117
+ };
118
+
119
+ // Start recording
120
+ const startRecording = () => {
121
+ if (!stream) return;
122
+
123
+ try {
124
+ chunksRef.current = [];
125
+
126
+ // Try different MIME types in order of preference
127
+ const mimeTypes = isVideo
128
+ ? ['video/webm;codecs=vp9,opus', 'video/webm;codecs=vp8,opus', 'video/webm']
129
+ : ['audio/webm;codecs=opus', 'audio/webm', 'audio/mp4', ''];
130
+
131
+ let selectedMimeType = '';
132
+ for (const mimeType of mimeTypes) {
133
+ if (mimeType === '' || window.MediaRecorder.isTypeSupported(mimeType)) {
134
+ selectedMimeType = mimeType;
135
+ break;
136
+ }
137
+ }
138
+
139
+ const options: MediaRecorderOptions = selectedMimeType ? { mimeType: selectedMimeType } : {};
140
+ const mediaRecorder = new window.MediaRecorder(stream, options);
141
+ mediaRecorderRef.current = mediaRecorder;
142
+
143
+ mediaRecorder.ondataavailable = (event) => {
144
+ if (event.data.size > 0) {
145
+ chunksRef.current.push(event.data);
146
+ }
147
+ };
148
+
149
+ mediaRecorder.onstop = () => {
150
+ const blob = new Blob(chunksRef.current, {
151
+ type: isVideo ? 'video/webm' : 'audio/webm'
152
+ });
153
+
154
+ setRecordedBlob(blob);
155
+ onComplete();
156
+ };
157
+
158
+ mediaRecorder.start();
159
+ setIsRecording(true);
160
+ setRecordingTime(0);
161
+
162
+ // Start timer
163
+ timerRef.current = setInterval(() => {
164
+ setRecordingTime(prev => prev + 1);
165
+ }, 1000);
166
+
167
+ } catch (err) {
168
+ console.error('Error starting recording:', err);
169
+ setError('Failed to start recording: ' + (err instanceof Error ? err.message : 'Unknown error'));
170
+ }
171
+ };
172
+
173
+ // Stop recording
174
+ const stopRecording = () => {
175
+ if (mediaRecorderRef.current && isRecording) {
176
+ mediaRecorderRef.current.stop();
177
+ setIsRecording(false);
178
+
179
+ if (timerRef.current) {
180
+ clearInterval(timerRef.current);
181
+ timerRef.current = null;
182
+ }
183
+ }
184
+ };
185
+
186
+ // Format recording time
187
+ const formatTime = (seconds: number) => {
188
+ const mins = Math.floor(seconds / 60);
189
+ const secs = seconds % 60;
190
+ return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
191
+ };
192
+
193
+ // Cleanup on unmount and when recording stops
194
+ useEffect(() => {
195
+ return () => {
196
+ if (stream) {
197
+ stream.getTracks().forEach(track => track.stop());
198
+ }
199
+ if (timerRef.current) {
200
+ clearInterval(timerRef.current);
201
+ }
202
+ };
203
+ }, [stream]);
204
+
205
+ // Cleanup stream when recording stops externally
206
+ useEffect(() => {
207
+ if (!recordingType && stream) {
208
+ stream.getTracks().forEach(track => track.stop());
209
+ setStream(null);
210
+ setCurrentMicrophone(null); // Clear microphone info
211
+ disconnect(); // Also disconnect audio analyzer
212
+ }
213
+ }, [recordingType, stream, disconnect]);
214
+
215
+ // Auto-request permissions when component mounts
216
+ useEffect(() => {
217
+ if (permissionState === 'prompt') {
218
+ requestPermissions();
219
+ }
220
+ }, []);
221
+
222
+ return (
223
+ <div className="flex flex-col items-center justify-center min-h-[400px] bg-gray-900 rounded-lg border-2 border-dashed border-gray-600 p-8">
224
+
225
+ {/* Header */}
226
+ <div className="mb-6 text-center">
227
+ <h3 className="text-xl font-semibold text-white mb-2">
228
+ Record {isVideo ? 'Video' : 'Audio'}
229
+ </h3>
230
+ <p className="text-gray-400 text-sm">
231
+ {permissionState === 'prompt' && 'Requesting permissions...'}
232
+ {permissionState === 'denied' && 'Permission required to record'}
233
+ {permissionState === 'granted' && !isRecording && 'Ready to record'}
234
+ {isRecording && `Recording... ${formatTime(recordingTime)}`}
235
+ </p>
236
+
237
+ {/* Microphone Device Info */}
238
+ {permissionState === 'granted' && currentMicrophone && (
239
+ <div className="mt-2 flex items-center justify-center gap-2 text-xs text-gray-300">
240
+ <svg className="w-4 h-4 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
241
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
242
+ </svg>
243
+ <span className="truncate max-w-xs" title={currentMicrophone}>
244
+ {currentMicrophone}
245
+ </span>
246
+ </div>
247
+ )}
248
+ </div>
249
+
250
+ {/* Video Preview (only for video recording) */}
251
+ {isVideo && permissionState === 'granted' && (
252
+ <div className="mb-6">
253
+ <video
254
+ ref={videoRef}
255
+ className="w-80 h-60 bg-black rounded-lg object-cover"
256
+ muted
257
+ playsInline
258
+ />
259
+ </div>
260
+ )}
261
+
262
+ {/* Audio Visualization */}
263
+ {permissionState === 'granted' && (
264
+ <div className="mb-6 flex items-center justify-center">
265
+ <div className="w-80 h-20 bg-gray-800 rounded-lg flex items-center justify-center">
266
+ <div className="flex items-center space-x-1">
267
+ {/* Real-time audio visualization bars */}
268
+ {Array.from({ length: 32 }, (_, i) => {
269
+ // Use a wider frequency range for better distribution
270
+ // Map across 60% of the frequency spectrum for voice and some harmonics
271
+ const voiceRangeEnd = Math.floor(audioData.length * 0.6);
272
+ const dataIndex = Math.floor((i / 32) * voiceRangeEnd);
273
+ const amplitude = audioData[dataIndex] || 0;
274
+
275
+ // Apply logarithmic scaling to prevent saturation and better distribute levels
276
+ const normalizedAmplitude = amplitude / 255;
277
+ const logScaled = Math.log10(1 + normalizedAmplitude * 9) / Math.log10(10); // Log scale 0-1
278
+ const height = Math.max(4, logScaled * 60); // Scale to 4-60px
279
+
280
+ return (
281
+ <div
282
+ key={i}
283
+ className="w-1 bg-blue-500 rounded-full transition-all duration-75"
284
+ style={{
285
+ height: `${height}px`
286
+ }}
287
+ />
288
+ );
289
+ })}
290
+ </div>
291
+ </div>
292
+ </div>
293
+ )}
294
+
295
+ {/* Error Display */}
296
+ {error && (
297
+ <div className="mb-4 p-3 bg-red-900/20 border border-red-500 rounded-lg">
298
+ <p className="text-red-300 text-sm">{error}</p>
299
+ </div>
300
+ )}
301
+
302
+ {/* Controls */}
303
+ <div className="flex gap-4">
304
+ {permissionState === 'denied' && (
305
+ <button
306
+ onClick={requestPermissions}
307
+ className="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg transition-colors"
308
+ >
309
+ Request Permission
310
+ </button>
311
+ )}
312
+
313
+ {permissionState === 'granted' && !isRecording && (
314
+ <button
315
+ onClick={startRecording}
316
+ disabled={!stream}
317
+ className="px-6 py-2 bg-red-600 hover:bg-red-700 disabled:bg-gray-600 text-white rounded-lg transition-colors flex items-center gap-2"
318
+ >
319
+ <div className="w-4 h-4 bg-white rounded-full"></div>
320
+ Start Recording
321
+ </button>
322
+ )}
323
+
324
+ {isRecording && (
325
+ <button
326
+ onClick={stopRecording}
327
+ className="px-6 py-2 bg-gray-600 hover:bg-gray-700 text-white rounded-lg transition-colors flex items-center gap-2"
328
+ >
329
+ <div className="w-4 h-4 bg-white"></div>
330
+ Stop Recording
331
+ </button>
332
+ )}
333
+
334
+ <button
335
+ onClick={onCancel}
336
+ disabled={isRecording}
337
+ className="px-6 py-2 bg-gray-700 hover:bg-gray-600 disabled:bg-gray-800 text-white rounded-lg transition-colors"
338
+ >
339
+ Cancel
340
+ </button>
341
+ </div>
342
+
343
+ {/* Tips */}
344
+ <div className="mt-6 text-center">
345
+ <p className="text-gray-400 text-xs max-w-md">
346
+ Speak clearly and minimize background noise for best transcription results.
347
+ </p>
348
+ </div>
349
+ </div>
350
+ );
351
+ };
352
+
353
+ export default MediaRecorder;
frontend/src/components/MinimapTimeline.tsx ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useRef, useEffect, useState, useCallback } from 'react';
2
+ import { ArrowDownTrayIcon } from '@heroicons/react/24/outline';
3
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
4
+
5
+ interface MinimapTimelineProps {
6
+ audioRef: React.RefObject<HTMLAudioElement>;
7
+ videoRef: React.RefObject<HTMLVideoElement>;
8
+ canvasTimelineRef: React.RefObject<HTMLDivElement>; // Container that scrolls
9
+ }
10
+
11
+ export default function MinimapTimeline({
12
+ audioRef,
13
+ videoRef,
14
+ canvasTimelineRef
15
+ }: MinimapTimelineProps) {
16
+ const canvasRef = useRef<HTMLCanvasElement>(null);
17
+ const containerRef = useRef<HTMLDivElement>(null);
18
+ const [isDragging, setIsDragging] = useState(false);
19
+ const [dragStartX, setDragStartX] = useState(0);
20
+ const [dragStartScrollLeft, setDragStartScrollLeft] = useState(0);
21
+ const [waveformData, setWaveformData] = useState<number[]>([]);
22
+ const [viewport, setViewport] = useState({ start: 0, end: 30, visible: false });
23
+
24
+ const {
25
+ transcription,
26
+ preprocessedAudio,
27
+ currentTime,
28
+ } = useTranscriptionStore();
29
+
30
+ // Constants
31
+ const MINIMAP_HEIGHT = 80;
32
+ const PIXELS_PER_SECOND = 300; // Match the CanvasTimeline scaling
33
+
34
+ // Get media duration
35
+ const getMediaDuration = useCallback(() => {
36
+ const audioElement = audioRef.current;
37
+ const videoElement = videoRef.current;
38
+
39
+ if (audioElement && !isNaN(audioElement.duration)) {
40
+ return audioElement.duration;
41
+ }
42
+ if (videoElement && !isNaN(videoElement.duration)) {
43
+ return videoElement.duration;
44
+ }
45
+
46
+ return transcription?.total_duration || 0;
47
+ }, [audioRef, videoRef, transcription]);
48
+
49
+ const mediaDuration = getMediaDuration();
50
+
51
+ // Canvas width based on container
52
+ const [canvasWidth, setCanvasWidth] = useState(800);
53
+
54
+ // Update canvas width on resize
55
+ useEffect(() => {
56
+ const updateCanvasWidth = () => {
57
+ if (containerRef.current) {
58
+ setCanvasWidth(containerRef.current.clientWidth);
59
+ }
60
+ };
61
+
62
+ updateCanvasWidth();
63
+ window.addEventListener('resize', updateCanvasWidth);
64
+ return () => window.removeEventListener('resize', updateCanvasWidth);
65
+ }, []);
66
+
67
+ // Track Canvas Timeline scroll position and calculate viewport
68
+ const updateViewportFromScroll = useCallback(() => {
69
+ const canvasContainer = canvasTimelineRef.current;
70
+ if (!canvasContainer || mediaDuration === 0) return;
71
+
72
+ const scrollLeft = canvasContainer.scrollLeft;
73
+ const containerWidth = canvasContainer.clientWidth;
74
+ const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
75
+
76
+ // Calculate what time range is currently visible
77
+ const startTime = (scrollLeft / totalCanvasWidth) * mediaDuration;
78
+ const endTime = ((scrollLeft + containerWidth) / totalCanvasWidth) * mediaDuration;
79
+
80
+ setViewport({
81
+ start: Math.max(0, startTime),
82
+ end: Math.min(mediaDuration, endTime),
83
+ visible: true
84
+ });
85
+ }, [canvasTimelineRef, mediaDuration]);
86
+
87
+ // Listen for scroll events on the Canvas Timeline container
88
+ useEffect(() => {
89
+ const canvasContainer = canvasTimelineRef.current;
90
+ if (!canvasContainer) return;
91
+
92
+ const handleScroll = () => {
93
+ updateViewportFromScroll();
94
+ };
95
+
96
+ const handleLoadOrResize = () => {
97
+ // Update viewport when container size changes
98
+ updateViewportFromScroll();
99
+ };
100
+
101
+ // Initial viewport calculation
102
+ updateViewportFromScroll();
103
+
104
+ canvasContainer.addEventListener('scroll', handleScroll);
105
+ window.addEventListener('resize', handleLoadOrResize);
106
+
107
+ return () => {
108
+ canvasContainer.removeEventListener('scroll', handleScroll);
109
+ window.removeEventListener('resize', handleLoadOrResize);
110
+ };
111
+ }, [updateViewportFromScroll]);
112
+
113
+ // Generate waveform data from preprocessed audio
114
+ const generateWaveformFromPreprocessedAudio = useCallback(async () => {
115
+ if (!preprocessedAudio?.data) {
116
+ console.log('No preprocessed audio data available');
117
+ return;
118
+ }
119
+
120
+ try {
121
+ console.log('Generating waveform from preprocessed audio data');
122
+
123
+ // Decode base64 audio data
124
+ const audioBytes = atob(preprocessedAudio.data);
125
+ const audioArrayBuffer = new ArrayBuffer(audioBytes.length);
126
+ const audioUint8Array = new Uint8Array(audioArrayBuffer);
127
+
128
+ for (let i = 0; i < audioBytes.length; i++) {
129
+ audioUint8Array[i] = audioBytes.charCodeAt(i);
130
+ }
131
+
132
+ // Create audio context and decode the WAV data
133
+ const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
134
+ const audioBuffer = await audioContext.decodeAudioData(audioArrayBuffer);
135
+
136
+ // Extract audio data
137
+ const channelData = audioBuffer.getChannelData(0);
138
+ const samples = Math.min(800, canvasWidth); // Limit samples for performance
139
+ const blockSize = Math.floor(channelData.length / samples);
140
+
141
+ const waveform: number[] = [];
142
+ for (let i = 0; i < samples; i++) {
143
+ const start = i * blockSize;
144
+ const end = Math.min(start + blockSize, channelData.length);
145
+
146
+ let sum = 0;
147
+ for (let j = start; j < end; j++) {
148
+ sum += Math.abs(channelData[j]);
149
+ }
150
+
151
+ waveform.push(sum / (end - start));
152
+ }
153
+
154
+ // Normalize waveform
155
+ const max = Math.max(...waveform);
156
+ const normalizedWaveform = max > 0 ? waveform.map(val => val / max) : waveform;
157
+
158
+ setWaveformData(normalizedWaveform);
159
+ console.log(`Generated waveform with ${normalizedWaveform.length} samples from preprocessed audio`);
160
+
161
+ } catch (error) {
162
+ console.error('Error generating waveform from preprocessed audio:', error);
163
+ // Fallback to segment-based visualization
164
+ generateFallbackWaveform();
165
+ }
166
+ }, [preprocessedAudio, canvasWidth]);
167
+
168
+ // Fallback waveform generation from segment data
169
+ const generateFallbackWaveform = useCallback(() => {
170
+ if (!transcription?.aligned_segments || mediaDuration === 0) return;
171
+
172
+ console.log('Using fallback waveform generation from segments');
173
+ const segments = transcription.aligned_segments;
174
+ const samples = Math.min(400, canvasWidth / 2);
175
+ const bars = new Array(samples).fill(0);
176
+
177
+ // Create waveform based on speech activity in segments
178
+ segments.forEach(segment => {
179
+ const startIndex = Math.floor((segment.start / mediaDuration) * samples);
180
+ const endIndex = Math.ceil((segment.end / mediaDuration) * samples);
181
+
182
+ for (let i = startIndex; i < Math.min(endIndex, samples); i++) {
183
+ // Use segment text length and duration to estimate intensity
184
+ const intensity = Math.min(1.0, segment.text.length / 50 + 0.3);
185
+ bars[i] = Math.max(bars[i], intensity * (0.7 + Math.random() * 0.3));
186
+ }
187
+ });
188
+
189
+ setWaveformData(bars);
190
+ console.log(`Generated fallback waveform with ${bars.length} samples`);
191
+ }, [transcription, mediaDuration, canvasWidth]);
192
+
193
+ // Generate waveform when preprocessed audio becomes available
194
+ useEffect(() => {
195
+ if (preprocessedAudio?.data) {
196
+ generateWaveformFromPreprocessedAudio();
197
+ } else if (transcription?.aligned_segments) {
198
+ // Use fallback if we have segments but no preprocessed audio
199
+ generateFallbackWaveform();
200
+ }
201
+ }, [preprocessedAudio, generateWaveformFromPreprocessedAudio, generateFallbackWaveform]);
202
+
203
+ // Draw the minimap
204
+ const draw = useCallback(() => {
205
+ const canvas = canvasRef.current;
206
+ if (!canvas || mediaDuration === 0) return;
207
+
208
+ const ctx = canvas.getContext('2d');
209
+ if (!ctx) return;
210
+
211
+ const { width, height } = canvas;
212
+
213
+ // Clear canvas
214
+ ctx.clearRect(0, 0, width, height);
215
+
216
+ // Draw background
217
+ ctx.fillStyle = '#1a1a1a';
218
+ ctx.fillRect(0, 0, width, height);
219
+
220
+ // Draw waveform
221
+ if (waveformData.length > 0) {
222
+ ctx.fillStyle = '#4a5568';
223
+ const barWidth = width / waveformData.length;
224
+
225
+ waveformData.forEach((amplitude, index) => {
226
+ const barHeight = amplitude * (height - 20);
227
+ const x = index * barWidth;
228
+ const y = (height - barHeight) / 2;
229
+
230
+ ctx.fillRect(x, y, Math.max(1, barWidth - 1), barHeight);
231
+ });
232
+ }
233
+
234
+ // Draw segments as colored bars
235
+ if (transcription?.aligned_segments) {
236
+ transcription.aligned_segments.forEach((segment, index) => {
237
+ const startX = (segment.start / mediaDuration) * width;
238
+ const endX = (segment.end / mediaDuration) * width;
239
+ const segmentWidth = endX - startX;
240
+
241
+ // Alternate colors for segments
242
+ ctx.fillStyle = index % 2 === 0 ? '#3182ce' : '#38a169';
243
+ ctx.fillRect(startX, height - 4, segmentWidth, 4);
244
+ });
245
+ }
246
+
247
+ // Draw current time indicator
248
+ const currentTimeX = (currentTime / mediaDuration) * width;
249
+ ctx.strokeStyle = '#f56565';
250
+ ctx.lineWidth = 2;
251
+ ctx.beginPath();
252
+ ctx.moveTo(currentTimeX, 0);
253
+ ctx.lineTo(currentTimeX, height);
254
+ ctx.stroke();
255
+
256
+ // Draw viewport region (what's visible in Canvas Timeline)
257
+ if (viewport.visible) {
258
+ const viewportStartX = (viewport.start / mediaDuration) * width;
259
+ const viewportEndX = (viewport.end / mediaDuration) * width;
260
+
261
+ // Draw viewport selection area (visible region highlight)
262
+ ctx.fillStyle = 'rgba(66, 153, 225, 0.3)';
263
+ ctx.fillRect(viewportStartX, 0, viewportEndX - viewportStartX, height);
264
+
265
+ // Draw left boundary line (start of visible area)
266
+ ctx.strokeStyle = '#4299e1';
267
+ ctx.lineWidth = 3;
268
+ ctx.beginPath();
269
+ ctx.moveTo(viewportStartX, 0);
270
+ ctx.lineTo(viewportStartX, height);
271
+ ctx.stroke();
272
+
273
+ // Draw right boundary line (end of visible area)
274
+ ctx.beginPath();
275
+ ctx.moveTo(viewportEndX, 0);
276
+ ctx.lineTo(viewportEndX, height);
277
+ ctx.stroke();
278
+
279
+ // Draw border around visible area
280
+ ctx.strokeStyle = '#4299e1';
281
+ ctx.lineWidth = 1;
282
+ ctx.strokeRect(viewportStartX, 0, viewportEndX - viewportStartX, height);
283
+ }
284
+ }, [waveformData, transcription, currentTime, viewport, mediaDuration]);
285
+
286
+ // Update canvas size and redraw
287
+ useEffect(() => {
288
+ const canvas = canvasRef.current;
289
+ if (canvas) {
290
+ canvas.width = canvasWidth;
291
+ canvas.height = MINIMAP_HEIGHT;
292
+ draw();
293
+ }
294
+ }, [canvasWidth, draw]);
295
+
296
+ // Redraw when dependencies change
297
+ useEffect(() => {
298
+ draw();
299
+ }, [draw]);
300
+
301
+ // Utility function to get time from X coordinate
302
+ const getTimeFromX = useCallback((x: number) => {
303
+ return (x / canvasWidth) * mediaDuration;
304
+ }, [canvasWidth, mediaDuration]);
305
+
306
+ // Check if clicking inside the viewport region
307
+ const isClickingViewport = useCallback((x: number) => {
308
+ if (!viewport.visible) return false;
309
+
310
+ const viewportStartX = (viewport.start / mediaDuration) * canvasWidth;
311
+ const viewportEndX = (viewport.end / mediaDuration) * canvasWidth;
312
+
313
+ return x >= viewportStartX && x <= viewportEndX;
314
+ }, [viewport, mediaDuration, canvasWidth]);
315
+
316
+ // Scroll Canvas Timeline to show specific time
317
+ const scrollToTime = useCallback((time: number) => {
318
+ const canvasContainer = canvasTimelineRef.current;
319
+ if (!canvasContainer) return;
320
+
321
+ const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
322
+ const targetScrollLeft = Math.max(0, (time / mediaDuration) * totalCanvasWidth);
323
+
324
+ canvasContainer.scrollLeft = targetScrollLeft;
325
+ }, [canvasTimelineRef, mediaDuration]);
326
+
327
+ // Mouse event handlers
328
+ const handleMouseDown = useCallback((e: React.MouseEvent) => {
329
+ const rect = canvasRef.current?.getBoundingClientRect();
330
+ if (!rect) return;
331
+
332
+ const x = e.clientX - rect.left;
333
+
334
+ if (isClickingViewport(x)) {
335
+ // Start dragging the viewport
336
+ setIsDragging(true);
337
+ setDragStartX(x);
338
+ const canvasContainer = canvasTimelineRef.current;
339
+ if (canvasContainer) {
340
+ setDragStartScrollLeft(canvasContainer.scrollLeft);
341
+ }
342
+ } else {
343
+ // Click outside viewport - jump to that position
344
+ const clickTime = getTimeFromX(x);
345
+ scrollToTime(clickTime);
346
+ }
347
+ }, [isClickingViewport, canvasTimelineRef, getTimeFromX, scrollToTime]);
348
+
349
+ const handleMouseMove = useCallback((e: React.MouseEvent) => {
350
+ if (!isDragging) return;
351
+
352
+ const rect = canvasRef.current?.getBoundingClientRect();
353
+ if (!rect) return;
354
+
355
+ const x = e.clientX - rect.left;
356
+ const deltaX = x - dragStartX;
357
+
358
+ const canvasContainer = canvasTimelineRef.current;
359
+ if (!canvasContainer) return;
360
+
361
+ // Convert deltaX to scroll delta
362
+ const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
363
+ const scrollDelta = (deltaX / canvasWidth) * totalCanvasWidth;
364
+
365
+ const newScrollLeft = Math.max(0, Math.min(
366
+ dragStartScrollLeft + scrollDelta,
367
+ canvasContainer.scrollWidth - canvasContainer.clientWidth
368
+ ));
369
+
370
+ canvasContainer.scrollLeft = newScrollLeft;
371
+ }, [isDragging, dragStartX, dragStartScrollLeft, canvasTimelineRef, mediaDuration, canvasWidth]);
372
+
373
+ const handleMouseUp = useCallback(() => {
374
+ setIsDragging(false);
375
+ }, []);
376
+
377
+ // Add global mouse event listeners when dragging
378
+ useEffect(() => {
379
+ if (isDragging) {
380
+ const handleGlobalMouseMove = (e: MouseEvent) => {
381
+ handleMouseMove(e as any);
382
+ };
383
+ const handleGlobalMouseUp = () => {
384
+ handleMouseUp();
385
+ };
386
+
387
+ document.addEventListener('mousemove', handleGlobalMouseMove);
388
+ document.addEventListener('mouseup', handleGlobalMouseUp);
389
+
390
+ return () => {
391
+ document.removeEventListener('mousemove', handleGlobalMouseMove);
392
+ document.removeEventListener('mouseup', handleGlobalMouseUp);
393
+ };
394
+ }
395
+ }, [isDragging, handleMouseMove, handleMouseUp]);
396
+
397
+ // Change cursor based on hover position
398
+ const handleMouseHover = useCallback((e: React.MouseEvent) => {
399
+ if (isDragging) return;
400
+
401
+ const rect = canvasRef.current?.getBoundingClientRect();
402
+ if (!rect) return;
403
+
404
+ const x = e.clientX - rect.left;
405
+ const canvas = canvasRef.current;
406
+ if (!canvas) return;
407
+
408
+ if (isClickingViewport(x)) {
409
+ canvas.style.cursor = 'move';
410
+ } else {
411
+ canvas.style.cursor = 'pointer';
412
+ }
413
+ }, [isDragging, isClickingViewport]);
414
+
415
+ // Download preprocessed audio as WAV file
416
+ const downloadPreprocessedAudio = useCallback(() => {
417
+ if (!preprocessedAudio?.data) {
418
+ console.error('No preprocessed audio data available');
419
+ return;
420
+ }
421
+
422
+ try {
423
+ // Decode base64 audio data
424
+ const audioBytes = atob(preprocessedAudio.data);
425
+ const audioArrayBuffer = new ArrayBuffer(audioBytes.length);
426
+ const audioUint8Array = new Uint8Array(audioArrayBuffer);
427
+
428
+ for (let i = 0; i < audioBytes.length; i++) {
429
+ audioUint8Array[i] = audioBytes.charCodeAt(i);
430
+ }
431
+
432
+ // Create blob and download
433
+ const blob = new Blob([audioUint8Array], { type: 'audio/wav' });
434
+ const url = URL.createObjectURL(blob);
435
+
436
+ // Get original filename without extension
437
+ const { file } = useTranscriptionStore.getState();
438
+ const originalName = file?.name?.replace(/\.[^/.]+$/, '') || 'audio';
439
+ const filename = `${originalName}_preprocessed_16khz_mono_normalized.wav`;
440
+
441
+ // Create download link
442
+ const link = document.createElement('a');
443
+ link.href = url;
444
+ link.download = filename;
445
+ document.body.appendChild(link);
446
+ link.click();
447
+ document.body.removeChild(link);
448
+
449
+ // Clean up URL
450
+ URL.revokeObjectURL(url);
451
+
452
+ console.log(`Downloaded preprocessed audio: ${filename}`);
453
+ } catch (error) {
454
+ console.error('Error downloading preprocessed audio:', error);
455
+ }
456
+ }, [preprocessedAudio]);
457
+
458
+ if (!transcription || mediaDuration === 0) {
459
+ return null;
460
+ }
461
+
462
+ return (
463
+ <div className="bg-gray-800 border-b border-gray-700">
464
+ <div className="px-4 py-2">
465
+ <div className="flex justify-between items-center text-xs text-gray-400 mb-1">
466
+ <div className="flex items-center gap-2">
467
+ <span>
468
+ Overview - Full Timeline ({Math.round(mediaDuration)}s)
469
+ {preprocessedAudio ? ' • Preprocessed Waveform' : ' • Segment-Based View'}
470
+ </span>
471
+ {preprocessedAudio && (
472
+ <div className="tooltip tooltip-bottom" data-tip="Download preprocessed audio as WAV file (16kHz, mono, layer-normalized). This is the exact audio data processed by the AI transcription model after conversion and standardization from the original file.">
473
+ <button
474
+ onClick={downloadPreprocessedAudio}
475
+ className="flex items-center gap-1 px-1.5 py-0.5 text-xs bg-gray-600 hover:bg-gray-500 rounded transition-colors text-white"
476
+ >
477
+ <ArrowDownTrayIcon className="w-3 h-3" />
478
+ .wav
479
+ </button>
480
+ </div>
481
+ )}
482
+ </div>
483
+ {viewport.visible && (
484
+ <span>
485
+ Visible: {viewport.start.toFixed(1)}s - {viewport.end.toFixed(1)}s
486
+ ({Math.round(viewport.end - viewport.start)}s view)
487
+ </span>
488
+ )}
489
+ </div>
490
+ <div
491
+ ref={containerRef}
492
+ className="relative"
493
+ style={{ height: MINIMAP_HEIGHT }}
494
+ >
495
+ <canvas
496
+ ref={canvasRef}
497
+ onMouseDown={handleMouseDown}
498
+ onMouseMove={handleMouseHover}
499
+ className="block w-full h-full"
500
+ style={{
501
+ width: '100%',
502
+ height: MINIMAP_HEIGHT,
503
+ }}
504
+ />
505
+ </div>
506
+ </div>
507
+ </div>
508
+ );
509
+ }
frontend/src/components/QuickGuide.tsx ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import {useTranscriptionStore} from "../stores/transcriptionStore";
3
+
4
+ interface QuickGuideProps {
5
+ currentStep?: string; // Optional override for current step
6
+ }
7
+
8
+ type GuideStep = {
9
+ id: string;
10
+ text: string;
11
+ icon?: string;
12
+ isActive: (state: any) => boolean;
13
+ isCompleted: (state: any) => boolean;
14
+ };
15
+
16
+ const QuickGuide: React.FC<QuickGuideProps> = ({currentStep}) => {
17
+ const {
18
+ file,
19
+ transcription,
20
+ isLoading,
21
+ selectedSegmentIndex,
22
+ currentSegments,
23
+ currentTime
24
+ } = useTranscriptionStore();
25
+
26
+ // Define all the steps with their conditions
27
+ const steps: GuideStep[] = [
28
+ {
29
+ id: "upload",
30
+ text: "Upload or record audio",
31
+ icon: "📁",
32
+ isActive: (state) => !state.file,
33
+ isCompleted: (state) => !!state.file,
34
+ },
35
+ {
36
+ id: "transcribe",
37
+ text: "Click transcribe to process",
38
+ icon: "🎯",
39
+ isActive: (state) =>
40
+ !!state.file && !state.transcription && !state.isLoading,
41
+ isCompleted: (state) => !!state.transcription || state.isLoading,
42
+ },
43
+ {
44
+ id: "play",
45
+ text: "Play media",
46
+ icon: "▶️",
47
+ isActive: (state) => !!state.transcription,
48
+ isCompleted: () => (currentTime ?? 0) > 0, // Always in progress when transcription available
49
+ },
50
+ // {
51
+ // id: "jump",
52
+ // text: "Click segments to jump",
53
+ // icon: "🎵",
54
+ // isActive: (state) => !!state.transcription,
55
+ // isCompleted: () => false, // Always in progress when transcription available
56
+ // },
57
+ // {
58
+ // id: "drag",
59
+ // text: "Drag segments to move/resize",
60
+ // icon: "↔️",
61
+ // isActive: (state) => !!state.transcription,
62
+ // isCompleted: () => false, // Always in progress when transcription available
63
+ // },
64
+ // {
65
+ // id: "combine",
66
+ // text: "Use slider to combine segments",
67
+ // icon: "🔗",
68
+ // isActive: (state) => !!state.transcription,
69
+ // isCompleted: () => false, // Always in progress when transcription available
70
+ // },
71
+ // {
72
+ // id: "download",
73
+ // text: "Download subtitles",
74
+ // icon: "💾",
75
+ // isActive: (state) => !!state.transcription,
76
+ // isCompleted: () => false, // Always in progress when transcription available
77
+ // },
78
+ ];
79
+
80
+ // Create state object for condition checking
81
+ const storeState = {
82
+ file,
83
+ transcription,
84
+ isLoading,
85
+ selectedSegmentIndex,
86
+ currentSegments,
87
+ };
88
+
89
+ // Determine step states
90
+ const getStepState = (step: GuideStep) => {
91
+ // Override with currentStep prop if provided
92
+ if (currentStep) {
93
+ if (step.id === currentStep) return "active";
94
+ if (step.isCompleted(storeState)) return "completed";
95
+ return "inactive";
96
+ }
97
+
98
+ // Default logic based on store state
99
+ if (step.isCompleted(storeState)) return "completed";
100
+ if (step.isActive(storeState)) return "active";
101
+ return "inactive";
102
+ };
103
+
104
+ // Get the appropriate CSS classes for each step state
105
+ const getStepClasses = (stepState: string) => {
106
+ switch (stepState) {
107
+ case "active":
108
+ return "text-blue-300 bg-blue-900/30 border-blue-500/50 font-medium";
109
+ case "completed":
110
+ return "text-green-300 bg-green-900/20 border-green-500/30";
111
+ default:
112
+ return "text-gray-400 bg-transparent border-transparent";
113
+ }
114
+ };
115
+
116
+ // Get icon for step state
117
+ const getStepIcon = (step: GuideStep, stepState: string) => {
118
+ if (stepState === "completed") return "✓";
119
+ if (stepState === "active") return "→";
120
+ return step.icon || "•";
121
+ };
122
+
123
+ return (
124
+ <div className="border-t border-gray-700 py-3">
125
+ <h3 className="text-xs font-semibold mb-2 text-gray-200">Quick Guide</h3>
126
+ <div className="space-y-1">
127
+ {steps.map((step) => {
128
+ const stepState = getStepState(step);
129
+ const stepClasses = getStepClasses(stepState);
130
+ const icon = getStepIcon(step, stepState);
131
+
132
+ return (
133
+ <div
134
+ key={step.id}
135
+ className={`text-xs px-2 py-1 rounded border transition-all duration-200 ${stepClasses}`}
136
+ >
137
+ <span
138
+ className="inline-block w-4 text-center mr-1"
139
+ aria-label={`Step ${step.id}`}
140
+ >
141
+ {icon}
142
+ </span>
143
+ {step.text}
144
+ </div>
145
+ );
146
+ })}
147
+ </div>
148
+
149
+ {/* Progress indicator */}
150
+ {transcription && (
151
+ <div className="mt-2 pt-2 border-t border-gray-600">
152
+ <div className="text-xs text-gray-400">
153
+ {selectedSegmentIndex !== null ? (
154
+ <span className="text-yellow-400">✏️ Editing mode active</span>
155
+ ) : (
156
+ <span className="text-green-400">
157
+ ✓ Ready for playback & editing
158
+ </span>
159
+ )}
160
+ </div>
161
+ </div>
162
+ )}
163
+
164
+ {/* Loading indicator */}
165
+ {isLoading && (
166
+ <div className="mt-2 pt-2 border-t border-gray-600">
167
+ <div className="text-xs text-blue-400 animate-pulse">
168
+ ⏳ Processing... Please wait
169
+ </div>
170
+ </div>
171
+ )}
172
+ </div>
173
+ );
174
+ };
175
+
176
+ export default QuickGuide;
frontend/src/components/SegmentEditor.tsx ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useEffect, useRef } from 'react';
2
+ import { AlignedSegment } from '../services/transcriptionApi';
3
+ import { formatTime } from '../utils/subtitleUtils';
4
+
5
+ interface SegmentEditorProps {
6
+ segment: AlignedSegment;
7
+ segmentIndex: number;
8
+ onUpdateText: (index: number, text: string) => void;
9
+ onDeleteSegment: (index: number) => void;
10
+ onClose: () => void;
11
+ }
12
+
13
+ export default function SegmentEditor({
14
+ segment,
15
+ segmentIndex,
16
+ onUpdateText,
17
+ onDeleteSegment,
18
+ onClose,
19
+ }: SegmentEditorProps) {
20
+ const textareaRef = useRef<HTMLTextAreaElement>(null);
21
+
22
+ useEffect(() => {
23
+ // Focus the textarea when component mounts
24
+ if (textareaRef.current) {
25
+ textareaRef.current.focus();
26
+ textareaRef.current.select();
27
+ }
28
+ }, []);
29
+
30
+ const handleTextChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
31
+ const newText = e.target.value;
32
+ onUpdateText(segmentIndex, newText);
33
+ };
34
+
35
+ const handleDelete = () => {
36
+ onDeleteSegment(segmentIndex);
37
+ onClose();
38
+ };
39
+
40
+ const handleKeyDown = (e: React.KeyboardEvent) => {
41
+ if (e.key === 'Escape') {
42
+ e.preventDefault();
43
+ onClose();
44
+ }
45
+ };
46
+
47
+ return (
48
+ <div className="bg-gray-800 border-b border-gray-700 p-4">
49
+ <div className="flex items-start justify-between mb-3">
50
+ <div className="flex-1">
51
+ <h3 className="text-sm font-semibold text-white mb-1">
52
+ Edit Segment #{segmentIndex + 1}
53
+ </h3>
54
+ <div className="text-xs text-gray-400">
55
+ {formatTime(segment.start)} - {formatTime(segment.end)} ({segment.duration.toFixed(1)}s)
56
+ </div>
57
+ </div>
58
+ <button
59
+ onClick={onClose}
60
+ className="text-gray-400 hover:text-white transition-colors ml-4"
61
+ title="Close editor"
62
+ >
63
+ <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
64
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
65
+ </svg>
66
+ </button>
67
+ </div>
68
+
69
+ <div className="mb-3">
70
+ <textarea
71
+ ref={textareaRef}
72
+ value={segment.text}
73
+ onChange={handleTextChange}
74
+ onKeyDown={handleKeyDown}
75
+ className="w-full p-2 bg-gray-700 text-white border border-gray-600 rounded resize-none focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
76
+ rows={1}
77
+ placeholder="Enter segment text..."
78
+ />
79
+ </div>
80
+
81
+ <div className="flex justify-end">
82
+ <button
83
+ onClick={handleDelete}
84
+ className="px-3 py-1 bg-red-600 hover:bg-red-700 text-white text-sm rounded transition-colors"
85
+ title="Delete this segment"
86
+ >
87
+ Delete
88
+ </button>
89
+ </div>
90
+ </div>
91
+ );
92
+ }
frontend/src/components/ServerStatusIndicator.tsx ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useEffect, useState } from 'react';
2
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
3
+ import { CheckCircleIcon, ClockIcon, InformationCircleIcon } from '@heroicons/react/24/outline';
4
+
5
+ const ServerStatusIndicator: React.FC = () => {
6
+ const {
7
+ serverStatus,
8
+ serverHealth,
9
+ startStatusPolling,
10
+ stopStatusPolling,
11
+ fetchServerHealth
12
+ } = useTranscriptionStore();
13
+
14
+ const [showTooltip, setShowTooltip] = useState(false);
15
+
16
+ // Start polling when component mounts
17
+ useEffect(() => {
18
+ startStatusPolling();
19
+ // Also fetch server health initially
20
+ fetchServerHealth();
21
+
22
+ // Cleanup on unmount
23
+ return () => {
24
+ stopStatusPolling();
25
+ };
26
+ }, [startStatusPolling, stopStatusPolling, fetchServerHealth]);
27
+
28
+ if (!serverStatus) {
29
+ return (
30
+ <div className="flex items-center gap-2 text-gray-300 text-sm">
31
+ <div className="w-2 h-2 bg-gray-400 rounded-full animate-pulse"></div>
32
+ <span>Connecting to server...</span>
33
+ </div>
34
+ );
35
+ }
36
+
37
+ const formatDuration = (seconds: number): string => {
38
+ const mins = Math.floor(seconds / 60);
39
+ const secs = Math.floor(seconds % 60);
40
+ return `${mins}:${secs.toString().padStart(2, '0')}`;
41
+ };
42
+
43
+ const getOperationLabel = (operation: string): string => {
44
+ switch (operation) {
45
+ case 'transcribe':
46
+ return 'Short Transcription';
47
+ case 'transcribe_long':
48
+ return 'Long Transcription';
49
+ case 'align':
50
+ return 'Alignment';
51
+ default:
52
+ return 'Processing';
53
+ }
54
+ };
55
+
56
+ const renderHealthTooltip = () => {
57
+ if (!serverHealth) return null;
58
+
59
+ return (
60
+ <div className="fixed z-50 p-3 bg-gray-900 text-white text-xs rounded-lg shadow-xl border border-gray-600 min-w-64 max-w-80"
61
+ style={{
62
+ left: '280px', // Position just outside the sidebar (sidebar width is 256px + padding)
63
+ top: '120px' // Position near the status indicator
64
+ }}>
65
+ <div className="font-semibold mb-2 text-blue-300">Server Health</div>
66
+
67
+ <div className="space-y-1">
68
+ <div className="flex justify-between">
69
+ <span className="text-gray-300">Status:</span>
70
+ <span className="text-green-400">{serverHealth.status}</span>
71
+ </div>
72
+
73
+ <div className="flex justify-between">
74
+ <span className="text-gray-300">Version:</span>
75
+ <span>{serverHealth.version}</span>
76
+ </div>
77
+
78
+ <div className="flex justify-between">
79
+ <span className="text-gray-300">Device:</span>
80
+ <span className="font-mono">{serverHealth.device}</span>
81
+ </div>
82
+
83
+ <div className="flex justify-between">
84
+ <span className="text-gray-300">CUDA:</span>
85
+ <span className={serverHealth.cuda_available ? "text-green-400" : "text-red-400"}>
86
+ {serverHealth.cuda_available ? "Available" : "Unavailable"}
87
+ </span>
88
+ </div>
89
+
90
+ <div className="flex justify-between">
91
+ <span className="text-gray-300">FFmpeg:</span>
92
+ <span className={serverHealth.ffmpeg_available ? "text-green-400" : "text-red-400"}>
93
+ {serverHealth.ffmpeg_available ? "Available" : "Unavailable"}
94
+ </span>
95
+ </div>
96
+
97
+ {serverHealth.gpu_name && (
98
+ <div className="flex justify-between">
99
+ <span className="text-gray-300">GPU:</span>
100
+ <span className="text-blue-400 text-right max-w-32 truncate" title={serverHealth.gpu_name}>
101
+ {serverHealth.gpu_name}
102
+ </span>
103
+ </div>
104
+ )}
105
+
106
+ {serverHealth.gpu_count && serverHealth.gpu_count > 1 && (
107
+ <div className="flex justify-between">
108
+ <span className="text-gray-300">GPU Count:</span>
109
+ <span>{serverHealth.gpu_count}</span>
110
+ </div>
111
+ )}
112
+
113
+ {/* GPU Memory Information */}
114
+ {serverHealth.gpu_memory_total_mb && (
115
+ <>
116
+ <div className="border-t border-gray-600 pt-1 mt-2">
117
+ <div className="text-xs text-blue-300 font-semibold mb-1">GPU Memory</div>
118
+ </div>
119
+
120
+ <div className="flex justify-between">
121
+ <span className="text-gray-300">Used:</span>
122
+ <span className="text-orange-400">
123
+ {serverHealth.gpu_memory_reserved_mb?.toFixed(1)} MB
124
+ </span>
125
+ </div>
126
+
127
+ <div className="flex justify-between">
128
+ <span className="text-gray-300">Total:</span>
129
+ <span className="text-blue-400">
130
+ {serverHealth.gpu_memory_total_mb?.toFixed(1)} MB
131
+ </span>
132
+ </div>
133
+
134
+ <div className="flex justify-between">
135
+ <span className="text-gray-300">Free:</span>
136
+ <span className="text-green-400">
137
+ {serverHealth.gpu_memory_free_mb?.toFixed(1)} MB
138
+ </span>
139
+ </div>
140
+
141
+ {/* Memory usage bar */}
142
+ {serverHealth.gpu_memory_total_mb && serverHealth.gpu_memory_reserved_mb && (
143
+ <div className="mt-1">
144
+ <div className="w-full bg-gray-600 rounded-full h-1.5">
145
+ <div
146
+ className="bg-orange-500 h-1.5 rounded-full transition-all duration-300"
147
+ style={{
148
+ width: `${Math.min(100, (serverHealth.gpu_memory_reserved_mb / serverHealth.gpu_memory_total_mb) * 100)}%`
149
+ }}
150
+ ></div>
151
+ </div>
152
+ <div className="text-xs text-gray-400 mt-0.5 text-center">
153
+ {((serverHealth.gpu_memory_reserved_mb / serverHealth.gpu_memory_total_mb) * 100).toFixed(1)}% used
154
+ </div>
155
+ </div>
156
+ )}
157
+ </>
158
+ )}
159
+ </div>
160
+
161
+ {/* Tooltip arrow pointing to the left (towards the info icon) */}
162
+ <div className="absolute left-0 top-4 w-0 h-0 border-t-4 border-t-transparent border-b-4 border-b-transparent border-r-4 border-r-gray-900 transform -translate-x-full"></div>
163
+ </div>
164
+ );
165
+ };
166
+
167
+ if (serverStatus.is_busy) {
168
+ const progress = serverStatus.progress || 0;
169
+ const progressPercent = Math.round(progress * 100);
170
+
171
+ return (
172
+ <div className="flex flex-col gap-1">
173
+ <div className="flex items-center justify-between">
174
+ <div className="flex items-center gap-2 text-orange-300 text-sm">
175
+ <ClockIcon className="w-4 h-4 animate-spin" />
176
+ <span className="font-medium">Server Busy</span>
177
+ </div>
178
+
179
+ {/* Health info icon with tooltip */}
180
+ <div className="relative">
181
+ <InformationCircleIcon
182
+ className="w-4 h-4 text-gray-400 hover:text-gray-200 cursor-help transition-colors"
183
+ onMouseEnter={() => setShowTooltip(true)}
184
+ onMouseLeave={() => setShowTooltip(false)}
185
+ />
186
+ {showTooltip && renderHealthTooltip()}
187
+ </div>
188
+ </div>
189
+
190
+ <div className="text-xs text-gray-300 space-y-1">
191
+ <div className="flex justify-between items-center">
192
+ <span>{getOperationLabel(serverStatus.current_operation || 'processing')}</span>
193
+ <span className="font-mono text-orange-300">{progressPercent}%</span>
194
+ </div>
195
+
196
+ {serverStatus.duration_seconds && (
197
+ <div className="flex justify-between items-center">
198
+ <span className="text-gray-400">Duration:</span>
199
+ <span className="font-mono">{formatDuration(serverStatus.duration_seconds)}</span>
200
+ </div>
201
+ )}
202
+
203
+ {/* Progress bar */}
204
+ <div className="w-full bg-gray-600 rounded-full h-1.5 mt-2">
205
+ <div
206
+ className="bg-orange-500 h-1.5 rounded-full transition-all duration-300"
207
+ style={{ width: `${progressPercent}%` }}
208
+ ></div>
209
+ </div>
210
+ </div>
211
+ </div>
212
+ );
213
+ }
214
+
215
+ return (
216
+ <div className="flex flex-col gap-1">
217
+ <div className="flex items-center justify-between">
218
+ <div className="flex items-center gap-2 text-green-300 text-sm">
219
+ <CheckCircleIcon className="w-4 h-4" />
220
+ <span className="font-medium">Server Ready</span>
221
+ </div>
222
+
223
+ {/* Health info icon with tooltip */}
224
+ <div className="relative">
225
+ <InformationCircleIcon
226
+ className="w-4 h-4 text-gray-400 hover:text-gray-200 cursor-help transition-colors"
227
+ onMouseEnter={() => setShowTooltip(true)}
228
+ onMouseLeave={() => setShowTooltip(false)}
229
+ />
230
+ {showTooltip && renderHealthTooltip()}
231
+ </div>
232
+ </div>
233
+
234
+ <div className="text-xs text-gray-300">
235
+ <span>Completed: {serverStatus.total_completed} transcriptions</span>
236
+ </div>
237
+ </div>
238
+ );
239
+ };
240
+
241
+ export default ServerStatusIndicator;
frontend/src/components/TermsModal.tsx ADDED
@@ -0,0 +1,675 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {XMarkIcon} from "@heroicons/react/24/outline";
2
+
3
+ interface ModalProps {
4
+ isOpen: boolean;
5
+ onClose: () => void;
6
+ }
7
+
8
+ export function TOSModalComponent({isOpen, onClose}: ModalProps) {
9
+ if (!isOpen) return null;
10
+
11
+ return (
12
+ <div className="modal modal-open">
13
+ <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
14
+ <div className="flex justify-between items-center mb-6">
15
+ <h3 className="font-bold text-xl text-blue-300">
16
+ Omnilingual ASR Demo Supplemental Terms of Service
17
+ </h3>
18
+ <button
19
+ className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
20
+ onClick={onClose}
21
+ aria-label="Close modal"
22
+ >
23
+ <XMarkIcon className="w-5 h-5" />
24
+ </button>
25
+ </div>
26
+
27
+ <div className="space-y-4 max-h-96 overflow-y-auto">
28
+ <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
29
+ <div className="text-gray-200 leading-relaxed space-y-4">
30
+ <p>
31
+ These Omnilingual ASR Demo Supplemental Terms of Service (these
32
+ "Supplemental Terms") govern your use of the Omnilingual ASR
33
+ demonstration experience hosted on Hugging Face (the "Demo").
34
+ The Demo showcases Meta's Omnilingual ASR research model for
35
+ transcribing audio and video files.
36
+ </p>
37
+ <p>
38
+ By using the Demo, you agree to be bound by these Supplemental
39
+ Terms, as well as Hugging Face's applicable terms and policies,
40
+ including Hugging Face's Terms of Service and Privacy Policy. If
41
+ you access any Meta service in connection with the Demo, Meta's
42
+ Terms of Service and Privacy Policy may also apply to that
43
+ separate access; however, as described below, Meta does not
44
+ receive your Demo data.
45
+ </p>
46
+
47
+ <ol className="list-decimal list-outside ml-6 space-y-4">
48
+ <li>
49
+ <strong>What the Demo Does</strong>
50
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
51
+ <li>
52
+ The Demo uses an automatic speech recognition model
53
+ developed by Meta to generate text transcriptions and
54
+ related metadata (e.g., timestamps) from audio or video
55
+ files you choose to provide ("Transcripts," and together
56
+ with any related metadata, "Outputs").
57
+ </li>
58
+ <li>
59
+ The audio and/or video files and any associated
60
+ information you provide (collectively, "Inputs") are
61
+ processed solely to provide you with Outputs via the Demo.
62
+ </li>
63
+ </ul>
64
+ </li>
65
+
66
+ <li>
67
+ <strong>Hosting; No Data to Meta</strong>
68
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
69
+ <li>
70
+ The Demo is hosted exclusively on Hugging Face
71
+ infrastructure. Meta does not operate the Demo's hosting
72
+ environment, and no Inputs, Outputs, or related usage data
73
+ from or about users of the Demo are transmitted to Meta.
74
+ </li>
75
+ <li>
76
+ Hugging Face (and, where applicable, the Hugging Face
77
+ Space owner) determines the means and purposes of
78
+ processing of any personal data you provide via the Demo.
79
+ Please review Hugging Face's Terms of Service and Privacy
80
+ Policy, which govern your use of the Demo and the handling
81
+ of your data on the platform.
82
+ </li>
83
+ </ul>
84
+ </li>
85
+
86
+ <li>
87
+ <strong>Your Rights and Restrictions</strong>
88
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
89
+ <li>
90
+ Subject to your compliance with these Supplemental Terms
91
+ and any applicable Hugging Face terms, Meta grants you a
92
+ limited, non-exclusive, non-transferable,
93
+ non-sublicensable license to access and use the Demo and
94
+ to access the Outputs made available through the Demo.
95
+ </li>
96
+ <li>
97
+ You may not use the Demo or Outputs for unlawful,
98
+ infringing, deceptive, harmful, or discriminatory
99
+ purposes, to violate others' rights (including privacy,
100
+ publicity, or intellectual property), or to cause or
101
+ attempt to cause harm.
102
+ </li>
103
+ <li>
104
+ You are responsible for ensuring you have all necessary
105
+ rights, permissions, and lawful bases to upload and
106
+ process Inputs, including any third-party content or
107
+ personal data contained in the Inputs. Do not upload
108
+ sensitive personal data unless you have a lawful basis and
109
+ it is permitted under applicable law.
110
+ </li>
111
+ </ul>
112
+ </li>
113
+
114
+ <li>
115
+ <strong>Inputs and Outputs; Licenses</strong>
116
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
117
+ <li>
118
+ As between you and Meta, you retain any rights you have in
119
+ your Inputs and Outputs. Because Meta does not receive
120
+ your Inputs or Outputs from the Demo, you do not grant
121
+ Meta any license to those materials via this Demo.
122
+ </li>
123
+ <li>
124
+ Your relationship regarding content licenses with Hugging
125
+ Face (and/or the Space owner) is governed by Hugging Face
126
+ terms. Meta makes no representations regarding how Hugging
127
+ Face handles or stores Inputs or Outputs.
128
+ </li>
129
+ </ul>
130
+ </li>
131
+
132
+ <li>
133
+ <strong>Feedback</strong>
134
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
135
+ <li>
136
+ If you choose to provide feedback to Meta outside the Demo
137
+ (for example, by emailing Meta or submitting feedback
138
+ through a Meta-managed channel), you grant Meta and its
139
+ affiliates a perpetual, irrevocable, non-exclusive,
140
+ sublicensable, transferable, royalty-free license to use
141
+ that feedback for any purpose. Do not include personal
142
+ data or confidential information in feedback.
143
+ </li>
144
+ </ul>
145
+ </li>
146
+
147
+ <li>
148
+ <strong>Third-Party Platform Terms</strong>
149
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
150
+ <li>
151
+ Your use of the Demo is also subject to Hugging Face's
152
+ Terms of Service and Privacy Policy. Any data collection,
153
+ retention, storage, caching, logging, or sharing related
154
+ to your use of the Demo is handled by Hugging Face (and/or
155
+ the Space owner) under those terms and policies. Meta is
156
+ not responsible for and has no control over Hugging Face's
157
+ processing.
158
+ </li>
159
+ </ul>
160
+ </li>
161
+
162
+ <li>
163
+ <strong>Age; Availability</strong>
164
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
165
+ <li>
166
+ You represent that you are at least the age of majority in
167
+ your jurisdiction. The Demo is made available for research
168
+ and demonstration purposes and may be modified, suspended,
169
+ or discontinued at any time without notice.
170
+ </li>
171
+ </ul>
172
+ </li>
173
+
174
+ <li>
175
+ <strong>Model and Demo Limitations</strong>
176
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
177
+ <li>
178
+ The Demo and Outputs may be inaccurate, incomplete,
179
+ untimely, or inappropriate for your use case. Do not rely
180
+ on the Demo or Outputs for medical, legal, safety, or
181
+ other high-risk purposes. You are solely responsible for
182
+ your use of the Demo and Outputs.
183
+ </li>
184
+ </ul>
185
+ </li>
186
+
187
+ <li>
188
+ <strong>Intellectual Property</strong>
189
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
190
+ <li>
191
+ Except for the limited rights expressly granted here, Meta
192
+ and its licensors retain all right, title, and interest in
193
+ and to the Demo, the Omnilingual ASR model, and all
194
+ related intellectual property.
195
+ </li>
196
+ </ul>
197
+ </li>
198
+
199
+ <li>
200
+ <strong>Termination</strong>
201
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
202
+ <li>
203
+ These Supplemental Terms are effective until terminated.
204
+ Meta may terminate or suspend your access to the Demo or
205
+ terminate these Supplemental Terms at any time for any
206
+ reason. Upon termination, your rights under these
207
+ Supplemental Terms will immediately cease.
208
+ </li>
209
+ </ul>
210
+ </li>
211
+
212
+ <li>
213
+ <strong>Disclaimers</strong>
214
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
215
+ <li>
216
+ The Demo and Outputs are provided "AS IS" and "AS
217
+ AVAILABLE." To the fullest extent permitted by law, Meta
218
+ and its affiliates disclaim all warranties, express or
219
+ implied, including warranties of merchantability, fitness
220
+ for a particular purpose, non-infringement, accuracy, and
221
+ quiet enjoyment. Meta does not warrant that the Demo will
222
+ be uninterrupted, secure, error-free, or that Outputs will
223
+ be accurate or reliable.
224
+ </li>
225
+ </ul>
226
+ </li>
227
+
228
+ <li>
229
+ <strong>Limitation of Liability</strong>
230
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
231
+ <li>
232
+ To the fullest extent permitted by law, in no event will
233
+ Meta or its affiliates, or their respective directors,
234
+ officers, employees, licensors, agents, or assigns be
235
+ liable for any indirect, incidental, consequential,
236
+ special, punitive, or exemplary damages, or lost profits,
237
+ arising out of or relating to the Demo or these
238
+ Supplemental Terms, even if advised of the possibility of
239
+ such damages. Without limiting the foregoing, the maximum
240
+ aggregate liability of Meta and its affiliates for all
241
+ claims arising out of or relating to the Demo or these
242
+ Supplemental Terms is fifty dollars (US $50).
243
+ </li>
244
+ </ul>
245
+ </li>
246
+
247
+ <li>
248
+ <strong>Indemnity</strong>
249
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
250
+ <li>
251
+ You will indemnify and hold harmless Meta and its
252
+ affiliates, and their respective directors, officers,
253
+ employees, licensors, agents, and assigns from and against
254
+ any claims, liabilities, damages, losses, and expenses
255
+ (including reasonable attorneys' fees) arising out of or
256
+ related to: (a) your Inputs or your use of the Demo or
257
+ Outputs; (b) your violation of these Supplemental Terms or
258
+ any applicable law; or (c) your infringement or violation
259
+ of any third-party rights.
260
+ </li>
261
+ </ul>
262
+ </li>
263
+
264
+ <li>
265
+ <strong>Governing Law; Venue</strong>
266
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
267
+ <li>
268
+ These Supplemental Terms and any claim, cause of action,
269
+ or dispute arising out of or relating to them or the Demo
270
+ will be governed by the laws of the State of California,
271
+ without regard to conflict of law principles. You agree
272
+ that all such claims will be brought exclusively in the
273
+ U.S. District Court for the Northern District of
274
+ California or the state courts located in San Mateo
275
+ County, California, and you consent to the personal
276
+ jurisdiction of those courts.
277
+ </li>
278
+ </ul>
279
+ </li>
280
+
281
+ <li>
282
+ <strong>Miscellaneous</strong>
283
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
284
+ <li>
285
+ If any provision of these Supplemental Terms is found
286
+ invalid or unenforceable, that provision will be enforced
287
+ to the maximum extent permissible and the remaining
288
+ provisions will remain in full force and effect. You may
289
+ not assign these Supplemental Terms without Meta's prior
290
+ written consent; any attempted assignment without consent
291
+ is void. Meta may assign these Supplemental Terms without
292
+ restriction. No waiver of any term will be deemed a
293
+ further or continuing waiver of such term or any other
294
+ term. These Supplemental Terms constitute the entire
295
+ agreement between you and Meta regarding the Demo and
296
+ supersede all prior or contemporaneous understandings
297
+ regarding the Demo.
298
+ </li>
299
+ </ul>
300
+ </li>
301
+ </ol>
302
+
303
+ <div className="mt-4">
304
+ <strong>Contact</strong>
305
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
306
+ <li>
307
+ For questions about these Supplemental Terms, contact Meta
308
+ at [email protected]. Note: Do not include personal
309
+ data or any Demo Inputs or Outputs in your communications,
310
+ as Meta does not receive or process Demo data.
311
+ </li>
312
+ </ul>
313
+ </div>
314
+ </div>
315
+ </div>
316
+ </div>
317
+ </div>
318
+ </div>
319
+ );
320
+ }
321
+
322
+ export function AUPModalComponent({isOpen, onClose}: ModalProps) {
323
+ if (!isOpen) return null;
324
+
325
+ return (
326
+ <div className="modal modal-open">
327
+ <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
328
+ <div className="flex justify-between items-center mb-6">
329
+ <h3 className="font-bold text-xl text-blue-300">
330
+ Omnilingual ASR Demo Acceptable Use Policy
331
+ </h3>
332
+ <button
333
+ className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
334
+ onClick={onClose}
335
+ aria-label="Close modal"
336
+ >
337
+ <XMarkIcon className="w-5 h-5" />
338
+ </button>
339
+ </div>
340
+
341
+ <div className="space-y-4 max-h-96 overflow-y-auto">
342
+ <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
343
+ <div className="text-gray-200 leading-relaxed space-y-4">
344
+ <p>
345
+ Meta is committed to promoting safe and responsible use of its
346
+ research tools, including the Omnilingual ASR Demo (the "Demo").
347
+ By accessing or using the Demo, you agree to comply with this
348
+ Acceptable Use Policy ("Policy") in addition to any applicable
349
+ Hugging Face terms and policies. The most recent version of this
350
+ Policy will be made available on the Demo's Hugging Face page.
351
+ </p>
352
+
353
+ <h4 className="font-semibold text-blue-200 mt-4">
354
+ Prohibited Uses
355
+ </h4>
356
+ <p>
357
+ To ensure the Demo is used safely and fairly, you agree that you
358
+ will not use, or allow others to use, the Demo or any outputs
359
+ from the Demo to:
360
+ </p>
361
+
362
+ <ol className="list-decimal list-outside ml-6 space-y-4">
363
+ <li>
364
+ <strong>Violate the Law or Others' Rights</strong>
365
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
366
+ <li>
367
+ Engage in, promote, generate, contribute to, encourage,
368
+ plan, incite, or further illegal or unlawful activity or
369
+ content, including but not limited to:
370
+ <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
371
+ <li>Violence or terrorism</li>
372
+ <li>
373
+ Exploitation or harm to children, including the
374
+ solicitation, creation, acquisition, or dissemination
375
+ of child exploitative content or failure to report
376
+ Child Sexual Abuse Material
377
+ </li>
378
+ <li>
379
+ Human trafficking, exploitation, and sexual violence
380
+ </li>
381
+ <li>
382
+ The illegal distribution of information or materials
383
+ to minors, including obscene materials, or failure to
384
+ employ legally required age-gating in connection with
385
+ such information or materials
386
+ </li>
387
+ <li>Sexual solicitation</li>
388
+ <li>Any other criminal activity</li>
389
+ </ul>
390
+ </li>
391
+ <li>
392
+ Engage in, promote, incite, or facilitate the harassment,
393
+ abuse, threatening, or bullying of individuals or groups
394
+ </li>
395
+ <li>
396
+ Engage in, promote, incite, or facilitate discrimination
397
+ or other unlawful or harmful conduct in the provision of
398
+ employment, credit, housing, or other essential goods and
399
+ services
400
+ </li>
401
+ <li>
402
+ Collect, process, disclose, generate, or infer health,
403
+ demographic, biometric, or other sensitive personal or
404
+ private information about individuals without all rights
405
+ and consents required by applicable laws
406
+ </li>
407
+ <li>
408
+ Infringe, misappropriate, or otherwise violate any
409
+ third-party rights, including intellectual property,
410
+ privacy, or publicity rights
411
+ </li>
412
+ <li>
413
+ Create, generate, or facilitate the creation of malicious
414
+ code, malware, computer viruses, or do anything else that
415
+ could disable, overburden, interfere with, or impair the
416
+ proper working, integrity, operation, or appearance of a
417
+ website or computer system
418
+ </li>
419
+ </ul>
420
+ </li>
421
+
422
+ <li>
423
+ <strong>Endanger Safety or Security</strong>
424
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
425
+ <li>
426
+ Engage in, promote, incite, facilitate, or assist in the
427
+ planning or development of activities that present a risk
428
+ of death or bodily harm to individuals, including use of
429
+ the Demo related to:
430
+ <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
431
+ <li>
432
+ Military, warfare, nuclear industries or applications,
433
+ espionage, or activities subject to the International
434
+ Traffic in Arms Regulations (ITAR)
435
+ </li>
436
+ <li>
437
+ Guns and illegal weapons (including weapon
438
+ development)
439
+ </li>
440
+ <li>
441
+ Illegal drugs and regulated/controlled substances
442
+ </li>
443
+ <li>
444
+ Operation of critical infrastructure, transportation
445
+ technologies, or heavy machinery
446
+ </li>
447
+ <li>
448
+ Self-harm or harm to others, including suicide,
449
+ cutting, and eating disorders
450
+ </li>
451
+ <li>
452
+ Any content intended to incite or promote violence,
453
+ abuse, or infliction of bodily harm
454
+ </li>
455
+ </ul>
456
+ </li>
457
+ </ul>
458
+ </li>
459
+
460
+ <li>
461
+ <strong>Deceive or Mislead Others</strong>
462
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
463
+ <li>
464
+ Generate, promote, or further fraud, scams, phishing,
465
+ multi-level marketing or pyramid schemes, or any other
466
+ fraudulent activities
467
+ </li>
468
+ <li>
469
+ Generate, promote, or further defamatory content,
470
+ including the creation of defamatory statements or other
471
+ content
472
+ </li>
473
+ <li>Generate, promote, or further distribute spam</li>
474
+ <li>
475
+ Impersonate another individual by depiction of their voice
476
+ or likeness without consent, authorization, or legal
477
+ right, including non-consensual sexual imagery
478
+ </li>
479
+ <li>
480
+ Represent that the use of the Demo or its outputs are
481
+ human-generated, or use outputs in a manner intended to
482
+ convince another person that they are communicating with a
483
+ human
484
+ </li>
485
+ <li>
486
+ Generate or facilitate false online engagement, including
487
+ fake reviews or other means of fake online engagement
488
+ </li>
489
+ <li>
490
+ Engage in or facilitate any form of academic dishonesty,
491
+ including plagiarism
492
+ </li>
493
+ </ul>
494
+ </li>
495
+
496
+ <li>
497
+ <strong>High-Risk or Sensitive Use Cases</strong>
498
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
499
+ <li>
500
+ Fail to appropriately disclose to end users any known
501
+ dangers of your AI system
502
+ </li>
503
+ <li>
504
+ Engage in automated government decision-making in
505
+ high-risk contexts, including law enforcement, criminal
506
+ justice, immigration, or asylum, without a qualified
507
+ person reviewing the outputs
508
+ </li>
509
+ <li>
510
+ Use the Demo or its outputs for any decision-making
511
+ related to health, financial, safety, or legal matters
512
+ </li>
513
+ </ul>
514
+ </li>
515
+
516
+ <li>
517
+ <strong>Adult Content</strong>
518
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
519
+ <li>
520
+ Create, develop, access, or disseminate adult content,
521
+ including:
522
+ <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
523
+ <li>Erotic, sexual, or romantic chats</li>
524
+ <li>Sexual solicitation</li>
525
+ <li>Pornography</li>
526
+ <li>
527
+ Content that describes or promotes sexual or adult
528
+ services
529
+ </li>
530
+ </ul>
531
+ </li>
532
+ </ul>
533
+ </li>
534
+ </ol>
535
+
536
+ <div className="mt-4">
537
+ <h4 className="font-semibold text-blue-200">
538
+ Reporting Violations
539
+ </h4>
540
+ <p className="mt-2">
541
+ If you become aware of any violation of this Policy, software
542
+ "bug," or other problem that could lead to a violation of this
543
+ Policy, please report it to: [email protected]. Do not
544
+ include personal data or any Demo Inputs or Outputs in your
545
+ communications, as Meta does not receive or process Demo data.
546
+ </p>
547
+ </div>
548
+
549
+ <div className="mt-4">
550
+ <h4 className="font-semibold text-blue-200">Enforcement</h4>
551
+ <p className="mt-2">
552
+ Meta reserves the right to investigate and take appropriate
553
+ action regarding any suspected or actual violation of this
554
+ Policy, including suspending or terminating access to the
555
+ Demo.
556
+ </p>
557
+ </div>
558
+
559
+ <div className="mt-4">
560
+ <h4 className="font-semibold text-blue-200">Contact</h4>
561
+ <p className="mt-2">
562
+ For questions about this Policy, contact
563
564
+ </p>
565
+ </div>
566
+ </div>
567
+ </div>
568
+ </div>
569
+ </div>
570
+ </div>
571
+ );
572
+ }
573
+
574
+ export function ModelReadmeModalComponent({isOpen, onClose}: ModalProps) {
575
+ if (!isOpen) return null;
576
+
577
+ return (
578
+ <div className="modal modal-open">
579
+ <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
580
+ <div className="flex justify-between items-center mb-6">
581
+ <h3 className="font-bold text-xl text-blue-300">Model Readme</h3>
582
+ <button
583
+ className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
584
+ onClick={onClose}
585
+ aria-label="Close modal"
586
+ >
587
+ <XMarkIcon className="w-5 h-5" />
588
+ </button>
589
+ </div>
590
+
591
+ <div className="space-y-4 max-h-96 overflow-y-auto">
592
+ <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
593
+ <div className="text-gray-200 leading-relaxed space-y-4">
594
+ <div>
595
+ <h4 className="font-semibold text-blue-200">Intended Use</h4>
596
+ <p className="mt-2">
597
+ The Omnilingual ASR model is designed to transcribe audio and
598
+ video files into text, supporting a wide range of languages.
599
+ Users are encouraged to explore the model's capabilities in a
600
+ responsible manner, consistent with applicable laws and
601
+ ethical guidelines.
602
+ </p>
603
+ </div>
604
+
605
+ <div>
606
+ <h4 className="font-semibold text-blue-200">Out-of-scope</h4>
607
+ <p className="mt-2">
608
+ The Omnilingual ASR model is intended for commercial and
609
+ research use in multiple languages. The following uses are
610
+ considered out of scope for the Omnilingual ASR model and are
611
+ strongly discouraged:
612
+ </p>
613
+ <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
614
+ <li>
615
+ Uses that violate laws or infringe the rights of others,
616
+ including generating, promoting, or disseminating content
617
+ that is illegal, harassing, discriminatory, or otherwise
618
+ harmful.
619
+ </li>
620
+ <li>
621
+ Processing, generating, or inferring sensitive personal
622
+ information (such as health, demographic, biometric, or
623
+ private data) without appropriate rights and consents
624
+ required by law.
625
+ </li>
626
+ <li>
627
+ Use in high-risk or sensitive contexts, including but not
628
+ limited to:
629
+ <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
630
+ <li>
631
+ Medical, legal, financial, or safety-critical
632
+ decision-making
633
+ </li>
634
+ <li>
635
+ Law enforcement, criminal justice, immigration, or
636
+ asylum processes
637
+ </li>
638
+ <li>
639
+ Operation of critical infrastructure or heavy machinery
640
+ </li>
641
+ </ul>
642
+ </li>
643
+ <li>
644
+ Generation or dissemination of adult content, including
645
+ erotic, sexual, or pornographic material.
646
+ </li>
647
+ <li>
648
+ Uses intended to deceive, mislead, impersonate others, or
649
+ facilitate fraud, scams, or disinformation.
650
+ </li>
651
+ <li>
652
+ Uses to create, promote, or distribute spam, malware, or
653
+ malicious code.
654
+ </li>
655
+ </ul>
656
+ </div>
657
+
658
+ <div>
659
+ <h4 className="font-semibold text-blue-200">Disclaimer</h4>
660
+ <p className="mt-2">
661
+ This model and its outputs may be inaccurate, incomplete, or
662
+ inappropriate for certain use cases. Users are solely
663
+ responsible for their own use of the model, including
664
+ compliance with applicable laws and regulations. The model
665
+ should not be relied upon for any high-risk or sensitive
666
+ applications.
667
+ </p>
668
+ </div>
669
+ </div>
670
+ </div>
671
+ </div>
672
+ </div>
673
+ </div>
674
+ );
675
+ }
frontend/src/components/TipsNotice.tsx ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { TRANSCRIPTION_LIMITS } from '../utils/transcriptionWarnings';
3
+
4
+ const TipsNotice: React.FC = () => {
5
+ return (
6
+ <div className="mb-4 p-3 bg-blue-900/30 rounded-lg border border-blue-600/50">
7
+ <div className="mb-2">
8
+ <h3 className="text-sm font-semibold text-blue-300">For Best Results</h3>
9
+ </div>
10
+
11
+ <div className="text-xs text-blue-200 mb-3 space-y-1">
12
+ <div>Duration: {TRANSCRIPTION_LIMITS.MIN_DURATION} seconds to {Math.floor(TRANSCRIPTION_LIMITS.MAX_DURATION / 60)} minutes</div>
13
+ <div>File size: Under {TRANSCRIPTION_LIMITS.MAX_FILE_SIZE}MB</div>
14
+ <div>Quality: Clear speech, minimal background noise</div>
15
+ </div>
16
+
17
+ <div className="text-xs text-blue-300/70 italic">
18
+ Too short: insufficient context for model accuracy<br/>
19
+ Too long/large: may exceed server resources
20
+ </div>
21
+ </div>
22
+ );
23
+ };
24
+
25
+ export default TipsNotice;
frontend/src/components/TranscriptionControls.tsx ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from 'react';
2
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
3
+ import LanguageSelector from './LanguageSelector';
4
+ import TranscriptionWarningModal from './TranscriptionWarningModal';
5
+ import { checkTranscriptionWarnings, getMediaDuration, WARNING_MESSAGES } from '../utils/transcriptionWarnings';
6
+
7
+ type WarningType = keyof typeof WARNING_MESSAGES;
8
+
9
+ const TranscriptionControls: React.FC = () => {
10
+ const {
11
+ file,
12
+ transcription,
13
+ isLoading,
14
+ isProcessingVideo,
15
+ error,
16
+ serverStatus,
17
+ selectedLanguage,
18
+ selectedScript,
19
+ handleTranscribe,
20
+ setSelectedLanguageAndScript,
21
+ } = useTranscriptionStore();
22
+
23
+ const [showWarningModal, setShowWarningModal] = useState(false);
24
+ const [warnings, setWarnings] = useState<WarningType[]>([]);
25
+ const [mediaDuration, setMediaDuration] = useState<number | undefined>(undefined);
26
+
27
+ // Get media duration when file changes
28
+ useEffect(() => {
29
+ if (file) {
30
+ getMediaDuration(file)
31
+ .then(duration => {
32
+ setMediaDuration(duration);
33
+ })
34
+ .catch(error => {
35
+ console.warn('Could not get media duration:', error);
36
+ setMediaDuration(undefined);
37
+ });
38
+ } else {
39
+ setMediaDuration(undefined);
40
+ }
41
+ }, [file]);
42
+
43
+ // Handle transcribe button click with warning check
44
+ const handleTranscribeClick = async () => {
45
+ if (!file) return;
46
+
47
+ // Check for warnings
48
+ const detectedWarnings = checkTranscriptionWarnings({
49
+ file,
50
+ duration: mediaDuration,
51
+ });
52
+
53
+ if (detectedWarnings.length > 0) {
54
+ setWarnings(detectedWarnings);
55
+ setShowWarningModal(true);
56
+ } else {
57
+ // No warnings, proceed directly
58
+ await handleTranscribe();
59
+ }
60
+ };
61
+
62
+ // Handle warning modal acceptance
63
+ const handleWarningAccept = async () => {
64
+ setShowWarningModal(false);
65
+ await handleTranscribe();
66
+ };
67
+
68
+ // Handle warning modal cancellation
69
+ const handleWarningCancel = () => {
70
+ setShowWarningModal(false);
71
+ setWarnings([]);
72
+ };
73
+
74
+ // Only show controls if file exists but no transcription yet
75
+ if (!file || transcription) {
76
+ return null;
77
+ }
78
+
79
+ return (
80
+ <>
81
+ <div className="bg-gray-800 border-t border-gray-700 p-4">
82
+ <div className="max-w-4xl mx-auto">
83
+ {/* Error Display */}
84
+ {error && (
85
+ <div className="mb-4 p-3 bg-red-600 rounded-lg">
86
+ <div className="text-sm font-medium text-white">Error</div>
87
+ <div className="text-sm text-red-100">{error}</div>
88
+ </div>
89
+ )}
90
+
91
+ {/* Controls Container */}
92
+ <div className="flex flex-col sm:flex-row items-center justify-center gap-4">
93
+ {/* Language Selection */}
94
+ <div className="flex flex-col items-center">
95
+ <div className="tooltip tooltip-bottom" data-tip="Select the primary language spoken in your audio/video file. This helps the AI model optimize its transcription accuracy by using language-specific acoustic models and vocabulary. Choosing the correct language significantly improves transcription quality and word recognition.">
96
+ <label className="text-sm font-medium text-gray-300 mb-2 cursor-help">
97
+ Transcription Language <span className="text-red-400">*</span>
98
+ </label>
99
+ </div>
100
+ <div className="w-80">
101
+ <LanguageSelector
102
+ selectedLanguage={selectedLanguage}
103
+ selectedScript={selectedScript}
104
+ onLanguageAndScriptSelect={setSelectedLanguageAndScript}
105
+ disabled={isLoading || serverStatus?.is_busy}
106
+ />
107
+ </div>
108
+ </div>
109
+
110
+ {/* Transcribe Button */}
111
+ <div className="flex flex-col items-center">
112
+ <label className="text-sm font-medium text-gray-300 mb-2">
113
+ {serverStatus?.is_busy ? (
114
+ <span className="text-orange-400">Server is processing a request</span>
115
+ ) : (
116
+ <span className="opacity-0">Action</span>
117
+ )}
118
+ </label>
119
+ <div className={!selectedLanguage ? "tooltip tooltip-bottom" : ""} data-tip={!selectedLanguage ? "Please select a transcription language to continue" : ""}>
120
+ <button
121
+ onClick={handleTranscribeClick}
122
+ disabled={isLoading || serverStatus?.is_busy || !selectedLanguage}
123
+ className="px-8 py-3 bg-green-600 hover:bg-green-700 disabled:bg-gray-600 rounded-lg text-sm font-medium transition-colors text-white min-w-32"
124
+ >
125
+ {isLoading
126
+ ? isProcessingVideo
127
+ ? "Processing..."
128
+ : "Transcribing..."
129
+ : serverStatus?.is_busy
130
+ ? "Server Busy"
131
+ : !selectedLanguage
132
+ ? "Select Language"
133
+ : "Transcribe"}
134
+ </button>
135
+ </div>
136
+ </div>
137
+ </div>
138
+ </div>
139
+ </div>
140
+
141
+ {/* Warning Modal */}
142
+ <TranscriptionWarningModal
143
+ isOpen={showWarningModal}
144
+ warnings={warnings}
145
+ onAccept={handleWarningAccept}
146
+ onCancel={handleWarningCancel}
147
+ />
148
+ </>
149
+ );
150
+ };
151
+
152
+ export default TranscriptionControls;
frontend/src/components/TranscriptionPlayer.tsx ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {useRef, useEffect, useCallback} from "react";
2
+ import {AlignedSegment} from "../services/transcriptionApi";
3
+ import {useTranscriptionStore} from "../stores/transcriptionStore";
4
+ import {useMediaTimeSync} from "../hooks/useMediaTimeSync";
5
+ import {
6
+ SUPPORTED_AUDIO_FORMATS,
7
+ SUPPORTED_VIDEO_FORMATS,
8
+ CODEC_INFO,
9
+ } from "../utils/mediaTypes";
10
+ import MediaPlayer from "./MediaPlayer";
11
+ import CanvasTimeline from "./CanvasTimeline";
12
+ import MinimapTimeline from "./MinimapTimeline";
13
+ import TranscriptionControls from "./TranscriptionControls";
14
+ import FullTranscription from "./FullTranscription";
15
+ import ErrorBoundary from "./ErrorBoundary";
16
+
17
+ export default function TranscriptionPlayer() {
18
+ // Get state from store
19
+ const {file, mediaUrl, transcription, isLoading, isProcessingVideo} =
20
+ useTranscriptionStore();
21
+
22
+ // Get actions from store
23
+ const {
24
+ handleTimeUpdate: updateTimeInStore,
25
+ setSelectedSegmentIndex,
26
+ selectedSegmentIndex,
27
+ setMediaRefs,
28
+ } = useTranscriptionStore();
29
+ const audioRef = useRef<HTMLAudioElement>(null);
30
+ const videoRef = useRef<HTMLVideoElement>(null);
31
+ const canvasTimelineRef = useRef<HTMLDivElement>(null);
32
+
33
+ // Set media refs in store for centralized seeking
34
+ useEffect(() => {
35
+ setMediaRefs(audioRef, videoRef);
36
+ }, [setMediaRefs]);
37
+
38
+ const handleTimeUpdate = useCallback(() => {
39
+ const mediaElement = audioRef.current || videoRef.current;
40
+ if (mediaElement && transcription?.aligned_segments) {
41
+ const mediaCurrentTime = mediaElement.currentTime;
42
+
43
+ // Find the active segment with a small tolerance for timing precision
44
+ const activeIndex = transcription.aligned_segments.findIndex(
45
+ (segment) =>
46
+ mediaCurrentTime >= segment.start && mediaCurrentTime <= segment.end
47
+ );
48
+
49
+ // If no exact match, find the closest segment
50
+ let finalActiveIndex: number | null = activeIndex;
51
+ if (activeIndex === -1) {
52
+ let closestIndex = -1;
53
+ let minDistance = Infinity;
54
+
55
+ transcription.aligned_segments.forEach((segment, index) => {
56
+ const distance = Math.min(
57
+ Math.abs(mediaCurrentTime - segment.start),
58
+ Math.abs(mediaCurrentTime - segment.end)
59
+ );
60
+ if (distance < minDistance && distance < 0.5) {
61
+ // 0.5 second tolerance
62
+ minDistance = distance;
63
+ closestIndex = index;
64
+ }
65
+ });
66
+
67
+ finalActiveIndex = closestIndex >= 0 ? closestIndex : null;
68
+ }
69
+
70
+ updateTimeInStore();
71
+
72
+ // Auto-select the active segment only if:
73
+ // 1. We found an active segment
74
+ // 2. Either no segment is selected, or the active segment changed
75
+ if (
76
+ finalActiveIndex !== null &&
77
+ selectedSegmentIndex !== finalActiveIndex
78
+ ) {
79
+ setSelectedSegmentIndex(finalActiveIndex);
80
+ }
81
+ }
82
+ }, [
83
+ transcription,
84
+ updateTimeInStore,
85
+ selectedSegmentIndex,
86
+ setSelectedSegmentIndex,
87
+ ]);
88
+
89
+ const handleSeekToSegment = (segment: AlignedSegment) => {
90
+ const mediaElement = audioRef.current || videoRef.current;
91
+ if (mediaElement) {
92
+ mediaElement.currentTime = segment.start;
93
+ // Immediately update the store to sync the progress indicator
94
+ handleTimeUpdate();
95
+ }
96
+ };
97
+
98
+ // Use media time sync hook for continuous time updates during playback
99
+ useMediaTimeSync({
100
+ audioRef,
101
+ videoRef,
102
+ onTimeUpdate: handleTimeUpdate,
103
+ transcription,
104
+ });
105
+
106
+ // Cleanup media URL on unmount
107
+ useEffect(() => {
108
+ return () => {
109
+ if (mediaUrl) {
110
+ URL.revokeObjectURL(mediaUrl);
111
+ }
112
+ };
113
+ }, [mediaUrl]);
114
+
115
+ return (
116
+ <div className="flex-1 min-w-0 flex flex-col bg-black">
117
+ {/* Media Player */}
118
+ {file && (
119
+ <ErrorBoundary componentName="MediaPlayer">
120
+ <MediaPlayer
121
+ audioRef={audioRef}
122
+ videoRef={videoRef}
123
+ onTimeUpdate={handleTimeUpdate}
124
+ />
125
+ </ErrorBoundary>
126
+ )}
127
+
128
+ {/* Transcription Controls */}
129
+ <ErrorBoundary componentName="TranscriptionControls">
130
+ <TranscriptionControls />
131
+ </ErrorBoundary>
132
+
133
+ {/* Full Transcription */}
134
+ <ErrorBoundary componentName="FullTranscription">
135
+ <FullTranscription />
136
+ </ErrorBoundary>
137
+
138
+ {/* Transcription Timeline */}
139
+ {transcription && (
140
+ <>
141
+ {/* Minimap Timeline */}
142
+ <ErrorBoundary componentName="MinimapTimeline">
143
+ <MinimapTimeline
144
+ audioRef={audioRef}
145
+ videoRef={videoRef}
146
+ canvasTimelineRef={canvasTimelineRef}
147
+ />
148
+ </ErrorBoundary>
149
+
150
+ {/* Canvas Timeline */}
151
+ {/* <ErrorBoundary componentName="CanvasTimeline">
152
+ <CanvasTimeline
153
+ audioRef={audioRef}
154
+ videoRef={videoRef}
155
+ onSeekToSegment={handleSeekToSegment}
156
+ onTimeUpdate={handleTimeUpdate}
157
+ ref={canvasTimelineRef}
158
+ />
159
+ </ErrorBoundary> */}
160
+ </>
161
+ )}
162
+
163
+ {/* Transcription Loading State */}
164
+ {file && !transcription && (isLoading || isProcessingVideo) && (
165
+ <div className="flex-1 flex items-center justify-center bg-gray-900 border-t border-gray-700">
166
+ <div className="text-center text-white">
167
+ <div className="mb-4">
168
+ <div className="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-500 mx-auto"></div>
169
+ </div>
170
+ <div className="text-2xl md:text-3xl mb-3 font-semibold">
171
+ {file?.type.startsWith("video/")
172
+ ? "Processing Video..."
173
+ : "Transcribing Audio..."}
174
+ </div>
175
+ <div className="text-base md:text-lg text-gray-400 max-w-md mx-auto">
176
+ {file?.type.startsWith("video/")
177
+ ? "Server is extracting audio and generating transcription"
178
+ : "Converting speech to text"}
179
+ {/* : "Converting speech to text with timestamps"} */}
180
+ </div>
181
+ </div>
182
+ </div>
183
+ )}
184
+
185
+ {/* No File State */}
186
+ {!file && (
187
+ <div className="flex-1 flex items-center justify-center">
188
+ <div className="text-center text-gray-400">
189
+ <div className="text-6xl mb-4">🎵</div>
190
+ <div className="text-xl mb-2">Upload Audio</div>
191
+ <div className="text-sm mb-4">
192
+ Choose an audio file or drag and drop or record audio from the
193
+ panel on the left anywhere to get started with transcription
194
+ </div>
195
+
196
+ {/* Supported File Types */}
197
+ <div className="text-xs text-gray-500 max-w-md mx-auto">
198
+ {/* Audio formats section */}
199
+ <div className="text-center mb-3">
200
+ <div className="font-medium text-gray-400 mb-1">
201
+ Audio Formats
202
+ </div>
203
+ <div className="text-xs text-gray-500">
204
+ {SUPPORTED_AUDIO_FORMATS.join(" • ")}
205
+ </div>
206
+ </div>
207
+
208
+ {/* Codec info */}
209
+ <div className="text-center">
210
+ <div className="text-xs text-gray-400 opacity-75">
211
+ Recommended: {CODEC_INFO.audio.common.slice(0, 2).join(", ")}{" "}
212
+ codecs
213
+ </div>
214
+ </div>
215
+ </div>
216
+ </div>
217
+ </div>
218
+ )}
219
+ </div>
220
+ );
221
+ }
frontend/src/components/TranscriptionSideBar.tsx ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, {useRef, useState} from "react";
2
+ import {useTranscriptionStore} from "../stores/transcriptionStore";
3
+ import ServerStatusIndicator from "./ServerStatusIndicator";
4
+ import FeedbackCard from "./FeedbackCard";
5
+ import TipsNotice from "./TipsNotice";
6
+ import QuickGuide from "./QuickGuide";
7
+ import {
8
+ TOSModalComponent,
9
+ AUPModalComponent,
10
+ ModelReadmeModalComponent,
11
+ } from "./TermsModal";
12
+
13
+ const TranscriptionSideBar: React.FC = () => {
14
+ const fileInputRef = useRef<HTMLInputElement>(null);
15
+ const [isTOSModalOpen, setIsTOSModalOpen] = useState(false);
16
+ const [isAUPModalOpen, setIsAUPModalOpen] = useState(false);
17
+ const [isModelReadmeModalOpen, setIsModelReadmeModalOpen] = useState(false);
18
+
19
+ const {
20
+ file,
21
+ transcription,
22
+ error,
23
+ isRecording,
24
+ handleFileSelect,
25
+ startRecording,
26
+ } = useTranscriptionStore();
27
+
28
+ const handleFileInputChange = (
29
+ event: React.ChangeEvent<HTMLInputElement>
30
+ ) => {
31
+ const selectedFile = event.target.files?.[0];
32
+ if (selectedFile) {
33
+ handleFileSelect(selectedFile);
34
+ }
35
+ };
36
+
37
+ return (
38
+ <div className="h-full p-4 overflow-y-auto">
39
+ <h2 className="text-lg font-bold mb-4">
40
+ Omnilingual ASR Media Transcription
41
+ </h2>
42
+
43
+ {/* Server Status Indicator */}
44
+ <div className="mb-4 p-3 bg-gray-900 rounded-lg border border-gray-600">
45
+ <ServerStatusIndicator />
46
+ </div>
47
+
48
+ {/* File Upload */}
49
+ <div className="mb-4">
50
+ <h3 className="text-sm font-semibold mb-2">Upload Media</h3>
51
+ <input
52
+ ref={fileInputRef}
53
+ type="file"
54
+ accept="audio/*"
55
+ onChange={handleFileInputChange}
56
+ className="hidden"
57
+ />
58
+ {!isRecording && (
59
+ <>
60
+ <button
61
+ onClick={() => fileInputRef.current?.click()}
62
+ className="w-full p-2 bg-blue-600 hover:bg-blue-700 rounded text-sm transition-colors mb-2 flex items-center justify-center gap-2"
63
+ >
64
+ <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
65
+ <path
66
+ fillRule="evenodd"
67
+ d="M3 17a1 1 0 011-1h12a1 1 0 110 2H4a1 1 0 01-1-1zm3.293-7.707a1 1 0 011.414 0L9 10.586V3a1 1 0 112 0v7.586l1.293-1.293a1 1 0 111.414 1.414l-3 3a1 1 0 01-1.414 0l-3-3a1 1 0 010-1.414z"
68
+ clipRule="evenodd"
69
+ />
70
+ </svg>
71
+ {file ? "Choose Different File" : "Choose File"}
72
+ </button>
73
+
74
+ {/* Recording Buttons */}
75
+ <div className="space-y-2">
76
+ <button
77
+ onClick={() => startRecording("audio")}
78
+ className="w-full p-2 bg-red-700 hover:bg-red-800 rounded text-sm transition-colors flex items-center justify-center gap-2"
79
+ >
80
+ <svg
81
+ className="w-4 h-4"
82
+ fill="currentColor"
83
+ viewBox="0 0 20 20"
84
+ >
85
+ <path
86
+ fillRule="evenodd"
87
+ d="M7 4a3 3 0 016 0v4a3 3 0 11-6 0V4zm4 10.93A7.001 7.001 0 0017 8a1 1 0 10-2 0A5 5 0 015 8a1 1 0 00-2 0 7.001 7.001 0 006 6.93V17H6a1 1 0 100 2h8a1 1 0 100-2h-3v-2.07z"
88
+ clipRule="evenodd"
89
+ />
90
+ </svg>
91
+ Record Audio
92
+ </button>
93
+
94
+ {/* <button
95
+ onClick={() => startRecording("video")}
96
+ className="w-full p-2 bg-red-700 hover:bg-red-800 rounded text-sm transition-colors flex items-center justify-center gap-2"
97
+ >
98
+ <svg
99
+ className="w-4 h-4"
100
+ fill="currentColor"
101
+ viewBox="0 0 20 20"
102
+ >
103
+ <path d="M2 6a2 2 0 012-2h6a2 2 0 012 2v8a2 2 0 01-2 2H4a2 2 0 01-2-2V6zM14.553 7.106A1 1 0 0014 8v4a1 1 0 00.553.894l2 1A1 1 0 0018 13V7a1 1 0 00-1.447-.894l-2 1z" />
104
+ </svg>
105
+ Record Video
106
+ </button> */}
107
+ </div>
108
+ </>
109
+ )}
110
+
111
+ {file && !isRecording && (
112
+ <div className="mt-2 p-2 bg-gray-700 rounded">
113
+ <div className="text-xs font-medium truncate">{file.name}</div>
114
+ <div className="text-xs text-gray-400">
115
+ {(file.size / 1024 / 1024).toFixed(1)} MB
116
+ </div>
117
+ </div>
118
+ )}
119
+ </div>
120
+
121
+ {/* Error Display */}
122
+ {error && transcription && (
123
+ <div className="mb-4 p-2 bg-red-600 rounded">
124
+ <div className="text-xs font-medium">Error</div>
125
+ <div className="text-xs">{error}</div>
126
+ </div>
127
+ )}
128
+
129
+ <QuickGuide />
130
+ <TipsNotice />
131
+ <FeedbackCard />
132
+
133
+ {/* Terms and Policy Buttons */}
134
+ <div className="mt-4 space-y-2">
135
+ <button
136
+ onClick={() => setIsTOSModalOpen(true)}
137
+ className="w-full p-2 bg-green-200 hover:bg-green-300 text-gray-800 rounded text-sm transition-colors"
138
+ >
139
+ Terms of Service
140
+ </button>
141
+ <button
142
+ onClick={() => setIsAUPModalOpen(true)}
143
+ className="w-full p-2 bg-green-200 hover:bg-green-300 text-gray-800 rounded text-sm transition-colors"
144
+ >
145
+ Acceptable Use Policy
146
+ </button>
147
+ <button
148
+ onClick={() => setIsModelReadmeModalOpen(true)}
149
+ className="w-full p-2 bg-green-200 hover:bg-green-300 text-gray-800 rounded text-sm transition-colors"
150
+ >
151
+ Model Readme
152
+ </button>
153
+ </div>
154
+
155
+ {/* Modals */}
156
+ <TOSModalComponent
157
+ isOpen={isTOSModalOpen}
158
+ onClose={() => setIsTOSModalOpen(false)}
159
+ />
160
+ <AUPModalComponent
161
+ isOpen={isAUPModalOpen}
162
+ onClose={() => setIsAUPModalOpen(false)}
163
+ />
164
+ <ModelReadmeModalComponent
165
+ isOpen={isModelReadmeModalOpen}
166
+ onClose={() => setIsModelReadmeModalOpen(false)}
167
+ />
168
+ </div>
169
+ );
170
+ };
171
+
172
+ export default TranscriptionSideBar;
frontend/src/components/TranscriptionWarningModal.tsx ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { WARNING_MESSAGES } from '../utils/transcriptionWarnings';
3
+
4
+ type WarningType = keyof typeof WARNING_MESSAGES;
5
+
6
+ interface TranscriptionWarningModalProps {
7
+ isOpen: boolean;
8
+ warnings: WarningType[];
9
+ onAccept: () => void;
10
+ onCancel: () => void;
11
+ }
12
+
13
+ const TranscriptionWarningModal: React.FC<TranscriptionWarningModalProps> = ({
14
+ isOpen,
15
+ warnings,
16
+ onAccept,
17
+ onCancel,
18
+ }) => {
19
+ if (!isOpen || warnings.length === 0) {
20
+ return null;
21
+ }
22
+
23
+ return (
24
+ <div className="fixed inset-0 z-50 flex items-center justify-center bg-black bg-opacity-50">
25
+ <div className="bg-gray-800 rounded-lg shadow-xl max-w-lg w-full mx-4 border border-gray-600">
26
+ <div className="p-6">
27
+
28
+ {/* Warning Messages */}
29
+ <div className="space-y-4 mb-6">
30
+ {warnings.map((warningType) => {
31
+ const warning = WARNING_MESSAGES[warningType];
32
+ return (
33
+ <div
34
+ key={warningType}
35
+ className="p-4 rounded-lg border-l-4 bg-orange-900/20 border-orange-500"
36
+ >
37
+ <h4 className="font-medium text-sm text-orange-300">
38
+ {warning.title}
39
+ </h4>
40
+ <p className="text-sm text-gray-300 mt-1">
41
+ {warning.message}
42
+ </p>
43
+ </div>
44
+ );
45
+ })}
46
+ </div>
47
+
48
+ {/* Actions */}
49
+ <div className="flex flex-col sm:flex-row gap-3 justify-end">
50
+ <button
51
+ onClick={onCancel}
52
+ className="px-4 py-2 text-sm font-medium text-gray-300 bg-gray-700 hover:bg-gray-600 rounded-lg transition-colors"
53
+ >
54
+ Cancel
55
+ </button>
56
+ <button
57
+ onClick={onAccept}
58
+ className="px-4 py-2 text-sm font-medium text-white bg-orange-600 hover:bg-orange-700 rounded-lg transition-colors"
59
+ >
60
+ Proceed Anyway
61
+ </button>
62
+ </div>
63
+ </div>
64
+ </div>
65
+ </div>
66
+ );
67
+ };
68
+
69
+ export default TranscriptionWarningModal;
frontend/src/components/WelcomeModal.tsx ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { XMarkIcon } from '@heroicons/react/24/outline';
2
+
3
+ interface WelcomeModalProps {
4
+ isOpen: boolean;
5
+ onClose: () => void;
6
+ }
7
+
8
+ export default function WelcomeModal({ isOpen, onClose }: WelcomeModalProps) {
9
+ if (!isOpen) return null;
10
+
11
+ return (
12
+ <div className="modal modal-open">
13
+ <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
14
+ <div className="flex justify-between items-center mb-6">
15
+ <h3 className="font-bold text-xl text-blue-300">
16
+ Omnilingual ASR Media Transcription Tool
17
+ </h3>
18
+ <button
19
+ className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
20
+ onClick={onClose}
21
+ aria-label="Close modal"
22
+ >
23
+ <XMarkIcon className="w-5 h-5" />
24
+ </button>
25
+ </div>
26
+
27
+ <div className="space-y-4">
28
+ {/* Main Description */}
29
+ <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
30
+ <h4 className="font-semibold text-blue-200 mb-2">About This Tool</h4>
31
+ <p className="text-gray-200 leading-relaxed">
32
+ This experimental transcription tool uses the Omnilingual ASR model to transcribe audio and video content
33
+ for <strong>low-resource languages</strong> around the world. Our goal is to help preserve and
34
+ document linguistic diversity by making transcription accessible for underrepresented languages.
35
+ </p>
36
+ </div>
37
+
38
+ {/* Important Disclaimer */}
39
+ <div className="bg-yellow-900/40 p-4 rounded-lg border-l-4 border-yellow-500">
40
+ <h4 className="font-semibold text-yellow-200 mb-2">⚠️ Important Disclaimer</h4>
41
+ <p className="text-yellow-100 leading-relaxed mb-3">
42
+ <strong>This is experimental software.</strong> While we strive for accuracy, transcriptions
43
+ are not perfect. You should always <strong>double-check the outputs and make edits accordingly</strong>
44
+ {" "}to ensure accuracy for your specific use case.
45
+ </p>
46
+ <p className="text-yellow-100 leading-relaxed mb-3">
47
+ <strong>Shared Server Limitations:</strong> Due to resource constraints, we can only process one request at a time on this shared server.
48
+ </p>
49
+ <p className="text-yellow-100 leading-relaxed">
50
+ <strong>Want dedicated access?</strong> Clone this HuggingFace Space or run on your own servers to remove server limitations. See the{' '}
51
+ <a
52
+ href="https://huggingface.co/spaces/facebook/mms-transcriptions/blob/main/README.md"
53
+ target="_blank"
54
+ rel="noopener noreferrer"
55
+ className="text-yellow-300 hover:text-yellow-200 underline transition-colors"
56
+ >
57
+ setup guide
58
+ </a>
59
+ {" "}for instructions.
60
+ </p>
61
+ </div>
62
+
63
+ {/* Language Information */}
64
+ <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-green-500">
65
+ <h4 className="font-semibold text-green-200 mb-2">Supported Languages</h4>
66
+ <p className="text-gray-200 leading-relaxed">
67
+ For this public demo, we've restricted transcription to low-resource languages with
68
+ <strong> error rates below 10%</strong>. This ensures the best possible experience while
69
+ focusing on languages that could benefit most from improved transcription tools.
70
+ </p>
71
+ </div>
72
+
73
+ {/* Cultural Impact */}
74
+ <div className="bg-purple-900/40 p-4 rounded-lg border-l-4 border-purple-500">
75
+ <h4 className="font-semibold text-purple-200 mb-2">Preserving Linguistic Heritage</h4>
76
+ <p className="text-purple-100 leading-relaxed">
77
+ Many of the world's languages lack digital transcription tools. By improving these models
78
+ for low-resource languages, we're contributing to the preservation of cultural heritage
79
+ and making digital content more accessible to diverse communities.
80
+ </p>
81
+ </div>
82
+
83
+
84
+ </div>
85
+
86
+ <div className="modal-action mt-6">
87
+ <button className="py-2 px-4 bg-blue-600 hover:bg-blue-700 rounded text-sm transition-colors" onClick={onClose}>
88
+ Get Started
89
+ </button>
90
+ </div>
91
+ </div>
92
+ </div>
93
+ );
94
+ }
frontend/src/hooks/useAudioAnalyzer.ts ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useRef, useEffect, useState } from 'react';
2
+
3
+ interface UseAudioAnalyzerReturn {
4
+ audioData: Uint8Array;
5
+ analyser: AnalyserNode | null;
6
+ connectToStream: (stream: MediaStream) => void;
7
+ disconnect: () => void;
8
+ }
9
+
10
+ export const useAudioAnalyzer = (fftSize: number = 256): UseAudioAnalyzerReturn => {
11
+ const [audioData, setAudioData] = useState<Uint8Array>(new Uint8Array(fftSize / 2));
12
+ const [analyser, setAnalyser] = useState<AnalyserNode | null>(null);
13
+ const audioContextRef = useRef<AudioContext | null>(null);
14
+ const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
15
+ const animationFrameRef = useRef<number | null>(null);
16
+
17
+ const connectToStream = (stream: MediaStream) => {
18
+ try {
19
+ // Clean up existing connections
20
+ disconnect();
21
+
22
+ // Create new audio context and analyser
23
+ const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
24
+ const analyserNode = audioContext.createAnalyser();
25
+ analyserNode.fftSize = fftSize;
26
+ analyserNode.smoothingTimeConstant = 0.8;
27
+
28
+ // Connect stream to analyser
29
+ const source = audioContext.createMediaStreamSource(stream);
30
+ source.connect(analyserNode);
31
+
32
+ // Store references
33
+ audioContextRef.current = audioContext;
34
+ sourceRef.current = source;
35
+ setAnalyser(analyserNode);
36
+
37
+ // Start updating audio data
38
+ const bufferLength = analyserNode.frequencyBinCount;
39
+ const dataArray = new Uint8Array(bufferLength);
40
+
41
+ const updateAudioData = () => {
42
+ if (analyserNode) {
43
+ analyserNode.getByteFrequencyData(dataArray);
44
+ setAudioData(new Uint8Array(dataArray));
45
+ animationFrameRef.current = requestAnimationFrame(updateAudioData);
46
+ }
47
+ };
48
+
49
+ updateAudioData();
50
+ } catch (error) {
51
+ console.error('Error setting up audio analyzer:', error);
52
+ }
53
+ };
54
+
55
+ const disconnect = () => {
56
+ // Cancel animation frame
57
+ if (animationFrameRef.current) {
58
+ cancelAnimationFrame(animationFrameRef.current);
59
+ animationFrameRef.current = null;
60
+ }
61
+
62
+ // Disconnect audio nodes
63
+ if (sourceRef.current) {
64
+ sourceRef.current.disconnect();
65
+ sourceRef.current = null;
66
+ }
67
+
68
+ // Close audio context
69
+ if (audioContextRef.current) {
70
+ audioContextRef.current.close();
71
+ audioContextRef.current = null;
72
+ }
73
+
74
+ setAnalyser(null);
75
+ setAudioData(new Uint8Array(fftSize / 2));
76
+ };
77
+
78
+ // Cleanup on unmount
79
+ useEffect(() => {
80
+ return () => {
81
+ disconnect();
82
+ };
83
+ }, []);
84
+
85
+ return {
86
+ audioData,
87
+ analyser,
88
+ connectToStream,
89
+ disconnect,
90
+ };
91
+ };
frontend/src/hooks/useDragAndDrop.ts ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useCallback, useState, DragEvent, useRef } from 'react';
2
+
3
+ interface UseDragAndDropOptions {
4
+ onFileDropped: (file: File) => void;
5
+ acceptedTypes?: string[];
6
+ }
7
+
8
+ interface UseDragAndDropReturn {
9
+ isDragActive: boolean;
10
+ dragProps: {
11
+ onDrop: (event: DragEvent<HTMLDivElement>) => void;
12
+ onDragOver: (event: DragEvent<HTMLDivElement>) => void;
13
+ onDragEnter: (event: DragEvent<HTMLDivElement>) => void;
14
+ onDragLeave: (event: DragEvent<HTMLDivElement>) => void;
15
+ };
16
+ }
17
+
18
+ const isValidFileType = (file: File, acceptedTypes?: string[]): boolean => {
19
+ if (!acceptedTypes || acceptedTypes.length === 0) return true;
20
+
21
+ return acceptedTypes.some(type => {
22
+ if (type.endsWith('/*')) {
23
+ // Handle MIME type categories like 'video/*' or 'audio/*'
24
+ const category = type.slice(0, -2);
25
+ return file.type.startsWith(category + '/');
26
+ } else {
27
+ // Handle exact MIME types
28
+ return file.type === type;
29
+ }
30
+ });
31
+ };
32
+
33
+ export const useDragAndDrop = ({
34
+ onFileDropped,
35
+ acceptedTypes = ['video/*', 'audio/*']
36
+ }: UseDragAndDropOptions): UseDragAndDropReturn => {
37
+ const [isDragActive, setIsDragActive] = useState(false);
38
+ const dragCounter = useRef(0);
39
+ const dragLeaveTimeout = useRef<number | null>(null);
40
+
41
+ const handleDragEnter = useCallback((event: DragEvent<HTMLDivElement>) => {
42
+ event.preventDefault();
43
+ event.stopPropagation();
44
+
45
+ // Clear any pending drag leave timeout
46
+ if (dragLeaveTimeout.current) {
47
+ clearTimeout(dragLeaveTimeout.current);
48
+ dragLeaveTimeout.current = null;
49
+ }
50
+
51
+ dragCounter.current += 1;
52
+
53
+ if (event.dataTransfer?.items && event.dataTransfer.items.length > 0) {
54
+ setIsDragActive(true);
55
+ }
56
+ }, []);
57
+
58
+ const handleDragLeave = useCallback((event: DragEvent<HTMLDivElement>) => {
59
+ event.preventDefault();
60
+ event.stopPropagation();
61
+
62
+ dragCounter.current -= 1;
63
+
64
+ // Use a small timeout to prevent flickering when moving between child elements
65
+ dragLeaveTimeout.current = window.setTimeout(() => {
66
+ if (dragCounter.current === 0) {
67
+ setIsDragActive(false);
68
+ }
69
+ }, 10);
70
+ }, []);
71
+
72
+ const handleDragOver = useCallback((event: DragEvent<HTMLDivElement>) => {
73
+ event.preventDefault();
74
+ event.stopPropagation();
75
+
76
+ // Set the dropEffect to indicate this is a copy operation
77
+ if (event.dataTransfer) {
78
+ event.dataTransfer.dropEffect = 'copy';
79
+ }
80
+ }, []);
81
+
82
+ const handleDrop = useCallback((event: DragEvent<HTMLDivElement>) => {
83
+ event.preventDefault();
84
+ event.stopPropagation();
85
+
86
+ // Clear timeout and reset state
87
+ if (dragLeaveTimeout.current) {
88
+ clearTimeout(dragLeaveTimeout.current);
89
+ dragLeaveTimeout.current = null;
90
+ }
91
+
92
+ setIsDragActive(false);
93
+ dragCounter.current = 0;
94
+
95
+ const files = Array.from(event.dataTransfer?.files || []);
96
+
97
+ if (files.length > 0) {
98
+ const validFile = files.find(file => isValidFileType(file, acceptedTypes));
99
+
100
+ if (validFile) {
101
+ onFileDropped(validFile);
102
+ } else {
103
+ console.warn('Dropped file is not a supported type:', files[0]?.type);
104
+ // You could also call an onError callback here if needed
105
+ }
106
+ }
107
+ }, [onFileDropped, acceptedTypes]);
108
+
109
+ return {
110
+ isDragActive,
111
+ dragProps: {
112
+ onDrop: handleDrop,
113
+ onDragOver: handleDragOver,
114
+ onDragEnter: handleDragEnter,
115
+ onDragLeave: handleDragLeave,
116
+ },
117
+ };
118
+ };
frontend/src/hooks/useMediaTimeSync.ts ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect } from 'react';
2
+ import { TranscriptionResponse } from '../services/transcriptionApi';
3
+
4
+ interface UseMediaTimeSyncProps {
5
+ audioRef: React.RefObject<HTMLAudioElement>;
6
+ videoRef: React.RefObject<HTMLVideoElement>;
7
+ onTimeUpdate: () => void;
8
+ transcription: TranscriptionResponse | null;
9
+ }
10
+
11
+ export const useMediaTimeSync = ({
12
+ audioRef,
13
+ videoRef,
14
+ onTimeUpdate,
15
+ transcription,
16
+ }: UseMediaTimeSyncProps) => {
17
+ useEffect(() => {
18
+ const mediaElement = audioRef.current || videoRef.current;
19
+ let frameId: number | null = null;
20
+
21
+ console.log('Setting up requestAnimationFrame for media element:', mediaElement?.tagName);
22
+
23
+ const smoothUpdate = () => {
24
+ if (mediaElement && !mediaElement.paused && !mediaElement.ended) {
25
+ onTimeUpdate();
26
+ frameId = requestAnimationFrame(smoothUpdate);
27
+ } else {
28
+ frameId = null;
29
+ }
30
+ };
31
+
32
+ if (mediaElement && transcription) {
33
+ const handlePlay = () => {
34
+ console.log('Media play event triggered');
35
+ if (frameId) {
36
+ cancelAnimationFrame(frameId);
37
+ }
38
+ frameId = requestAnimationFrame(smoothUpdate);
39
+ };
40
+
41
+ const handlePause = () => {
42
+ console.log('Media pause event triggered');
43
+ if (frameId) {
44
+ cancelAnimationFrame(frameId);
45
+ frameId = null;
46
+ }
47
+ };
48
+
49
+ mediaElement.addEventListener('play', handlePlay);
50
+ mediaElement.addEventListener('pause', handlePause);
51
+ mediaElement.addEventListener('ended', handlePause);
52
+
53
+ // Start if already playing
54
+ if (!mediaElement.paused) {
55
+ console.log('Media is already playing, starting animation loop');
56
+ handlePlay();
57
+ }
58
+
59
+ return () => {
60
+ if (frameId) {
61
+ cancelAnimationFrame(frameId);
62
+ }
63
+ mediaElement.removeEventListener('play', handlePlay);
64
+ mediaElement.removeEventListener('pause', handlePause);
65
+ mediaElement.removeEventListener('ended', handlePause);
66
+ };
67
+ }
68
+ }, [transcription, onTimeUpdate, audioRef, videoRef]);
69
+ };
frontend/src/hooks/useTimelineDragControls.ts ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useCallback, useEffect } from 'react';
2
+ import { AlignedSegment } from '../services/transcriptionApi';
3
+ import { SegmentWithTrack } from '../utils/trackUtils';
4
+ import { useTranscriptionStore } from '../stores/transcriptionStore';
5
+
6
+ interface UseTimelineDragControlsOptions {
7
+ segmentsWithTracks: SegmentWithTrack[];
8
+ displaySegments: AlignedSegment[];
9
+ geometryUtils: {
10
+ timeToX: (time: number) => number;
11
+ trackToY: (track: number) => number;
12
+ canvasXToTime: (canvasX: number) => number;
13
+ clientXToCanvasX: (clientX: number, canvasRef: React.RefObject<HTMLCanvasElement>) => number;
14
+ timelineWidth: number;
15
+ };
16
+ canvasRef: React.RefObject<HTMLCanvasElement>;
17
+ containerRef: React.RefObject<HTMLDivElement>;
18
+ mediaDuration: number;
19
+ constants: {
20
+ TRACK_HEIGHT: number;
21
+ TIMELINE_PADDING: number;
22
+ };
23
+ }
24
+
25
+ type DragType = 'move' | 'resize-start' | 'resize-end';
26
+
27
+ export const useTimelineDragControls = ({
28
+ segmentsWithTracks,
29
+ displaySegments,
30
+ geometryUtils,
31
+ canvasRef,
32
+ containerRef,
33
+ mediaDuration,
34
+ constants,
35
+ }: UseTimelineDragControlsOptions) => {
36
+ const { TRACK_HEIGHT, TIMELINE_PADDING } = constants;
37
+ const { timeToX, trackToY, canvasXToTime, clientXToCanvasX, timelineWidth } = geometryUtils;
38
+
39
+ // Get store actions
40
+ const {
41
+ seekToTime,
42
+ selectedSegmentIndex,
43
+ setSelectedSegmentIndex,
44
+ updateSegmentTiming,
45
+ finalizeSegmentPositioning,
46
+ } = useTranscriptionStore();
47
+
48
+ // Drag state
49
+ const [isDragging, setIsDragging] = useState(false);
50
+ const [isTimelineDragging, setIsTimelineDragging] = useState(false);
51
+ const [dragType, setDragType] = useState<DragType | null>(null);
52
+ const [dragStartX, setDragStartX] = useState(0);
53
+ const [dragStartTime, setDragStartTime] = useState(0);
54
+ const [dragSegmentIndex, setDragSegmentIndex] = useState<number | null>(null);
55
+ const [hoveredSegment, setHoveredSegment] = useState<number | null>(null);
56
+
57
+ // Find segment at a specific time
58
+ const findSegmentAtTime = useCallback((time: number) => {
59
+ for (let i = 0; i < displaySegments.length; i++) {
60
+ const segment = displaySegments[i];
61
+ if (time >= segment.start && time <= segment.end) {
62
+ return i;
63
+ }
64
+ }
65
+ return null;
66
+ }, [displaySegments]);
67
+
68
+ // Seek to position using centralized store function
69
+ const seekToPosition = useCallback((clientX: number) => {
70
+ const actualCanvasX = clientXToCanvasX(clientX, canvasRef);
71
+ const clickTime = canvasXToTime(actualCanvasX);
72
+ const clampedTime = Math.max(0, Math.min(clickTime, mediaDuration));
73
+
74
+ seekToTime(clampedTime);
75
+
76
+ // Auto-select segment at the current time position
77
+ const segmentAtTime = findSegmentAtTime(clampedTime);
78
+ if (segmentAtTime !== null) {
79
+ setSelectedSegmentIndex(segmentAtTime);
80
+ }
81
+ }, [clientXToCanvasX, canvasRef, canvasXToTime, mediaDuration, seekToTime, findSegmentAtTime, setSelectedSegmentIndex]);
82
+
83
+ // Find segment at position
84
+ const findSegmentAtPosition = useCallback((canvasX: number, canvasY: number) => {
85
+ for (let i = 0; i < segmentsWithTracks.length; i++) {
86
+ const segment = segmentsWithTracks[i];
87
+ const segmentX = timeToX(segment.start);
88
+ // Use actual time-based width for hit detection, with minimum 8px for very short segments
89
+ const actualWidth = timeToX(segment.end) - segmentX;
90
+ const segmentWidth = Math.max(actualWidth, 8);
91
+ const segmentY = trackToY(segment.track);
92
+
93
+ if (canvasX >= segmentX && canvasX <= segmentX + segmentWidth &&
94
+ canvasY >= segmentY && canvasY <= segmentY + TRACK_HEIGHT) {
95
+ // Find the original segment index in displaySegments
96
+ const originalIndex = displaySegments.findIndex(s =>
97
+ s.start === segment.start && s.end === segment.end && s.text === segment.text
98
+ );
99
+ return { index: originalIndex, segment };
100
+ }
101
+ }
102
+ return null;
103
+ }, [segmentsWithTracks, displaySegments, timeToX, trackToY, TRACK_HEIGHT]);
104
+
105
+ // Check if position is on resize handle
106
+ const getResizeHandle = useCallback((canvasX: number, canvasY: number, segmentIndex: number) => {
107
+ if (selectedSegmentIndex !== segmentIndex) return null;
108
+
109
+ // Find the segment in segmentsWithTracks that corresponds to the original segment index
110
+ const originalSegment = displaySegments[segmentIndex];
111
+ const segmentWithTrack = segmentsWithTracks.find(s =>
112
+ s.start === originalSegment.start && s.end === originalSegment.end && s.text === originalSegment.text
113
+ );
114
+
115
+ if (!segmentWithTrack) return null;
116
+
117
+ const segmentX = timeToX(segmentWithTrack.start);
118
+ // Use actual time-based width for resize handle detection, with minimum for very short segments
119
+ const actualWidth = timeToX(segmentWithTrack.end) - segmentX;
120
+ const segmentWidth = Math.max(actualWidth, 16); // Minimum 16px to ensure handles are accessible
121
+ const segmentY = trackToY(segmentWithTrack.track);
122
+
123
+ const handleWidth = 8;
124
+
125
+ if (canvasX >= segmentX && canvasX <= segmentX + handleWidth &&
126
+ canvasY >= segmentY && canvasY <= segmentY + TRACK_HEIGHT) {
127
+ return 'resize-start';
128
+ }
129
+ if (canvasX >= segmentX + segmentWidth - handleWidth && canvasX <= segmentX + segmentWidth &&
130
+ canvasY >= segmentY && canvasY <= segmentY + TRACK_HEIGHT) {
131
+ return 'resize-end';
132
+ }
133
+ return null;
134
+ }, [selectedSegmentIndex, segmentsWithTracks, displaySegments, timeToX, trackToY, TRACK_HEIGHT]);
135
+
136
+ // Edge-based auto-scroll during user interactions
137
+ const handleEdgeScroll = useCallback((clientX: number) => {
138
+ const container = containerRef.current;
139
+ if (!container) return;
140
+
141
+ // Use container's bounding rect to get the visible viewport area
142
+ const containerRect = container.getBoundingClientRect();
143
+ const containerWidth = container.clientWidth;
144
+ const edgeThreshold = 80; // Increased from 50px to 80px for better UX
145
+ const scrollSpeed = 8; // Slightly reduced for smoother scrolling
146
+
147
+ // Calculate mouse position relative to the visible container viewport
148
+ const mouseX = clientX - containerRect.left;
149
+
150
+ // Check if mouse is near the edges or outside the container
151
+ const isLeftOfContainer = mouseX < 0;
152
+ const isRightOfContainer = mouseX > containerWidth;
153
+ const isNearLeftEdge = mouseX > 20 && mouseX < edgeThreshold; // Start 20px from edge, trigger within 80px
154
+ const isNearRightEdge = mouseX > containerWidth - edgeThreshold && mouseX < containerWidth - 20; // End 20px from edge
155
+
156
+ // Scroll left if near left edge or dragging to the left of container
157
+ if ((isNearLeftEdge || isLeftOfContainer) && container.scrollLeft > 0) {
158
+ let adjustedScrollSpeed = scrollSpeed;
159
+
160
+ if (isLeftOfContainer) {
161
+ // Faster scrolling when outside container
162
+ const distanceOutside = Math.abs(mouseX);
163
+ adjustedScrollSpeed = Math.min(scrollSpeed * 2, scrollSpeed + distanceOutside * 0.1);
164
+ } else {
165
+ // Variable speed based on distance from edge when inside
166
+ const distanceFromEdge = mouseX - 20;
167
+ const scrollMultiplier = 1 - (distanceFromEdge / (edgeThreshold - 20));
168
+ adjustedScrollSpeed = Math.max(2, scrollSpeed * scrollMultiplier);
169
+ }
170
+
171
+ container.scrollLeft = Math.max(0, container.scrollLeft - adjustedScrollSpeed);
172
+ }
173
+ // Scroll right if near right edge or dragging to the right of container
174
+ else if (isNearRightEdge || isRightOfContainer) {
175
+ let adjustedScrollSpeed = scrollSpeed;
176
+
177
+ if (isRightOfContainer) {
178
+ // Faster scrolling when outside container
179
+ const distanceOutside = mouseX - containerWidth;
180
+ adjustedScrollSpeed = Math.min(scrollSpeed * 2, scrollSpeed + distanceOutside * 0.1);
181
+ } else {
182
+ // Variable speed based on distance from edge when inside
183
+ const distanceFromEdge = (containerWidth - 20) - mouseX;
184
+ const scrollMultiplier = 1 - (distanceFromEdge / (edgeThreshold - 20));
185
+ adjustedScrollSpeed = Math.max(2, scrollSpeed * scrollMultiplier);
186
+ }
187
+
188
+ const maxScrollLeft = Math.max(0, timelineWidth - containerWidth);
189
+ container.scrollLeft = Math.min(maxScrollLeft, container.scrollLeft + adjustedScrollSpeed);
190
+ }
191
+ }, [timelineWidth, containerRef]);
192
+
193
+ // Handle mouse events
194
+ const handleMouseMove = useCallback((event: React.MouseEvent<HTMLCanvasElement>) => {
195
+ // Completely disable hover detection and cursor updates during any drag operation
196
+ if (isDragging || isTimelineDragging) {
197
+ return;
198
+ }
199
+
200
+ const canvas = canvasRef.current;
201
+ if (!canvas) return;
202
+
203
+ const rect = canvas.getBoundingClientRect();
204
+ const actualCanvasX = clientXToCanvasX(event.clientX, canvasRef);
205
+ const y = event.clientY - rect.top;
206
+
207
+ // Find hovered segment
208
+ const foundSegment = findSegmentAtPosition(actualCanvasX, y);
209
+ setHoveredSegment(foundSegment?.index ?? null);
210
+
211
+ // Update cursor
212
+ if (canvas) {
213
+ let cursor = 'default';
214
+
215
+ if (foundSegment) {
216
+ const resizeHandle = getResizeHandle(actualCanvasX, y, foundSegment.index);
217
+ if (resizeHandle) {
218
+ cursor = 'ew-resize';
219
+ } else {
220
+ cursor = 'move';
221
+ }
222
+ }
223
+
224
+ canvas.style.cursor = cursor;
225
+ }
226
+ }, [isDragging, isTimelineDragging, findSegmentAtPosition, getResizeHandle, clientXToCanvasX, canvasRef]);
227
+
228
+ const handleMouseDown = useCallback((event: React.MouseEvent<HTMLCanvasElement>) => {
229
+ const canvas = canvasRef.current;
230
+ if (!canvas) return;
231
+
232
+ const rect = canvas.getBoundingClientRect();
233
+ const actualCanvasX = clientXToCanvasX(event.clientX, canvasRef);
234
+ const y = event.clientY - rect.top;
235
+
236
+ // Check if clicking on a segment
237
+ const foundSegment = findSegmentAtPosition(actualCanvasX, y);
238
+
239
+ if (foundSegment) {
240
+ // Check for resize handles first
241
+ const resizeHandle = getResizeHandle(actualCanvasX, y, foundSegment.index);
242
+
243
+ if (resizeHandle) {
244
+ // Start resize drag
245
+ event.preventDefault();
246
+ setIsDragging(true);
247
+ setDragType(resizeHandle as DragType);
248
+ setDragStartX(event.clientX);
249
+ setDragSegmentIndex(foundSegment.index);
250
+
251
+ // Only set selected segment if it's not already selected
252
+ // This prevents changing selection during drag operations
253
+ if (selectedSegmentIndex !== foundSegment.index) {
254
+ setSelectedSegmentIndex(foundSegment.index);
255
+ }
256
+
257
+ const segment = foundSegment.segment;
258
+ setDragStartTime(resizeHandle === 'resize-end' ? segment.end : segment.start);
259
+
260
+ // Update media time using centralized store function
261
+ seekToTime(resizeHandle === 'resize-end' ? segment.end : segment.start);
262
+ } else {
263
+ // Start move drag
264
+ event.preventDefault();
265
+ setIsDragging(true);
266
+ setDragType('move');
267
+ setDragStartX(event.clientX);
268
+ setDragSegmentIndex(foundSegment.index);
269
+
270
+ // Only set selected segment if it's not already selected
271
+ // This prevents changing selection during drag operations
272
+ if (selectedSegmentIndex !== foundSegment.index) {
273
+ setSelectedSegmentIndex(foundSegment.index);
274
+ }
275
+
276
+ setDragStartTime(foundSegment.segment.start);
277
+
278
+ // Update media time to mouse position using centralized store function
279
+ const clickTime = canvasXToTime(actualCanvasX);
280
+ seekToTime(clickTime);
281
+ }
282
+ } else {
283
+ // Clicking outside of any segment - deselect the selected segment
284
+ if (selectedSegmentIndex !== null) {
285
+ setSelectedSegmentIndex(null);
286
+ }
287
+
288
+ // Timeline click - start timeline drag
289
+ event.preventDefault();
290
+ setIsTimelineDragging(true);
291
+ seekToPosition(event.clientX);
292
+ }
293
+ }, [
294
+ findSegmentAtPosition,
295
+ getResizeHandle,
296
+ selectedSegmentIndex,
297
+ setSelectedSegmentIndex,
298
+ seekToPosition,
299
+ canvasXToTime,
300
+ clientXToCanvasX,
301
+ canvasRef,
302
+ seekToTime
303
+ ]);
304
+
305
+ // Global mouse move handler
306
+ useEffect(() => {
307
+ const handleGlobalMouseMove = (e: MouseEvent) => {
308
+ // Handle edge-based scrolling during any drag operation
309
+ if (isDragging || isTimelineDragging) {
310
+ handleEdgeScroll(e.clientX);
311
+
312
+ // Clear hover state during drag operations to prevent visual confusion
313
+ if (hoveredSegment !== null) {
314
+ setHoveredSegment(null);
315
+ }
316
+ }
317
+
318
+ if (isDragging && dragType && dragSegmentIndex !== null) {
319
+ const deltaX = e.clientX - dragStartX;
320
+ const timelineWidthPx = timelineWidth - TIMELINE_PADDING * 2;
321
+ const deltaTime = (deltaX / timelineWidthPx) * mediaDuration;
322
+
323
+ const segment = displaySegments[dragSegmentIndex];
324
+ let newStart = segment.start;
325
+ let newEnd = segment.end;
326
+
327
+ switch (dragType) {
328
+ case 'move':
329
+ const newStartTime = Math.max(0, dragStartTime + deltaTime);
330
+ const duration = segment.end - segment.start;
331
+ newStart = Math.min(newStartTime, mediaDuration - duration);
332
+ newEnd = newStart + duration;
333
+
334
+ // Update media time to follow mouse using centralized store function
335
+ const actualCanvasX = clientXToCanvasX(e.clientX, canvasRef);
336
+ const mouseTime = canvasXToTime(actualCanvasX);
337
+ seekToTime(mouseTime);
338
+ break;
339
+ case 'resize-start':
340
+ newStart = Math.max(0, Math.min(dragStartTime + deltaTime, segment.end - 0.1));
341
+ seekToTime(newStart);
342
+ break;
343
+ case 'resize-end':
344
+ newEnd = Math.min(mediaDuration, Math.max(dragStartTime + deltaTime, segment.start + 0.1));
345
+ seekToTime(newEnd);
346
+ break;
347
+ }
348
+
349
+ updateSegmentTiming(dragSegmentIndex, newStart, newEnd, true); // deferSorting=true during drag
350
+ } else if (isTimelineDragging) {
351
+ seekToPosition(e.clientX);
352
+ }
353
+ };
354
+
355
+ const handleGlobalMouseUp = () => {
356
+ // If we were dragging a segment, finalize its positioning (re-sort segments)
357
+ if (isDragging && dragSegmentIndex !== null) {
358
+ finalizeSegmentPositioning();
359
+ }
360
+
361
+ setIsDragging(false);
362
+ setIsTimelineDragging(false);
363
+ setDragType(null);
364
+ setDragSegmentIndex(null);
365
+ };
366
+
367
+ if (isDragging || isTimelineDragging) {
368
+ document.addEventListener('mousemove', handleGlobalMouseMove);
369
+ document.addEventListener('mouseup', handleGlobalMouseUp);
370
+
371
+ return () => {
372
+ document.removeEventListener('mousemove', handleGlobalMouseMove);
373
+ document.removeEventListener('mouseup', handleGlobalMouseUp);
374
+ };
375
+ }
376
+ }, [
377
+ isDragging,
378
+ isTimelineDragging,
379
+ dragType,
380
+ dragSegmentIndex,
381
+ dragStartX,
382
+ dragStartTime,
383
+ displaySegments,
384
+ mediaDuration,
385
+ timelineWidth,
386
+ TIMELINE_PADDING,
387
+ updateSegmentTiming,
388
+ seekToPosition,
389
+ canvasXToTime,
390
+ clientXToCanvasX,
391
+ canvasRef,
392
+ seekToTime,
393
+ handleEdgeScroll,
394
+ hoveredSegment
395
+ ]);
396
+
397
+ return {
398
+ // State
399
+ isDragging,
400
+ isTimelineDragging,
401
+ dragSegmentIndex,
402
+ hoveredSegment,
403
+
404
+ // Actions
405
+ seekToPosition,
406
+ findSegmentAtTime,
407
+
408
+ // Mouse handlers
409
+ handleMouseMove,
410
+ handleMouseDown,
411
+
412
+ // Utilities
413
+ findSegmentAtPosition,
414
+ getResizeHandle,
415
+ };
416
+ };
frontend/src/hooks/useTimelineGeometry.ts ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useCallback, useMemo } from 'react';
2
+
3
+ interface UseTimelineGeometryOptions {
4
+ mediaDuration: number;
5
+ constants: {
6
+ TRACK_HEIGHT: number;
7
+ TRACK_PADDING: number;
8
+ TIMELINE_PADDING: number;
9
+ PIXELS_PER_SECOND: number;
10
+ };
11
+ }
12
+
13
+ export const useTimelineGeometry = ({
14
+ mediaDuration,
15
+ constants,
16
+ }: UseTimelineGeometryOptions) => {
17
+ const { TRACK_HEIGHT, TRACK_PADDING, TIMELINE_PADDING, PIXELS_PER_SECOND } = constants;
18
+
19
+ // Calculate timeline dimensions
20
+ const timelineWidth = useMemo(() =>
21
+ mediaDuration * PIXELS_PER_SECOND,
22
+ [mediaDuration, PIXELS_PER_SECOND]
23
+ );
24
+
25
+ // Convert time to x position
26
+ const timeToX = useCallback((time: number) => {
27
+ return TIMELINE_PADDING + (time / mediaDuration) * (timelineWidth - TIMELINE_PADDING * 2);
28
+ }, [mediaDuration, timelineWidth, TIMELINE_PADDING]);
29
+
30
+ // Convert x position to time
31
+ const xToTime = useCallback((x: number) => {
32
+ return ((x - TIMELINE_PADDING) / (timelineWidth - TIMELINE_PADDING * 2)) * mediaDuration;
33
+ }, [mediaDuration, timelineWidth, TIMELINE_PADDING]);
34
+
35
+ // Convert track to y position
36
+ const trackToY = useCallback((track: number) => {
37
+ return TIMELINE_PADDING + track * (TRACK_HEIGHT + TRACK_PADDING);
38
+ }, [TRACK_HEIGHT, TRACK_PADDING, TIMELINE_PADDING]);
39
+
40
+ // Convert canvas coordinates to time (accounting for scroll)
41
+ const canvasXToTime = useCallback((canvasX: number) => {
42
+ return xToTime(canvasX);
43
+ }, [xToTime]);
44
+
45
+ // Convert client coordinates to canvas coordinates
46
+ const clientXToCanvasX = useCallback((clientX: number, canvasRef: React.RefObject<HTMLCanvasElement>) => {
47
+ const canvas = canvasRef.current;
48
+ if (!canvas) return 0;
49
+
50
+ const rect = canvas.getBoundingClientRect();
51
+ return clientX - rect.left;
52
+ }, []);
53
+
54
+ return {
55
+ timelineWidth,
56
+ timeToX,
57
+ xToTime,
58
+ trackToY,
59
+ canvasXToTime,
60
+ clientXToCanvasX,
61
+ };
62
+ };
frontend/src/hooks/useTimelineRenderer.ts ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useCallback, useEffect } from 'react';
2
+ import { AlignedSegment } from '../services/transcriptionApi';
3
+ import { SegmentWithTrack } from '../utils/trackUtils';
4
+ import { formatTime } from '../utils/subtitleUtils';
5
+
6
+ interface UseTimelineRendererOptions {
7
+ canvasRef: React.RefObject<HTMLCanvasElement>;
8
+ canvasSize: { width: number; height: number };
9
+ segmentsWithTracks: SegmentWithTrack[];
10
+ displaySegments: AlignedSegment[];
11
+ currentTime: number;
12
+ activeSegmentIndex: number | null;
13
+ selectedSegmentIndex: number | null;
14
+ hoveredSegment: number | null;
15
+ isDragging: boolean;
16
+ dragSegmentIndex: number | null;
17
+ mediaDuration: number;
18
+ geometryUtils: {
19
+ timeToX: (time: number) => number;
20
+ trackToY: (track: number) => number;
21
+ timelineWidth: number;
22
+ };
23
+ constants: {
24
+ TRACK_HEIGHT: number;
25
+ TIMELINE_PADDING: number;
26
+ PIXELS_PER_SECOND: number;
27
+ };
28
+ }
29
+
30
+ export const useTimelineRenderer = ({
31
+ canvasRef,
32
+ canvasSize,
33
+ segmentsWithTracks,
34
+ displaySegments,
35
+ currentTime,
36
+ activeSegmentIndex,
37
+ selectedSegmentIndex,
38
+ hoveredSegment,
39
+ isDragging,
40
+ dragSegmentIndex,
41
+ mediaDuration,
42
+ geometryUtils,
43
+ constants,
44
+ }: UseTimelineRendererOptions) => {
45
+ const { timeToX, trackToY, timelineWidth } = geometryUtils;
46
+ const { TRACK_HEIGHT, TIMELINE_PADDING, PIXELS_PER_SECOND } = constants;
47
+
48
+ // Draw the timeline
49
+ const draw = useCallback(() => {
50
+ const canvas = canvasRef.current;
51
+ if (!canvas) return;
52
+
53
+ const ctx = canvas.getContext('2d');
54
+ if (!ctx) return;
55
+
56
+ // Set canvas size
57
+ canvas.width = canvasSize.width;
58
+ canvas.height = canvasSize.height;
59
+
60
+ // Clear canvas
61
+ ctx.fillStyle = '#0f172a'; // bg-slate-900
62
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
63
+
64
+ // Draw timeline base line
65
+ ctx.strokeStyle = '#4B5563'; // gray-600
66
+ ctx.lineWidth = 1;
67
+ ctx.beginPath();
68
+ const baseY = canvas.height / 2;
69
+ ctx.moveTo(TIMELINE_PADDING, baseY);
70
+ ctx.lineTo(timelineWidth - TIMELINE_PADDING, baseY);
71
+ ctx.stroke();
72
+
73
+ // Draw time markers with dynamic intervals
74
+ const getOptimalTimeInterval = () => {
75
+ const pixelsPerSecond = PIXELS_PER_SECOND;
76
+ const minSpacing = 120; // Increased from 80 to give more space between markers
77
+
78
+ // Calculate what time interval gives us reasonable spacing
79
+ const minTimeInterval = minSpacing / pixelsPerSecond;
80
+
81
+ // Choose appropriate intervals based on duration and zoom
82
+ if (minTimeInterval <= 1) return { major: 5, minor: 1 };
83
+ if (minTimeInterval <= 5) return { major: 10, minor: 2 };
84
+ if (minTimeInterval <= 10) return { major: 30, minor: 5 };
85
+ if (minTimeInterval <= 30) return { major: 60, minor: 10 };
86
+ if (minTimeInterval <= 60) return { major: 300, minor: 60 }; // 5min major, 1min minor
87
+ if (minTimeInterval <= 300) return { major: 600, minor: 120 }; // 10min major, 2min minor
88
+ return { major: 1800, minor: 300 }; // 30min major, 5min minor
89
+ };
90
+
91
+ const { major: majorInterval, minor: minorInterval } = getOptimalTimeInterval();
92
+
93
+ // Draw background grid lines for better visual organization
94
+ ctx.strokeStyle = '#1E293B'; // slate-800 (very subtle)
95
+ ctx.lineWidth = 1;
96
+ for (let time = 0; time <= mediaDuration; time += minorInterval) {
97
+ const x = timeToX(time);
98
+ ctx.beginPath();
99
+ ctx.moveTo(x, 0);
100
+ ctx.lineTo(x, canvas.height - 40); // Don't overlap with time labels
101
+ ctx.stroke();
102
+ }
103
+
104
+ // Draw minor markers (shorter, more visible than before)
105
+ ctx.strokeStyle = '#64748B'; // slate-500 (more visible than gray-700)
106
+ ctx.lineWidth = 1;
107
+ for (let time = 0; time <= mediaDuration; time += minorInterval) {
108
+ const x = timeToX(time);
109
+ ctx.beginPath();
110
+ ctx.moveTo(x, canvas.height - 15);
111
+ ctx.lineTo(x, canvas.height - 5);
112
+ ctx.stroke();
113
+ }
114
+
115
+ // Draw major markers (taller, much more prominent)
116
+ ctx.strokeStyle = '#94A3B8'; // slate-400 (much more visible)
117
+ ctx.fillStyle = '#F1F5F9'; // slate-100 (bright white-ish for text)
118
+ ctx.font = 'bold 13px system-ui'; // Slightly larger and bold
119
+ ctx.lineWidth = 2; // Thicker lines for major markers
120
+
121
+ for (let time = 0; time <= mediaDuration; time += majorInterval) {
122
+ const x = timeToX(time);
123
+
124
+ // Draw time label with special handling for 0:00 to avoid clipping
125
+ const timeText = formatTime(time);
126
+ ctx.fillStyle = '#F1F5F9'; // slate-100 (bright white-ish)
127
+
128
+ if (time === 0) {
129
+ // For 0:00, align left and shift right to avoid clipping
130
+ ctx.textAlign = 'left';
131
+ ctx.fillText(timeText, x + 4, canvas.height - 20);
132
+ } else {
133
+ // For all other times, center align as normal
134
+ ctx.textAlign = 'center';
135
+ ctx.fillText(timeText, x, canvas.height - 20);
136
+ }
137
+ }
138
+
139
+ // Draw segments
140
+ segmentsWithTracks.forEach((segment) => {
141
+ // Find the original segment index in displaySegments
142
+ const originalIndex = displaySegments.findIndex(s =>
143
+ s.start === segment.start && s.end === segment.end && s.text === segment.text
144
+ );
145
+
146
+ const x = timeToX(segment.start);
147
+ // Calculate actual width based on time duration, don't enforce minimum width for rendering
148
+ const actualWidth = timeToX(segment.end) - timeToX(segment.start);
149
+ const width = Math.max(actualWidth, 2); // Minimum 2px so segments are always visible
150
+ const y = trackToY(segment.track);
151
+ const height = TRACK_HEIGHT;
152
+
153
+ // Draw all segments (scrolling is handled by container)
154
+ {
155
+ // Determine segment color based on original segment index, not track index
156
+ let fillColor = '#374151'; // gray-700 (default)
157
+ let strokeColor = '#4B5563'; // gray-600
158
+ let textColor = '#D1D5DB'; // gray-300
159
+
160
+ // Priority order: dragging > selected > active > hovered
161
+ // Use originalIndex for all comparisons to maintain consistency during drag operations
162
+ if (isDragging && dragSegmentIndex === originalIndex) {
163
+ // Special styling for segment being dragged
164
+ fillColor = '#DC2626'; // red-600 (dragging indicator)
165
+ strokeColor = '#EF4444'; // red-500
166
+ textColor = '#FFFFFF';
167
+ } else if (selectedSegmentIndex === originalIndex) {
168
+ fillColor = '#D97706'; // yellow-600
169
+ strokeColor = '#FBBF24'; // yellow-400
170
+ textColor = '#FFFFFF';
171
+ } else if (activeSegmentIndex === originalIndex && selectedSegmentIndex === null) {
172
+ // Don't highlight active segment in blue when there's a selected segment
173
+ fillColor = '#2563EB'; // blue-600
174
+ strokeColor = '#3B82F6'; // blue-500
175
+ textColor = '#FFFFFF';
176
+ } else if (hoveredSegment === originalIndex && !isDragging) {
177
+ // Only show hover state when not dragging
178
+ fillColor = '#4B5563'; // gray-600
179
+ strokeColor = '#6B7280'; // gray-500
180
+ }
181
+
182
+ // Draw segment rectangle
183
+ ctx.fillStyle = fillColor;
184
+ ctx.fillRect(x, y, width, height);
185
+
186
+ // Draw segment border
187
+ ctx.strokeStyle = strokeColor;
188
+ ctx.lineWidth = 1;
189
+ ctx.strokeRect(x, y, width, height);
190
+
191
+ // Draw segment text
192
+ ctx.fillStyle = textColor;
193
+ ctx.font = '12px system-ui';
194
+ ctx.textAlign = 'left';
195
+
196
+ // Clip text to segment width
197
+ ctx.save();
198
+ ctx.beginPath();
199
+ ctx.rect(x + 4, y, width - 8, height);
200
+ ctx.clip();
201
+
202
+ const textY = y + height / 2 + 4; // Center vertically
203
+ ctx.fillText(segment.text, x + 4, textY);
204
+ ctx.restore();
205
+
206
+ // Draw resize handles for selected segment
207
+ if (selectedSegmentIndex === originalIndex) {
208
+ const handleWidth = 8;
209
+ ctx.fillStyle = '#3B82F6'; // blue-500
210
+
211
+ // Left handle
212
+ ctx.fillRect(x, y, handleWidth, height);
213
+
214
+ // Right handle
215
+ ctx.fillRect(x + width - handleWidth, y, handleWidth, height);
216
+ }
217
+ }
218
+ });
219
+
220
+ // Draw progress indicator
221
+ const progressX = timeToX(currentTime);
222
+ ctx.strokeStyle = '#EF4444'; // red-500
223
+ ctx.lineWidth = 2;
224
+ ctx.beginPath();
225
+ ctx.moveTo(progressX, 0);
226
+ ctx.lineTo(progressX, canvas.height);
227
+ ctx.stroke();
228
+ }, [
229
+ canvasRef,
230
+ canvasSize,
231
+ segmentsWithTracks,
232
+ displaySegments,
233
+ currentTime,
234
+ activeSegmentIndex,
235
+ selectedSegmentIndex,
236
+ hoveredSegment,
237
+ isDragging,
238
+ dragSegmentIndex,
239
+ mediaDuration,
240
+ timeToX,
241
+ trackToY,
242
+ timelineWidth,
243
+ TRACK_HEIGHT,
244
+ TIMELINE_PADDING,
245
+ PIXELS_PER_SECOND,
246
+ ]);
247
+
248
+ // Redraw when dependencies change
249
+ useEffect(() => {
250
+ draw();
251
+ }, [draw]);
252
+
253
+ return { draw };
254
+ };
frontend/src/index.css ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ @tailwind base;
2
+ @tailwind components;
3
+ @tailwind utilities;
4
+
5
+ body {
6
+ @apply bg-gray-900 text-white;
7
+ }
frontend/src/main.tsx ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import ReactDOM from 'react-dom/client';
3
+ import App from './App';
4
+ import './index.css';
5
+
6
+ ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render(
7
+ <React.StrictMode>
8
+ <App />
9
+ </React.StrictMode>
10
+ );
frontend/src/pages/TranscriptionPage.tsx ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import {useState, useRef, useCallback} from "react";
3
+ import {useTranscriptionStore} from "../stores/transcriptionStore";
4
+ import {
5
+ SUPPORTED_AUDIO_FORMATS,
6
+ SUPPORTED_VIDEO_FORMATS,
7
+ CODEC_INFO,
8
+ } from "../utils/mediaTypes";
9
+ import TranscriptionSideBar from "../components/TranscriptionSideBar";
10
+ import TranscriptionPlayer from "../components/TranscriptionPlayer";
11
+ import MediaRecorder from "../components/MediaRecorder";
12
+ import {useDragAndDrop} from "../hooks/useDragAndDrop";
13
+ import {CloudArrowUpIcon} from "@heroicons/react/24/outline";
14
+ import ErrorBoundary from "../components/ErrorBoundary";
15
+ export default function TranscriptionPage() {
16
+ const {isRecording, setFile, stopRecording} = useTranscriptionStore();
17
+
18
+ // Sidebar resizing state
19
+ const [sidebarWidth, setSidebarWidth] = useState(256); // Default 256px (w-64)
20
+ const [isResizing, setIsResizing] = useState(false);
21
+ const sidebarRef = useRef<HTMLDivElement>(null);
22
+
23
+ // Drag and drop functionality
24
+ const {isDragActive, dragProps} = useDragAndDrop({
25
+ onFileDropped: setFile,
26
+ acceptedTypes: ["audio/*"],
27
+ });
28
+
29
+ // Handle sidebar resizing
30
+ const handleMouseDown = useCallback((e: React.MouseEvent) => {
31
+ e.preventDefault();
32
+ setIsResizing(true);
33
+ }, []);
34
+
35
+ const handleMouseMove = useCallback(
36
+ (e: MouseEvent) => {
37
+ if (!isResizing) return;
38
+
39
+ const newWidth = Math.max(200, Math.min(600, e.clientX)); // Min 200px, max 600px
40
+ setSidebarWidth(newWidth);
41
+ },
42
+ [isResizing]
43
+ );
44
+
45
+ const handleMouseUp = useCallback(() => {
46
+ setIsResizing(false);
47
+ }, []);
48
+
49
+ // Add global mouse event listeners
50
+ React.useEffect(() => {
51
+ if (isResizing) {
52
+ document.addEventListener("mousemove", handleMouseMove);
53
+ document.addEventListener("mouseup", handleMouseUp);
54
+ document.body.style.userSelect = "none"; // Prevent text selection during drag
55
+ document.body.style.cursor = "ew-resize";
56
+ }
57
+
58
+ return () => {
59
+ document.removeEventListener("mousemove", handleMouseMove);
60
+ document.removeEventListener("mouseup", handleMouseUp);
61
+ document.body.style.userSelect = "";
62
+ document.body.style.cursor = "";
63
+ };
64
+ }, [isResizing, handleMouseMove, handleMouseUp]);
65
+
66
+ return (
67
+ <ErrorBoundary componentName="TranscriptionPage">
68
+ <div className="flex h-screen bg-gray-900 relative" {...dragProps}>
69
+ {/* Drag Overlay */}
70
+ {isDragActive && (
71
+ <div className="absolute inset-0 bg-blue-900/80 border-4 border-dashed border-blue-400 z-50 flex items-center justify-center">
72
+ <div className="text-center text-white">
73
+ <CloudArrowUpIcon className="w-16 h-16 mx-auto mb-4 text-blue-300" />
74
+ <div className="text-2xl font-semibold mb-2">
75
+ Drop your audio file here
76
+ </div>
77
+ <div className="text-lg text-blue-200 mb-4">
78
+ Supports audio files only
79
+ </div>
80
+
81
+ {/* Audio formats section */}
82
+ <div className="text-center mb-3">
83
+ <div className="text-sm font-medium text-blue-300 mb-1">
84
+ Audio Formats
85
+ </div>
86
+ <div className="text-xs text-blue-100 opacity-90">
87
+ {SUPPORTED_AUDIO_FORMATS.join(" • ")}
88
+ </div>
89
+ </div>
90
+
91
+ {/* Codec info */}
92
+ <div className="text-center">
93
+ <div className="text-xs text-blue-200 opacity-75">
94
+ Best with standard codecs:{" "}
95
+ {CODEC_INFO.audio.common.slice(0, 2).join(", ")}
96
+ </div>
97
+ </div>
98
+ </div>
99
+ </div>
100
+ )}
101
+
102
+ {/* Sidebar with Resize Handle */}
103
+ <div className="relative flex">
104
+ <div
105
+ ref={sidebarRef}
106
+ className="flex-shrink-0 bg-gray-800 text-white overflow-y-auto"
107
+ style={{width: `${sidebarWidth}px`}}
108
+ >
109
+ <TranscriptionSideBar />
110
+ </div>
111
+
112
+ {/* Drag Handle */}
113
+ <div
114
+ className="w-1 bg-gray-600 hover:bg-gray-500 cursor-ew-resize flex-shrink-0 transition-colors duration-150"
115
+ onMouseDown={handleMouseDown}
116
+ title="Drag to resize sidebar"
117
+ />
118
+ </div>
119
+
120
+ {/* Main Content */}
121
+ <ErrorBoundary componentName="TranscriptionPlayer">
122
+ {isRecording ? (
123
+ <div className="flex-1 flex items-center justify-center bg-gray-900">
124
+ <MediaRecorder
125
+ onComplete={() => stopRecording()}
126
+ onCancel={() => stopRecording()}
127
+ />
128
+ </div>
129
+ ) : (
130
+ <TranscriptionPlayer />
131
+ )}
132
+ </ErrorBoundary>
133
+ </div>
134
+ </ErrorBoundary>
135
+ );
136
+ }
frontend/src/services/transcriptionApi.ts ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // API service for transcription functionality
2
+
3
+ // Common API error handling and utilities
4
+ class ApiError extends Error {
5
+ constructor(message: string, public status?: number, public isServerBusy: boolean = false) {
6
+ super(message);
7
+ this.name = 'ApiError';
8
+ }
9
+ }
10
+
11
+ const getServerUrl = (): string => import.meta.env.VITE_SERVER_URL || "";
12
+
13
+ // Centralized fetch wrapper with consistent error handling
14
+ const fetchApi = async (
15
+ endpoint: string,
16
+ options: RequestInit = {},
17
+ expectJson: boolean = true
18
+ ): Promise<any> => {
19
+ const response = await fetch(endpoint, options);
20
+
21
+ if (!response.ok) {
22
+ let errorMessage = `HTTP error! status: ${response.status}`;
23
+ let isServerBusy = false;
24
+
25
+ // Try to extract error details from response
26
+ try {
27
+ const errorData = await response.json();
28
+ errorMessage = errorData?.error || errorMessage;
29
+
30
+ if (response.status === 503) {
31
+ isServerBusy = true;
32
+ errorMessage = `Server busy: ${errorData?.error || 'Server is currently processing another request'}`;
33
+ }
34
+ } catch {
35
+ // If JSON parsing fails, use default error message
36
+ if (response.status === 503) {
37
+ isServerBusy = true;
38
+ errorMessage = 'Server busy: Server is currently processing another request';
39
+ }
40
+ }
41
+
42
+ throw new ApiError(errorMessage, response.status, isServerBusy);
43
+ }
44
+
45
+ if (expectJson) {
46
+ return response.json();
47
+ }
48
+
49
+ return response;
50
+ };
51
+
52
+ // Create form data helper
53
+ const createFormData = (data: Record<string, string | File | Blob>): FormData => {
54
+ const formData = new FormData();
55
+ Object.entries(data).forEach(([key, value]) => {
56
+ formData.append(key, value);
57
+ });
58
+ return formData;
59
+ };
60
+
61
+ export interface AlignedSegment {
62
+ duration: number;
63
+ end: number;
64
+ start: number;
65
+ text: string;
66
+ chunk_index?: number;
67
+ speech_segment_index?: number;
68
+ // Merge history to allow intelligent splitting
69
+ mergedFrom?: AlignedSegment[];
70
+ mergeThreshold?: number; // The threshold used when this merge was created
71
+ }
72
+
73
+ export interface ChunkInfo {
74
+ chunk_index: number;
75
+ start_time: number;
76
+ end_time: number;
77
+ duration: number;
78
+ num_segments: number;
79
+ transcription: string;
80
+ }
81
+
82
+ export interface PreprocessedAudio {
83
+ data: string; // base64 encoded audio data
84
+ format: string; // "wav"
85
+ sample_rate: number;
86
+ duration: number;
87
+ size_bytes: number;
88
+ }
89
+
90
+ export interface TranscriptionResponse {
91
+ aligned_segments: AlignedSegment[];
92
+ alignment_available?: boolean;
93
+ device?: string;
94
+ model: string;
95
+ num_segments: number;
96
+ status: string;
97
+ total_duration: number;
98
+ transcription: string;
99
+ // Long-form specific fields
100
+ chunks?: ChunkInfo[];
101
+ num_chunks?: number;
102
+ // Preprocessed audio data
103
+ preprocessed_audio?: PreprocessedAudio;
104
+ }
105
+
106
+ export interface ServerStatus {
107
+ is_busy: boolean;
108
+ current_operation?: string;
109
+ current_filename?: string;
110
+ progress?: number;
111
+ duration_seconds?: number;
112
+ total_completed: number;
113
+ }
114
+
115
+ export interface HealthResponse {
116
+ status: string;
117
+ message: string;
118
+ version: string;
119
+ service: string;
120
+ device: string;
121
+ cuda_available: boolean;
122
+ ffmpeg_available: boolean;
123
+ transcription_status: ServerStatus;
124
+ gpu_count?: number;
125
+ current_device?: number;
126
+ gpu_name?: string;
127
+ gpu_memory_allocated_mb?: number;
128
+ gpu_memory_reserved_mb?: number;
129
+ gpu_memory_total_mb?: number;
130
+ gpu_memory_free_mb?: number;
131
+ }
132
+
133
+ // Main transcription API function
134
+ export const transcribeAudio = async (
135
+ file: File,
136
+ languageCode?: string | null,
137
+ scriptCode?: string | null,
138
+ onVideoProcessing?: (isProcessing: boolean) => void
139
+ ): Promise<TranscriptionResponse> => {
140
+ // Determine if this is a video file for UI feedback
141
+ const isVideoFile = file.type.startsWith("video/");
142
+
143
+ if (isVideoFile) {
144
+ onVideoProcessing?.(true);
145
+ console.log("Processing video file on server side:", file.name);
146
+ }
147
+
148
+ try {
149
+ // Create form data with unified 'media' field
150
+ const formData = createFormData({
151
+ media: file // Single 'media' parameter for all file types
152
+ });
153
+
154
+ // Combine language and script codes for server if both are specified
155
+ if (languageCode && scriptCode) {
156
+ const combinedLanguage = `${languageCode}_${scriptCode}`;
157
+ formData.append("language", combinedLanguage);
158
+ }
159
+
160
+ // Request preprocessed audio for waveform generation
161
+ formData.append("include_preprocessed", "true");
162
+
163
+ console.log('transcribeAudio - About to make API call with formData:', {
164
+ fileName: file.name,
165
+ fileType: file.type,
166
+ fileSize: file.size,
167
+ hasLanguage: !!languageCode && !!scriptCode,
168
+ combinedLanguage: languageCode && scriptCode ? `${languageCode}_${scriptCode}` : null
169
+ });
170
+
171
+ // Debug: Check if the File object is still valid
172
+ if (file instanceof File) {
173
+ console.log('transcribeAudio - File is valid File object');
174
+
175
+ // Try to read a small portion to ensure it's accessible
176
+ try {
177
+ const slice = file.slice(0, 100);
178
+ const arrayBuffer = await slice.arrayBuffer();
179
+ console.log('transcribeAudio - File slice readable, first 100 bytes length:', arrayBuffer.byteLength);
180
+ } catch (error) {
181
+ console.error('transcribeAudio - File slice read failed:', error);
182
+ }
183
+ } else {
184
+ console.error('transcribeAudio - File is not a valid File object:', file);
185
+ }
186
+
187
+ const result = await fetchApi(`${getServerUrl()}/transcribe`, {
188
+ method: "POST",
189
+ body: formData,
190
+ });
191
+
192
+ if (result.status !== "success") {
193
+ throw new Error("Transcription failed");
194
+ }
195
+
196
+ return result;
197
+
198
+ } finally {
199
+ if (isVideoFile) {
200
+ onVideoProcessing?.(false);
201
+ }
202
+ }
203
+ };
204
+
205
+ // Server status API functions
206
+ export const getServerStatus = async (): Promise<ServerStatus> => {
207
+ return fetchApi(`${getServerUrl()}/status`);
208
+ };
209
+
210
+ export const getServerHealth = async (): Promise<HealthResponse> => {
211
+ return fetchApi(`${getServerUrl()}/health`);
212
+ };
213
+
214
+ // Video + Subtitles combination API function
215
+ export const combineVideoWithSubtitles = async (
216
+ videoFile: File,
217
+ subtitleContent: string,
218
+ language: string = 'eng',
219
+ format: 'srt' | 'webvtt' = 'srt',
220
+ outputFormat: 'mp4' | 'mkv' = 'mp4'
221
+ ): Promise<Blob> => {
222
+ const formData = createFormData({
223
+ video: videoFile,
224
+ subtitles: subtitleContent,
225
+ format,
226
+ output_format: outputFormat,
227
+ language
228
+ });
229
+
230
+ const response = await fetchApi(
231
+ `${getServerUrl()}/combine-video-subtitles`,
232
+ { method: 'POST', body: formData },
233
+ false // Don't expect JSON, expect blob
234
+ );
235
+
236
+ return response.blob();
237
+ };
238
+
239
+ // Cache for supported languages
240
+ let supportedLanguagesCache: string[] | null = null;
241
+ let supportedLanguagesPromise: Promise<string[]> | null = null;
242
+
243
+ // Get supported languages with caching
244
+ export const getSupportedLanguages = async (): Promise<string[]> => {
245
+ // Return from cache if available
246
+ if (supportedLanguagesCache) {
247
+ return supportedLanguagesCache;
248
+ }
249
+
250
+ // Return existing promise if already in flight
251
+ if (supportedLanguagesPromise) {
252
+ return supportedLanguagesPromise;
253
+ }
254
+
255
+ // Create new promise and cache it
256
+ supportedLanguagesPromise = (async () => {
257
+ try {
258
+ const response = await fetchApi(`${getServerUrl()}/supported-languages`);
259
+ const languages = response.supported_languages;
260
+
261
+ // Cache the result
262
+ supportedLanguagesCache = languages;
263
+
264
+ return languages;
265
+ } catch (error) {
266
+ // Reset promise on error so we can retry
267
+ supportedLanguagesPromise = null;
268
+ throw error;
269
+ }
270
+ })();
271
+
272
+ return supportedLanguagesPromise;
273
+ };
frontend/src/stores/transcriptionStore.ts ADDED
@@ -0,0 +1,1161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {create} from "zustand";
2
+ import {devtools} from "zustand/middleware";
3
+ import debounce from "debounce";
4
+ import {
5
+ TranscriptionResponse,
6
+ PreprocessedAudio,
7
+ transcribeAudio,
8
+ AlignedSegment,
9
+ ServerStatus,
10
+ getServerStatus,
11
+ HealthResponse,
12
+ getServerHealth,
13
+ } from "../services/transcriptionApi";
14
+ import {generateSRT, downloadVideoWithSubtitles} from "../utils/subtitleUtils";
15
+ import {
16
+ trackTranscriptionStart,
17
+ trackTranscriptionComplete,
18
+ trackTranscriptionError,
19
+ trackFileUpload,
20
+ trackLanguageChange,
21
+ trackDownloadVideoWithSubtitles,
22
+ } from "../analytics/gaEvents";
23
+
24
+ // Helper function to find the active segment based on current time
25
+ const findActiveSegmentIndex = (
26
+ segments: AlignedSegment[],
27
+ currentTime: number
28
+ ): number | null => {
29
+ for (let i = 0; i < segments.length; i++) {
30
+ const segment = segments[i];
31
+ if (currentTime >= segment.start && currentTime <= segment.end) {
32
+ return i;
33
+ }
34
+ }
35
+ return null;
36
+ };
37
+
38
+ // Types for our store state
39
+ interface TranscriptionState {
40
+ // File and media state
41
+ file: File | null;
42
+ mediaUrl: string | null;
43
+
44
+ // Recording state
45
+ isRecording: boolean;
46
+ recordingType: "audio" | "video" | null;
47
+ recordedBlob: Blob | null;
48
+
49
+ // Media refs for seeking (set by components)
50
+ audioRef: React.RefObject<HTMLAudioElement> | null;
51
+ videoRef: React.RefObject<HTMLVideoElement> | null;
52
+
53
+ // Transcription state
54
+ transcription: TranscriptionResponse | null;
55
+ preprocessedAudio: PreprocessedAudio | null;
56
+ currentTime: number;
57
+ activeSegmentIndex: number | null;
58
+ currentSegments: AlignedSegment[] | null;
59
+
60
+ // Edit state
61
+ selectedSegmentIndex: number | null;
62
+
63
+ // Viewport state for minimap
64
+ viewportStart: number;
65
+ viewportEnd: number;
66
+
67
+ // History state for undo/redo
68
+ history: AlignedSegment[][];
69
+ historyIndex: number;
70
+
71
+ // Loading and error state
72
+ isLoading: boolean;
73
+ isProcessingVideo: boolean;
74
+ isDownloadingVideo: boolean;
75
+ error: string | null;
76
+
77
+ // Language selection
78
+ selectedLanguage: string | null;
79
+ selectedScript: string | null;
80
+
81
+ // Server status
82
+ serverStatus: ServerStatus | null;
83
+ serverHealth: HealthResponse | null;
84
+ isPollingStatus: boolean;
85
+ statusPollingInterval: number | null;
86
+
87
+ // Modal state
88
+ showWelcomeModal: boolean;
89
+
90
+ // Computed properties
91
+ isVideoFile: boolean;
92
+ hasFile: boolean;
93
+ hasTranscription: boolean;
94
+
95
+ // Actions
96
+ setFile: (file: File | null) => void;
97
+ setTranscription: (transcription: TranscriptionResponse | null) => void;
98
+
99
+ // Recording actions
100
+ startRecording: (type: "audio" | "video") => void;
101
+ stopRecording: () => void;
102
+ setRecordedBlob: (blob: Blob | null) => void;
103
+ setCurrentTime: (time: number) => void;
104
+ setActiveSegmentIndex: (index: number | null) => void;
105
+ setIsLoading: (loading: boolean) => void;
106
+ setIsProcessingVideo: (processing: boolean) => void;
107
+ setIsDownloadingVideo: (downloading: boolean) => void;
108
+ setError: (error: string | null) => void;
109
+ setSelectedLanguage: (language: string | null) => void;
110
+ setSelectedScript: (script: string | null) => void;
111
+ setSelectedLanguageAndScript: (
112
+ language: string | null,
113
+ script: string | null
114
+ ) => void;
115
+
116
+ // Modal actions
117
+ setShowWelcomeModal: (show: boolean) => void;
118
+
119
+ // Media control actions
120
+ setMediaRefs: (
121
+ audioRef: React.RefObject<HTMLAudioElement>,
122
+ videoRef: React.RefObject<HTMLVideoElement>
123
+ ) => void;
124
+ seekToTime: (time: number) => void;
125
+
126
+ // Server status actions
127
+ setServerStatus: (status: ServerStatus | null) => void;
128
+ setServerHealth: (health: HealthResponse | null) => void;
129
+ fetchServerStatus: () => Promise<void>;
130
+ fetchServerHealth: () => Promise<void>;
131
+ startStatusPolling: () => void;
132
+ stopStatusPolling: () => void;
133
+
134
+ // Edit actions
135
+ setSelectedSegmentIndex: (index: number | null) => void;
136
+ updateSegmentTiming: (
137
+ index: number,
138
+ start: number,
139
+ end: number,
140
+ deferSorting?: boolean
141
+ ) => void;
142
+ updateSegmentText: (index: number, text: string) => void;
143
+ deleteSegment: (index: number) => void;
144
+ mergeSegmentsByProximity: (maxDurationSeconds: number) => void;
145
+ finalizeSegmentPositioning: () => void;
146
+
147
+ // Viewport actions
148
+ setViewport: (start: number, end: number) => void;
149
+ initializeViewport: (duration: number) => void;
150
+
151
+ // History actions
152
+ undo: () => void;
153
+ redo: () => void;
154
+ canUndo: boolean;
155
+ canRedo: boolean;
156
+
157
+ // Helper functions
158
+ _recordHistoryImmediate: (segments: AlignedSegment[]) => void;
159
+ _recordHistoryDebounced: (segments: AlignedSegment[]) => void;
160
+
161
+ // Complex actions
162
+ handleFileSelect: (file: File) => void;
163
+ handleTranscribe: () => Promise<void>;
164
+ handleTimeUpdate: () => void;
165
+ handleDownloadVideoWithSubtitles: () => Promise<void>;
166
+ reset: () => void;
167
+ }
168
+
169
+ // Initial state
170
+ const initialState = {
171
+ file: null,
172
+ mediaUrl: null,
173
+ audioRef: null,
174
+ videoRef: null,
175
+
176
+ // Recording state
177
+ isRecording: false,
178
+ recordingType: null,
179
+ recordedBlob: null,
180
+ transcription: null,
181
+ preprocessedAudio: null,
182
+ currentTime: 0,
183
+ activeSegmentIndex: null,
184
+ selectedSegmentIndex: null,
185
+ history: [],
186
+ historyIndex: -1,
187
+ isLoading: false,
188
+ isProcessingVideo: false,
189
+ isDownloadingVideo: false,
190
+ error: null,
191
+ selectedLanguage: null,
192
+ selectedScript: null,
193
+ currentSegments: null,
194
+ viewportStart: 0,
195
+ viewportEnd: 30, // Default to first 30 seconds
196
+ showWelcomeModal: true, // Show modal on app load
197
+ };
198
+
199
+ export const useTranscriptionStore = create<TranscriptionState>()(
200
+ devtools(
201
+ (set, get) => ({
202
+ ...initialState,
203
+ // Server status state
204
+ serverStatus: null,
205
+ serverHealth: null,
206
+ isPollingStatus: false,
207
+ statusPollingInterval: null,
208
+
209
+ // Computed properties - these will be updated when relevant state changes
210
+ isVideoFile: false,
211
+ hasFile: false,
212
+ hasTranscription: false,
213
+
214
+ canUndo: false,
215
+ canRedo: false,
216
+
217
+ // Simple setters
218
+ setFile: (file) => {
219
+ const {mediaUrl, showWelcomeModal} = get();
220
+
221
+ // Clean up previous media URL
222
+ if (mediaUrl) {
223
+ URL.revokeObjectURL(mediaUrl);
224
+ }
225
+
226
+ set({
227
+ ...initialState,
228
+ // Override only file-specific properties
229
+ file,
230
+ mediaUrl: file ? URL.createObjectURL(file) : null,
231
+ isVideoFile: file?.type.startsWith("video/") ?? false,
232
+ hasFile: !!file,
233
+ hasTranscription: false,
234
+ // Preserve the modal state - don't reset it
235
+ showWelcomeModal,
236
+ });
237
+ },
238
+ setTranscription: (transcription) => {
239
+ set({
240
+ transcription,
241
+ preprocessedAudio: transcription?.preprocessed_audio || null,
242
+ hasTranscription: !!transcription,
243
+ currentSegments: transcription?.aligned_segments || null,
244
+ });
245
+
246
+ // Initialize history when transcription is first set
247
+ if (transcription && transcription.aligned_segments) {
248
+ const segments = [...transcription.aligned_segments];
249
+ set({
250
+ history: [segments],
251
+ historyIndex: 0,
252
+ canUndo: false,
253
+ canRedo: false,
254
+ });
255
+ }
256
+ },
257
+
258
+ handleTimeUpdate: () => {
259
+ const {audioRef, videoRef, transcription} = get();
260
+ const mediaElement = audioRef?.current || videoRef?.current;
261
+
262
+ if (mediaElement && transcription) {
263
+ const currentTime = mediaElement.currentTime;
264
+ const activeIndex = findActiveSegmentIndex(
265
+ transcription.aligned_segments,
266
+ currentTime
267
+ );
268
+
269
+ set({
270
+ currentTime,
271
+ activeSegmentIndex: activeIndex,
272
+ });
273
+ }
274
+ },
275
+ setCurrentTime: (currentTime) => set({currentTime}),
276
+ setActiveSegmentIndex: (activeSegmentIndex) => set({activeSegmentIndex}),
277
+ setIsLoading: (isLoading) => set({isLoading}),
278
+ setIsProcessingVideo: (isProcessingVideo) => set({isProcessingVideo}),
279
+ setIsDownloadingVideo: (isDownloadingVideo) => set({isDownloadingVideo}),
280
+ setError: (error) => set({error}),
281
+ setSelectedLanguage: (selectedLanguage) => {
282
+ // Track language selection
283
+ if (selectedLanguage) {
284
+ trackLanguageChange(selectedLanguage);
285
+ }
286
+ set({selectedLanguage});
287
+ },
288
+ setSelectedScript: (selectedScript) => set({selectedScript}),
289
+ setSelectedLanguageAndScript: (selectedLanguage, selectedScript) => {
290
+ // Track language selection
291
+ if (selectedLanguage) {
292
+ trackLanguageChange(selectedLanguage);
293
+ }
294
+ set({selectedLanguage, selectedScript});
295
+ },
296
+
297
+ // Modal actions
298
+ setShowWelcomeModal: (showWelcomeModal) => set({showWelcomeModal}),
299
+
300
+ // Media control actions
301
+ setMediaRefs: (audioRef, videoRef) => set({audioRef, videoRef}),
302
+ seekToTime: (time) => {
303
+ const {audioRef, videoRef} = get();
304
+ const mediaElement = audioRef?.current || videoRef?.current;
305
+ if (mediaElement) {
306
+ const seekTime = Math.max(
307
+ 0,
308
+ Math.min(time, mediaElement.duration || Infinity)
309
+ );
310
+ mediaElement.currentTime = seekTime;
311
+ // Immediately update current time to trigger auto-scroll
312
+ set({currentTime: seekTime});
313
+ }
314
+ },
315
+
316
+ // Server status actions
317
+ setServerStatus: (serverStatus) => set({serverStatus}),
318
+ setServerHealth: (serverHealth) => set({serverHealth}),
319
+
320
+ fetchServerStatus: async () => {
321
+ try {
322
+ const status = await getServerStatus();
323
+ set({serverStatus: status});
324
+ } catch (error) {
325
+ console.error("Failed to fetch server status:", error);
326
+ }
327
+ },
328
+
329
+ fetchServerHealth: async () => {
330
+ try {
331
+ const health = await getServerHealth();
332
+ set({serverHealth: health});
333
+ } catch (error) {
334
+ console.error("Failed to fetch server health:", error);
335
+ }
336
+ },
337
+
338
+ startStatusPolling: () => {
339
+ const {isPollingStatus, statusPollingInterval} = get();
340
+
341
+ if (isPollingStatus) {
342
+ return; // Already polling
343
+ }
344
+
345
+ // Clear any existing interval
346
+ if (statusPollingInterval) {
347
+ clearInterval(statusPollingInterval);
348
+ }
349
+
350
+ const {fetchServerStatus} = get();
351
+
352
+ // Fetch immediately
353
+ fetchServerStatus();
354
+
355
+ // Then poll every 2 seconds
356
+ const interval = setInterval(() => {
357
+ fetchServerStatus();
358
+ }, 2000);
359
+
360
+ set({
361
+ isPollingStatus: true,
362
+ statusPollingInterval: interval,
363
+ });
364
+ },
365
+
366
+ stopStatusPolling: () => {
367
+ const {statusPollingInterval} = get();
368
+
369
+ if (statusPollingInterval) {
370
+ clearInterval(statusPollingInterval);
371
+ }
372
+
373
+ set({
374
+ isPollingStatus: false,
375
+ statusPollingInterval: null,
376
+ });
377
+ },
378
+
379
+ // Helper function to record history immediately (for instant actions like delete)
380
+ _recordHistoryImmediate: (segments: AlignedSegment[]) => {
381
+ const {history, historyIndex} = get();
382
+
383
+ // Remove any history after current index (when we're not at the end)
384
+ const newHistory = history.slice(0, historyIndex + 1);
385
+
386
+ // Add new state to history
387
+ newHistory.push([...segments]);
388
+
389
+ // Limit history size to prevent memory issues (keep last 50 states)
390
+ const maxHistorySize = 50;
391
+ const newIndex = newHistory.length - 1;
392
+
393
+ if (newHistory.length > maxHistorySize) {
394
+ newHistory.shift();
395
+ const adjustedIndex = newIndex - 1;
396
+ set({
397
+ history: newHistory,
398
+ historyIndex: adjustedIndex,
399
+ canUndo: adjustedIndex > 0,
400
+ canRedo: false, // Always false when adding new history
401
+ });
402
+ } else {
403
+ set({
404
+ history: newHistory,
405
+ historyIndex: newIndex,
406
+ canUndo: newIndex > 0,
407
+ canRedo: false, // Always false when adding new history
408
+ });
409
+ }
410
+ },
411
+
412
+ // Debounced history recording method
413
+ _recordHistoryDebounced: debounce((segments: AlignedSegment[]) => {
414
+ const {_recordHistoryImmediate} = get();
415
+ _recordHistoryImmediate(segments);
416
+ }, 500),
417
+
418
+ // Edit mode actions
419
+ // Initialize history for undo/redo (called automatically when transcription is set)
420
+ initializeHistory: () => {
421
+ const {transcription, history} = get();
422
+ if (!transcription || history.length > 0) return;
423
+
424
+ const segments = [...transcription.aligned_segments];
425
+ set({
426
+ history: [segments],
427
+ historyIndex: 0,
428
+ canUndo: false,
429
+ canRedo: false,
430
+ });
431
+ },
432
+
433
+ setSelectedSegmentIndex: (selectedSegmentIndex) => {
434
+ set({selectedSegmentIndex});
435
+ },
436
+
437
+ updateSegmentTiming: (
438
+ index: number,
439
+ start: number,
440
+ end: number,
441
+ deferSorting: boolean = false
442
+ ) => {
443
+ const {
444
+ currentSegments,
445
+ transcription,
446
+ selectedSegmentIndex,
447
+ _recordHistoryDebounced,
448
+ } = get();
449
+ if (
450
+ !currentSegments ||
451
+ !transcription ||
452
+ index < 0 ||
453
+ index >= currentSegments.length
454
+ )
455
+ return;
456
+
457
+ const updatedSegments = [...currentSegments];
458
+ updatedSegments[index] = {
459
+ ...updatedSegments[index],
460
+ start,
461
+ end,
462
+ duration: end - start,
463
+ };
464
+
465
+ // If deferSorting is true (during drag operations), just update without re-sorting
466
+ if (deferSorting) {
467
+ // Update both transcription and current segments without re-sorting
468
+ const updatedTranscription = {
469
+ ...transcription,
470
+ aligned_segments: updatedSegments,
471
+ };
472
+
473
+ set({
474
+ transcription: updatedTranscription,
475
+ currentSegments: updatedSegments,
476
+ });
477
+
478
+ // Don't record history during intermediate drag updates
479
+ return;
480
+ }
481
+
482
+ // Normal operation: re-sort segments by start time to maintain chronological order
483
+ const sortedSegments = [...updatedSegments].sort(
484
+ (a, b) => a.start - b.start
485
+ );
486
+
487
+ // Find the new index of the moved segment after sorting
488
+ const movedSegment = updatedSegments[index];
489
+ const newIndex = sortedSegments.findIndex(
490
+ (seg) =>
491
+ seg.start === movedSegment.start &&
492
+ seg.end === movedSegment.end &&
493
+ seg.text === movedSegment.text
494
+ );
495
+
496
+ // Update selected segment index if it was the one being moved
497
+ let newSelectedIndex = selectedSegmentIndex;
498
+ if (selectedSegmentIndex === index) {
499
+ newSelectedIndex = newIndex;
500
+ } else if (selectedSegmentIndex !== null) {
501
+ // Find where the currently selected segment ended up after sorting
502
+ const selectedSegment = updatedSegments[selectedSegmentIndex];
503
+ newSelectedIndex = sortedSegments.findIndex(
504
+ (seg) =>
505
+ seg.start === selectedSegment.start &&
506
+ seg.end === selectedSegment.end &&
507
+ seg.text === selectedSegment.text
508
+ );
509
+ }
510
+
511
+ // Update both transcription and current segments
512
+ const updatedTranscription = {
513
+ ...transcription,
514
+ aligned_segments: sortedSegments,
515
+ };
516
+
517
+ set({
518
+ transcription: updatedTranscription,
519
+ currentSegments: sortedSegments,
520
+ selectedSegmentIndex: newSelectedIndex,
521
+ });
522
+
523
+ // Record history with debounce for drag operations
524
+ _recordHistoryDebounced(sortedSegments);
525
+ },
526
+
527
+ // New method to finalize segment positioning after drag operations
528
+ finalizeSegmentPositioning: () => {
529
+ const {currentSegments, transcription, selectedSegmentIndex} = get();
530
+ if (!currentSegments || !transcription) return;
531
+
532
+ // Re-sort segments by start time
533
+ const sortedSegments = [...currentSegments].sort(
534
+ (a, b) => a.start - b.start
535
+ );
536
+
537
+ // Update selected segment index to reflect new position
538
+ let newSelectedIndex = selectedSegmentIndex;
539
+ if (selectedSegmentIndex !== null) {
540
+ const selectedSegment = currentSegments[selectedSegmentIndex];
541
+ newSelectedIndex = sortedSegments.findIndex(
542
+ (seg) =>
543
+ seg.start === selectedSegment.start &&
544
+ seg.end === selectedSegment.end &&
545
+ seg.text === selectedSegment.text
546
+ );
547
+ }
548
+
549
+ // Update both transcription and current segments
550
+ const updatedTranscription = {
551
+ ...transcription,
552
+ aligned_segments: sortedSegments,
553
+ };
554
+
555
+ set({
556
+ transcription: updatedTranscription,
557
+ currentSegments: sortedSegments,
558
+ selectedSegmentIndex: newSelectedIndex,
559
+ });
560
+ },
561
+
562
+ updateSegmentText: (index: number, text: string) => {
563
+ const {currentSegments, transcription, _recordHistoryDebounced} = get();
564
+ if (
565
+ !currentSegments ||
566
+ !transcription ||
567
+ index < 0 ||
568
+ index >= currentSegments.length
569
+ )
570
+ return;
571
+
572
+ const updatedSegments = [...currentSegments];
573
+ updatedSegments[index] = {
574
+ ...updatedSegments[index],
575
+ text,
576
+ };
577
+
578
+ // Update both transcription and current segments
579
+ const updatedTranscription = {
580
+ ...transcription,
581
+ aligned_segments: updatedSegments,
582
+ };
583
+
584
+ set({
585
+ transcription: updatedTranscription,
586
+ currentSegments: updatedSegments,
587
+ });
588
+
589
+ // Record history with debounce for text changes
590
+ _recordHistoryDebounced(updatedSegments);
591
+ },
592
+
593
+ deleteSegment: (index: number) => {
594
+ const {
595
+ currentSegments,
596
+ transcription,
597
+ selectedSegmentIndex,
598
+ _recordHistoryImmediate,
599
+ } = get();
600
+ if (
601
+ !currentSegments ||
602
+ !transcription ||
603
+ index < 0 ||
604
+ index >= currentSegments.length
605
+ )
606
+ return;
607
+
608
+ const updatedSegments = currentSegments.filter(
609
+ (_: AlignedSegment, i: number) => i !== index
610
+ );
611
+
612
+ // Adjust selected segment index if necessary
613
+ let newSelectedIndex = selectedSegmentIndex;
614
+ if (selectedSegmentIndex === index) {
615
+ newSelectedIndex = null; // Clear selection if we deleted the selected segment
616
+ } else if (
617
+ selectedSegmentIndex !== null &&
618
+ selectedSegmentIndex > index
619
+ ) {
620
+ newSelectedIndex = selectedSegmentIndex - 1; // Adjust index if selected segment was after deleted one
621
+ }
622
+
623
+ // Update both transcription and current segments
624
+ const updatedTranscription = {
625
+ ...transcription,
626
+ aligned_segments: updatedSegments,
627
+ };
628
+
629
+ set({
630
+ transcription: updatedTranscription,
631
+ currentSegments: updatedSegments,
632
+ selectedSegmentIndex: newSelectedIndex,
633
+ });
634
+
635
+ // Record history immediately for deletions since they're instant actions
636
+ _recordHistoryImmediate(updatedSegments);
637
+ },
638
+
639
+ mergeSegmentsByProximity: (maxDurationSeconds: number) => {
640
+ const {
641
+ transcription,
642
+ currentSegments,
643
+ history,
644
+ _recordHistoryDebounced,
645
+ } = get();
646
+ if (!transcription) return;
647
+
648
+ console.log(`Merge threshold changed to: ${maxDurationSeconds}s`);
649
+
650
+ // Always use current segments - this is the source of truth for user's changes
651
+ if (!currentSegments) {
652
+ console.warn("No currentSegments available for merging");
653
+ return;
654
+ }
655
+
656
+ // const originalSegments = history.length > 0 ? [...history[0]] : [...transcription.aligned_segments];
657
+
658
+ // // If threshold is 0, reset to original segments (clear all merge history)
659
+ // if (maxDurationSeconds === 0) {
660
+ // console.log(`Resetting to original ${originalSegments.length} segments`);
661
+
662
+ // // Strip merge history from original segments
663
+ // const cleanedOriginals = originalSegments.map(segment => ({
664
+ // ...segment,
665
+ // mergedFrom: undefined,
666
+ // mergeThreshold: undefined,
667
+ // }));
668
+
669
+ // const updatedTranscription = {
670
+ // ...transcription,
671
+ // aligned_segments: cleanedOriginals,
672
+ // };
673
+
674
+ // set({
675
+ // transcription: updatedTranscription,
676
+ // currentSegments: cleanedOriginals,
677
+ // selectedSegmentIndex: null,
678
+ // });
679
+ // return;
680
+ // }
681
+
682
+ // Step 1: Intelligently split segments that were merged at higher thresholds
683
+ const splitSegmentsRecursively = (
684
+ segment: AlignedSegment
685
+ ): AlignedSegment[] => {
686
+ // If this segment has no merge history or was merged at/below current threshold, keep it
687
+ if (
688
+ !segment.mergedFrom ||
689
+ !segment.mergeThreshold ||
690
+ segment.mergeThreshold <= maxDurationSeconds
691
+ ) {
692
+ return [segment];
693
+ }
694
+
695
+ // This segment was merged above the current threshold, split it
696
+ console.log(
697
+ `Splitting segment merged at ${segment.mergeThreshold}s (current threshold: ${maxDurationSeconds}s)`
698
+ );
699
+
700
+ // Calculate the relative positions of constituents within the current segment's timing
701
+ const currentStart = segment.start;
702
+ const currentEnd = segment.end;
703
+ const currentDuration = currentEnd - currentStart;
704
+
705
+ // Get the original combined duration when constituents were first merged
706
+ const constituents = segment.mergedFrom;
707
+ const originalTotalDuration = constituents.reduce(
708
+ (sum, c) => sum + c.duration,
709
+ 0
710
+ );
711
+
712
+ // Position each constituent relative to current segment position
713
+ let cumulativeTime = 0;
714
+ const repositionedConstituents = constituents
715
+ .map((constituent) => {
716
+ const relativeStart = cumulativeTime / originalTotalDuration;
717
+ const relativeEnd =
718
+ (cumulativeTime + constituent.duration) / originalTotalDuration;
719
+
720
+ const newStart = currentStart + relativeStart * currentDuration;
721
+ const newEnd = currentStart + relativeEnd * currentDuration;
722
+
723
+ cumulativeTime += constituent.duration;
724
+
725
+ const repositioned: AlignedSegment = {
726
+ ...constituent,
727
+ start: newStart,
728
+ end: newEnd,
729
+ duration: newEnd - newStart,
730
+ };
731
+
732
+ // Recursively split this constituent if needed
733
+ return splitSegmentsRecursively(repositioned);
734
+ })
735
+ .flat();
736
+
737
+ return repositionedConstituents;
738
+ };
739
+
740
+ let segments = currentSegments.flatMap((segment) =>
741
+ splitSegmentsRecursively(segment)
742
+ );
743
+
744
+ console.log(`After splitting: ${segments.length} segments`);
745
+
746
+ // Step 2: Merge segments that can be merged at the current threshold
747
+ const originalCount = segments.length;
748
+ let merged = true;
749
+
750
+ while (merged && segments.length > 1) {
751
+ merged = false;
752
+ let closestDistance = Infinity;
753
+ let closestPair = -1;
754
+
755
+ // Find the closest pair of adjacent segments that can be merged
756
+ for (let i = 0; i < segments.length - 1; i++) {
757
+ const segment1 = segments[i];
758
+ const segment2 = segments[i + 1];
759
+ const gap = segment2.start - segment1.end;
760
+
761
+ // Calculate what the duration would be if we merged these segments
762
+ const mergedDuration = segment2.end - segment1.start;
763
+
764
+ // Only consider this pair if the merged duration wouldn't exceed the threshold
765
+ if (mergedDuration <= maxDurationSeconds && gap < closestDistance) {
766
+ closestDistance = gap;
767
+ closestPair = i;
768
+ }
769
+ }
770
+
771
+ // Merge the closest pair if found
772
+ if (closestPair !== -1) {
773
+ const segment1 = segments[closestPair];
774
+ const segment2 = segments[closestPair + 1];
775
+
776
+ // Collect all constituent segments (handle nested merges)
777
+ const getAllConstituents = (
778
+ segment: AlignedSegment
779
+ ): AlignedSegment[] => {
780
+ if (segment.mergedFrom) {
781
+ return segment.mergedFrom.flatMap(getAllConstituents);
782
+ }
783
+ // Return atomic segment without merge history
784
+ return [
785
+ {
786
+ start: segment.start,
787
+ end: segment.end,
788
+ duration: segment.duration,
789
+ text: segment.text,
790
+ chunk_index: segment.chunk_index,
791
+ speech_segment_index: segment.speech_segment_index,
792
+ },
793
+ ];
794
+ };
795
+
796
+ const constituents1 = getAllConstituents(segment1);
797
+ const constituents2 = getAllConstituents(segment2);
798
+ const allConstituents = [...constituents1, ...constituents2];
799
+
800
+ const mergedSegment: AlignedSegment = {
801
+ start: segment1.start,
802
+ end: segment2.end,
803
+ duration: segment2.end - segment1.start,
804
+ text: `${segment1.text} ${segment2.text}`,
805
+ chunk_index: segment1.chunk_index,
806
+ speech_segment_index: segment1.speech_segment_index,
807
+ mergedFrom: allConstituents,
808
+ mergeThreshold: maxDurationSeconds,
809
+ };
810
+
811
+ segments = [
812
+ ...segments.slice(0, closestPair),
813
+ mergedSegment,
814
+ ...segments.slice(closestPair + 2),
815
+ ];
816
+ merged = true;
817
+ console.log(
818
+ `Merged segments: "${segment1.text}" + "${segment2.text}"`
819
+ );
820
+ }
821
+ }
822
+
823
+ console.log(
824
+ `Final result: ${originalCount} → ${segments.length} segments`
825
+ );
826
+
827
+ // Update both transcription and current segments
828
+ const updatedTranscription = {
829
+ ...transcription,
830
+ aligned_segments: segments,
831
+ };
832
+
833
+ set({
834
+ transcription: updatedTranscription,
835
+ currentSegments: segments,
836
+ selectedSegmentIndex: null,
837
+ });
838
+
839
+ // Record history with debounce for merge slider changes
840
+ _recordHistoryDebounced(segments);
841
+ },
842
+
843
+ // Viewport actions
844
+ setViewport: (start: number, end: number) => {
845
+ set({
846
+ viewportStart: start,
847
+ viewportEnd: end,
848
+ });
849
+ },
850
+
851
+ initializeViewport: (duration: number) => {
852
+ const FIXED_VIEWPORT_DURATION = 30; // Fixed viewport window is always 30 seconds
853
+ const viewportDuration = Math.min(FIXED_VIEWPORT_DURATION, duration);
854
+ set({
855
+ viewportStart: 0,
856
+ viewportEnd: viewportDuration,
857
+ });
858
+ },
859
+
860
+ // History actions
861
+ undo: () => {
862
+ const {history, historyIndex, transcription} = get();
863
+ if (historyIndex > 0) {
864
+ const newIndex = historyIndex - 1;
865
+ const segments = history[newIndex];
866
+
867
+ // Update both transcription and current segments
868
+ const updatedTranscription = {
869
+ ...transcription!,
870
+ aligned_segments: [...segments],
871
+ };
872
+
873
+ set({
874
+ transcription: updatedTranscription,
875
+ currentSegments: [...segments],
876
+ historyIndex: newIndex,
877
+ selectedSegmentIndex: null,
878
+ canUndo: newIndex > 0,
879
+ canRedo: newIndex < history.length - 1,
880
+ });
881
+ }
882
+ },
883
+
884
+ redo: () => {
885
+ const {history, historyIndex, transcription} = get();
886
+ if (historyIndex < history.length - 1) {
887
+ const newIndex = historyIndex + 1;
888
+ const segments = history[newIndex];
889
+
890
+ // Update both transcription and current segments
891
+ const updatedTranscription = {
892
+ ...transcription!,
893
+ aligned_segments: [...segments],
894
+ };
895
+
896
+ set({
897
+ transcription: updatedTranscription,
898
+ currentSegments: [...segments],
899
+ historyIndex: newIndex,
900
+ selectedSegmentIndex: null,
901
+ canUndo: newIndex > 0,
902
+ canRedo: newIndex < history.length - 1,
903
+ });
904
+ }
905
+ },
906
+
907
+ // Complex actions
908
+ handleFileSelect: (selectedFile: File) => {
909
+ // Reject video files - only allow audio
910
+ if (selectedFile.type.startsWith("video/")) {
911
+ set({
912
+ error:
913
+ "Video files are not supported. Please upload an audio file only.",
914
+ });
915
+ return;
916
+ }
917
+
918
+ // Reject non-audio files
919
+ if (!selectedFile.type.startsWith("audio/")) {
920
+ set({
921
+ error: "Invalid file type. Please upload an audio file.",
922
+ });
923
+ return;
924
+ }
925
+
926
+ const {mediaUrl} = get();
927
+
928
+ // Clean up previous media URL
929
+ if (mediaUrl) {
930
+ URL.revokeObjectURL(mediaUrl);
931
+ }
932
+
933
+ // Create new object URL for media playback
934
+ const url = URL.createObjectURL(selectedFile);
935
+
936
+ // Track file upload
937
+ const fileType = "audio";
938
+ const fileSizeMB =
939
+ Math.round((selectedFile.size / (1024 * 1024)) * 10) / 10; // Round to 1 decimal
940
+ trackFileUpload(fileType, fileSizeMB);
941
+
942
+ set({
943
+ file: selectedFile,
944
+ mediaUrl: url,
945
+ transcription: null,
946
+ currentTime: 0,
947
+ activeSegmentIndex: null,
948
+ error: null,
949
+ isVideoFile: false,
950
+ hasFile: true,
951
+ hasTranscription: false,
952
+ currentSegments: null,
953
+ });
954
+ },
955
+
956
+ handleTranscribe: async () => {
957
+ const {file, selectedLanguage, selectedScript, setTranscription} =
958
+ get();
959
+ if (!file) return;
960
+
961
+ set({isLoading: true, error: null});
962
+
963
+ // Track transcription start
964
+ if (selectedLanguage) {
965
+ trackTranscriptionStart(selectedLanguage);
966
+ }
967
+
968
+ const startTime = Date.now();
969
+
970
+ try {
971
+ const result = await transcribeAudio(
972
+ file,
973
+ selectedLanguage,
974
+ selectedScript,
975
+ (isProcessing) => set({isProcessingVideo: isProcessing})
976
+ );
977
+
978
+ // Track transcription completion
979
+ if (selectedLanguage) {
980
+ const duration = Math.round((Date.now() - startTime) / 1000); // Duration in seconds
981
+ trackTranscriptionComplete(selectedLanguage, duration);
982
+ }
983
+
984
+ // Use setTranscription to properly initialize history
985
+ setTranscription(result);
986
+ } catch (err) {
987
+ console.error("Transcription error:", err);
988
+
989
+ // Track transcription error
990
+ if (selectedLanguage) {
991
+ const errorMessage =
992
+ err instanceof Error ? err.message : "Unknown error";
993
+ trackTranscriptionError(selectedLanguage, errorMessage);
994
+ }
995
+
996
+ set({
997
+ error:
998
+ err instanceof Error ? err.message : "An unknown error occurred",
999
+ });
1000
+ } finally {
1001
+ set({isLoading: false, isProcessingVideo: false});
1002
+ }
1003
+ },
1004
+
1005
+ handleDownloadVideoWithSubtitles: async () => {
1006
+ const {
1007
+ file,
1008
+ transcription,
1009
+ selectedLanguage,
1010
+ setIsDownloadingVideo,
1011
+ setError,
1012
+ } = get();
1013
+ if (!file || !transcription) return;
1014
+
1015
+ setIsDownloadingVideo(true);
1016
+ try {
1017
+ const srtContent = generateSRT(transcription.aligned_segments);
1018
+ const filename = file.name.replace(
1019
+ /\.[^/.]+$/,
1020
+ "_with_subtitles.mp4"
1021
+ );
1022
+ // Pass the selected language or fallback to 'eng'
1023
+ const language = selectedLanguage || "eng";
1024
+ await downloadVideoWithSubtitles(
1025
+ file,
1026
+ srtContent,
1027
+ filename,
1028
+ language,
1029
+ "srt",
1030
+ "mp4"
1031
+ );
1032
+
1033
+ // Track video with subtitles download
1034
+ if (selectedLanguage) {
1035
+ trackDownloadVideoWithSubtitles(selectedLanguage);
1036
+ }
1037
+ } catch (err) {
1038
+ console.error("Error creating video with subtitles:", err);
1039
+ setError("Failed to create video with subtitles");
1040
+ } finally {
1041
+ setIsDownloadingVideo(false);
1042
+ }
1043
+ },
1044
+
1045
+ // Recording actions
1046
+ startRecording: (type: "audio" | "video") => {
1047
+ set({
1048
+ isRecording: true,
1049
+ recordingType: type,
1050
+ recordedBlob: null,
1051
+ });
1052
+ },
1053
+
1054
+ stopRecording: () => {
1055
+ set({
1056
+ isRecording: false,
1057
+ recordingType: null,
1058
+ });
1059
+ },
1060
+
1061
+ setRecordedBlob: (blob: Blob | null) => {
1062
+ if (blob) {
1063
+ // Store the current recording type before it gets cleared
1064
+ const currentRecordingType = get().recordingType;
1065
+
1066
+ // Convert blob to ArrayBuffer first, then back to Blob to ensure data persistence
1067
+ blob
1068
+ .arrayBuffer()
1069
+ .then((arrayBuffer) => {
1070
+ const {mediaUrl: currentUrl, showWelcomeModal} = get();
1071
+
1072
+ // Clean up previous media URL
1073
+ if (currentUrl) {
1074
+ URL.revokeObjectURL(currentUrl);
1075
+ }
1076
+
1077
+ const extension =
1078
+ currentRecordingType === "video" ? "webm" : "webm";
1079
+ const mimeType =
1080
+ currentRecordingType === "video" ? "video/webm" : "audio/webm";
1081
+
1082
+ // Create a new blob from the ArrayBuffer to ensure data persistence
1083
+ const persistentBlob = new Blob([arrayBuffer], {type: mimeType});
1084
+
1085
+ // Convert to File object
1086
+ const file = new File(
1087
+ [persistentBlob],
1088
+ `recorded_${currentRecordingType}.${extension}`,
1089
+ {
1090
+ type: mimeType,
1091
+ lastModified: Date.now(),
1092
+ }
1093
+ );
1094
+
1095
+ // Create URL from the persistent blob
1096
+ const url = URL.createObjectURL(persistentBlob);
1097
+
1098
+ // Get duration from the blob by creating a temporary media element
1099
+ const tempElement =
1100
+ currentRecordingType === "video"
1101
+ ? document.createElement("video")
1102
+ : document.createElement("audio");
1103
+
1104
+ // Track file upload
1105
+ const fileType =
1106
+ currentRecordingType === "video" ? "video" : "audio";
1107
+ const fileSizeMB =
1108
+ Math.round((persistentBlob.size / (1024 * 1024)) * 10) / 10;
1109
+ trackFileUpload(fileType, fileSizeMB);
1110
+
1111
+ // Set all the state - preserve existing media refs!
1112
+ const {audioRef, videoRef} = get();
1113
+ set({
1114
+ ...initialState,
1115
+ audioRef, // Preserve existing audioRef
1116
+ videoRef, // Preserve existing videoRef
1117
+ recordedBlob: persistentBlob, // Store the persistent blob
1118
+ file: file,
1119
+ mediaUrl: url,
1120
+ isRecording: false,
1121
+ recordingType: null,
1122
+ isVideoFile: currentRecordingType === "video",
1123
+ hasFile: true,
1124
+ hasTranscription: false,
1125
+ showWelcomeModal,
1126
+ });
1127
+ })
1128
+ .catch((error) => {
1129
+ console.error("Failed to create persistent blob:", error);
1130
+ set({error: "Failed to process recorded media"});
1131
+ });
1132
+ } else {
1133
+ set({recordedBlob: blob});
1134
+ }
1135
+ },
1136
+
1137
+ reset: () => {
1138
+ const {mediaUrl} = get();
1139
+
1140
+ // Clean up media URL
1141
+ if (mediaUrl) {
1142
+ URL.revokeObjectURL(mediaUrl);
1143
+ }
1144
+
1145
+ set({
1146
+ ...initialState,
1147
+ isVideoFile: false,
1148
+ hasFile: false,
1149
+ hasTranscription: false,
1150
+ currentSegments: null,
1151
+ });
1152
+ },
1153
+ }),
1154
+ {
1155
+ name: "transcription-store", // Name for devtools
1156
+ }
1157
+ )
1158
+ );
1159
+
1160
+ // Export the store hook directly - components should use useTranscriptionStore()
1161
+ // and destructure what they need directly from the store
frontend/src/utils/languages.ts ADDED
The diff for this file is too large to render. See raw diff
 
frontend/src/utils/mediaTypes.ts ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Supported media file types for transcription
3
+ * These should match the server's supported formats in convert_media_to_wav.py
4
+ */
5
+
6
+ export const SUPPORTED_AUDIO_FORMATS = [
7
+ 'WAV', 'MP3', 'M4A', 'AAC', 'FLAC', 'OGG', 'WMA'
8
+ ] as const;
9
+
10
+ export const SUPPORTED_VIDEO_FORMATS = [
11
+ 'MP4', 'AVI', 'MOV', 'MKV'
12
+ ] as const;
13
+
14
+ /**
15
+ * Common codec information for supported formats
16
+ * Note: Some esoteric codecs within these containers may not be supported
17
+ */
18
+ export const CODEC_INFO = {
19
+ // Audio codecs - widely supported
20
+ audio: {
21
+ common: ['AAC', 'MP3', 'PCM', 'FLAC', 'Vorbis'],
22
+ note: 'Most standard audio codecs are supported'
23
+ },
24
+ // Video codecs - audio track extraction
25
+ video: {
26
+ common: ['H.264 (AVC)', 'H.265 (HEVC)', 'VP9', 'AV1'],
27
+ legacy: ['H.263', 'MPEG-4', 'DivX', 'XviD'],
28
+ note: 'Audio is extracted from video - codec compatibility depends on FFmpeg support'
29
+ }
30
+ } as const;
31
+
32
+ export const SUPPORTED_FORMATS = {
33
+ audio: SUPPORTED_AUDIO_FORMATS,
34
+ video: SUPPORTED_VIDEO_FORMATS,
35
+ all: [...SUPPORTED_AUDIO_FORMATS, ...SUPPORTED_VIDEO_FORMATS]
36
+ } as const;
37
+
38
+ /**
39
+ /**
40
+ * Get potential compatibility warnings for file types
41
+ */
42
+ export const getCompatibilityWarning = (extension: string): string | null => {
43
+ const ext = extension.toLowerCase().replace('.', '');
44
+
45
+ switch (ext) {
46
+ case 'mp4':
47
+ return 'MP4 files with uncommon codecs (e.g., proprietary or very old codecs) may not process correctly. H.264 video with AAC audio works best.';
48
+ case 'avi':
49
+ return 'AVI files may contain various codecs. If processing fails, try converting to MP4 with H.264/AAC first.';
50
+ case 'mkv':
51
+ return 'MKV is a container format that supports many codecs. Most common codecs work, but some proprietary ones may not.';
52
+ case 'mov':
53
+ return 'MOV files usually work well, but very old or proprietary Apple codecs may cause issues.';
54
+ case 'wma':
55
+ case 'aac':
56
+ return 'Some DRM-protected or uncommon codec variants may not be supported.';
57
+ default:
58
+ return null;
59
+ }
60
+ };