omniasr-transcriptions

Runtime error

App Files Files Community

jeanma

kern3l Alexis-Hz EC2 Default User

mduppes EC2 Default User Mark Duppenthaler commited on 18 days ago

Commit

ae238b3

verified ·

0 Parent(s):

Omnilingual ASR transcription demo

Browse files

Co-authored-by: kern3l <[email protected]>
Co-authored-by: Alexis-Hz <[email protected]>
Co-authored-by: EC2 Default User <EC2 Default [email protected]>
Co-authored-by: mduppes <[email protected]>
Co-authored-by: EC2 Default User <EC2 Default [email protected]>
Co-authored-by: Mark Duppenthaler <Mark [email protected]>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +96 -0
.gitattributes +36 -0
.gitignore +158 -0
Dockerfile +151 -0
README.md +160 -0
frontend/.env +5 -0
frontend/.gitignore +38 -0
frontend/README.md +60 -0
frontend/index.html +13 -0
frontend/package-lock.json +0 -0
frontend/package.json +32 -0
frontend/postcss.config.js +6 -0
frontend/public/vite.svg +6 -0
frontend/src/App.tsx +28 -0
frontend/src/analytics/Analytics.tsx +181 -0
frontend/src/analytics/CookieBanner.tsx +53 -0
frontend/src/analytics/gaEvents.ts +97 -0
frontend/src/components/CanvasTimeline.tsx +393 -0
frontend/src/components/ErrorBoundary.tsx +131 -0
frontend/src/components/FeedbackCard.tsx +27 -0
frontend/src/components/FullTranscription.tsx +152 -0
frontend/src/components/LanguageSelector.tsx +262 -0
frontend/src/components/MediaDownloadControls.tsx +75 -0
frontend/src/components/MediaEditControls.tsx +113 -0
frontend/src/components/MediaPlayer.tsx +130 -0
frontend/src/components/MediaRecorder.tsx +353 -0
frontend/src/components/MinimapTimeline.tsx +509 -0
frontend/src/components/QuickGuide.tsx +176 -0
frontend/src/components/SegmentEditor.tsx +92 -0
frontend/src/components/ServerStatusIndicator.tsx +241 -0
frontend/src/components/TermsModal.tsx +675 -0
frontend/src/components/TipsNotice.tsx +25 -0
frontend/src/components/TranscriptionControls.tsx +152 -0
frontend/src/components/TranscriptionPlayer.tsx +221 -0
frontend/src/components/TranscriptionSideBar.tsx +172 -0
frontend/src/components/TranscriptionWarningModal.tsx +69 -0
frontend/src/components/WelcomeModal.tsx +94 -0
frontend/src/hooks/useAudioAnalyzer.ts +91 -0
frontend/src/hooks/useDragAndDrop.ts +118 -0
frontend/src/hooks/useMediaTimeSync.ts +69 -0
frontend/src/hooks/useTimelineDragControls.ts +416 -0
frontend/src/hooks/useTimelineGeometry.ts +62 -0
frontend/src/hooks/useTimelineRenderer.ts +254 -0
frontend/src/index.css +7 -0
frontend/src/main.tsx +10 -0
frontend/src/pages/TranscriptionPage.tsx +136 -0
frontend/src/services/transcriptionApi.ts +273 -0
frontend/src/stores/transcriptionStore.ts +1161 -0
frontend/src/utils/languages.ts +0 -0
frontend/src/utils/mediaTypes.ts +60 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,96 @@

+# Models directory - downloaded at runtime
+server/models/
+models/
+# Node modules - installed during build
+frontend/node_modules/
+**/node_modules/
+# Frontend build artifacts - built during Docker build
+frontend/dist/
+frontend/build/
+# Development files
+.git/
+.gitignore
+*.md
+README*
+# IDE and editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Python cache and artifacts
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+env/
+ENV/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pytest
+.pytest_cache/
+# Coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+# Logs
+*.log
+logs/
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+# Docker files (optional - uncomment if you don't want to include docker files)
+# Dockerfile*
+# docker-compose*
+# .dockerignore
+# Temporary files
+tmp/
+temp/

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,158 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+# wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Logs
+*.log
+logs/
+# Temporary files
+*.tmp
+*.temp
+# Git backup directories
+.git.backup
+# Models directory (only at root level)
+/models/

Dockerfile ADDED Viewed

	@@ -0,0 +1,151 @@

+# Frontend build stage
+FROM node:18-alpine AS frontend-build
+# Google Analytics build args
+ARG VITE_ENABLE_ANALYTICS
+ARG VITE_REACT_APP_GOOGLE_ANALYTICS_ID
+ARG VITE_ALLOW_ALL_LANGUAGES
+# Make build args available as environment variables during build
+ENV VITE_ENABLE_ANALYTICS=${VITE_ENABLE_ANALYTICS}
+ENV VITE_REACT_APP_GOOGLE_ANALYTICS_ID=${VITE_REACT_APP_GOOGLE_ANALYTICS_ID}
+ENV VITE_ALLOW_ALL_LANGUAGES=${VITE_ALLOW_ALL_LANGUAGES}
+WORKDIR /app/frontend
+COPY frontend/package.json frontend/package-lock.json* ./
+RUN npm install
+COPY frontend/ ./
+RUN npm run build
+# Dockerfile to support Translations API Build - works locally and on Hugging Face Spaces
+FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 as base
+ENV PYTHON_VERSION=3.10 \
+    PYTHON_VERSION_SHORT=310
+RUN apt-get update && apt-get upgrade -y
+# Install system packages including audio processing libraries
+RUN apt-get install -y \
+    build-essential \
+    wget \
+    python${PYTHON_VERSION} \
+    python3-pip \
+    libpq-dev
+#Constants
+ENV PYTHONUNBUFFERED TRUE
+ARG DEBIAN_FRONTEND=noninteractive
+# Set up user with UID 1000 for HF Spaces compatibility
+RUN useradd -m -u 1000 user
+# Install base utilities, linux packages, and audio processing libraries
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    fakeroot \
+    ca-certificates \
+    curl \
+    vim \
+    ssh \
+    wget \
+    gcc \
+    git \
+    ffmpeg \
+    libsndfile1 \
+    libsox-fmt-all \
+    sox \
+    libavcodec-extra && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# Install miniconda
+ENV CONDA_DIR /opt/conda
+# Put conda in path and install
+ENV PATH=$CONDA_DIR/bin:$PATH
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \
+    && /bin/bash ~/miniconda.sh -b -p /opt/conda
+RUN conda config --set auto_activate_base false && \
+    conda config --set channel_priority flexible && \
+    mkdir -p ~/.conda && \
+    echo "channel_priority: flexible" > ~/.condarc && \
+    conda config --add channels conda-forge && \
+    conda config --set remote_max_retries 5 && \
+    conda config --set remote_connect_timeout_secs 30 && \
+    conda config --set remote_read_timeout_secs 30 && \
+    conda config --set show_channel_urls True && \
+    conda config --set auto_update_conda False && \
+    conda config --set notify_outdated_conda False && \
+    conda config --set report_errors False && \
+    conda config --set always_yes True && \
+    conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
+    conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r && \
+    conda clean -afy
+RUN conda config --set channel_priority false && \
+    conda create -n transcriptions-api python=${PYTHON_VERSION} -y && \
+    conda install -n transcriptions-api -c conda-forge \
+    libsndfile=1.0.31 \
+    numpy \
+    scipy \
+    -y
+# Enable conda
+SHELL ["conda", "run", "-n", "transcriptions-api", "/bin/bash", "-c"]
+# Set up working directory and environment for user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# Copy requirements.txt and wheel file before installing dependencies
+COPY --chown=user server/requirements.txt ./
+COPY --chown=user server/wheels/omnilingual_asr-0.1.0-py3-none-any.whl ./
+# Install MMS library from local wheel file
+RUN pip install omnilingual_asr-0.1.0-py3-none-any.whl
+# Install Python dependencies with proper conda activation
+RUN pip install -r requirements.txt
+# Install debugpy for development debugging
+RUN pip install debugpy
+# Copy server code into the image with proper ownership
+COPY --chown=user ./server $HOME/app/server
+# Copy frontend build from the frontend-build stage
+COPY --from=frontend-build --chown=user /app/frontend/dist $HOME/app/frontend/dist
+# Make scripts executable and create directories with proper ownership
+RUN chmod +x $HOME/app/server/run.sh $HOME/app/server/download_models.sh && \
+    mkdir -p $HOME/app/models && \
+    chown -R user:user $HOME/app && \
+    chmod -R 755 $HOME/app
+# Switch to user for runtime
+USER user
+# Create /data/models and if possible (for HF Spaces)
+RUN mkdir -p /data/models 2>/dev/null || true
+# Set working directory to server
+WORKDIR $HOME/app/server
+# Expose port 7860 for HF Spaces (also works locally)
+EXPOSE 7860
+# For production: pre-download models into the image (optional)
+# Uncomment the following lines if you want models baked into the production image
+# RUN mkdir -p $HOME/app/models
+# RUN cd $HOME/app/models && \
+#     wget -O ctc_alignment_mling_uroman_model_dict.txt https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/dictionary.txt && \
+#     wget -O ctc_alignment_mling_uroman_model.pt https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/model.pt && \
+#     wget https://dl.fbaipublicfiles.com/mms/mms_1143_langs_tokenizer_spm.model && \
+#     wget https://dl.fbaipublicfiles.com/mms/mms_XRI.pt
+# Default command - works for both local and HF Spaces
+CMD ["conda", "run", "--no-capture-output", "-n", "transcriptions-api", "./run.sh"]

README.md ADDED Viewed

	@@ -0,0 +1,160 @@

+---
+title: Omnilingual ASR Media Transcription
+emoji: 🌍
+colorFrom: blue
+colorTo: green
+sdk: docker
+app_port: 7860
+pinned: false
+license: mit
+suggested_hardware: a100-large
+---
+# Experimental Omnilingual ASR Media Transcription Demo
+A media transcription tool with a web interface for multilingual audio and video transcription using Meta's Omnilingual ASR model. Transcriptions are supported for 1600+ languages.
+This application is designed primarily as a **web-based media transcription tool** with an intuitive frontend interface. While you can interact directly with the API endpoints, the recommended usage is through the web interface at `http://localhost:7860`.
+## HuggingFace Space Configuration
+This application is configured to run as a HuggingFace Space, however has resource limitations as it is a public. In order to have your own dedicated space, please clone with the following recommended specifications:
+- **Hardware**: A100 GPU (80GB) - Required for loading the 7B parameter Omnilingual ASR model
+  - _Alternative_: Machines with lower GPU memory can use smaller models by setting the `MODEL_NAME` environment variable in HuggingFace Space settings, e.g. `omniASR_LLM_300M` (requires ~8GB GPU memory)
+- **Persistent Storage**: Enabled for model caching and improved loading times. Medium (150GB)
+- **Docker Runtime**: Uses custom Dockerfile for fairseq2 and PyTorch integration
+- **Port**: 7860 (HuggingFace standard)
+The A100 machine is specifically chosen to accommodate the large Omnilingual ASR model (~14GB) in GPU memory, ensuring fast inference and real-time transcription capabilities.
+## Running Outside HuggingFace
+While this application is designed for HuggingFace Spaces, **it can be run on any machine with Docker and GPU support** with similar hardware requirements as the machines on HuggingFace.
+## Getting Started
+### Running with Docker
+1. Build and run the container:
+```bash
+docker build -t omnilingual-asr-transcriptions .
+docker run --rm -p 7860:7860 --gpus all \
+  -e MODEL_NAME=omniASR_LLM_300M \
+  -v {your cache directory}:/home/user/app/models \
+  omnilingual-asr-transcriptions
+```
+The media transcription app will be available at `http://localhost:7860`
+#### Docker Run Parameters Explained:
+- `--rm`: Automatically remove the container when it exits
+- `-p 7860:7860`: Map host port 7860 to container port 7860
+- `--gpus all`: Enable GPU access for CUDA acceleration
+- `-e MODEL_NAME=omniASR_LLM_300M`: Set the Omnilingual ASR model variant to use
+  - Options: `omniASR_LLM_1B` (default, 1B parameters), `omniASR_LLM_300M` (300M parameters, faster)
+- `-e ENABLE_TOXIC_FILTERING=true`: Enable filtering of toxic words from transcription results (optional)
+- `-v {your cache directory}:/home/user/app/models`: Mount local models directory
+  - **Purpose**: Persist downloaded models between container runs (14GB+ cache)
+  - **Benefits**: Avoid re-downloading models on each container restart
+  - **Path**: Adjust `{your cache directory}` to your local models directory
+### Available API Endpoints
+#### Core Transcription Routes
+- `GET /health` - Comprehensive health check with GPU/CUDA status, FFmpeg availability, and transcription status
+- `GET /status` - Get current transcription status (busy/idle, progress, operation type)
+- `POST /transcribe` - Audio transcription with automatic chunking for files of any length
+#### Additional Routes
+- `POST /combine-video-subtitles` - Combine video files with subtitle tracks
+- `GET /` - Serve the web application frontend
+- `GET /assets/<filename>` - Serve frontend static assets
+### Usage Recommendations
+**Primary Usage**: Access the web interface at `http://localhost:7860` for an intuitive media transcription experience with drag-and-drop file upload, real-time progress tracking, and downloadable results.
+**API Usage**: For programmatic access or integration with other tools, you can call the API endpoints directly as shown in the examples below.
+### Environment Variables
+You are free to change these if you clone the space and set them in the Huggingface space settings or in your own server environment. In the public shared demo these are controled for an optimal experience.
+#### Server Environment Variables
+- `API_LOG_LEVEL` - Set logging level (DEBUG, INFO, WARNING, ERROR)
+- `MODEL_NAME` - Omnilingual ASR model to use (default: omniASR_LLM_1B)
+- `USE_CHUNKING` - Enable/disable audio chunking (default: true)
+- `ENABLE_TOXIC_FILTERING` - Enable toxic word filtering from transcription results (default: false)
+#### Frontend Environment Variables
+- `VITE_ALLOW_ALL_LANGUAGES` - Set to `true` to show all 1,400+ supported languages in the language selector, or `false` to only show languages with error rates < 10% for public demo (default: false)
+- `VITE_ENABLE_ANALYTICS` - Set to `true` to enable Google Analytics tracking, or `false` to disable analytics (default: false)
+- `VITE_REACT_APP_GOOGLE_ANALYTICS_ID` - Your Google Analytics measurement ID (e.g., `G-XXXXXXXXXX`) for tracking usage when analytics are enabled
+### API Examples (For Developers)
+For programmatic access or integration with other tools, you can call the API endpoints directly:
+```bash
+# Health check
+curl http://localhost:7860/health
+# Get transcription status
+curl http://localhost:7860/status
+# Transcribe audio file
+curl -X POST http://localhost:7860/transcribe \
+  -F "audio=@path/to/your/audio.wav"
+```
+## Project Structure
+```
+omnilingual-asr-transcriptions/
+├── Dockerfile                      # Multi-stage build with frontend + backend
+├── README.md
+├── requirements.txt               # Python dependencies
+├── deploy.sh                      # Deployment script
+├── run_docker.sh                  # Local Docker run script
+├── frontend/                      # Web interface (React/Vite)
+│   ├── package.json
+│   ├── src/
+│   └── dist/                      # Built frontend (served by Flask)
+├── models/                       # Model files (automatically downloaded)
+│   ├── ctc_alignment_mling_uroman_model.pt
+│   ├── ctc_alignment_mling_uroman_model_dict.txt
+│   └── [Additional model files downloaded at runtime]
+└── server/                       # Flask API backend
+    ├── server.py                 # Main Flask application
+    ├── transcriptions_blueprint.py  # API routes
+    ├── audio_transcription.py    # Core transcription logic
+    ├── media_transcription_processor.py  # Media processing
+    ├── transcription_status.py   # Status tracking
+    ├── env_vars.py              # Environment configuration
+    ├── run.sh                   # Production startup script
+    ├── download_models.sh       # Model download script
+    ├── wheels/                  # Pre-built Omnilingual ASR wheel packages
+    └── inference/               # Model inference components
+        ├── mms_model_pipeline.py    # Omnilingual ASR model wrapper
+        ├── audio_chunker.py         # Audio chunking logic
+        └── audio_sentence_alignment.py  # Forced alignment
+```
+### Key Features
+- **Simplified Architecture**: Single Docker container with built-in model management
+- **Auto Model Download**: Models are downloaded automatically during container startup
+- **Omnilingual ASR Integration**: Uses the latest Omnilingual ASR library with 1600+ language support
+- **GPU Acceleration**: CUDA-enabled inference with automatic device detection
+- **Web Interface**: Modern React frontend for easy testing and usage
+- **Smart Transcription**: Single endpoint handles files of any length with automatic chunking
+- **Intelligent Processing**: Automatic audio format detection and conversion
+**Note**: Model files are large (14GB+ total) and are downloaded automatically when the container starts. The first run may take longer due to model downloads.

frontend/.env ADDED Viewed

	@@ -0,0 +1,5 @@

+VITE_SERVER_URL=''
+# Set to 'true' to show all languages, 'false' to only show accurate languages for demo
+VITE_ALLOW_ALL_LANGUAGES=true
+VITE_ENABLE_ANALYTICS=false

frontend/.gitignore ADDED Viewed

	@@ -0,0 +1,38 @@

+# Vite/React/TypeScript frontend
+node_modules/
+dist/
+.vite/
+.env.local
+.env.*
+.DS_Store
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+.eslintcache
+.parcel-cache
+.turbo/
+.next/
+.vercel/
+.cache/
+.storybook/
+.swc/
+.coverage/
+coverage/
+.sass-cache/
+.nuxt/
+.output/
+.firebase/
+.firebaserc
+.netlify/
+netlify.toml
+.envrc
+.env.test.local
+.env.production.local
+.env.development.local
+# Editor directories and files
+.idea/
+.vscode/
+*.sublime-workspace
+*.sublime-project

frontend/README.md ADDED Viewed

	@@ -0,0 +1,60 @@

+# Project Title
+A brief description of your project and its purpose.
+## Getting Started
+These instructions will help you set up the project on your local machine for development and testing.
+### Prerequisites
+- Node.js (version X.X.X or later)
+- npm (version X.X.X or later)
+### Installation
+1. Clone the repository:
+   ```
+   git clone <repository-url>
+   ```
+2. Navigate to the project directory:
+   ```
+   cd frontend
+   ```
+3. Install the dependencies:
+   ```
+   npm install
+   ```
+### Running the Application
+To start the development server, run:
+```
+npm run dev
+```
+Open your browser and go to `http://localhost:3000` to see the application in action.
+### Building for Production
+To create a production build, run:
+```
+npm run build
+```
+The built files will be generated in the `dist` directory.
+## Usage
+- Upload audio or video files to transcribe.
+- View synchronized transcriptions and download subtitles in various formats.
+## Contributing
+If you would like to contribute to this project, please fork the repository and submit a pull request.
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

frontend/index.html ADDED Viewed

	@@ -0,0 +1,13 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Omnilingual ASR Media Transcription</title>
+    <link rel="stylesheet" href="/src/index.css">
+</head>
+<body class="bg-gray-900 text-white">
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+</body>
+</html>

frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/package.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "name": "frontend",
+  "version": "1.0.0",
+  "private": true,
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "serve": "vite preview"
+  },
+  "dependencies": {
+    "@heroicons/react": "^2.2.0",
+    "daisyui": "^4.12.23",
+    "debounce": "^2.2.0",
+    "match-sorter": "^8.1.0",
+    "react": "^18.2.0",
+    "react-cookie-consent": "^9.0.0",
+    "react-dom": "^18.2.0",
+    "react-select": "^5.10.2",
+    "react-use": "^17.6.0",
+    "tailwindcss": "^3.0.0",
+    "zustand": "^5.0.7"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.15",
+    "@types/react-dom": "^18.2.7",
+    "@vitejs/plugin-react": "^4.0.3",
+    "autoprefixer": "^10.0.0",
+    "postcss": "^8.0.0",
+    "typescript": "^5.1.6",
+    "vite": "^4.4.5"
+  }
+}

frontend/postcss.config.js ADDED Viewed

	@@ -0,0 +1,6 @@

+module.exports = {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+};

frontend/public/vite.svg ADDED Viewed

frontend/src/App.tsx ADDED Viewed

	@@ -0,0 +1,28 @@

+import React from 'react';
+import TranscriptionPage from './pages/TranscriptionPage';
+import WelcomeModal from './components/WelcomeModal';
+import { useTranscriptionStore } from './stores/transcriptionStore';
+import { trackWelcomeModalClose } from './analytics/gaEvents';
+import Analytics from './analytics/Analytics';
+const App: React.FC = () => {
+  const { showWelcomeModal, setShowWelcomeModal } = useTranscriptionStore();
+  const handleCloseWelcomeModal = () => {
+    trackWelcomeModalClose();
+    setShowWelcomeModal(false);
+  };
+  return (
+    <div className="App">
+      <TranscriptionPage />
+      <WelcomeModal
+        isOpen={showWelcomeModal}
+        onClose={handleCloseWelcomeModal}
+      />
+      <Analytics />
+    </div>
+  );
+};
+export default App;

frontend/src/analytics/Analytics.tsx ADDED Viewed

	@@ -0,0 +1,181 @@

+import React from "react";
+import CookieBanner from "./CookieBanner";
+export const CONSENT_COOKIE_NAME = "omniasr_transcription_consent";
+// Declare gtag global function
+declare global {
+  interface Window {
+    gtag: (...args: any[]) => void;
+    dataLayer: any[];
+  }
+}
+const Analytics = () => {
+  const [analyticsEnabled, setAnalyticsEnabled] = React.useState(false);
+  const [consentState, setConsentState] = React.useState<boolean | null>(null); // In-memory fallback
+  const [showBanner, setShowBanner] = React.useState(false); // Control banner visibility
+  // Check if we're in iframe (like HuggingFace Spaces)
+  const isInIframe = () => {
+    try {
+      return window.self !== window.top;
+    } catch (e) {
+      return true;
+    }
+  };
+  // Get consent with fallback chain: memory -> localStorage -> sessionStorage -> cookies
+  const getConsent = (): boolean => {
+    // First check in-memory state (for HF spaces that block all storage)
+    if (consentState !== null) {
+      return consentState;
+    }
+    // Try localStorage
+    try {
+      const localValue = window.localStorage.getItem(CONSENT_COOKIE_NAME);
+      if (localValue !== null) {
+        return localValue === "true";
+      }
+    } catch (e) {}
+    // Try sessionStorage
+    try {
+      const sessionValue = window.sessionStorage.getItem(CONSENT_COOKIE_NAME);
+      if (sessionValue !== null) {
+        return sessionValue === "true";
+      }
+    } catch (e) {}
+    // Try cookies
+    try {
+      return document.cookie.includes(`${CONSENT_COOKIE_NAME}=true`);
+    } catch (e) {}
+    return false;
+  };
+  // Set consent with fallback chain
+  const setConsent = (accepted: boolean) => {
+    // Always set in-memory state first (works in all environments)
+    setConsentState(accepted);
+    // Try localStorage
+    try {
+      window.localStorage.setItem(CONSENT_COOKIE_NAME, accepted.toString());
+    } catch (e) {}
+    // Try sessionStorage as fallback
+    try {
+      window.sessionStorage.setItem(CONSENT_COOKIE_NAME, accepted.toString());
+    } catch (e) {}
+    // Try cookies (mainly for non-iframe environments)
+    if (!isInIframe()) {
+      try {
+        const expires = new Date();
+        expires.setFullYear(expires.getFullYear() + 1);
+        document.cookie = `${CONSENT_COOKIE_NAME}=${accepted}; expires=${expires.toUTCString()}; path=/; SameSite=Lax`;
+      } catch (e) {}
+    }
+  };
+  // Load gtag script dynamically
+  const loadGtagScript = (gaId: string) => {
+    return new Promise<void>((resolve, reject) => {
+      // Check if gtag is already loaded
+      if (window.gtag) {
+        resolve();
+        return;
+      }
+      // Create script element
+      const script = document.createElement('script');
+      script.async = true;
+      script.src = `https://www.googletagmanager.com/gtag/js?id=${gaId}`;
+      script.onload = () => {
+        // Initialize gtag
+        window.dataLayer = window.dataLayer || [];
+        window.gtag = function gtag() {
+          window.dataLayer.push(arguments);
+        };
+        window.gtag('js', new Date());
+        window.gtag('config', gaId, {
+          // Settings for iframe environments
+          send_page_view: false, // We'll send manually
+          cookie_flags: 'max-age=7200;secure;samesite=none', // For iframe support
+        });
+        console.log('GA: gtag script loaded');
+        resolve();
+      };
+      script.onerror = () => {
+        console.error('❌ Failed to load gtag script');
+        reject(new Error('Failed to load gtag script'));
+      };
+      document.head.appendChild(script);
+    });
+  };
+  // Enable analytics if consent given
+  const handleAcceptCookie = React.useCallback(() => {
+    console.log('User accepted analytics cookies');
+    setConsent(true);
+    setShowBanner(false); // Hide banner after acceptance
+    const gaId = import.meta.env.VITE_REACT_APP_GOOGLE_ANALYTICS_ID;
+    const analyticsEnabled = import.meta.env.VITE_ENABLE_ANALYTICS === 'true';
+    if (gaId && analyticsEnabled) {
+      loadGtagScript(gaId)
+        .then(() => {
+          setAnalyticsEnabled(true);
+          console.log('GA initialized successfully');
+          // Send initial pageview
+          const pathname = window.location.pathname;
+          window.gtag('event', 'page_view', {
+            page_title: document.title,
+            page_location: window.location.href,
+            page_path: pathname,
+          });
+        })
+        .catch((e) => {
+          console.error('GA initialization failed:', e);
+        });
+    }
+  }, []);
+  const handleDeclineCookie = React.useCallback(() => {
+    console.log('User declined analytics cookies');
+    setConsent(false);
+    setShowBanner(false); // Hide banner after decline
+  }, []);
+  // Check for existing consent on mount
+  React.useEffect(() => {
+    const existingConsent = getConsent();
+    if (existingConsent) {
+      console.log('GA: Found existing consent, initializing...');
+      handleAcceptCookie();
+      setShowBanner(false); // Don't show banner if consent already exists
+    } else {
+      setShowBanner(true); // Show banner if no consent
+    }
+  }, [handleAcceptCookie]);
+  // Note: pageview is now sent directly in handleAcceptCookie when gtag is loaded
+  return showBanner ? (
+    <CookieBanner
+      onAccept={handleAcceptCookie}
+      onDecline={handleDeclineCookie}
+    />
+  ) : null;
+};
+export default Analytics;

frontend/src/analytics/CookieBanner.tsx ADDED Viewed

	@@ -0,0 +1,53 @@

+import CookieConsent from "react-cookie-consent";
+import { CONSENT_COOKIE_NAME } from "./Analytics";
+interface CookieBannerProps {
+  onAccept?: (acceptedByScrolling: boolean) => void;
+  onDecline?: () => void;
+}
+const CookieBanner = ({ onAccept, onDecline }: CookieBannerProps) => {
+  return (
+    <CookieConsent
+      style={{
+        backgroundColor: "white",
+        color: "black",
+        alignItems: "center",
+        flexDirection: "column",
+      }}
+      contentStyle={{ flex: 1, margin: 0 }}
+      overlayStyle={{ backgroundColor: "rgba(0, 0, 0, .65)" }}
+      overlay
+      cookieName={CONSENT_COOKIE_NAME}
+      disableButtonStyles
+      declineButtonClasses="bg-gray-800 hover:bg-gray-900 text-white border border-gray-600 w-[136px] md:w-[208px] h-[36px] capitalize text-base font-medium rounded px-4 py-2 transition-colors"
+      buttonClasses="bg-blue-600 hover:bg-blue-700 text-white border-blue-600 w-[136px] md:w-[208px] h-[36px] capitalize text-base font-medium rounded px-4 py-2 transition-colors"
+      buttonText="Accept"
+      declineButtonText="Decline"
+      enableDeclineButton={true}
+      onAccept={onAccept}
+      onDecline={onDecline}
+      containerClasses="text-base font-medium p-10 md:p-14 pb-14 text-center" // overriding the default so that the popup isn't hidden by adblockers: https://github.com/Mastermindzh/react-cookie-consent/issues/64
+      contentClasses="max-w-[565px]"
+      buttonWrapperClasses="mt-7 mb-2 flex gap-5"
+    >
+      {" "}
+      Allow the use of cookies from Meta on this browser? To find out more about
+      the use of cookies, see our{" "}
+      <a href="https://www.facebook.com/privacy/policy" target="_blank" rel="noopener noreferrer">
+        <b>Privacy Policy</b>
+      </a>{" "}
+      and{" "}
+      <a
+        href="https://www.facebook.com/privacy/policies/cookies"
+        target="_blank"
+        rel="noopener noreferrer"
+      >
+        <b>Cookies Policy</b>
+      </a>
+      .
+    </CookieConsent>
+  );
+};
+export default CookieBanner;

frontend/src/analytics/gaEvents.ts ADDED Viewed

	@@ -0,0 +1,97 @@

+import { CONSENT_COOKIE_NAME } from "./Analytics";
+// Declare gtag global function
+declare global {
+  interface Window {
+    gtag: (...args: any[]) => void;
+  }
+}
+// Check if analytics are enabled and user has consented
+const isAnalyticsEnabled = (): boolean => {
+  if (import.meta.env.VITE_ENABLE_ANALYTICS !== 'true') {
+    return false;
+  }
+  // Check localStorage first (works in iframes like HuggingFace), then cookies
+  try {
+    const localStorageValue = localStorage.getItem(CONSENT_COOKIE_NAME);
+    if (localStorageValue === "true") return true;
+  } catch (e) {}
+  return document.cookie.includes(`${CONSENT_COOKIE_NAME}=true`);
+};
+// Send GA4 event using gtag
+export const sendGAEvent = (
+  eventCategory: string,
+  eventAction: string,
+  eventLabel?: string,
+  value?: number
+) => {
+  if (!isAnalyticsEnabled() || !window.gtag) {
+    return;
+  }
+  try {
+    // Use gtag event format for GA4
+    const eventName = `${eventCategory.toLowerCase()}_${eventAction.toLowerCase()}`;
+    const eventParams: any = {
+      event_category: eventCategory,
+      event_label: eventLabel,
+    };
+    if (value !== undefined) {
+      eventParams.value = value;
+    }
+    window.gtag('event', eventName, eventParams);
+  } catch (error) {
+    console.error("GA Event Error:", error);
+  }
+};
+// Predefined event functions for common actions
+export const trackTranscriptionStart = (languageCode: string) => {
+  sendGAEvent("Transcription", "start", languageCode);
+};
+export const trackTranscriptionComplete = (languageCode: string, duration?: number) => {
+  sendGAEvent("Transcription", "complete", languageCode, duration);
+};
+export const trackTranscriptionError = (languageCode: string, errorMessage?: string) => {
+  sendGAEvent("Transcription", "error", `${languageCode}${errorMessage ? ` - ${errorMessage}` : ''}`);
+};
+export const trackFileUpload = (fileType: string, fileSizeMB?: number) => {
+  sendGAEvent("File", "upload", fileType, fileSizeMB);
+};
+export const trackLanguageChange = (languageCode: string) => {
+  sendGAEvent("Language", "select", languageCode);
+};
+export const trackDownloadSRT = (languageCode: string) => {
+  sendGAEvent("Download", "srt", languageCode);
+};
+export const trackDownloadVideoWithSubtitles = (languageCode: string) => {
+  sendGAEvent("Download", "video_with_subtitles", languageCode);
+};
+export const trackReset = () => {
+  sendGAEvent("App", "reset");
+};
+export const trackWelcomeModalClose = () => {
+  sendGAEvent("Modal", "welcome_close");
+};
+export const trackSegmentEdit = (languageCode: string, editType: "text" | "timing") => {
+  sendGAEvent("Edit", editType, languageCode);
+};
+export const trackSegmentDelete = (languageCode: string) => {
+  sendGAEvent("Edit", "delete_segment", languageCode);
+};

frontend/src/components/CanvasTimeline.tsx ADDED Viewed

	@@ -0,0 +1,393 @@

+import React, {
+  useRef,
+  useEffect,
+  useState,
+  useCallback,
+  forwardRef,
+} from "react";
+import {AlignedSegment} from "../services/transcriptionApi";
+import {useTranscriptionStore} from "../stores/transcriptionStore";
+import {formatTime} from "../utils/subtitleUtils";
+import {assignTracksToSegments, getMaxTrackCount} from "../utils/trackUtils";
+import {useTimelineGeometry} from "../hooks/useTimelineGeometry";
+import {useTimelineDragControls} from "../hooks/useTimelineDragControls";
+import {useTimelineRenderer} from "../hooks/useTimelineRenderer";
+import SegmentEditor from "./SegmentEditor";
+import MediaDownloadControls from "./MediaDownloadControls";
+import MediaEditControls from "./MediaEditControls";
+interface CanvasTimelineProps {
+  audioRef: React.RefObject<HTMLAudioElement>;
+  videoRef: React.RefObject<HTMLVideoElement>;
+  onSeekToSegment: (segment: AlignedSegment) => void;
+  onTimeUpdate: () => void;
+  viewport?: {start: number; end: number};
+}
+const CanvasTimeline = forwardRef<HTMLDivElement, CanvasTimelineProps>(
+  ({audioRef, videoRef}, ref) => {
+    const canvasRef = useRef<HTMLCanvasElement>(null);
+    const containerRef = useRef<HTMLDivElement>(null);
+    // Combine the forwarded ref with our internal ref
+    const combinedRef = useCallback(
+      (node: HTMLDivElement | null) => {
+        // Use Object.defineProperty to safely assign to current
+        if (containerRef.current !== node) {
+          Object.defineProperty(containerRef, "current", {
+            value: node,
+            writable: true,
+            configurable: true,
+          });
+        }
+        if (typeof ref === "function") {
+          ref(node);
+        } else if (ref) {
+          // Type assertion to overcome readonly constraint
+          const mutableRef = ref as any;
+          mutableRef.current = node;
+        }
+      },
+      [ref]
+    );
+    const [canvasSize, setCanvasSize] = useState({width: 1200, height: 200});
+    const {
+      transcription,
+      currentTime,
+      activeSegmentIndex,
+      selectedSegmentIndex,
+      currentSegments,
+      setSelectedSegmentIndex,
+      updateSegmentText,
+      deleteSegment,
+    } = useTranscriptionStore();
+    // Constants
+    const constants = {
+      TRACK_HEIGHT: 32,
+      TRACK_PADDING: 4,
+      TIMELINE_PADDING: 0,
+      PIXELS_PER_SECOND: 300, // Increased from 200 to give segments more space
+    };
+    // Early return if no transcription
+    if (!transcription) {
+      return null;
+    }
+    const displaySegments = currentSegments || transcription.aligned_segments;
+    const segmentsWithTracks = assignTracksToSegments(displaySegments);
+    const trackCount = getMaxTrackCount(segmentsWithTracks);
+    // Get actual media duration from audio/video elements
+    const getMediaDuration = useCallback(() => {
+      const audioElement = audioRef.current;
+      const videoElement = videoRef.current;
+      if (audioElement && !isNaN(audioElement.duration)) {
+        return audioElement.duration;
+      }
+      if (videoElement && !isNaN(videoElement.duration)) {
+        return videoElement.duration;
+      }
+      // Fallback to transcription duration if media duration is not available
+      return transcription.total_duration;
+    }, [audioRef, videoRef, transcription.total_duration]);
+    const mediaDuration = getMediaDuration();
+    console.log({mediaDuration});
+    // Calculate canvas dimensions based on full media duration
+    const timelineWidth = mediaDuration * constants.PIXELS_PER_SECOND;
+    const timelineHeight =
+      constants.TIMELINE_PADDING * 2 +
+      trackCount * (constants.TRACK_HEIGHT + constants.TRACK_PADDING);
+    // Update canvas size when needed
+    useEffect(() => {
+      setCanvasSize({
+        width: timelineWidth, // Canvas internal resolution
+        height: Math.max(timelineHeight, 200),
+      });
+    }, [timelineWidth, timelineHeight, trackCount]);
+    // Initialize geometry utilities
+    const geometryUtils = useTimelineGeometry({
+      mediaDuration,
+      constants,
+    });
+    // Initialize drag controls
+    const dragControls = useTimelineDragControls({
+      segmentsWithTracks,
+      displaySegments,
+      geometryUtils,
+      canvasRef,
+      containerRef,
+      mediaDuration,
+      constants: {
+        TRACK_HEIGHT: constants.TRACK_HEIGHT,
+        TIMELINE_PADDING: constants.TIMELINE_PADDING,
+      },
+    });
+    // Initialize renderer
+    const {draw} = useTimelineRenderer({
+      canvasRef,
+      canvasSize,
+      segmentsWithTracks,
+      displaySegments,
+      currentTime,
+      activeSegmentIndex,
+      selectedSegmentIndex,
+      hoveredSegment: dragControls.hoveredSegment,
+      isDragging: dragControls.isDragging,
+      dragSegmentIndex: dragControls.dragSegmentIndex,
+      mediaDuration,
+      geometryUtils,
+      constants,
+    });
+    // State for smooth scrolling animation
+    const scrollAnimationRef = useRef<number | null>(null);
+    // Smooth scroll implementation using requestAnimationFrame
+    const smoothScrollTo = useCallback(
+      (
+        container: HTMLDivElement,
+        targetScrollLeft: number,
+        duration = 500
+      ): Promise<void> => {
+        return new Promise((resolve) => {
+          const startScrollLeft = container.scrollLeft;
+          const scrollDistance = targetScrollLeft - startScrollLeft;
+          const startTime = Date.now();
+          const animate = () => {
+            const currentTime = Date.now();
+            const elapsedTime = currentTime - startTime;
+            const progress = Math.min(elapsedTime / duration, 1);
+            // Use easeOutQuart for smooth deceleration
+            const easeOutQuart = 1 - Math.pow(1 - progress, 4);
+            container.scrollLeft =
+              startScrollLeft + scrollDistance * easeOutQuart;
+            if (progress < 1) {
+              scrollAnimationRef.current = requestAnimationFrame(animate);
+            } else {
+              scrollAnimationRef.current = null;
+              resolve();
+            }
+          };
+          // Cancel any existing animation
+          if (scrollAnimationRef.current) {
+            cancelAnimationFrame(scrollAnimationRef.current);
+          }
+          animate();
+        });
+      },
+      []
+    );
+    // Cleanup animation on unmount
+    useEffect(() => {
+      return () => {
+        if (scrollAnimationRef.current) {
+          cancelAnimationFrame(scrollAnimationRef.current);
+        }
+      };
+    }, []);
+    // Determine if media is playing for auto-scroll behavior
+    const isMediaPlaying = useCallback(() => {
+      const audioElement = audioRef.current;
+      const videoElement = videoRef.current;
+      const mediaElement = audioElement || videoElement;
+      return mediaElement && !mediaElement.paused && !mediaElement.ended;
+    }, [audioRef, videoRef]);
+    // Track if we're currently animating scroll to avoid re-triggering
+    const isScrollingRef = useRef(false);
+    const prevCurrentTimeRef = useRef(currentTime);
+    // Auto-scroll during playback: only when playing and playhead gets near edges (20%)
+    useEffect(() => {
+      const container = containerRef.current;
+      if (!container || !isMediaPlaying() || isScrollingRef.current) return;
+      const timeX = geometryUtils.timeToX(currentTime);
+      const containerWidth = container.clientWidth;
+      const currentScrollLeft = container.scrollLeft;
+      const maxScrollLeft = Math.max(0, container.scrollWidth - containerWidth);
+      // Calculate 20% edge boundaries
+      const leftEdge = currentScrollLeft + containerWidth * 0.2;
+      const rightEdge =
+        currentScrollLeft + containerWidth - containerWidth * 0.2;
+      // Only scroll if playhead is near edges
+      if (timeX < leftEdge || timeX > rightEdge) {
+        isScrollingRef.current = true;
+        // Center the playhead position
+        const targetScrollLeft = Math.max(
+          0,
+          Math.min(maxScrollLeft, timeX - containerWidth / 2)
+        );
+        smoothScrollTo(container, targetScrollLeft, 800).then(() => {
+          isScrollingRef.current = false;
+        });
+      }
+    }, [currentTime, geometryUtils, isMediaPlaying, smoothScrollTo]);
+    // Handle manual seeking (scrubbing, keyboard shortcuts, etc.)
+    useEffect(() => {
+      const container = containerRef.current;
+      if (!container || isScrollingRef.current) return;
+      const timeDifference = Math.abs(currentTime - prevCurrentTimeRef.current);
+      const isSeekOperation = timeDifference > 0.5; // Significant time jump indicates seeking
+      if (isSeekOperation) {
+        const timeX = geometryUtils.timeToX(currentTime);
+        const containerWidth = container.clientWidth;
+        const currentScrollLeft = container.scrollLeft;
+        const maxScrollLeft = Math.max(
+          0,
+          container.scrollWidth - containerWidth
+        );
+        // Check if the seek position is outside the visible area
+        const visibleStart = currentScrollLeft;
+        const visibleEnd = currentScrollLeft + containerWidth;
+        if (timeX < visibleStart || timeX > visibleEnd) {
+          isScrollingRef.current = true;
+          // Center the seek position
+          const targetScrollLeft = Math.max(
+            0,
+            Math.min(maxScrollLeft, timeX - containerWidth / 2)
+          );
+          smoothScrollTo(container, targetScrollLeft, 600).then(() => {
+            isScrollingRef.current = false;
+          });
+        }
+      }
+      prevCurrentTimeRef.current = currentTime;
+    }, [currentTime, geometryUtils, smoothScrollTo]);
+    // Redraw on scroll
+    useEffect(() => {
+      const container = containerRef.current;
+      if (!container) return;
+      const handleScroll = () => {
+        draw();
+      };
+      container.addEventListener("scroll", handleScroll);
+      return () => container.removeEventListener("scroll", handleScroll);
+    }, [draw]);
+    return (
+      <div className="flex-1 flex flex-col bg-gray-900 border-t border-gray-700 min-h-32">
+        {/* Header */}
+        <div className="px-4 py-2 bg-gray-800 border-b border-gray-700">
+          {/* Download Buttons - Centered above edit controls */}
+          {/* <div className="flex justify-center mb-2">
+          <MediaDownloadControls />
+        </div> */}
+          {/* Edit Controls */}
+          {/* <MediaEditControls /> */}
+        </div>
+        {/* Canvas Container */}
+        <div
+          ref={combinedRef}
+          className="flex-1 overflow-auto bg-black border-t border-slate-700"
+          style={{
+            minHeight: "200px",
+            scrollBehavior: "auto", // Changed from 'smooth' to 'auto' for responsive following
+          }}
+        >
+          <canvas
+            ref={canvasRef}
+            onMouseMove={dragControls.handleMouseMove}
+            onMouseDown={dragControls.handleMouseDown}
+            className="block"
+            style={{
+              width: `${canvasSize.width}px`,
+              height: `${canvasSize.height}px`,
+            }}
+          />
+        </div>
+        {/* Tooltip for hovered segment */}
+        {dragControls.hoveredSegment !== null &&
+          !dragControls.isDragging &&
+          !dragControls.isTimelineDragging &&
+          (() => {
+            // Find the segment in segmentsWithTracks that corresponds to the hovered original segment
+            const originalSegment =
+              displaySegments[dragControls.hoveredSegment];
+            // Safety check: ensure the segment exists
+            if (!originalSegment) return null;
+            const hoveredSegmentWithTrack = segmentsWithTracks.find(
+              (s) =>
+                s.start === originalSegment.start &&
+                s.end === originalSegment.end &&
+                s.text === originalSegment.text
+            );
+            if (!hoveredSegmentWithTrack) return null;
+            return (
+              <div className="absolute bottom-4 left-4 bg-gray-800 text-white text-xs rounded px-2 py-1 pointer-events-none z-30 max-w-xs">
+                <div className="whitespace-normal break-words">
+                  {hoveredSegmentWithTrack.text}
+                </div>
+                <div className="text-gray-400 mt-1">
+                  {formatTime(hoveredSegmentWithTrack.start)} -{" "}
+                  {formatTime(hoveredSegmentWithTrack.end)} (
+                  {hoveredSegmentWithTrack.duration.toFixed(1)}s)
+                </div>
+                <div className="text-yellow-400 mt-1 text-xs">
+                  Click to select • Drag to move • Drag edges to resize
+                </div>
+              </div>
+            );
+          })()}
+        {/* Segment Editor at Bottom */}
+        {selectedSegmentIndex !== null &&
+          displaySegments[selectedSegmentIndex] && (
+            <SegmentEditor
+              segment={displaySegments[selectedSegmentIndex]}
+              segmentIndex={selectedSegmentIndex}
+              onUpdateText={updateSegmentText}
+              onDeleteSegment={deleteSegment}
+              onClose={() => setSelectedSegmentIndex(null)}
+            />
+          )}
+      </div>
+    );
+  }
+);
+CanvasTimeline.displayName = "CanvasTimeline";
+export default CanvasTimeline;

frontend/src/components/ErrorBoundary.tsx ADDED Viewed

	@@ -0,0 +1,131 @@

+import React, { Component, ReactNode } from 'react';
+interface Props {
+  children: ReactNode;
+  componentName?: string;
+}
+interface State {
+  hasError: boolean;
+  error: Error | null;
+  errorInfo: React.ErrorInfo | null;
+}
+class ErrorBoundary extends Component<Props, State> {
+  constructor(props: Props) {
+    super(props);
+    this.state = { hasError: false, error: null, errorInfo: null };
+  }
+  static getDerivedStateFromError(error: Error): State {
+    return { hasError: true, error, errorInfo: null };
+  }
+  componentDidCatch(error: Error, errorInfo: React.ErrorInfo) {
+    console.error('ErrorBoundary caught an error:', error, errorInfo);
+    this.setState({
+      error,
+      errorInfo
+    });
+  }
+  handleRetry = () => {
+    this.setState({ hasError: false, error: null, errorInfo: null });
+  };
+  handleCopyError = () => {
+    const { error, errorInfo } = this.state;
+    const { componentName } = this.props;
+    const errorText = `
+Component: ${componentName || 'Unknown'}
+Error: ${error?.message || 'Unknown error'}
+Stack: ${error?.stack || 'No stack trace available'}
+Component Stack: ${errorInfo?.componentStack || 'No component stack available'}
+Timestamp: ${new Date().toISOString()}
+    `.trim();
+    navigator.clipboard.writeText(errorText).then(() => {
+      alert('Error details copied to clipboard!');
+    }).catch(() => {
+      // Fallback for older browsers
+      const textArea = document.createElement('textarea');
+      textArea.value = errorText;
+      document.body.appendChild(textArea);
+      textArea.select();
+      document.execCommand('copy');
+      document.body.removeChild(textArea);
+      alert('Error details copied to clipboard!');
+    });
+  };
+  render() {
+    if (this.state.hasError) {
+      const { componentName } = this.props;
+      const { error, errorInfo } = this.state;
+      return (
+        <div className="flex flex-col items-center justify-center p-8 bg-red-50 border border-red-200 rounded-lg m-4">
+          <div className="text-center mb-6">
+            <h2 className="text-xl font-semibold text-red-800 mb-2">
+              Sorry, something went wrong
+            </h2>
+            <p className="text-red-600 mb-4">
+              {componentName ? `An error occurred in the ${componentName} component.` : 'An unexpected error occurred.'}
+            </p>
+            <button
+              onClick={this.handleRetry}
+              className="px-4 py-2 bg-blue-600 text-white rounded hover:bg-blue-700 transition-colors mr-2"
+            >
+              Try Again
+            </button>
+            <button
+              onClick={this.handleCopyError}
+              className="px-4 py-2 bg-gray-600 text-white rounded hover:bg-gray-700 transition-colors"
+            >
+              Copy Error Details
+            </button>
+          </div>
+          <details className="w-full max-w-4xl">
+            <summary className="cursor-pointer text-red-700 font-medium mb-2 hover:text-red-800">
+              Show Error Details (for developers)
+            </summary>
+            <div className="bg-gray-100 p-4 rounded border text-sm font-mono overflow-auto max-h-96">
+              <div className="mb-4">
+                <strong className="text-red-700">Error Message:</strong>
+                <pre className="mt-1 whitespace-pre-wrap text-red-800">
+                  {error?.message || 'Unknown error'}
+                </pre>
+              </div>
+              <div className="mb-4">
+                <strong className="text-red-700">Stack Trace:</strong>
+                <pre className="mt-1 whitespace-pre-wrap text-gray-800 text-xs">
+                  {error?.stack || 'No stack trace available'}
+                </pre>
+              </div>
+              {errorInfo?.componentStack && (
+                <div className="mb-4">
+                  <strong className="text-red-700">Component Stack:</strong>
+                  <pre className="mt-1 whitespace-pre-wrap text-gray-800 text-xs">
+                    {errorInfo.componentStack}
+                  </pre>
+                </div>
+              )}
+              <div className="text-xs text-gray-600">
+                <strong>Timestamp:</strong> {new Date().toISOString()}
+              </div>
+            </div>
+          </details>
+        </div>
+      );
+    }
+    return this.props.children;
+  }
+}
+export default ErrorBoundary;

frontend/src/components/FeedbackCard.tsx ADDED Viewed

	@@ -0,0 +1,27 @@

+import React from 'react';
+const FeedbackCard: React.FC = () => {
+  return (
+    <div className="mb-4 p-3 bg-green-900/30 rounded-lg border border-green-600/50">
+      <div className="mb-2">
+        <h3 className="text-sm font-semibold text-green-300">Help Us Improve</h3>
+      </div>
+      <div className="text-xs text-green-100 leading-relaxed">
+        Your feedback is crucial for improving language coverage and model quality for low-resource languages.
+        Please{' '}
+        <a
+          href="https://forms.gle/JZhFWsA36sg2DdtN9"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="text-green-300 hover:text-green-200 underline transition-colors"
+        >
+          provide feedback
+        </a>
+        {' '}to share your experiences, suggestions, and any issues you encounter.
+      </div>
+    </div>
+  );
+};
+export default FeedbackCard;

frontend/src/components/FullTranscription.tsx ADDED Viewed

	@@ -0,0 +1,152 @@

+import React, { useState } from 'react';
+import { formatTime } from '../utils/subtitleUtils';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+import { InformationCircleIcon } from '@heroicons/react/24/outline';
+const FullTranscription: React.FC = () => {
+  const [showInfoTooltip, setShowInfoTooltip] = useState(false);
+  const [showExpandedChunks, setShowExpandedChunks] = useState(false);
+  const { transcription, selectedLanguage } = useTranscriptionStore();
+  const renderInfoTooltip = () => {
+    if (!transcription) return null;
+    const chunks = transcription.chunks || [];
+    const maxVisibleChunks = 3;
+    const hasMoreChunks = chunks.length > maxVisibleChunks;
+    const visibleChunks = showExpandedChunks ? chunks : chunks.slice(0, maxVisibleChunks);
+    // Calculate better positioning to avoid edge cuts
+    const tooltipStyle: React.CSSProperties = {
+      position: 'fixed',
+      left: '50%',
+      top: '50%',
+      transform: 'translate(-50%, -50%)',
+      maxHeight: '80vh',  // Prevent tooltip from being taller than viewport
+      overflowY: 'auto' as const
+    };
+    return (
+      <div
+        className="z-50 p-3 bg-gray-900 text-white text-xs rounded-lg shadow-xl border border-gray-600 min-w-64 max-w-96"
+        style={tooltipStyle}>
+        <div className="font-semibold mb-2 text-blue-300">Transcription Details</div>
+        <div className="space-y-1">
+          <div className="flex justify-between">
+            <span className="text-gray-300">Model:</span>
+            <span>{transcription.model}</span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">Language:</span>
+            <span className="font-mono">
+              {selectedLanguage || 'auto-detect'}
+            </span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">Segments:</span>
+            <span>{transcription.num_segments}</span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">Duration:</span>
+            <span>{formatTime(transcription.total_duration)}</span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">Device:</span>
+            <span className="font-mono">{transcription.device}</span>
+          </div>
+          {/* Long-form specific info */}
+          {transcription.num_chunks && (
+            <>
+              <div className="border-t border-gray-600 pt-1 mt-1">
+                <div className="text-xs text-blue-300 font-semibold mb-1">Long-form Processing</div>
+              </div>
+              <div className="flex justify-between">
+                <span className="text-gray-300">Chunks:</span>
+                <span>{transcription.num_chunks}</span>
+              </div>
+            </>
+          )}
+        </div>
+        {/* Improved Chunk details */}
+        {chunks.length > 0 && (
+          <div className="mt-2">
+            <div className="flex items-center justify-between mb-1">
+              <div className="text-xs text-blue-300 font-semibold">Chunk Details</div>
+              {hasMoreChunks && (
+                <button
+                  onClick={() => setShowExpandedChunks(!showExpandedChunks)}
+                  className="text-xs text-blue-400 hover:text-blue-300 underline transition-colors"
+                >
+                  {showExpandedChunks ? 'Show Less' : `Show All (${chunks.length})`}
+                </button>
+              )}
+            </div>
+            <div className="space-y-1 max-h-48 overflow-y-auto">
+              {visibleChunks.map((chunk, index) => (
+                <div key={index} className="text-xs bg-gray-700 p-2 rounded border border-gray-600">
+                  <div className="flex justify-between items-start">
+                    <div className="font-medium text-gray-200">
+                      Chunk #{index + 1}
+                    </div>
+                    <div className="text-gray-400 text-xs">
+                      {chunk.duration.toFixed(1)}s
+                    </div>
+                  </div>
+                  <div className="text-gray-300 font-mono mt-1">
+                    {formatTime(chunk.start_time)} → {formatTime(chunk.end_time)}
+                  </div>
+                </div>
+              ))}
+              {hasMoreChunks && !showExpandedChunks && (
+                <div className="text-center py-1">
+                  <button
+                    onClick={() => setShowExpandedChunks(true)}
+                    className="text-xs text-blue-400 hover:text-blue-300 underline transition-colors"
+                  >
+                    +{chunks.length - maxVisibleChunks} more chunks...
+                  </button>
+                </div>
+              )}
+            </div>
+          </div>
+        )}
+      </div>
+    );
+  };
+  if (!transcription) return null;
+  return (
+    <div className="p-4 bg-gray-800 border-t border-gray-700">
+      <div className="flex items-center gap-2 mb-3">
+        <h3 className="text-sm font-semibold text-white">Full Transcription</h3>
+        {/* Info tooltip */}
+        <div className="relative">
+          <InformationCircleIcon
+            className="w-4 h-4 text-gray-400 hover:text-gray-200 cursor-help transition-colors"
+            onMouseEnter={() => setShowInfoTooltip(true)}
+            onMouseLeave={() => setShowInfoTooltip(false)}
+          />
+          {showInfoTooltip && renderInfoTooltip()}
+        </div>
+      </div>
+      <div className="text-sm max-h-32 overflow-y-auto text-gray-300 font-mono bg-gray-900 p-3 rounded border border-gray-600">
+        {transcription.transcription}
+      </div>
+    </div>
+  );
+};
+export default FullTranscription;

frontend/src/components/LanguageSelector.tsx ADDED Viewed

	@@ -0,0 +1,262 @@

+import React, { useMemo, useState, useEffect } from 'react';
+import Select, { components, StylesConfig, type SingleValue } from 'react-select';
+import { matchSorter } from 'match-sorter';
+import { LANGUAGE_MAP, ACCURATE_LANGUAGES } from '../utils/languages';
+import { getScriptName, getScriptDescription } from '../utils/scripts';
+import { getSupportedLanguages } from '../services/transcriptionApi';
+interface LanguageSelectorProps {
+  selectedLanguage: string | null;
+  selectedScript: string | null;
+  onLanguageAndScriptSelect: (language: string | null, script: string | null) => void;
+  disabled?: boolean;
+}
+interface OptionType {
+  value: string; // The full code_script combination
+  label: string;
+  languageName: string;
+  scriptName: string;
+  languageCode: string;
+  scriptCode: string;
+}
+const parseLanguage = (languageString: string): OptionType | null => {
+  const parts = languageString.split('_');
+  // Always expect format: "eng_Latn"
+  if (parts.length === 2) {
+    const [languageCode, scriptCode] = parts;
+    const languageName = (LANGUAGE_MAP as Record<string, string>)[languageCode] || languageCode;
+    const scriptName = getScriptName(scriptCode);
+    return {
+      value: languageString,
+      label: `${languageName} ${scriptName} (${languageString})`,
+      languageName,
+      scriptName,
+      languageCode,
+      scriptCode,
+    };
+  }
+  return null;
+};
+// Custom Option component to show language, script, and code with tooltip
+const Option = (props: any) => {
+  const scriptDescription = getScriptDescription(props.data.scriptCode);
+  return (
+    <components.Option {...props}>
+      <div className="flex flex-col" title={scriptDescription || undefined}>
+        <div className="font-medium text-sm">{props.data.languageName}</div>
+        <div className="text-xs text-gray-400">{props.data.scriptName} ({props.data.value})</div>
+      </div>
+    </components.Option>
+  );
+};
+// Custom SingleValue component for selected value
+const SingleValue = (props: any) => (
+  <components.SingleValue {...props}>
+    <div className="flex flex-col">
+      <div className="font-medium text-sm leading-tight">{props.data.languageName}</div>
+      <div className="text-xs text-gray-400 leading-tight">{props.data.scriptName} ({props.data.value})</div>
+    </div>
+  </components.SingleValue>
+);
+// Custom styles to match the dark theme
+const customStyles: StylesConfig<OptionType> = {
+  control: (styles, { isDisabled, isFocused }) => ({
+    ...styles,
+    backgroundColor: isDisabled ? '#374151' : '#374151', // gray-700
+    borderColor: isFocused ? '#3b82f6' : '#4b5563', // blue-500 : gray-600
+    borderRadius: '0.375rem',
+    minHeight: '40px',
+    boxShadow: isFocused ? '0 0 0 1px #3b82f6' : 'none',
+    '&:hover': {
+      borderColor: isDisabled ? '#4b5563' : '#6b7280', // gray-600 : gray-500
+      backgroundColor: isDisabled ? '#374151' : '#4b5563', // gray-700 : gray-600
+    },
+    cursor: isDisabled ? 'not-allowed' : 'pointer',
+  }),
+  singleValue: (styles) => ({
+    ...styles,
+    color: '#f9fafb', // gray-50
+  }),
+  placeholder: (styles) => ({
+    ...styles,
+    color: '#9ca3af', // gray-400
+  }),
+  input: (styles) => ({
+    ...styles,
+    color: '#f9fafb', // gray-50
+  }),
+  menu: (styles) => ({
+    ...styles,
+    backgroundColor: '#374151', // gray-700
+    border: '1px solid #4b5563', // gray-600
+    borderRadius: '0.5rem',
+    boxShadow: '0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05)',
+    zIndex: 50,
+  }),
+  menuList: (styles) => ({
+    ...styles,
+    maxHeight: '200px',
+    padding: 0,
+  }),
+  option: (styles, { isFocused, isSelected }) => ({
+    ...styles,
+    backgroundColor: isSelected
+      ? '#2563eb' // blue-600
+      : isFocused
+        ? '#4b5563' // gray-600
+        : 'transparent',
+    color: '#f9fafb', // gray-50
+    cursor: 'pointer',
+    padding: '8px 12px',
+    '&:hover': {
+      backgroundColor: isSelected ? '#2563eb' : '#4b5563', // blue-600 : gray-600
+    },
+  }),
+  indicatorSeparator: (styles) => ({
+    ...styles,
+    backgroundColor: '#4b5563', // gray-600
+  }),
+  dropdownIndicator: (styles, { isDisabled }) => ({
+    ...styles,
+    color: isDisabled ? '#6b7280' : '#9ca3af', // gray-500 : gray-400
+    '&:hover': {
+      color: isDisabled ? '#6b7280' : '#d1d5db', // gray-500 : gray-300
+    },
+  }),
+  clearIndicator: (styles) => ({
+    ...styles,
+    color: '#9ca3af', // gray-400
+    '&:hover': {
+      color: '#d1d5db', // gray-300
+    },
+  }),
+  noOptionsMessage: (styles) => ({
+    ...styles,
+    color: '#9ca3af', // gray-400
+  }),
+};
+const LanguageSelector: React.FC<LanguageSelectorProps> = ({
+  selectedLanguage,
+  selectedScript,
+  onLanguageAndScriptSelect,
+  disabled = false
+}) => {
+  const [supportedLanguages, setSupportedLanguages] = useState<string[]>([]);
+  const [isLoading, setIsLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  // Fetch supported languages from API
+  useEffect(() => {
+    const fetchSupportedLanguages = async () => {
+      try {
+        setIsLoading(true);
+        const languages = await getSupportedLanguages();
+        setSupportedLanguages(languages);
+      } catch (err) {
+        console.error('Failed to fetch supported languages:', err);
+        setError('Failed to load supported languages');
+      } finally {
+        setIsLoading(false);
+      }
+    };
+    fetchSupportedLanguages();
+  }, []);
+  // Convert supported languages to options
+  const languageOptions = useMemo(() => {
+    const allowAllLanguages = import.meta.env.VITE_ALLOW_ALL_LANGUAGES === 'true';
+    return supportedLanguages
+      .map(parseLanguage)
+      .filter((option): option is OptionType => option !== null)
+      .filter((option) => {
+        if (allowAllLanguages) {
+          return true;
+        }
+        return ACCURATE_LANGUAGES.includes(option.languageCode);
+      })
+      .sort((a, b) => a.languageName.localeCompare(b.languageName));
+  }, [supportedLanguages]);
+  // Find the selected option
+  const selectedOption = useMemo(() => {
+    if (!selectedLanguage || !selectedScript) return null;
+    const combinedValue = `${selectedLanguage}_${selectedScript}`;
+    return languageOptions.find(option => option.value === combinedValue) || null;
+  }, [selectedLanguage, selectedScript, languageOptions]);
+  const handleChange = (newValue: SingleValue<OptionType>) => {
+    if (newValue) {
+      onLanguageAndScriptSelect(newValue.languageCode, newValue.scriptCode);
+    } else {
+      onLanguageAndScriptSelect(null, null);
+    }
+  };
+  // Custom filterOption function using match-sorter
+  const filterOptions = useMemo(() => {
+    return (option: { label: string; value: string; data: OptionType }, inputValue: string) => {
+      if (!inputValue.trim()) return true;
+      // Use match-sorter to check if this individual option matches
+      const matches = matchSorter([option.data], inputValue, {
+        keys: [
+          'languageName',     // Primary: language name
+          'scriptName',       // Secondary: script name
+          'languageCode',     // Tertiary: language code
+          'scriptCode',       // Quaternary: script code
+          'label',            // Fallback: full label
+        ],
+        threshold: matchSorter.rankings.CONTAINS,
+      });
+      return matches.length > 0;
+    };
+  }, []);
+  if (error) {
+    return (
+      <div className="text-red-400 text-sm p-2 bg-red-900/20 rounded">
+        {error}
+      </div>
+    );
+  }
+  return (
+    <Select<OptionType>
+      value={selectedOption}
+      onChange={handleChange}
+      options={languageOptions}
+      placeholder={isLoading ? "Loading languages..." : "Select language..."}
+      isClearable
+      isDisabled={disabled || isLoading}
+      isSearchable
+      filterOption={(option, inputValue) => filterOptions(option, inputValue)}
+      components={{ Option, SingleValue }}
+      styles={customStyles}
+      menuPortalTarget={document.body}
+      menuPosition="fixed"
+      noOptionsMessage={({ inputValue }) =>
+        `No languages found matching "${inputValue}"`
+      }
+      // Performance optimizations
+      menuIsOpen={undefined} // Let react-select manage this
+      blurInputOnSelect={true}
+      closeMenuOnSelect={true}
+      hideSelectedOptions={false}
+    />
+  );
+};
+export default LanguageSelector;

frontend/src/components/MediaDownloadControls.tsx ADDED Viewed

	@@ -0,0 +1,75 @@

+import React from 'react';
+import { ArrowDownTrayIcon } from '@heroicons/react/24/outline';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+import { generateSRT, generateWebVTT, downloadSubtitles } from '../utils/subtitleUtils';
+import { trackDownloadSRT, sendGAEvent } from '../analytics/gaEvents';
+const MediaDownloadControls: React.FC = () => {
+  const {
+    file,
+    transcription,
+    selectedLanguage,
+    isVideoFile,
+    isDownloadingVideo,
+    handleDownloadVideoWithSubtitles,
+  } = useTranscriptionStore();
+  if (!transcription) {
+    return null;
+  }
+  return (
+    <div className="flex items-center space-x-2">
+      <div className="tooltip tooltip-bottom" data-tip="Download subtitle file in SRT format. Compatible with most video players and editing software">
+        <button
+          onClick={() => {
+            const srtContent = generateSRT(transcription.aligned_segments);
+            const filename = file?.name?.replace(/\.[^/.]+$/, ".srt") || "subtitles.srt";
+            downloadSubtitles(srtContent, filename);
+            // Track SRT download
+            if (selectedLanguage) {
+              trackDownloadSRT(selectedLanguage);
+            }
+          }}
+          className="flex items-center gap-1 px-2 py-1 text-xs bg-purple-600 hover:bg-purple-700 rounded transition-colors text-white"
+        >
+          <ArrowDownTrayIcon className="w-3 h-3" />
+          .srt
+        </button>
+      </div>
+      <div className="tooltip tooltip-bottom" data-tip="Download subtitle file in WebVTT format. Ideal for web browsers and HTML5 video players">
+        <button
+          onClick={() => {
+            const vttContent = generateWebVTT(transcription.aligned_segments);
+            const filename = file?.name?.replace(/\.[^/.]+$/, ".vtt") || "subtitles.vtt";
+            downloadSubtitles(vttContent, filename);
+            // Track VTT download
+            if (selectedLanguage) {
+              sendGAEvent("Download", "vtt", selectedLanguage);
+            }
+          }}
+          className="flex items-center gap-1 px-2 py-1 text-xs bg-indigo-600 hover:bg-indigo-700 rounded transition-colors text-white"
+        >
+          <ArrowDownTrayIcon className="w-3 h-3" />
+          .vtt
+        </button>
+      </div>
+      {isVideoFile && (
+        <div className="tooltip tooltip-bottom" data-tip="Download video with embedded subtitle track. Selectable from compatible media players">
+          <button
+            onClick={handleDownloadVideoWithSubtitles}
+            disabled={isDownloadingVideo}
+            className="flex items-center gap-1 px-2 py-1 text-xs bg-orange-600 hover:bg-orange-700 disabled:bg-gray-600 rounded transition-colors text-white"
+          >
+            <ArrowDownTrayIcon className="w-3 h-3" />
+            {isDownloadingVideo ? "Creating..." : ".mp4"}
+          </button>
+        </div>
+      )}
+    </div>
+  );
+};
+export default MediaDownloadControls;

frontend/src/components/MediaEditControls.tsx ADDED Viewed

	@@ -0,0 +1,113 @@

+import React, { useState, useEffect } from 'react';
+import { InformationCircleIcon } from '@heroicons/react/24/outline';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+import { formatTime } from '../utils/subtitleUtils';
+const DEFAULT_MERGE_THRESHOLD = 2;
+const MediaEditControls: React.FC = () => {
+  const [mergeThreshold, setMergeThreshold] = useState(DEFAULT_MERGE_THRESHOLD);
+  const {
+    transcription,
+    currentTime,
+    activeSegmentIndex,
+    currentSegments,
+    undo,
+    redo,
+    canUndo,
+    canRedo,
+    mergeSegmentsByProximity,
+  } = useTranscriptionStore();
+  const MAX_MERGE_INTERVAL_SECONDS = 30;
+  if (!transcription) {
+    return null;
+  }
+  const displaySegments = currentSegments || transcription.aligned_segments;
+  // Handle merge threshold changes
+  useEffect(() => {
+    mergeSegmentsByProximity(mergeThreshold);
+  }, [mergeThreshold, mergeSegmentsByProximity]);
+  // Keyboard shortcuts for undo/redo
+  useEffect(() => {
+    const handleKeyDown = (e: KeyboardEvent) => {
+      if ((e.ctrlKey || e.metaKey) && e.key === 'z' && !e.shiftKey) {
+        e.preventDefault();
+        undo();
+      } else if ((e.ctrlKey || e.metaKey) && (e.key === 'y' || (e.key === 'z' && e.shiftKey))) {
+        e.preventDefault();
+        redo();
+      }
+    };
+    document.addEventListener('keydown', handleKeyDown);
+    return () => document.removeEventListener('keydown', handleKeyDown);
+  }, [undo, redo]);
+  return (
+    <div className="flex items-center justify-center">
+      <div className="flex items-center space-x-4">
+        {/* Current Status Info */}
+        <div className="flex items-center space-x-4 text-xs text-gray-400">
+          <span className="text-green-400">
+            {formatTime(currentTime)} / {formatTime(transcription.total_duration)}
+          </span>
+          <span className="text-blue-400">
+            {activeSegmentIndex !== null
+              ? `Segment ${activeSegmentIndex + 1}/${displaySegments.length}`
+              : "No active segment"}
+          </span>
+        </div>
+        {/* Combine Segments Slider */}
+        <div className="flex items-center space-x-2">
+          <label className="text-xs text-gray-300 whitespace-nowrap flex items-center space-x-1">
+            <span>Combine Words:</span>
+            <div className="tooltip" data-tip="This slider merges nearby words. The higher the more words are combined">
+              <InformationCircleIcon className="w-4 h-4 text-gray-100 hover:text-gray-300 cursor-help inline ml-1" />
+            </div>
+          </label>
+          <input
+            type="range"
+            min="0"
+            max={MAX_MERGE_INTERVAL_SECONDS}
+            step="0.5"
+            value={mergeThreshold}
+            onChange={(e) => setMergeThreshold(Number(e.target.value))}
+            className="w-20 h-1 bg-gray-600 rounded-lg appearance-none cursor-pointer slider"
+            style={{
+              background: `linear-gradient(to right, #3B82F6 0%, #3B82F6 ${(mergeThreshold / MAX_MERGE_INTERVAL_SECONDS) * 100}%, #4B5563 ${(mergeThreshold / MAX_MERGE_INTERVAL_SECONDS) * 100}%, #4B5563 100%)`
+            }}
+          />
+        </div>
+        {/* Undo/Redo Buttons */}
+        <div className="flex items-center space-x-2">
+          <button
+            onClick={undo}
+            disabled={!canUndo}
+            className="px-3 py-1 text-xs bg-blue-600 hover:bg-blue-700 disabled:bg-gray-600 disabled:cursor-not-allowed text-white rounded transition-colors"
+            title="Undo (Ctrl+Z)"
+          >
+            ↶ Undo
+          </button>
+          <button
+            onClick={redo}
+            disabled={!canRedo}
+            className="px-3 py-1 text-xs bg-blue-600 hover:bg-blue-700 disabled:bg-gray-600 disabled:cursor-not-allowed text-white rounded transition-colors"
+            title="Redo (Ctrl+Y)"
+          >
+            ↷ Redo
+          </button>
+        </div>
+      </div>
+    </div>
+  );
+};
+export default MediaEditControls;

frontend/src/components/MediaPlayer.tsx ADDED Viewed

	@@ -0,0 +1,130 @@

+import React from 'react';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+import { generateWebVTT } from '../utils/subtitleUtils';
+import {LANGUAGE_MAP} from '../utils/languages';
+interface MediaPlayerProps {
+  audioRef: React.RefObject<HTMLAudioElement>;
+  videoRef: React.RefObject<HTMLVideoElement>;
+  onTimeUpdate?: () => void;
+}
+export default function MediaPlayer({
+  audioRef,
+  videoRef,
+  onTimeUpdate,
+}: MediaPlayerProps) {
+  const {
+    file,
+    mediaUrl,
+    isVideoFile,
+    currentSegments,
+    selectedLanguage,
+    setCurrentTime
+  } = useTranscriptionStore();
+  const handleSeeked = (event: React.SyntheticEvent<HTMLMediaElement>) => {
+    const target = event.target as HTMLMediaElement;
+    setCurrentTime(target.currentTime);
+    // Call onTimeUpdate to trigger segment selection logic
+    if (onTimeUpdate) {
+      onTimeUpdate();
+    }
+  };
+  const handleLoadedMetadata = (event: React.SyntheticEvent<HTMLMediaElement>) => {
+    const target = event.target as HTMLMediaElement;
+    setCurrentTime(target.currentTime);
+    // Call onTimeUpdate to trigger segment selection logic
+    if (onTimeUpdate) {
+      onTimeUpdate();
+    }
+  };
+  // Helper function to encode UTF-8 string to base64
+  const utf8ToBase64 = (str: string): string => {
+    // Convert string to UTF-8 bytes, then to base64
+    const encoder = new TextEncoder();
+    const bytes = encoder.encode(str);
+    let binary = '';
+    bytes.forEach(byte => binary += String.fromCharCode(byte));
+    return btoa(binary);
+  };
+  // Get language info for subtitles
+  const getLanguageInfo = () => {
+    if (!selectedLanguage) {
+      return { code: 'en', name: 'English' };
+    }
+    const languageName = (LANGUAGE_MAP as Record<string, string>)[selectedLanguage];
+    return {
+      code: selectedLanguage,
+      name: languageName || 'Unknown'
+    };
+  };
+  // Early return if no file is selected
+  if (!file) {
+    return null;
+  }
+  // Early return if no media URL is available
+  if (!mediaUrl) {
+    return (
+      <div className="p-6 bg-gray-800">
+        <div className="max-w-4xl mx-auto text-center text-gray-300">
+          Loading media...
+        </div>
+      </div>
+    );
+  }
+  return (
+    <div className="p-6 bg-gray-800">
+      <div className="max-w-4xl mx-auto">
+        {isVideoFile ? (
+          <video
+            ref={videoRef}
+            src={mediaUrl || ""}
+            className="w-full max-h-96 rounded-lg"
+            onSeeked={handleSeeked}
+            onLoadedMetadata={handleLoadedMetadata}
+            controls
+            controlsList="nodownload nofullscreen noremoteplayback"
+            disablePictureInPicture
+          >
+            {currentSegments && currentSegments.length > 0 && (() => {
+              const { code, name } = getLanguageInfo();
+              return (
+                <track
+                  kind="subtitles"
+                  src={`data:text/vtt;base64,${utf8ToBase64(generateWebVTT(currentSegments))}`}
+                  srcLang={code}
+                  label={name}
+                  default
+                />
+              );
+            })()}
+          </video>
+        ) : (
+          <div className="bg-gray-700 p-8 rounded-lg">
+            <audio
+              ref={audioRef}
+              src={mediaUrl || ""}
+              className="w-full"
+              onSeeked={handleSeeked}
+              onLoadedMetadata={handleLoadedMetadata}
+              controls
+              controlsList="nodownload"
+            />
+            <div className="mt-4 text-center text-gray-300">
+              <div className="text-lg font-medium">Audio File</div>
+              <div className="text-sm">{file.name}</div>
+            </div>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}

frontend/src/components/MediaRecorder.tsx ADDED Viewed

	@@ -0,0 +1,353 @@

+import React, { useState, useRef, useEffect } from 'react';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+import { useAudioAnalyzer } from '../hooks/useAudioAnalyzer';
+interface MediaRecorderProps {
+  onComplete: () => void;
+  onCancel: () => void;
+}
+const MediaRecorder: React.FC<MediaRecorderProps> = ({ onComplete, onCancel }) => {
+  const { recordingType, setRecordedBlob } = useTranscriptionStore();
+  const [isRecording, setIsRecording] = useState(false);
+  const [recordingTime, setRecordingTime] = useState(0);
+  const [stream, setStream] = useState<MediaStream | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  const [permissionState, setPermissionState] = useState<'prompt' | 'granted' | 'denied'>('prompt');
+  const [currentMicrophone, setCurrentMicrophone] = useState<string | null>(null);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const videoRef = useRef<HTMLVideoElement>(null);
+  const chunksRef = useRef<Blob[]>([]);
+  const timerRef = useRef<number | null>(null);
+  const isVideo = recordingType === 'video';
+  // Audio analyzer for real-time waveform
+  const { audioData, connectToStream, disconnect } = useAudioAnalyzer(256);
+  // Get microphone device info
+  const getMicrophoneInfo = async (mediaStream: MediaStream) => {
+    try {
+      // Get all available audio input devices
+      const devices = await navigator.mediaDevices.enumerateDevices();
+      const audioInputDevices = devices.filter(device => device.kind === 'audioinput');
+      // Get the audio track from the current stream
+      const audioTrack = mediaStream.getAudioTracks()[0];
+      if (audioTrack) {
+        // Get the device settings
+        const settings = audioTrack.getSettings();
+        const deviceId = settings.deviceId;
+        // Find the matching device in our list
+        const currentDevice = audioInputDevices.find(device => device.deviceId === deviceId);
+        if (currentDevice && currentDevice.label) {
+          setCurrentMicrophone(currentDevice.label);
+        } else {
+          // Fallback to device ID if label is not available
+          setCurrentMicrophone(`Microphone (${deviceId?.substring(0, 8)}...)`);
+        }
+      }
+    } catch (err) {
+      console.error('Error getting microphone info:', err);
+      setCurrentMicrophone('Unknown microphone');
+    }
+  };
+  // Request permissions and setup media stream
+  const requestPermissions = async () => {
+    try {
+      setError(null);
+      const constraints: MediaStreamConstraints = {
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          autoGainControl: true,
+        },
+        video: isVideo ? {
+          width: { ideal: 1280 },
+          height: { ideal: 720 },
+          facingMode: 'user'
+        } : false
+      };
+      const mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
+      setStream(mediaStream);
+      setPermissionState('granted');
+      // Get microphone device information
+      await getMicrophoneInfo(mediaStream);
+      // Show video preview if recording video
+      if (isVideo && videoRef.current) {
+        videoRef.current.srcObject = mediaStream;
+        videoRef.current.play();
+      }
+      // Connect audio analyzer for waveform visualization
+      connectToStream(mediaStream);
+    } catch (err) {
+      console.error('Error accessing media devices:', err);
+      setPermissionState('denied');
+      if (err instanceof DOMException) {
+        switch (err.name) {
+          case 'NotAllowedError':
+            setError('Permission denied. Please allow access to your microphone' + (isVideo ? ' and camera' : '') + '.');
+            break;
+          case 'NotFoundError':
+            setError('No ' + (isVideo ? 'camera or ' : '') + 'microphone found.');
+            break;
+          case 'NotReadableError':
+            setError('Media device is already in use by another application.');
+            break;
+          default:
+            setError('Failed to access media devices: ' + err.message);
+        }
+      } else {
+        setError('An unexpected error occurred while accessing media devices.');
+      }
+    }
+  };
+  // Start recording
+  const startRecording = () => {
+    if (!stream) return;
+    try {
+      chunksRef.current = [];
+      // Try different MIME types in order of preference
+      const mimeTypes = isVideo
+        ? ['video/webm;codecs=vp9,opus', 'video/webm;codecs=vp8,opus', 'video/webm']
+        : ['audio/webm;codecs=opus', 'audio/webm', 'audio/mp4', ''];
+      let selectedMimeType = '';
+      for (const mimeType of mimeTypes) {
+        if (mimeType === '' || window.MediaRecorder.isTypeSupported(mimeType)) {
+          selectedMimeType = mimeType;
+          break;
+        }
+      }
+      const options: MediaRecorderOptions = selectedMimeType ? { mimeType: selectedMimeType } : {};
+      const mediaRecorder = new window.MediaRecorder(stream, options);
+      mediaRecorderRef.current = mediaRecorder;
+      mediaRecorder.ondataavailable = (event) => {
+        if (event.data.size > 0) {
+          chunksRef.current.push(event.data);
+        }
+      };
+      mediaRecorder.onstop = () => {
+        const blob = new Blob(chunksRef.current, {
+          type: isVideo ? 'video/webm' : 'audio/webm'
+        });
+        setRecordedBlob(blob);
+        onComplete();
+      };
+      mediaRecorder.start();
+      setIsRecording(true);
+      setRecordingTime(0);
+      // Start timer
+      timerRef.current = setInterval(() => {
+        setRecordingTime(prev => prev + 1);
+      }, 1000);
+    } catch (err) {
+      console.error('Error starting recording:', err);
+      setError('Failed to start recording: ' + (err instanceof Error ? err.message : 'Unknown error'));
+    }
+  };
+  // Stop recording
+  const stopRecording = () => {
+    if (mediaRecorderRef.current && isRecording) {
+      mediaRecorderRef.current.stop();
+      setIsRecording(false);
+      if (timerRef.current) {
+        clearInterval(timerRef.current);
+        timerRef.current = null;
+      }
+    }
+  };
+  // Format recording time
+  const formatTime = (seconds: number) => {
+    const mins = Math.floor(seconds / 60);
+    const secs = seconds % 60;
+    return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
+  };
+  // Cleanup on unmount and when recording stops
+  useEffect(() => {
+    return () => {
+      if (stream) {
+        stream.getTracks().forEach(track => track.stop());
+      }
+      if (timerRef.current) {
+        clearInterval(timerRef.current);
+      }
+    };
+  }, [stream]);
+  // Cleanup stream when recording stops externally
+  useEffect(() => {
+    if (!recordingType && stream) {
+      stream.getTracks().forEach(track => track.stop());
+      setStream(null);
+      setCurrentMicrophone(null); // Clear microphone info
+      disconnect(); // Also disconnect audio analyzer
+    }
+  }, [recordingType, stream, disconnect]);
+  // Auto-request permissions when component mounts
+  useEffect(() => {
+    if (permissionState === 'prompt') {
+      requestPermissions();
+    }
+  }, []);
+  return (
+    <div className="flex flex-col items-center justify-center min-h-[400px] bg-gray-900 rounded-lg border-2 border-dashed border-gray-600 p-8">
+      {/* Header */}
+      <div className="mb-6 text-center">
+        <h3 className="text-xl font-semibold text-white mb-2">
+          Record {isVideo ? 'Video' : 'Audio'}
+        </h3>
+        <p className="text-gray-400 text-sm">
+          {permissionState === 'prompt' && 'Requesting permissions...'}
+          {permissionState === 'denied' && 'Permission required to record'}
+          {permissionState === 'granted' && !isRecording && 'Ready to record'}
+          {isRecording && `Recording... ${formatTime(recordingTime)}`}
+        </p>
+        {/* Microphone Device Info */}
+        {permissionState === 'granted' && currentMicrophone && (
+          <div className="mt-2 flex items-center justify-center gap-2 text-xs text-gray-300">
+            <svg className="w-4 h-4 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+              <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
+            </svg>
+            <span className="truncate max-w-xs" title={currentMicrophone}>
+              {currentMicrophone}
+            </span>
+          </div>
+        )}
+      </div>
+      {/* Video Preview (only for video recording) */}
+      {isVideo && permissionState === 'granted' && (
+        <div className="mb-6">
+          <video
+            ref={videoRef}
+            className="w-80 h-60 bg-black rounded-lg object-cover"
+            muted
+            playsInline
+          />
+        </div>
+      )}
+      {/* Audio Visualization */}
+      {permissionState === 'granted' && (
+        <div className="mb-6 flex items-center justify-center">
+          <div className="w-80 h-20 bg-gray-800 rounded-lg flex items-center justify-center">
+            <div className="flex items-center space-x-1">
+              {/* Real-time audio visualization bars */}
+              {Array.from({ length: 32 }, (_, i) => {
+                // Use a wider frequency range for better distribution
+                // Map across 60% of the frequency spectrum for voice and some harmonics
+                const voiceRangeEnd = Math.floor(audioData.length * 0.6);
+                const dataIndex = Math.floor((i / 32) * voiceRangeEnd);
+                const amplitude = audioData[dataIndex] || 0;
+                // Apply logarithmic scaling to prevent saturation and better distribute levels
+                const normalizedAmplitude = amplitude / 255;
+                const logScaled = Math.log10(1 + normalizedAmplitude * 9) / Math.log10(10); // Log scale 0-1
+                const height = Math.max(4, logScaled * 60); // Scale to 4-60px
+                return (
+                  <div
+                    key={i}
+                    className="w-1 bg-blue-500 rounded-full transition-all duration-75"
+                    style={{
+                      height: `${height}px`
+                    }}
+                  />
+                );
+              })}
+            </div>
+          </div>
+        </div>
+      )}
+      {/* Error Display */}
+      {error && (
+        <div className="mb-4 p-3 bg-red-900/20 border border-red-500 rounded-lg">
+          <p className="text-red-300 text-sm">{error}</p>
+        </div>
+      )}
+      {/* Controls */}
+      <div className="flex gap-4">
+        {permissionState === 'denied' && (
+          <button
+            onClick={requestPermissions}
+            className="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg transition-colors"
+          >
+            Request Permission
+          </button>
+        )}
+        {permissionState === 'granted' && !isRecording && (
+          <button
+            onClick={startRecording}
+            disabled={!stream}
+            className="px-6 py-2 bg-red-600 hover:bg-red-700 disabled:bg-gray-600 text-white rounded-lg transition-colors flex items-center gap-2"
+          >
+            <div className="w-4 h-4 bg-white rounded-full"></div>
+            Start Recording
+          </button>
+        )}
+        {isRecording && (
+          <button
+            onClick={stopRecording}
+            className="px-6 py-2 bg-gray-600 hover:bg-gray-700 text-white rounded-lg transition-colors flex items-center gap-2"
+          >
+            <div className="w-4 h-4 bg-white"></div>
+            Stop Recording
+          </button>
+        )}
+        <button
+          onClick={onCancel}
+          disabled={isRecording}
+          className="px-6 py-2 bg-gray-700 hover:bg-gray-600 disabled:bg-gray-800 text-white rounded-lg transition-colors"
+        >
+          Cancel
+        </button>
+      </div>
+      {/* Tips */}
+      <div className="mt-6 text-center">
+        <p className="text-gray-400 text-xs max-w-md">
+          Speak clearly and minimize background noise for best transcription results.
+        </p>
+      </div>
+    </div>
+  );
+};
+export default MediaRecorder;

frontend/src/components/MinimapTimeline.tsx ADDED Viewed

	@@ -0,0 +1,509 @@

+import React, { useRef, useEffect, useState, useCallback } from 'react';
+import { ArrowDownTrayIcon } from '@heroicons/react/24/outline';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+interface MinimapTimelineProps {
+  audioRef: React.RefObject<HTMLAudioElement>;
+  videoRef: React.RefObject<HTMLVideoElement>;
+  canvasTimelineRef: React.RefObject<HTMLDivElement>; // Container that scrolls
+}
+export default function MinimapTimeline({
+  audioRef,
+  videoRef,
+  canvasTimelineRef
+}: MinimapTimelineProps) {
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+  const containerRef = useRef<HTMLDivElement>(null);
+  const [isDragging, setIsDragging] = useState(false);
+  const [dragStartX, setDragStartX] = useState(0);
+  const [dragStartScrollLeft, setDragStartScrollLeft] = useState(0);
+  const [waveformData, setWaveformData] = useState<number[]>([]);
+  const [viewport, setViewport] = useState({ start: 0, end: 30, visible: false });
+  const {
+    transcription,
+    preprocessedAudio,
+    currentTime,
+  } = useTranscriptionStore();
+  // Constants
+  const MINIMAP_HEIGHT = 80;
+  const PIXELS_PER_SECOND = 300; // Match the CanvasTimeline scaling
+  // Get media duration
+  const getMediaDuration = useCallback(() => {
+    const audioElement = audioRef.current;
+    const videoElement = videoRef.current;
+    if (audioElement && !isNaN(audioElement.duration)) {
+      return audioElement.duration;
+    }
+    if (videoElement && !isNaN(videoElement.duration)) {
+      return videoElement.duration;
+    }
+    return transcription?.total_duration || 0;
+  }, [audioRef, videoRef, transcription]);
+  const mediaDuration = getMediaDuration();
+  // Canvas width based on container
+  const [canvasWidth, setCanvasWidth] = useState(800);
+  // Update canvas width on resize
+  useEffect(() => {
+    const updateCanvasWidth = () => {
+      if (containerRef.current) {
+        setCanvasWidth(containerRef.current.clientWidth);
+      }
+    };
+    updateCanvasWidth();
+    window.addEventListener('resize', updateCanvasWidth);
+    return () => window.removeEventListener('resize', updateCanvasWidth);
+  }, []);
+  // Track Canvas Timeline scroll position and calculate viewport
+  const updateViewportFromScroll = useCallback(() => {
+    const canvasContainer = canvasTimelineRef.current;
+    if (!canvasContainer || mediaDuration === 0) return;
+    const scrollLeft = canvasContainer.scrollLeft;
+    const containerWidth = canvasContainer.clientWidth;
+    const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
+    // Calculate what time range is currently visible
+    const startTime = (scrollLeft / totalCanvasWidth) * mediaDuration;
+    const endTime = ((scrollLeft + containerWidth) / totalCanvasWidth) * mediaDuration;
+    setViewport({
+      start: Math.max(0, startTime),
+      end: Math.min(mediaDuration, endTime),
+      visible: true
+    });
+  }, [canvasTimelineRef, mediaDuration]);
+  // Listen for scroll events on the Canvas Timeline container
+  useEffect(() => {
+    const canvasContainer = canvasTimelineRef.current;
+    if (!canvasContainer) return;
+    const handleScroll = () => {
+      updateViewportFromScroll();
+    };
+    const handleLoadOrResize = () => {
+      // Update viewport when container size changes
+      updateViewportFromScroll();
+    };
+    // Initial viewport calculation
+    updateViewportFromScroll();
+    canvasContainer.addEventListener('scroll', handleScroll);
+    window.addEventListener('resize', handleLoadOrResize);
+    return () => {
+      canvasContainer.removeEventListener('scroll', handleScroll);
+      window.removeEventListener('resize', handleLoadOrResize);
+    };
+  }, [updateViewportFromScroll]);
+  // Generate waveform data from preprocessed audio
+  const generateWaveformFromPreprocessedAudio = useCallback(async () => {
+    if (!preprocessedAudio?.data) {
+      console.log('No preprocessed audio data available');
+      return;
+    }
+    try {
+      console.log('Generating waveform from preprocessed audio data');
+      // Decode base64 audio data
+      const audioBytes = atob(preprocessedAudio.data);
+      const audioArrayBuffer = new ArrayBuffer(audioBytes.length);
+      const audioUint8Array = new Uint8Array(audioArrayBuffer);
+      for (let i = 0; i < audioBytes.length; i++) {
+        audioUint8Array[i] = audioBytes.charCodeAt(i);
+      }
+      // Create audio context and decode the WAV data
+      const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
+      const audioBuffer = await audioContext.decodeAudioData(audioArrayBuffer);
+      // Extract audio data
+      const channelData = audioBuffer.getChannelData(0);
+      const samples = Math.min(800, canvasWidth); // Limit samples for performance
+      const blockSize = Math.floor(channelData.length / samples);
+      const waveform: number[] = [];
+      for (let i = 0; i < samples; i++) {
+        const start = i * blockSize;
+        const end = Math.min(start + blockSize, channelData.length);
+        let sum = 0;
+        for (let j = start; j < end; j++) {
+          sum += Math.abs(channelData[j]);
+        }
+        waveform.push(sum / (end - start));
+      }
+      // Normalize waveform
+      const max = Math.max(...waveform);
+      const normalizedWaveform = max > 0 ? waveform.map(val => val / max) : waveform;
+      setWaveformData(normalizedWaveform);
+      console.log(`Generated waveform with ${normalizedWaveform.length} samples from preprocessed audio`);
+    } catch (error) {
+      console.error('Error generating waveform from preprocessed audio:', error);
+      // Fallback to segment-based visualization
+      generateFallbackWaveform();
+    }
+  }, [preprocessedAudio, canvasWidth]);
+  // Fallback waveform generation from segment data
+  const generateFallbackWaveform = useCallback(() => {
+    if (!transcription?.aligned_segments || mediaDuration === 0) return;
+    console.log('Using fallback waveform generation from segments');
+    const segments = transcription.aligned_segments;
+    const samples = Math.min(400, canvasWidth / 2);
+    const bars = new Array(samples).fill(0);
+    // Create waveform based on speech activity in segments
+    segments.forEach(segment => {
+      const startIndex = Math.floor((segment.start / mediaDuration) * samples);
+      const endIndex = Math.ceil((segment.end / mediaDuration) * samples);
+      for (let i = startIndex; i < Math.min(endIndex, samples); i++) {
+        // Use segment text length and duration to estimate intensity
+        const intensity = Math.min(1.0, segment.text.length / 50 + 0.3);
+        bars[i] = Math.max(bars[i], intensity * (0.7 + Math.random() * 0.3));
+      }
+    });
+    setWaveformData(bars);
+    console.log(`Generated fallback waveform with ${bars.length} samples`);
+  }, [transcription, mediaDuration, canvasWidth]);
+  // Generate waveform when preprocessed audio becomes available
+  useEffect(() => {
+    if (preprocessedAudio?.data) {
+      generateWaveformFromPreprocessedAudio();
+    } else if (transcription?.aligned_segments) {
+      // Use fallback if we have segments but no preprocessed audio
+      generateFallbackWaveform();
+    }
+  }, [preprocessedAudio, generateWaveformFromPreprocessedAudio, generateFallbackWaveform]);
+  // Draw the minimap
+  const draw = useCallback(() => {
+    const canvas = canvasRef.current;
+    if (!canvas || mediaDuration === 0) return;
+    const ctx = canvas.getContext('2d');
+    if (!ctx) return;
+    const { width, height } = canvas;
+    // Clear canvas
+    ctx.clearRect(0, 0, width, height);
+    // Draw background
+    ctx.fillStyle = '#1a1a1a';
+    ctx.fillRect(0, 0, width, height);
+    // Draw waveform
+    if (waveformData.length > 0) {
+      ctx.fillStyle = '#4a5568';
+      const barWidth = width / waveformData.length;
+      waveformData.forEach((amplitude, index) => {
+        const barHeight = amplitude * (height - 20);
+        const x = index * barWidth;
+        const y = (height - barHeight) / 2;
+        ctx.fillRect(x, y, Math.max(1, barWidth - 1), barHeight);
+      });
+    }
+    // Draw segments as colored bars
+    if (transcription?.aligned_segments) {
+      transcription.aligned_segments.forEach((segment, index) => {
+        const startX = (segment.start / mediaDuration) * width;
+        const endX = (segment.end / mediaDuration) * width;
+        const segmentWidth = endX - startX;
+        // Alternate colors for segments
+        ctx.fillStyle = index % 2 === 0 ? '#3182ce' : '#38a169';
+        ctx.fillRect(startX, height - 4, segmentWidth, 4);
+      });
+    }
+    // Draw current time indicator
+    const currentTimeX = (currentTime / mediaDuration) * width;
+    ctx.strokeStyle = '#f56565';
+    ctx.lineWidth = 2;
+    ctx.beginPath();
+    ctx.moveTo(currentTimeX, 0);
+    ctx.lineTo(currentTimeX, height);
+    ctx.stroke();
+    // Draw viewport region (what's visible in Canvas Timeline)
+    if (viewport.visible) {
+      const viewportStartX = (viewport.start / mediaDuration) * width;
+      const viewportEndX = (viewport.end / mediaDuration) * width;
+      // Draw viewport selection area (visible region highlight)
+      ctx.fillStyle = 'rgba(66, 153, 225, 0.3)';
+      ctx.fillRect(viewportStartX, 0, viewportEndX - viewportStartX, height);
+      // Draw left boundary line (start of visible area)
+      ctx.strokeStyle = '#4299e1';
+      ctx.lineWidth = 3;
+      ctx.beginPath();
+      ctx.moveTo(viewportStartX, 0);
+      ctx.lineTo(viewportStartX, height);
+      ctx.stroke();
+      // Draw right boundary line (end of visible area)
+      ctx.beginPath();
+      ctx.moveTo(viewportEndX, 0);
+      ctx.lineTo(viewportEndX, height);
+      ctx.stroke();
+      // Draw border around visible area
+      ctx.strokeStyle = '#4299e1';
+      ctx.lineWidth = 1;
+      ctx.strokeRect(viewportStartX, 0, viewportEndX - viewportStartX, height);
+    }
+  }, [waveformData, transcription, currentTime, viewport, mediaDuration]);
+  // Update canvas size and redraw
+  useEffect(() => {
+    const canvas = canvasRef.current;
+    if (canvas) {
+      canvas.width = canvasWidth;
+      canvas.height = MINIMAP_HEIGHT;
+      draw();
+    }
+  }, [canvasWidth, draw]);
+  // Redraw when dependencies change
+  useEffect(() => {
+    draw();
+  }, [draw]);
+  // Utility function to get time from X coordinate
+  const getTimeFromX = useCallback((x: number) => {
+    return (x / canvasWidth) * mediaDuration;
+  }, [canvasWidth, mediaDuration]);
+  // Check if clicking inside the viewport region
+  const isClickingViewport = useCallback((x: number) => {
+    if (!viewport.visible) return false;
+    const viewportStartX = (viewport.start / mediaDuration) * canvasWidth;
+    const viewportEndX = (viewport.end / mediaDuration) * canvasWidth;
+    return x >= viewportStartX && x <= viewportEndX;
+  }, [viewport, mediaDuration, canvasWidth]);
+  // Scroll Canvas Timeline to show specific time
+  const scrollToTime = useCallback((time: number) => {
+    const canvasContainer = canvasTimelineRef.current;
+    if (!canvasContainer) return;
+    const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
+    const targetScrollLeft = Math.max(0, (time / mediaDuration) * totalCanvasWidth);
+    canvasContainer.scrollLeft = targetScrollLeft;
+  }, [canvasTimelineRef, mediaDuration]);
+  // Mouse event handlers
+  const handleMouseDown = useCallback((e: React.MouseEvent) => {
+    const rect = canvasRef.current?.getBoundingClientRect();
+    if (!rect) return;
+    const x = e.clientX - rect.left;
+    if (isClickingViewport(x)) {
+      // Start dragging the viewport
+      setIsDragging(true);
+      setDragStartX(x);
+      const canvasContainer = canvasTimelineRef.current;
+      if (canvasContainer) {
+        setDragStartScrollLeft(canvasContainer.scrollLeft);
+      }
+    } else {
+      // Click outside viewport - jump to that position
+      const clickTime = getTimeFromX(x);
+      scrollToTime(clickTime);
+    }
+  }, [isClickingViewport, canvasTimelineRef, getTimeFromX, scrollToTime]);
+  const handleMouseMove = useCallback((e: React.MouseEvent) => {
+    if (!isDragging) return;
+    const rect = canvasRef.current?.getBoundingClientRect();
+    if (!rect) return;
+    const x = e.clientX - rect.left;
+    const deltaX = x - dragStartX;
+    const canvasContainer = canvasTimelineRef.current;
+    if (!canvasContainer) return;
+    // Convert deltaX to scroll delta
+    const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
+    const scrollDelta = (deltaX / canvasWidth) * totalCanvasWidth;
+    const newScrollLeft = Math.max(0, Math.min(
+      dragStartScrollLeft + scrollDelta,
+      canvasContainer.scrollWidth - canvasContainer.clientWidth
+    ));
+    canvasContainer.scrollLeft = newScrollLeft;
+  }, [isDragging, dragStartX, dragStartScrollLeft, canvasTimelineRef, mediaDuration, canvasWidth]);
+  const handleMouseUp = useCallback(() => {
+    setIsDragging(false);
+  }, []);
+  // Add global mouse event listeners when dragging
+  useEffect(() => {
+    if (isDragging) {
+      const handleGlobalMouseMove = (e: MouseEvent) => {
+        handleMouseMove(e as any);
+      };
+      const handleGlobalMouseUp = () => {
+        handleMouseUp();
+      };
+      document.addEventListener('mousemove', handleGlobalMouseMove);
+      document.addEventListener('mouseup', handleGlobalMouseUp);
+      return () => {
+        document.removeEventListener('mousemove', handleGlobalMouseMove);
+        document.removeEventListener('mouseup', handleGlobalMouseUp);
+      };
+    }
+  }, [isDragging, handleMouseMove, handleMouseUp]);
+  // Change cursor based on hover position
+  const handleMouseHover = useCallback((e: React.MouseEvent) => {
+    if (isDragging) return;
+    const rect = canvasRef.current?.getBoundingClientRect();
+    if (!rect) return;
+    const x = e.clientX - rect.left;
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+    if (isClickingViewport(x)) {
+      canvas.style.cursor = 'move';
+    } else {
+      canvas.style.cursor = 'pointer';
+    }
+  }, [isDragging, isClickingViewport]);
+  // Download preprocessed audio as WAV file
+  const downloadPreprocessedAudio = useCallback(() => {
+    if (!preprocessedAudio?.data) {
+      console.error('No preprocessed audio data available');
+      return;
+    }
+    try {
+      // Decode base64 audio data
+      const audioBytes = atob(preprocessedAudio.data);
+      const audioArrayBuffer = new ArrayBuffer(audioBytes.length);
+      const audioUint8Array = new Uint8Array(audioArrayBuffer);
+      for (let i = 0; i < audioBytes.length; i++) {
+        audioUint8Array[i] = audioBytes.charCodeAt(i);
+      }
+      // Create blob and download
+      const blob = new Blob([audioUint8Array], { type: 'audio/wav' });
+      const url = URL.createObjectURL(blob);
+      // Get original filename without extension
+      const { file } = useTranscriptionStore.getState();
+      const originalName = file?.name?.replace(/\.[^/.]+$/, '') || 'audio';
+      const filename = `${originalName}_preprocessed_16khz_mono_normalized.wav`;
+      // Create download link
+      const link = document.createElement('a');
+      link.href = url;
+      link.download = filename;
+      document.body.appendChild(link);
+      link.click();
+      document.body.removeChild(link);
+      // Clean up URL
+      URL.revokeObjectURL(url);
+      console.log(`Downloaded preprocessed audio: ${filename}`);
+    } catch (error) {
+      console.error('Error downloading preprocessed audio:', error);
+    }
+  }, [preprocessedAudio]);
+  if (!transcription || mediaDuration === 0) {
+    return null;
+  }
+  return (
+    <div className="bg-gray-800 border-b border-gray-700">
+      <div className="px-4 py-2">
+        <div className="flex justify-between items-center text-xs text-gray-400 mb-1">
+          <div className="flex items-center gap-2">
+            <span>
+              Overview - Full Timeline ({Math.round(mediaDuration)}s)
+              {preprocessedAudio ? ' • Preprocessed Waveform' : ' • Segment-Based View'}
+            </span>
+            {preprocessedAudio && (
+              <div className="tooltip tooltip-bottom" data-tip="Download preprocessed audio as WAV file (16kHz, mono, layer-normalized). This is the exact audio data processed by the AI transcription model after conversion and standardization from the original file.">
+                <button
+                  onClick={downloadPreprocessedAudio}
+                  className="flex items-center gap-1 px-1.5 py-0.5 text-xs bg-gray-600 hover:bg-gray-500 rounded transition-colors text-white"
+                >
+                  <ArrowDownTrayIcon className="w-3 h-3" />
+                  .wav
+                </button>
+              </div>
+            )}
+          </div>
+          {viewport.visible && (
+            <span>
+              Visible: {viewport.start.toFixed(1)}s - {viewport.end.toFixed(1)}s
+              ({Math.round(viewport.end - viewport.start)}s view)
+            </span>
+          )}
+        </div>
+        <div
+          ref={containerRef}
+          className="relative"
+          style={{ height: MINIMAP_HEIGHT }}
+        >
+          <canvas
+            ref={canvasRef}
+            onMouseDown={handleMouseDown}
+            onMouseMove={handleMouseHover}
+            className="block w-full h-full"
+            style={{
+              width: '100%',
+              height: MINIMAP_HEIGHT,
+            }}
+          />
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/src/components/QuickGuide.tsx ADDED Viewed

	@@ -0,0 +1,176 @@

+import React from "react";
+import {useTranscriptionStore} from "../stores/transcriptionStore";
+interface QuickGuideProps {
+  currentStep?: string; // Optional override for current step
+}
+type GuideStep = {
+  id: string;
+  text: string;
+  icon?: string;
+  isActive: (state: any) => boolean;
+  isCompleted: (state: any) => boolean;
+};
+const QuickGuide: React.FC<QuickGuideProps> = ({currentStep}) => {
+  const {
+    file,
+    transcription,
+    isLoading,
+    selectedSegmentIndex,
+    currentSegments,
+    currentTime
+  } = useTranscriptionStore();
+  // Define all the steps with their conditions
+  const steps: GuideStep[] = [
+    {
+      id: "upload",
+      text: "Upload or record audio",
+      icon: "📁",
+      isActive: (state) => !state.file,
+      isCompleted: (state) => !!state.file,
+    },
+    {
+      id: "transcribe",
+      text: "Click transcribe to process",
+      icon: "🎯",
+      isActive: (state) =>
+        !!state.file && !state.transcription && !state.isLoading,
+      isCompleted: (state) => !!state.transcription || state.isLoading,
+    },
+    {
+      id: "play",
+      text: "Play media",
+      icon: "▶️",
+      isActive: (state) => !!state.transcription,
+      isCompleted: () => (currentTime ?? 0) > 0, // Always in progress when transcription available
+    },
+    // {
+    //   id: "jump",
+    //   text: "Click segments to jump",
+    //   icon: "🎵",
+    //   isActive: (state) => !!state.transcription,
+    //   isCompleted: () => false, // Always in progress when transcription available
+    // },
+    // {
+    //   id: "drag",
+    //   text: "Drag segments to move/resize",
+    //   icon: "↔️",
+    //   isActive: (state) => !!state.transcription,
+    //   isCompleted: () => false, // Always in progress when transcription available
+    // },
+    // {
+    //   id: "combine",
+    //   text: "Use slider to combine segments",
+    //   icon: "🔗",
+    //   isActive: (state) => !!state.transcription,
+    //   isCompleted: () => false, // Always in progress when transcription available
+    // },
+    // {
+    //   id: "download",
+    //   text: "Download subtitles",
+    //   icon: "💾",
+    //   isActive: (state) => !!state.transcription,
+    //   isCompleted: () => false, // Always in progress when transcription available
+    // },
+  ];
+  // Create state object for condition checking
+  const storeState = {
+    file,
+    transcription,
+    isLoading,
+    selectedSegmentIndex,
+    currentSegments,
+  };
+  // Determine step states
+  const getStepState = (step: GuideStep) => {
+    // Override with currentStep prop if provided
+    if (currentStep) {
+      if (step.id === currentStep) return "active";
+      if (step.isCompleted(storeState)) return "completed";
+      return "inactive";
+    }
+    // Default logic based on store state
+    if (step.isCompleted(storeState)) return "completed";
+    if (step.isActive(storeState)) return "active";
+    return "inactive";
+  };
+  // Get the appropriate CSS classes for each step state
+  const getStepClasses = (stepState: string) => {
+    switch (stepState) {
+      case "active":
+        return "text-blue-300 bg-blue-900/30 border-blue-500/50 font-medium";
+      case "completed":
+        return "text-green-300 bg-green-900/20 border-green-500/30";
+      default:
+        return "text-gray-400 bg-transparent border-transparent";
+    }
+  };
+  // Get icon for step state
+  const getStepIcon = (step: GuideStep, stepState: string) => {
+    if (stepState === "completed") return "✓";
+    if (stepState === "active") return "→";
+    return step.icon || "•";
+  };
+  return (
+    <div className="border-t border-gray-700 py-3">
+      <h3 className="text-xs font-semibold mb-2 text-gray-200">Quick Guide</h3>
+      <div className="space-y-1">
+        {steps.map((step) => {
+          const stepState = getStepState(step);
+          const stepClasses = getStepClasses(stepState);
+          const icon = getStepIcon(step, stepState);
+          return (
+            <div
+              key={step.id}
+              className={`text-xs px-2 py-1 rounded border transition-all duration-200 ${stepClasses}`}
+            >
+              <span
+                className="inline-block w-4 text-center mr-1"
+                aria-label={`Step ${step.id}`}
+              >
+                {icon}
+              </span>
+              {step.text}
+            </div>
+          );
+        })}
+      </div>
+      {/* Progress indicator */}
+      {transcription && (
+        <div className="mt-2 pt-2 border-t border-gray-600">
+          <div className="text-xs text-gray-400">
+            {selectedSegmentIndex !== null ? (
+              <span className="text-yellow-400">✏️ Editing mode active</span>
+            ) : (
+              <span className="text-green-400">
+                ✓ Ready for playback & editing
+              </span>
+            )}
+          </div>
+        </div>
+      )}
+      {/* Loading indicator */}
+      {isLoading && (
+        <div className="mt-2 pt-2 border-t border-gray-600">
+          <div className="text-xs text-blue-400 animate-pulse">
+            ⏳ Processing... Please wait
+          </div>
+        </div>
+      )}
+    </div>
+  );
+};
+export default QuickGuide;

frontend/src/components/SegmentEditor.tsx ADDED Viewed

	@@ -0,0 +1,92 @@

+import React, { useEffect, useRef } from 'react';
+import { AlignedSegment } from '../services/transcriptionApi';
+import { formatTime } from '../utils/subtitleUtils';
+interface SegmentEditorProps {
+  segment: AlignedSegment;
+  segmentIndex: number;
+  onUpdateText: (index: number, text: string) => void;
+  onDeleteSegment: (index: number) => void;
+  onClose: () => void;
+}
+export default function SegmentEditor({
+  segment,
+  segmentIndex,
+  onUpdateText,
+  onDeleteSegment,
+  onClose,
+}: SegmentEditorProps) {
+  const textareaRef = useRef<HTMLTextAreaElement>(null);
+  useEffect(() => {
+    // Focus the textarea when component mounts
+    if (textareaRef.current) {
+      textareaRef.current.focus();
+      textareaRef.current.select();
+    }
+  }, []);
+  const handleTextChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
+    const newText = e.target.value;
+    onUpdateText(segmentIndex, newText);
+  };
+  const handleDelete = () => {
+    onDeleteSegment(segmentIndex);
+    onClose();
+  };
+  const handleKeyDown = (e: React.KeyboardEvent) => {
+    if (e.key === 'Escape') {
+      e.preventDefault();
+      onClose();
+    }
+  };
+  return (
+    <div className="bg-gray-800 border-b border-gray-700 p-4">
+      <div className="flex items-start justify-between mb-3">
+        <div className="flex-1">
+          <h3 className="text-sm font-semibold text-white mb-1">
+            Edit Segment #{segmentIndex + 1}
+          </h3>
+          <div className="text-xs text-gray-400">
+            {formatTime(segment.start)} - {formatTime(segment.end)} ({segment.duration.toFixed(1)}s)
+          </div>
+        </div>
+        <button
+          onClick={onClose}
+          className="text-gray-400 hover:text-white transition-colors ml-4"
+          title="Close editor"
+        >
+          <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
+          </svg>
+        </button>
+      </div>
+      <div className="mb-3">
+        <textarea
+          ref={textareaRef}
+          value={segment.text}
+          onChange={handleTextChange}
+          onKeyDown={handleKeyDown}
+          className="w-full p-2 bg-gray-700 text-white border border-gray-600 rounded resize-none focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
+          rows={1}
+          placeholder="Enter segment text..."
+        />
+      </div>
+      <div className="flex justify-end">
+        <button
+          onClick={handleDelete}
+          className="px-3 py-1 bg-red-600 hover:bg-red-700 text-white text-sm rounded transition-colors"
+          title="Delete this segment"
+        >
+          Delete
+        </button>
+      </div>
+    </div>
+  );
+}

frontend/src/components/ServerStatusIndicator.tsx ADDED Viewed

	@@ -0,0 +1,241 @@

+import React, { useEffect, useState } from 'react';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+import { CheckCircleIcon, ClockIcon, InformationCircleIcon } from '@heroicons/react/24/outline';
+const ServerStatusIndicator: React.FC = () => {
+  const {
+    serverStatus,
+    serverHealth,
+    startStatusPolling,
+    stopStatusPolling,
+    fetchServerHealth
+  } = useTranscriptionStore();
+  const [showTooltip, setShowTooltip] = useState(false);
+  // Start polling when component mounts
+  useEffect(() => {
+    startStatusPolling();
+    // Also fetch server health initially
+    fetchServerHealth();
+    // Cleanup on unmount
+    return () => {
+      stopStatusPolling();
+    };
+  }, [startStatusPolling, stopStatusPolling, fetchServerHealth]);
+  if (!serverStatus) {
+    return (
+      <div className="flex items-center gap-2 text-gray-300 text-sm">
+        <div className="w-2 h-2 bg-gray-400 rounded-full animate-pulse"></div>
+        <span>Connecting to server...</span>
+      </div>
+    );
+  }
+  const formatDuration = (seconds: number): string => {
+    const mins = Math.floor(seconds / 60);
+    const secs = Math.floor(seconds % 60);
+    return `${mins}:${secs.toString().padStart(2, '0')}`;
+  };
+  const getOperationLabel = (operation: string): string => {
+    switch (operation) {
+      case 'transcribe':
+        return 'Short Transcription';
+      case 'transcribe_long':
+        return 'Long Transcription';
+      case 'align':
+        return 'Alignment';
+      default:
+        return 'Processing';
+    }
+  };
+  const renderHealthTooltip = () => {
+    if (!serverHealth) return null;
+    return (
+      <div className="fixed z-50 p-3 bg-gray-900 text-white text-xs rounded-lg shadow-xl border border-gray-600 min-w-64 max-w-80"
+           style={{
+             left: '280px', // Position just outside the sidebar (sidebar width is 256px + padding)
+             top: '120px'    // Position near the status indicator
+           }}>
+        <div className="font-semibold mb-2 text-blue-300">Server Health</div>
+        <div className="space-y-1">
+          <div className="flex justify-between">
+            <span className="text-gray-300">Status:</span>
+            <span className="text-green-400">{serverHealth.status}</span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">Version:</span>
+            <span>{serverHealth.version}</span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">Device:</span>
+            <span className="font-mono">{serverHealth.device}</span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">CUDA:</span>
+            <span className={serverHealth.cuda_available ? "text-green-400" : "text-red-400"}>
+              {serverHealth.cuda_available ? "Available" : "Unavailable"}
+            </span>
+          </div>
+          <div className="flex justify-between">
+            <span className="text-gray-300">FFmpeg:</span>
+            <span className={serverHealth.ffmpeg_available ? "text-green-400" : "text-red-400"}>
+              {serverHealth.ffmpeg_available ? "Available" : "Unavailable"}
+            </span>
+          </div>
+          {serverHealth.gpu_name && (
+            <div className="flex justify-between">
+              <span className="text-gray-300">GPU:</span>
+              <span className="text-blue-400 text-right max-w-32 truncate" title={serverHealth.gpu_name}>
+                {serverHealth.gpu_name}
+              </span>
+            </div>
+          )}
+          {serverHealth.gpu_count && serverHealth.gpu_count > 1 && (
+            <div className="flex justify-between">
+              <span className="text-gray-300">GPU Count:</span>
+              <span>{serverHealth.gpu_count}</span>
+            </div>
+          )}
+          {/* GPU Memory Information */}
+          {serverHealth.gpu_memory_total_mb && (
+            <>
+              <div className="border-t border-gray-600 pt-1 mt-2">
+                <div className="text-xs text-blue-300 font-semibold mb-1">GPU Memory</div>
+              </div>
+              <div className="flex justify-between">
+                <span className="text-gray-300">Used:</span>
+                <span className="text-orange-400">
+                  {serverHealth.gpu_memory_reserved_mb?.toFixed(1)} MB
+                </span>
+              </div>
+              <div className="flex justify-between">
+                <span className="text-gray-300">Total:</span>
+                <span className="text-blue-400">
+                  {serverHealth.gpu_memory_total_mb?.toFixed(1)} MB
+                </span>
+              </div>
+              <div className="flex justify-between">
+                <span className="text-gray-300">Free:</span>
+                <span className="text-green-400">
+                  {serverHealth.gpu_memory_free_mb?.toFixed(1)} MB
+                </span>
+              </div>
+              {/* Memory usage bar */}
+              {serverHealth.gpu_memory_total_mb && serverHealth.gpu_memory_reserved_mb && (
+                <div className="mt-1">
+                  <div className="w-full bg-gray-600 rounded-full h-1.5">
+                    <div
+                      className="bg-orange-500 h-1.5 rounded-full transition-all duration-300"
+                      style={{
+                        width: `${Math.min(100, (serverHealth.gpu_memory_reserved_mb / serverHealth.gpu_memory_total_mb) * 100)}%`
+                      }}
+                    ></div>
+                  </div>
+                  <div className="text-xs text-gray-400 mt-0.5 text-center">
+                    {((serverHealth.gpu_memory_reserved_mb / serverHealth.gpu_memory_total_mb) * 100).toFixed(1)}% used
+                  </div>
+                </div>
+              )}
+            </>
+          )}
+        </div>
+        {/* Tooltip arrow pointing to the left (towards the info icon) */}
+        <div className="absolute left-0 top-4 w-0 h-0 border-t-4 border-t-transparent border-b-4 border-b-transparent border-r-4 border-r-gray-900 transform -translate-x-full"></div>
+      </div>
+    );
+  };
+  if (serverStatus.is_busy) {
+    const progress = serverStatus.progress || 0;
+    const progressPercent = Math.round(progress * 100);
+    return (
+      <div className="flex flex-col gap-1">
+        <div className="flex items-center justify-between">
+          <div className="flex items-center gap-2 text-orange-300 text-sm">
+            <ClockIcon className="w-4 h-4 animate-spin" />
+            <span className="font-medium">Server Busy</span>
+          </div>
+          {/* Health info icon with tooltip */}
+          <div className="relative">
+            <InformationCircleIcon
+              className="w-4 h-4 text-gray-400 hover:text-gray-200 cursor-help transition-colors"
+              onMouseEnter={() => setShowTooltip(true)}
+              onMouseLeave={() => setShowTooltip(false)}
+            />
+            {showTooltip && renderHealthTooltip()}
+          </div>
+        </div>
+        <div className="text-xs text-gray-300 space-y-1">
+          <div className="flex justify-between items-center">
+            <span>{getOperationLabel(serverStatus.current_operation || 'processing')}</span>
+            <span className="font-mono text-orange-300">{progressPercent}%</span>
+          </div>
+          {serverStatus.duration_seconds && (
+            <div className="flex justify-between items-center">
+              <span className="text-gray-400">Duration:</span>
+              <span className="font-mono">{formatDuration(serverStatus.duration_seconds)}</span>
+            </div>
+          )}
+          {/* Progress bar */}
+          <div className="w-full bg-gray-600 rounded-full h-1.5 mt-2">
+            <div
+              className="bg-orange-500 h-1.5 rounded-full transition-all duration-300"
+              style={{ width: `${progressPercent}%` }}
+            ></div>
+          </div>
+        </div>
+      </div>
+    );
+  }
+  return (
+    <div className="flex flex-col gap-1">
+      <div className="flex items-center justify-between">
+        <div className="flex items-center gap-2 text-green-300 text-sm">
+          <CheckCircleIcon className="w-4 h-4" />
+          <span className="font-medium">Server Ready</span>
+        </div>
+        {/* Health info icon with tooltip */}
+        <div className="relative">
+          <InformationCircleIcon
+            className="w-4 h-4 text-gray-400 hover:text-gray-200 cursor-help transition-colors"
+            onMouseEnter={() => setShowTooltip(true)}
+            onMouseLeave={() => setShowTooltip(false)}
+          />
+          {showTooltip && renderHealthTooltip()}
+        </div>
+      </div>
+      <div className="text-xs text-gray-300">
+        <span>Completed: {serverStatus.total_completed} transcriptions</span>
+      </div>
+    </div>
+  );
+};
+export default ServerStatusIndicator;

frontend/src/components/TermsModal.tsx ADDED Viewed

	@@ -0,0 +1,675 @@

+import {XMarkIcon} from "@heroicons/react/24/outline";
+interface ModalProps {
+  isOpen: boolean;
+  onClose: () => void;
+}
+export function TOSModalComponent({isOpen, onClose}: ModalProps) {
+  if (!isOpen) return null;
+  return (
+    <div className="modal modal-open">
+      <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
+        <div className="flex justify-between items-center mb-6">
+          <h3 className="font-bold text-xl text-blue-300">
+            Omnilingual ASR Demo Supplemental Terms of Service
+          </h3>
+          <button
+            className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
+            onClick={onClose}
+            aria-label="Close modal"
+          >
+            <XMarkIcon className="w-5 h-5" />
+          </button>
+        </div>
+        <div className="space-y-4 max-h-96 overflow-y-auto">
+          <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
+            <div className="text-gray-200 leading-relaxed space-y-4">
+              <p>
+                These Omnilingual ASR Demo Supplemental Terms of Service (these
+                "Supplemental Terms") govern your use of the Omnilingual ASR
+                demonstration experience hosted on Hugging Face (the "Demo").
+                The Demo showcases Meta's Omnilingual ASR research model for
+                transcribing audio and video files.
+              </p>
+              <p>
+                By using the Demo, you agree to be bound by these Supplemental
+                Terms, as well as Hugging Face's applicable terms and policies,
+                including Hugging Face's Terms of Service and Privacy Policy. If
+                you access any Meta service in connection with the Demo, Meta's
+                Terms of Service and Privacy Policy may also apply to that
+                separate access; however, as described below, Meta does not
+                receive your Demo data.
+              </p>
+              <ol className="list-decimal list-outside ml-6 space-y-4">
+                <li>
+                  <strong>What the Demo Does</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      The Demo uses an automatic speech recognition model
+                      developed by Meta to generate text transcriptions and
+                      related metadata (e.g., timestamps) from audio or video
+                      files you choose to provide ("Transcripts," and together
+                      with any related metadata, "Outputs").
+                    </li>
+                    <li>
+                      The audio and/or video files and any associated
+                      information you provide (collectively, "Inputs") are
+                      processed solely to provide you with Outputs via the Demo.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Hosting; No Data to Meta</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      The Demo is hosted exclusively on Hugging Face
+                      infrastructure. Meta does not operate the Demo's hosting
+                      environment, and no Inputs, Outputs, or related usage data
+                      from or about users of the Demo are transmitted to Meta.
+                    </li>
+                    <li>
+                      Hugging Face (and, where applicable, the Hugging Face
+                      Space owner) determines the means and purposes of
+                      processing of any personal data you provide via the Demo.
+                      Please review Hugging Face's Terms of Service and Privacy
+                      Policy, which govern your use of the Demo and the handling
+                      of your data on the platform.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Your Rights and Restrictions</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      Subject to your compliance with these Supplemental Terms
+                      and any applicable Hugging Face terms, Meta grants you a
+                      limited, non-exclusive, non-transferable,
+                      non-sublicensable license to access and use the Demo and
+                      to access the Outputs made available through the Demo.
+                    </li>
+                    <li>
+                      You may not use the Demo or Outputs for unlawful,
+                      infringing, deceptive, harmful, or discriminatory
+                      purposes, to violate others' rights (including privacy,
+                      publicity, or intellectual property), or to cause or
+                      attempt to cause harm.
+                    </li>
+                    <li>
+                      You are responsible for ensuring you have all necessary
+                      rights, permissions, and lawful bases to upload and
+                      process Inputs, including any third-party content or
+                      personal data contained in the Inputs. Do not upload
+                      sensitive personal data unless you have a lawful basis and
+                      it is permitted under applicable law.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Inputs and Outputs; Licenses</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      As between you and Meta, you retain any rights you have in
+                      your Inputs and Outputs. Because Meta does not receive
+                      your Inputs or Outputs from the Demo, you do not grant
+                      Meta any license to those materials via this Demo.
+                    </li>
+                    <li>
+                      Your relationship regarding content licenses with Hugging
+                      Face (and/or the Space owner) is governed by Hugging Face
+                      terms. Meta makes no representations regarding how Hugging
+                      Face handles or stores Inputs or Outputs.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Feedback</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      If you choose to provide feedback to Meta outside the Demo
+                      (for example, by emailing Meta or submitting feedback
+                      through a Meta-managed channel), you grant Meta and its
+                      affiliates a perpetual, irrevocable, non-exclusive,
+                      sublicensable, transferable, royalty-free license to use
+                      that feedback for any purpose. Do not include personal
+                      data or confidential information in feedback.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Third-Party Platform Terms</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      Your use of the Demo is also subject to Hugging Face's
+                      Terms of Service and Privacy Policy. Any data collection,
+                      retention, storage, caching, logging, or sharing related
+                      to your use of the Demo is handled by Hugging Face (and/or
+                      the Space owner) under those terms and policies. Meta is
+                      not responsible for and has no control over Hugging Face's
+                      processing.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Age; Availability</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      You represent that you are at least the age of majority in
+                      your jurisdiction. The Demo is made available for research
+                      and demonstration purposes and may be modified, suspended,
+                      or discontinued at any time without notice.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Model and Demo Limitations</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      The Demo and Outputs may be inaccurate, incomplete,
+                      untimely, or inappropriate for your use case. Do not rely
+                      on the Demo or Outputs for medical, legal, safety, or
+                      other high-risk purposes. You are solely responsible for
+                      your use of the Demo and Outputs.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Intellectual Property</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      Except for the limited rights expressly granted here, Meta
+                      and its licensors retain all right, title, and interest in
+                      and to the Demo, the Omnilingual ASR model, and all
+                      related intellectual property.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Termination</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      These Supplemental Terms are effective until terminated.
+                      Meta may terminate or suspend your access to the Demo or
+                      terminate these Supplemental Terms at any time for any
+                      reason. Upon termination, your rights under these
+                      Supplemental Terms will immediately cease.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Disclaimers</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      The Demo and Outputs are provided "AS IS" and "AS
+                      AVAILABLE." To the fullest extent permitted by law, Meta
+                      and its affiliates disclaim all warranties, express or
+                      implied, including warranties of merchantability, fitness
+                      for a particular purpose, non-infringement, accuracy, and
+                      quiet enjoyment. Meta does not warrant that the Demo will
+                      be uninterrupted, secure, error-free, or that Outputs will
+                      be accurate or reliable.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Limitation of Liability</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      To the fullest extent permitted by law, in no event will
+                      Meta or its affiliates, or their respective directors,
+                      officers, employees, licensors, agents, or assigns be
+                      liable for any indirect, incidental, consequential,
+                      special, punitive, or exemplary damages, or lost profits,
+                      arising out of or relating to the Demo or these
+                      Supplemental Terms, even if advised of the possibility of
+                      such damages. Without limiting the foregoing, the maximum
+                      aggregate liability of Meta and its affiliates for all
+                      claims arising out of or relating to the Demo or these
+                      Supplemental Terms is fifty dollars (US $50).
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Indemnity</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      You will indemnify and hold harmless Meta and its
+                      affiliates, and their respective directors, officers,
+                      employees, licensors, agents, and assigns from and against
+                      any claims, liabilities, damages, losses, and expenses
+                      (including reasonable attorneys' fees) arising out of or
+                      related to: (a) your Inputs or your use of the Demo or
+                      Outputs; (b) your violation of these Supplemental Terms or
+                      any applicable law; or (c) your infringement or violation
+                      of any third-party rights.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Governing Law; Venue</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      These Supplemental Terms and any claim, cause of action,
+                      or dispute arising out of or relating to them or the Demo
+                      will be governed by the laws of the State of California,
+                      without regard to conflict of law principles. You agree
+                      that all such claims will be brought exclusively in the
+                      U.S. District Court for the Northern District of
+                      California or the state courts located in San Mateo
+                      County, California, and you consent to the personal
+                      jurisdiction of those courts.
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Miscellaneous</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      If any provision of these Supplemental Terms is found
+                      invalid or unenforceable, that provision will be enforced
+                      to the maximum extent permissible and the remaining
+                      provisions will remain in full force and effect. You may
+                      not assign these Supplemental Terms without Meta's prior
+                      written consent; any attempted assignment without consent
+                      is void. Meta may assign these Supplemental Terms without
+                      restriction. No waiver of any term will be deemed a
+                      further or continuing waiver of such term or any other
+                      term. These Supplemental Terms constitute the entire
+                      agreement between you and Meta regarding the Demo and
+                      supersede all prior or contemporaneous understandings
+                      regarding the Demo.
+                    </li>
+                  </ul>
+                </li>
+              </ol>
+              <div className="mt-4">
+                <strong>Contact</strong>
+                <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                  <li>
+                    For questions about these Supplemental Terms, contact Meta
+                    at [email protected]. Note: Do not include personal
+                    data or any Demo Inputs or Outputs in your communications,
+                    as Meta does not receive or process Demo data.
+                  </li>
+                </ul>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}
+export function AUPModalComponent({isOpen, onClose}: ModalProps) {
+  if (!isOpen) return null;
+  return (
+    <div className="modal modal-open">
+      <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
+        <div className="flex justify-between items-center mb-6">
+          <h3 className="font-bold text-xl text-blue-300">
+            Omnilingual ASR Demo Acceptable Use Policy
+          </h3>
+          <button
+            className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
+            onClick={onClose}
+            aria-label="Close modal"
+          >
+            <XMarkIcon className="w-5 h-5" />
+          </button>
+        </div>
+        <div className="space-y-4 max-h-96 overflow-y-auto">
+          <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
+            <div className="text-gray-200 leading-relaxed space-y-4">
+              <p>
+                Meta is committed to promoting safe and responsible use of its
+                research tools, including the Omnilingual ASR Demo (the "Demo").
+                By accessing or using the Demo, you agree to comply with this
+                Acceptable Use Policy ("Policy") in addition to any applicable
+                Hugging Face terms and policies. The most recent version of this
+                Policy will be made available on the Demo's Hugging Face page.
+              </p>
+              <h4 className="font-semibold text-blue-200 mt-4">
+                Prohibited Uses
+              </h4>
+              <p>
+                To ensure the Demo is used safely and fairly, you agree that you
+                will not use, or allow others to use, the Demo or any outputs
+                from the Demo to:
+              </p>
+              <ol className="list-decimal list-outside ml-6 space-y-4">
+                <li>
+                  <strong>Violate the Law or Others' Rights</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
+                    <li>
+                      Engage in, promote, generate, contribute to, encourage,
+                      plan, incite, or further illegal or unlawful activity or
+                      content, including but not limited to:
+                      <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
+                        <li>Violence or terrorism</li>
+                        <li>
+                          Exploitation or harm to children, including the
+                          solicitation, creation, acquisition, or dissemination
+                          of child exploitative content or failure to report
+                          Child Sexual Abuse Material
+                        </li>
+                        <li>
+                          Human trafficking, exploitation, and sexual violence
+                        </li>
+                        <li>
+                          The illegal distribution of information or materials
+                          to minors, including obscene materials, or failure to
+                          employ legally required age-gating in connection with
+                          such information or materials
+                        </li>
+                        <li>Sexual solicitation</li>
+                        <li>Any other criminal activity</li>
+                      </ul>
+                    </li>
+                    <li>
+                      Engage in, promote, incite, or facilitate the harassment,
+                      abuse, threatening, or bullying of individuals or groups
+                    </li>
+                    <li>
+                      Engage in, promote, incite, or facilitate discrimination
+                      or other unlawful or harmful conduct in the provision of
+                      employment, credit, housing, or other essential goods and
+                      services
+                    </li>
+                    <li>
+                      Collect, process, disclose, generate, or infer health,
+                      demographic, biometric, or other sensitive personal or
+                      private information about individuals without all rights
+                      and consents required by applicable laws
+                    </li>
+                    <li>
+                      Infringe, misappropriate, or otherwise violate any
+                      third-party rights, including intellectual property,
+                      privacy, or publicity rights
+                    </li>
+                    <li>
+                      Create, generate, or facilitate the creation of malicious
+                      code, malware, computer viruses, or do anything else that
+                      could disable, overburden, interfere with, or impair the
+                      proper working, integrity, operation, or appearance of a
+                      website or computer system
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Endanger Safety or Security</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
+                    <li>
+                      Engage in, promote, incite, facilitate, or assist in the
+                      planning or development of activities that present a risk
+                      of death or bodily harm to individuals, including use of
+                      the Demo related to:
+                      <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
+                        <li>
+                          Military, warfare, nuclear industries or applications,
+                          espionage, or activities subject to the International
+                          Traffic in Arms Regulations (ITAR)
+                        </li>
+                        <li>
+                          Guns and illegal weapons (including weapon
+                          development)
+                        </li>
+                        <li>
+                          Illegal drugs and regulated/controlled substances
+                        </li>
+                        <li>
+                          Operation of critical infrastructure, transportation
+                          technologies, or heavy machinery
+                        </li>
+                        <li>
+                          Self-harm or harm to others, including suicide,
+                          cutting, and eating disorders
+                        </li>
+                        <li>
+                          Any content intended to incite or promote violence,
+                          abuse, or infliction of bodily harm
+                        </li>
+                      </ul>
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Deceive or Mislead Others</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      Generate, promote, or further fraud, scams, phishing,
+                      multi-level marketing or pyramid schemes, or any other
+                      fraudulent activities
+                    </li>
+                    <li>
+                      Generate, promote, or further defamatory content,
+                      including the creation of defamatory statements or other
+                      content
+                    </li>
+                    <li>Generate, promote, or further distribute spam</li>
+                    <li>
+                      Impersonate another individual by depiction of their voice
+                      or likeness without consent, authorization, or legal
+                      right, including non-consensual sexual imagery
+                    </li>
+                    <li>
+                      Represent that the use of the Demo or its outputs are
+                      human-generated, or use outputs in a manner intended to
+                      convince another person that they are communicating with a
+                      human
+                    </li>
+                    <li>
+                      Generate or facilitate false online engagement, including
+                      fake reviews or other means of fake online engagement
+                    </li>
+                    <li>
+                      Engage in or facilitate any form of academic dishonesty,
+                      including plagiarism
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>High-Risk or Sensitive Use Cases</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-1">
+                    <li>
+                      Fail to appropriately disclose to end users any known
+                      dangers of your AI system
+                    </li>
+                    <li>
+                      Engage in automated government decision-making in
+                      high-risk contexts, including law enforcement, criminal
+                      justice, immigration, or asylum, without a qualified
+                      person reviewing the outputs
+                    </li>
+                    <li>
+                      Use the Demo or its outputs for any decision-making
+                      related to health, financial, safety, or legal matters
+                    </li>
+                  </ul>
+                </li>
+                <li>
+                  <strong>Adult Content</strong>
+                  <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
+                    <li>
+                      Create, develop, access, or disseminate adult content,
+                      including:
+                      <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
+                        <li>Erotic, sexual, or romantic chats</li>
+                        <li>Sexual solicitation</li>
+                        <li>Pornography</li>
+                        <li>
+                          Content that describes or promotes sexual or adult
+                          services
+                        </li>
+                      </ul>
+                    </li>
+                  </ul>
+                </li>
+              </ol>
+              <div className="mt-4">
+                <h4 className="font-semibold text-blue-200">
+                  Reporting Violations
+                </h4>
+                <p className="mt-2">
+                  If you become aware of any violation of this Policy, software
+                  "bug," or other problem that could lead to a violation of this
+                  Policy, please report it to: [email protected]. Do not
+                  include personal data or any Demo Inputs or Outputs in your
+                  communications, as Meta does not receive or process Demo data.
+                </p>
+              </div>
+              <div className="mt-4">
+                <h4 className="font-semibold text-blue-200">Enforcement</h4>
+                <p className="mt-2">
+                  Meta reserves the right to investigate and take appropriate
+                  action regarding any suspected or actual violation of this
+                  Policy, including suspending or terminating access to the
+                  Demo.
+                </p>
+              </div>
+              <div className="mt-4">
+                <h4 className="font-semibold text-blue-200">Contact</h4>
+                <p className="mt-2">
+                  For questions about this Policy, contact
+                  [email protected].
+                </p>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}
+export function ModelReadmeModalComponent({isOpen, onClose}: ModalProps) {
+  if (!isOpen) return null;
+  return (
+    <div className="modal modal-open">
+      <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
+        <div className="flex justify-between items-center mb-6">
+          <h3 className="font-bold text-xl text-blue-300">Model Readme</h3>
+          <button
+            className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
+            onClick={onClose}
+            aria-label="Close modal"
+          >
+            <XMarkIcon className="w-5 h-5" />
+          </button>
+        </div>
+        <div className="space-y-4 max-h-96 overflow-y-auto">
+          <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
+            <div className="text-gray-200 leading-relaxed space-y-4">
+              <div>
+                <h4 className="font-semibold text-blue-200">Intended Use</h4>
+                <p className="mt-2">
+                  The Omnilingual ASR model is designed to transcribe audio and
+                  video files into text, supporting a wide range of languages.
+                  Users are encouraged to explore the model's capabilities in a
+                  responsible manner, consistent with applicable laws and
+                  ethical guidelines.
+                </p>
+              </div>
+              <div>
+                <h4 className="font-semibold text-blue-200">Out-of-scope</h4>
+                <p className="mt-2">
+                  The Omnilingual ASR model is intended for commercial and
+                  research use in multiple languages. The following uses are
+                  considered out of scope for the Omnilingual ASR model and are
+                  strongly discouraged:
+                </p>
+                <ul className="list-disc list-outside ml-6 mt-2 space-y-2">
+                  <li>
+                    Uses that violate laws or infringe the rights of others,
+                    including generating, promoting, or disseminating content
+                    that is illegal, harassing, discriminatory, or otherwise
+                    harmful.
+                  </li>
+                  <li>
+                    Processing, generating, or inferring sensitive personal
+                    information (such as health, demographic, biometric, or
+                    private data) without appropriate rights and consents
+                    required by law.
+                  </li>
+                  <li>
+                    Use in high-risk or sensitive contexts, including but not
+                    limited to:
+                    <ul className="list-[circle] list-outside ml-6 mt-1 space-y-1">
+                      <li>
+                        Medical, legal, financial, or safety-critical
+                        decision-making
+                      </li>
+                      <li>
+                        Law enforcement, criminal justice, immigration, or
+                        asylum processes
+                      </li>
+                      <li>
+                        Operation of critical infrastructure or heavy machinery
+                      </li>
+                    </ul>
+                  </li>
+                  <li>
+                    Generation or dissemination of adult content, including
+                    erotic, sexual, or pornographic material.
+                  </li>
+                  <li>
+                    Uses intended to deceive, mislead, impersonate others, or
+                    facilitate fraud, scams, or disinformation.
+                  </li>
+                  <li>
+                    Uses to create, promote, or distribute spam, malware, or
+                    malicious code.
+                  </li>
+                </ul>
+              </div>
+              <div>
+                <h4 className="font-semibold text-blue-200">Disclaimer</h4>
+                <p className="mt-2">
+                  This model and its outputs may be inaccurate, incomplete, or
+                  inappropriate for certain use cases. Users are solely
+                  responsible for their own use of the model, including
+                  compliance with applicable laws and regulations. The model
+                  should not be relied upon for any high-risk or sensitive
+                  applications.
+                </p>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/src/components/TipsNotice.tsx ADDED Viewed

	@@ -0,0 +1,25 @@

+import React from 'react';
+import { TRANSCRIPTION_LIMITS } from '../utils/transcriptionWarnings';
+const TipsNotice: React.FC = () => {
+  return (
+    <div className="mb-4 p-3 bg-blue-900/30 rounded-lg border border-blue-600/50">
+      <div className="mb-2">
+        <h3 className="text-sm font-semibold text-blue-300">For Best Results</h3>
+      </div>
+      <div className="text-xs text-blue-200 mb-3 space-y-1">
+        <div>Duration: {TRANSCRIPTION_LIMITS.MIN_DURATION} seconds to {Math.floor(TRANSCRIPTION_LIMITS.MAX_DURATION / 60)} minutes</div>
+        <div>File size: Under {TRANSCRIPTION_LIMITS.MAX_FILE_SIZE}MB</div>
+        <div>Quality: Clear speech, minimal background noise</div>
+      </div>
+      <div className="text-xs text-blue-300/70 italic">
+        Too short: insufficient context for model accuracy<br/>
+        Too long/large: may exceed server resources
+      </div>
+    </div>
+  );
+};
+export default TipsNotice;

frontend/src/components/TranscriptionControls.tsx ADDED Viewed

	@@ -0,0 +1,152 @@

+import React, { useState, useEffect } from 'react';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+import LanguageSelector from './LanguageSelector';
+import TranscriptionWarningModal from './TranscriptionWarningModal';
+import { checkTranscriptionWarnings, getMediaDuration, WARNING_MESSAGES } from '../utils/transcriptionWarnings';
+type WarningType = keyof typeof WARNING_MESSAGES;
+const TranscriptionControls: React.FC = () => {
+  const {
+    file,
+    transcription,
+    isLoading,
+    isProcessingVideo,
+    error,
+    serverStatus,
+    selectedLanguage,
+    selectedScript,
+    handleTranscribe,
+    setSelectedLanguageAndScript,
+  } = useTranscriptionStore();
+  const [showWarningModal, setShowWarningModal] = useState(false);
+  const [warnings, setWarnings] = useState<WarningType[]>([]);
+  const [mediaDuration, setMediaDuration] = useState<number | undefined>(undefined);
+  // Get media duration when file changes
+  useEffect(() => {
+    if (file) {
+      getMediaDuration(file)
+        .then(duration => {
+          setMediaDuration(duration);
+        })
+        .catch(error => {
+          console.warn('Could not get media duration:', error);
+          setMediaDuration(undefined);
+        });
+    } else {
+      setMediaDuration(undefined);
+    }
+  }, [file]);
+  // Handle transcribe button click with warning check
+  const handleTranscribeClick = async () => {
+    if (!file) return;
+    // Check for warnings
+    const detectedWarnings = checkTranscriptionWarnings({
+      file,
+      duration: mediaDuration,
+    });
+    if (detectedWarnings.length > 0) {
+      setWarnings(detectedWarnings);
+      setShowWarningModal(true);
+    } else {
+      // No warnings, proceed directly
+      await handleTranscribe();
+    }
+  };
+  // Handle warning modal acceptance
+  const handleWarningAccept = async () => {
+    setShowWarningModal(false);
+    await handleTranscribe();
+  };
+  // Handle warning modal cancellation
+  const handleWarningCancel = () => {
+    setShowWarningModal(false);
+    setWarnings([]);
+  };
+  // Only show controls if file exists but no transcription yet
+  if (!file || transcription) {
+    return null;
+  }
+  return (
+    <>
+      <div className="bg-gray-800 border-t border-gray-700 p-4">
+        <div className="max-w-4xl mx-auto">
+          {/* Error Display */}
+          {error && (
+            <div className="mb-4 p-3 bg-red-600 rounded-lg">
+              <div className="text-sm font-medium text-white">Error</div>
+              <div className="text-sm text-red-100">{error}</div>
+            </div>
+          )}
+          {/* Controls Container */}
+          <div className="flex flex-col sm:flex-row items-center justify-center gap-4">
+            {/* Language Selection */}
+            <div className="flex flex-col items-center">
+              <div className="tooltip tooltip-bottom" data-tip="Select the primary language spoken in your audio/video file. This helps the AI model optimize its transcription accuracy by using language-specific acoustic models and vocabulary. Choosing the correct language significantly improves transcription quality and word recognition.">
+                <label className="text-sm font-medium text-gray-300 mb-2 cursor-help">
+                  Transcription Language <span className="text-red-400">*</span>
+                </label>
+              </div>
+              <div className="w-80">
+                <LanguageSelector
+                  selectedLanguage={selectedLanguage}
+                  selectedScript={selectedScript}
+                  onLanguageAndScriptSelect={setSelectedLanguageAndScript}
+                  disabled={isLoading || serverStatus?.is_busy}
+                />
+              </div>
+            </div>
+            {/* Transcribe Button */}
+            <div className="flex flex-col items-center">
+              <label className="text-sm font-medium text-gray-300 mb-2">
+                {serverStatus?.is_busy ? (
+                  <span className="text-orange-400">Server is processing a request</span>
+                ) : (
+                  <span className="opacity-0">Action</span>
+                )}
+              </label>
+              <div className={!selectedLanguage ? "tooltip tooltip-bottom" : ""} data-tip={!selectedLanguage ? "Please select a transcription language to continue" : ""}>
+                <button
+                  onClick={handleTranscribeClick}
+                  disabled={isLoading || serverStatus?.is_busy || !selectedLanguage}
+                  className="px-8 py-3 bg-green-600 hover:bg-green-700 disabled:bg-gray-600 rounded-lg text-sm font-medium transition-colors text-white min-w-32"
+                >
+                  {isLoading
+                    ? isProcessingVideo
+                      ? "Processing..."
+                      : "Transcribing..."
+                    : serverStatus?.is_busy
+                    ? "Server Busy"
+                    : !selectedLanguage
+                    ? "Select Language"
+                    : "Transcribe"}
+                </button>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+      {/* Warning Modal */}
+      <TranscriptionWarningModal
+        isOpen={showWarningModal}
+        warnings={warnings}
+        onAccept={handleWarningAccept}
+        onCancel={handleWarningCancel}
+      />
+    </>
+  );
+};
+export default TranscriptionControls;

frontend/src/components/TranscriptionPlayer.tsx ADDED Viewed

	@@ -0,0 +1,221 @@

+import {useRef, useEffect, useCallback} from "react";
+import {AlignedSegment} from "../services/transcriptionApi";
+import {useTranscriptionStore} from "../stores/transcriptionStore";
+import {useMediaTimeSync} from "../hooks/useMediaTimeSync";
+import {
+  SUPPORTED_AUDIO_FORMATS,
+  SUPPORTED_VIDEO_FORMATS,
+  CODEC_INFO,
+} from "../utils/mediaTypes";
+import MediaPlayer from "./MediaPlayer";
+import CanvasTimeline from "./CanvasTimeline";
+import MinimapTimeline from "./MinimapTimeline";
+import TranscriptionControls from "./TranscriptionControls";
+import FullTranscription from "./FullTranscription";
+import ErrorBoundary from "./ErrorBoundary";
+export default function TranscriptionPlayer() {
+  // Get state from store
+  const {file, mediaUrl, transcription, isLoading, isProcessingVideo} =
+    useTranscriptionStore();
+  // Get actions from store
+  const {
+    handleTimeUpdate: updateTimeInStore,
+    setSelectedSegmentIndex,
+    selectedSegmentIndex,
+    setMediaRefs,
+  } = useTranscriptionStore();
+  const audioRef = useRef<HTMLAudioElement>(null);
+  const videoRef = useRef<HTMLVideoElement>(null);
+  const canvasTimelineRef = useRef<HTMLDivElement>(null);
+  // Set media refs in store for centralized seeking
+  useEffect(() => {
+    setMediaRefs(audioRef, videoRef);
+  }, [setMediaRefs]);
+  const handleTimeUpdate = useCallback(() => {
+    const mediaElement = audioRef.current || videoRef.current;
+    if (mediaElement && transcription?.aligned_segments) {
+      const mediaCurrentTime = mediaElement.currentTime;
+      // Find the active segment with a small tolerance for timing precision
+      const activeIndex = transcription.aligned_segments.findIndex(
+        (segment) =>
+          mediaCurrentTime >= segment.start && mediaCurrentTime <= segment.end
+      );
+      // If no exact match, find the closest segment
+      let finalActiveIndex: number | null = activeIndex;
+      if (activeIndex === -1) {
+        let closestIndex = -1;
+        let minDistance = Infinity;
+        transcription.aligned_segments.forEach((segment, index) => {
+          const distance = Math.min(
+            Math.abs(mediaCurrentTime - segment.start),
+            Math.abs(mediaCurrentTime - segment.end)
+          );
+          if (distance < minDistance && distance < 0.5) {
+            // 0.5 second tolerance
+            minDistance = distance;
+            closestIndex = index;
+          }
+        });
+        finalActiveIndex = closestIndex >= 0 ? closestIndex : null;
+      }
+      updateTimeInStore();
+      // Auto-select the active segment only if:
+      // 1. We found an active segment
+      // 2. Either no segment is selected, or the active segment changed
+      if (
+        finalActiveIndex !== null &&
+        selectedSegmentIndex !== finalActiveIndex
+      ) {
+        setSelectedSegmentIndex(finalActiveIndex);
+      }
+    }
+  }, [
+    transcription,
+    updateTimeInStore,
+    selectedSegmentIndex,
+    setSelectedSegmentIndex,
+  ]);
+  const handleSeekToSegment = (segment: AlignedSegment) => {
+    const mediaElement = audioRef.current || videoRef.current;
+    if (mediaElement) {
+      mediaElement.currentTime = segment.start;
+      // Immediately update the store to sync the progress indicator
+      handleTimeUpdate();
+    }
+  };
+  // Use media time sync hook for continuous time updates during playback
+  useMediaTimeSync({
+    audioRef,
+    videoRef,
+    onTimeUpdate: handleTimeUpdate,
+    transcription,
+  });
+  // Cleanup media URL on unmount
+  useEffect(() => {
+    return () => {
+      if (mediaUrl) {
+        URL.revokeObjectURL(mediaUrl);
+      }
+    };
+  }, [mediaUrl]);
+  return (
+    <div className="flex-1 min-w-0 flex flex-col bg-black">
+      {/* Media Player */}
+      {file && (
+        <ErrorBoundary componentName="MediaPlayer">
+          <MediaPlayer
+            audioRef={audioRef}
+            videoRef={videoRef}
+            onTimeUpdate={handleTimeUpdate}
+          />
+        </ErrorBoundary>
+      )}
+      {/* Transcription Controls */}
+      <ErrorBoundary componentName="TranscriptionControls">
+        <TranscriptionControls />
+      </ErrorBoundary>
+      {/* Full Transcription */}
+      <ErrorBoundary componentName="FullTranscription">
+        <FullTranscription />
+      </ErrorBoundary>
+      {/* Transcription Timeline */}
+      {transcription && (
+        <>
+          {/* Minimap Timeline */}
+          <ErrorBoundary componentName="MinimapTimeline">
+            <MinimapTimeline
+              audioRef={audioRef}
+              videoRef={videoRef}
+              canvasTimelineRef={canvasTimelineRef}
+            />
+          </ErrorBoundary>
+          {/* Canvas Timeline */}
+          {/* <ErrorBoundary componentName="CanvasTimeline">
+            <CanvasTimeline
+              audioRef={audioRef}
+              videoRef={videoRef}
+              onSeekToSegment={handleSeekToSegment}
+              onTimeUpdate={handleTimeUpdate}
+              ref={canvasTimelineRef}
+            />
+          </ErrorBoundary> */}
+        </>
+      )}
+      {/* Transcription Loading State */}
+      {file && !transcription && (isLoading || isProcessingVideo) && (
+        <div className="flex-1 flex items-center justify-center bg-gray-900 border-t border-gray-700">
+          <div className="text-center text-white">
+            <div className="mb-4">
+              <div className="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-500 mx-auto"></div>
+            </div>
+            <div className="text-2xl md:text-3xl mb-3 font-semibold">
+              {file?.type.startsWith("video/")
+                ? "Processing Video..."
+                : "Transcribing Audio..."}
+            </div>
+            <div className="text-base md:text-lg text-gray-400 max-w-md mx-auto">
+              {file?.type.startsWith("video/")
+                ? "Server is extracting audio and generating transcription"
+                : "Converting speech to text"}
+                {/* : "Converting speech to text with timestamps"} */}
+            </div>
+          </div>
+        </div>
+      )}
+      {/* No File State */}
+      {!file && (
+        <div className="flex-1 flex items-center justify-center">
+          <div className="text-center text-gray-400">
+            <div className="text-6xl mb-4">🎵</div>
+            <div className="text-xl mb-2">Upload Audio</div>
+            <div className="text-sm mb-4">
+              Choose an audio file or drag and drop or record audio from the
+              panel on the left anywhere to get started with transcription
+            </div>
+            {/* Supported File Types */}
+            <div className="text-xs text-gray-500 max-w-md mx-auto">
+              {/* Audio formats section */}
+              <div className="text-center mb-3">
+                <div className="font-medium text-gray-400 mb-1">
+                  Audio Formats
+                </div>
+                <div className="text-xs text-gray-500">
+                  {SUPPORTED_AUDIO_FORMATS.join(" • ")}
+                </div>
+              </div>
+              {/* Codec info */}
+              <div className="text-center">
+                <div className="text-xs text-gray-400 opacity-75">
+                  Recommended: {CODEC_INFO.audio.common.slice(0, 2).join(", ")}{" "}
+                  codecs
+                </div>
+              </div>
+            </div>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}

frontend/src/components/TranscriptionSideBar.tsx ADDED Viewed

	@@ -0,0 +1,172 @@

+import React, {useRef, useState} from "react";
+import {useTranscriptionStore} from "../stores/transcriptionStore";
+import ServerStatusIndicator from "./ServerStatusIndicator";
+import FeedbackCard from "./FeedbackCard";
+import TipsNotice from "./TipsNotice";
+import QuickGuide from "./QuickGuide";
+import {
+  TOSModalComponent,
+  AUPModalComponent,
+  ModelReadmeModalComponent,
+} from "./TermsModal";
+const TranscriptionSideBar: React.FC = () => {
+  const fileInputRef = useRef<HTMLInputElement>(null);
+  const [isTOSModalOpen, setIsTOSModalOpen] = useState(false);
+  const [isAUPModalOpen, setIsAUPModalOpen] = useState(false);
+  const [isModelReadmeModalOpen, setIsModelReadmeModalOpen] = useState(false);
+  const {
+    file,
+    transcription,
+    error,
+    isRecording,
+    handleFileSelect,
+    startRecording,
+  } = useTranscriptionStore();
+  const handleFileInputChange = (
+    event: React.ChangeEvent<HTMLInputElement>
+  ) => {
+    const selectedFile = event.target.files?.[0];
+    if (selectedFile) {
+      handleFileSelect(selectedFile);
+    }
+  };
+  return (
+    <div className="h-full p-4 overflow-y-auto">
+      <h2 className="text-lg font-bold mb-4">
+        Omnilingual ASR Media Transcription
+      </h2>
+      {/* Server Status Indicator */}
+      <div className="mb-4 p-3 bg-gray-900 rounded-lg border border-gray-600">
+        <ServerStatusIndicator />
+      </div>
+      {/* File Upload */}
+      <div className="mb-4">
+        <h3 className="text-sm font-semibold mb-2">Upload Media</h3>
+        <input
+          ref={fileInputRef}
+          type="file"
+          accept="audio/*"
+          onChange={handleFileInputChange}
+          className="hidden"
+        />
+        {!isRecording && (
+          <>
+            <button
+              onClick={() => fileInputRef.current?.click()}
+              className="w-full p-2 bg-blue-600 hover:bg-blue-700 rounded text-sm transition-colors mb-2 flex items-center justify-center gap-2"
+            >
+              <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
+                <path
+                  fillRule="evenodd"
+                  d="M3 17a1 1 0 011-1h12a1 1 0 110 2H4a1 1 0 01-1-1zm3.293-7.707a1 1 0 011.414 0L9 10.586V3a1 1 0 112 0v7.586l1.293-1.293a1 1 0 111.414 1.414l-3 3a1 1 0 01-1.414 0l-3-3a1 1 0 010-1.414z"
+                  clipRule="evenodd"
+                />
+              </svg>
+              {file ? "Choose Different File" : "Choose File"}
+            </button>
+            {/* Recording Buttons */}
+            <div className="space-y-2">
+              <button
+                onClick={() => startRecording("audio")}
+                className="w-full p-2 bg-red-700 hover:bg-red-800 rounded text-sm transition-colors flex items-center justify-center gap-2"
+              >
+                <svg
+                  className="w-4 h-4"
+                  fill="currentColor"
+                  viewBox="0 0 20 20"
+                >
+                  <path
+                    fillRule="evenodd"
+                    d="M7 4a3 3 0 016 0v4a3 3 0 11-6 0V4zm4 10.93A7.001 7.001 0 0017 8a1 1 0 10-2 0A5 5 0 015 8a1 1 0 00-2 0 7.001 7.001 0 006 6.93V17H6a1 1 0 100 2h8a1 1 0 100-2h-3v-2.07z"
+                    clipRule="evenodd"
+                  />
+                </svg>
+                Record Audio
+              </button>
+              {/* <button
+                onClick={() => startRecording("video")}
+                className="w-full p-2 bg-red-700 hover:bg-red-800 rounded text-sm transition-colors flex items-center justify-center gap-2"
+              >
+                <svg
+                  className="w-4 h-4"
+                  fill="currentColor"
+                  viewBox="0 0 20 20"
+                >
+                  <path d="M2 6a2 2 0 012-2h6a2 2 0 012 2v8a2 2 0 01-2 2H4a2 2 0 01-2-2V6zM14.553 7.106A1 1 0 0014 8v4a1 1 0 00.553.894l2 1A1 1 0 0018 13V7a1 1 0 00-1.447-.894l-2 1z" />
+                </svg>
+                Record Video
+              </button> */}
+            </div>
+          </>
+        )}
+        {file && !isRecording && (
+          <div className="mt-2 p-2 bg-gray-700 rounded">
+            <div className="text-xs font-medium truncate">{file.name}</div>
+            <div className="text-xs text-gray-400">
+              {(file.size / 1024 / 1024).toFixed(1)} MB
+            </div>
+          </div>
+        )}
+      </div>
+      {/* Error Display */}
+      {error && transcription && (
+        <div className="mb-4 p-2 bg-red-600 rounded">
+          <div className="text-xs font-medium">Error</div>
+          <div className="text-xs">{error}</div>
+        </div>
+      )}
+      <QuickGuide />
+      <TipsNotice />
+      <FeedbackCard />
+      {/* Terms and Policy Buttons */}
+      <div className="mt-4 space-y-2">
+        <button
+          onClick={() => setIsTOSModalOpen(true)}
+          className="w-full p-2 bg-green-200 hover:bg-green-300 text-gray-800 rounded text-sm transition-colors"
+        >
+          Terms of Service
+        </button>
+        <button
+          onClick={() => setIsAUPModalOpen(true)}
+          className="w-full p-2 bg-green-200 hover:bg-green-300 text-gray-800 rounded text-sm transition-colors"
+        >
+          Acceptable Use Policy
+        </button>
+        <button
+          onClick={() => setIsModelReadmeModalOpen(true)}
+          className="w-full p-2 bg-green-200 hover:bg-green-300 text-gray-800 rounded text-sm transition-colors"
+        >
+          Model Readme
+        </button>
+      </div>
+      {/* Modals */}
+      <TOSModalComponent
+        isOpen={isTOSModalOpen}
+        onClose={() => setIsTOSModalOpen(false)}
+      />
+      <AUPModalComponent
+        isOpen={isAUPModalOpen}
+        onClose={() => setIsAUPModalOpen(false)}
+      />
+      <ModelReadmeModalComponent
+        isOpen={isModelReadmeModalOpen}
+        onClose={() => setIsModelReadmeModalOpen(false)}
+      />
+    </div>
+  );
+};
+export default TranscriptionSideBar;

frontend/src/components/TranscriptionWarningModal.tsx ADDED Viewed

	@@ -0,0 +1,69 @@

+import React from 'react';
+import { WARNING_MESSAGES } from '../utils/transcriptionWarnings';
+type WarningType = keyof typeof WARNING_MESSAGES;
+interface TranscriptionWarningModalProps {
+  isOpen: boolean;
+  warnings: WarningType[];
+  onAccept: () => void;
+  onCancel: () => void;
+}
+const TranscriptionWarningModal: React.FC<TranscriptionWarningModalProps> = ({
+  isOpen,
+  warnings,
+  onAccept,
+  onCancel,
+}) => {
+  if (!isOpen || warnings.length === 0) {
+    return null;
+  }
+  return (
+    <div className="fixed inset-0 z-50 flex items-center justify-center bg-black bg-opacity-50">
+      <div className="bg-gray-800 rounded-lg shadow-xl max-w-lg w-full mx-4 border border-gray-600">
+        <div className="p-6">
+          {/* Warning Messages */}
+          <div className="space-y-4 mb-6">
+            {warnings.map((warningType) => {
+              const warning = WARNING_MESSAGES[warningType];
+              return (
+                <div
+                  key={warningType}
+                  className="p-4 rounded-lg border-l-4 bg-orange-900/20 border-orange-500"
+                >
+                  <h4 className="font-medium text-sm text-orange-300">
+                    {warning.title}
+                  </h4>
+                  <p className="text-sm text-gray-300 mt-1">
+                    {warning.message}
+                  </p>
+                </div>
+              );
+            })}
+          </div>
+          {/* Actions */}
+          <div className="flex flex-col sm:flex-row gap-3 justify-end">
+            <button
+              onClick={onCancel}
+              className="px-4 py-2 text-sm font-medium text-gray-300 bg-gray-700 hover:bg-gray-600 rounded-lg transition-colors"
+            >
+              Cancel
+            </button>
+            <button
+              onClick={onAccept}
+              className="px-4 py-2 text-sm font-medium text-white bg-orange-600 hover:bg-orange-700 rounded-lg transition-colors"
+            >
+              Proceed Anyway
+            </button>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+};
+export default TranscriptionWarningModal;

frontend/src/components/WelcomeModal.tsx ADDED Viewed

	@@ -0,0 +1,94 @@

+import { XMarkIcon } from '@heroicons/react/24/outline';
+interface WelcomeModalProps {
+  isOpen: boolean;
+  onClose: () => void;
+}
+export default function WelcomeModal({ isOpen, onClose }: WelcomeModalProps) {
+  if (!isOpen) return null;
+  return (
+    <div className="modal modal-open">
+      <div className="modal-box max-w-4xl bg-gray-800 text-white border border-gray-600">
+        <div className="flex justify-between items-center mb-6">
+          <h3 className="font-bold text-xl text-blue-300">
+            Omnilingual ASR Media Transcription Tool
+          </h3>
+          <button
+            className="btn btn-sm btn-circle btn-ghost text-gray-300 hover:text-white hover:bg-gray-700"
+            onClick={onClose}
+            aria-label="Close modal"
+          >
+            <XMarkIcon className="w-5 h-5" />
+          </button>
+        </div>
+        <div className="space-y-4">
+          {/* Main Description */}
+          <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-blue-500">
+            <h4 className="font-semibold text-blue-200 mb-2">About This Tool</h4>
+            <p className="text-gray-200 leading-relaxed">
+              This experimental transcription tool uses the Omnilingual ASR model to transcribe audio and video content
+              for <strong>low-resource languages</strong> around the world. Our goal is to help preserve and
+              document linguistic diversity by making transcription accessible for underrepresented languages.
+            </p>
+          </div>
+          {/* Important Disclaimer */}
+          <div className="bg-yellow-900/40 p-4 rounded-lg border-l-4 border-yellow-500">
+            <h4 className="font-semibold text-yellow-200 mb-2">⚠️ Important Disclaimer</h4>
+            <p className="text-yellow-100 leading-relaxed mb-3">
+              <strong>This is experimental software.</strong> While we strive for accuracy, transcriptions
+              are not perfect. You should always <strong>double-check the outputs and make edits accordingly</strong>
+              {" "}to ensure accuracy for your specific use case.
+            </p>
+            <p className="text-yellow-100 leading-relaxed mb-3">
+              <strong>Shared Server Limitations:</strong> Due to resource constraints, we can only process one request at a time on this shared server.
+            </p>
+            <p className="text-yellow-100 leading-relaxed">
+              <strong>Want dedicated access?</strong> Clone this HuggingFace Space or run on your own servers to remove server limitations. See the{' '}
+              <a
+                href="https://huggingface.co/spaces/facebook/mms-transcriptions/blob/main/README.md"
+                target="_blank"
+                rel="noopener noreferrer"
+                className="text-yellow-300 hover:text-yellow-200 underline transition-colors"
+              >
+                setup guide
+              </a>
+              {" "}for instructions.
+            </p>
+          </div>
+          {/* Language Information */}
+          <div className="bg-gray-700 p-4 rounded-lg border-l-4 border-green-500">
+            <h4 className="font-semibold text-green-200 mb-2">Supported Languages</h4>
+            <p className="text-gray-200 leading-relaxed">
+              For this public demo, we've restricted transcription to low-resource languages with
+              <strong> error rates below 10%</strong>. This ensures the best possible experience while
+              focusing on languages that could benefit most from improved transcription tools.
+            </p>
+          </div>
+          {/* Cultural Impact */}
+          <div className="bg-purple-900/40 p-4 rounded-lg border-l-4 border-purple-500">
+            <h4 className="font-semibold text-purple-200 mb-2">Preserving Linguistic Heritage</h4>
+            <p className="text-purple-100 leading-relaxed">
+              Many of the world's languages lack digital transcription tools. By improving these models
+              for low-resource languages, we're contributing to the preservation of cultural heritage
+              and making digital content more accessible to diverse communities.
+            </p>
+          </div>
+        </div>
+        <div className="modal-action mt-6">
+          <button className="py-2 px-4 bg-blue-600 hover:bg-blue-700 rounded text-sm transition-colors" onClick={onClose}>
+            Get Started
+          </button>
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/src/hooks/useAudioAnalyzer.ts ADDED Viewed

	@@ -0,0 +1,91 @@

+import { useRef, useEffect, useState } from 'react';
+interface UseAudioAnalyzerReturn {
+  audioData: Uint8Array;
+  analyser: AnalyserNode | null;
+  connectToStream: (stream: MediaStream) => void;
+  disconnect: () => void;
+}
+export const useAudioAnalyzer = (fftSize: number = 256): UseAudioAnalyzerReturn => {
+  const [audioData, setAudioData] = useState<Uint8Array>(new Uint8Array(fftSize / 2));
+  const [analyser, setAnalyser] = useState<AnalyserNode | null>(null);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
+  const animationFrameRef = useRef<number | null>(null);
+  const connectToStream = (stream: MediaStream) => {
+    try {
+      // Clean up existing connections
+      disconnect();
+      // Create new audio context and analyser
+      const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
+      const analyserNode = audioContext.createAnalyser();
+      analyserNode.fftSize = fftSize;
+      analyserNode.smoothingTimeConstant = 0.8;
+      // Connect stream to analyser
+      const source = audioContext.createMediaStreamSource(stream);
+      source.connect(analyserNode);
+      // Store references
+      audioContextRef.current = audioContext;
+      sourceRef.current = source;
+      setAnalyser(analyserNode);
+      // Start updating audio data
+      const bufferLength = analyserNode.frequencyBinCount;
+      const dataArray = new Uint8Array(bufferLength);
+      const updateAudioData = () => {
+        if (analyserNode) {
+          analyserNode.getByteFrequencyData(dataArray);
+          setAudioData(new Uint8Array(dataArray));
+          animationFrameRef.current = requestAnimationFrame(updateAudioData);
+        }
+      };
+      updateAudioData();
+    } catch (error) {
+      console.error('Error setting up audio analyzer:', error);
+    }
+  };
+  const disconnect = () => {
+    // Cancel animation frame
+    if (animationFrameRef.current) {
+      cancelAnimationFrame(animationFrameRef.current);
+      animationFrameRef.current = null;
+    }
+    // Disconnect audio nodes
+    if (sourceRef.current) {
+      sourceRef.current.disconnect();
+      sourceRef.current = null;
+    }
+    // Close audio context
+    if (audioContextRef.current) {
+      audioContextRef.current.close();
+      audioContextRef.current = null;
+    }
+    setAnalyser(null);
+    setAudioData(new Uint8Array(fftSize / 2));
+  };
+  // Cleanup on unmount
+  useEffect(() => {
+    return () => {
+      disconnect();
+    };
+  }, []);
+  return {
+    audioData,
+    analyser,
+    connectToStream,
+    disconnect,
+  };
+};

frontend/src/hooks/useDragAndDrop.ts ADDED Viewed

	@@ -0,0 +1,118 @@

+import { useCallback, useState, DragEvent, useRef } from 'react';
+interface UseDragAndDropOptions {
+  onFileDropped: (file: File) => void;
+  acceptedTypes?: string[];
+}
+interface UseDragAndDropReturn {
+  isDragActive: boolean;
+  dragProps: {
+    onDrop: (event: DragEvent<HTMLDivElement>) => void;
+    onDragOver: (event: DragEvent<HTMLDivElement>) => void;
+    onDragEnter: (event: DragEvent<HTMLDivElement>) => void;
+    onDragLeave: (event: DragEvent<HTMLDivElement>) => void;
+  };
+}
+const isValidFileType = (file: File, acceptedTypes?: string[]): boolean => {
+  if (!acceptedTypes || acceptedTypes.length === 0) return true;
+  return acceptedTypes.some(type => {
+    if (type.endsWith('/*')) {
+      // Handle MIME type categories like 'video/*' or 'audio/*'
+      const category = type.slice(0, -2);
+      return file.type.startsWith(category + '/');
+    } else {
+      // Handle exact MIME types
+      return file.type === type;
+    }
+  });
+};
+export const useDragAndDrop = ({
+  onFileDropped,
+  acceptedTypes = ['video/*', 'audio/*']
+}: UseDragAndDropOptions): UseDragAndDropReturn => {
+  const [isDragActive, setIsDragActive] = useState(false);
+  const dragCounter = useRef(0);
+  const dragLeaveTimeout = useRef<number | null>(null);
+  const handleDragEnter = useCallback((event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+    // Clear any pending drag leave timeout
+    if (dragLeaveTimeout.current) {
+      clearTimeout(dragLeaveTimeout.current);
+      dragLeaveTimeout.current = null;
+    }
+    dragCounter.current += 1;
+    if (event.dataTransfer?.items && event.dataTransfer.items.length > 0) {
+      setIsDragActive(true);
+    }
+  }, []);
+  const handleDragLeave = useCallback((event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+    dragCounter.current -= 1;
+    // Use a small timeout to prevent flickering when moving between child elements
+    dragLeaveTimeout.current = window.setTimeout(() => {
+      if (dragCounter.current === 0) {
+        setIsDragActive(false);
+      }
+    }, 10);
+  }, []);
+  const handleDragOver = useCallback((event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+    // Set the dropEffect to indicate this is a copy operation
+    if (event.dataTransfer) {
+      event.dataTransfer.dropEffect = 'copy';
+    }
+  }, []);
+  const handleDrop = useCallback((event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+    // Clear timeout and reset state
+    if (dragLeaveTimeout.current) {
+      clearTimeout(dragLeaveTimeout.current);
+      dragLeaveTimeout.current = null;
+    }
+    setIsDragActive(false);
+    dragCounter.current = 0;
+    const files = Array.from(event.dataTransfer?.files || []);
+    if (files.length > 0) {
+      const validFile = files.find(file => isValidFileType(file, acceptedTypes));
+      if (validFile) {
+        onFileDropped(validFile);
+      } else {
+        console.warn('Dropped file is not a supported type:', files[0]?.type);
+        // You could also call an onError callback here if needed
+      }
+    }
+  }, [onFileDropped, acceptedTypes]);
+  return {
+    isDragActive,
+    dragProps: {
+      onDrop: handleDrop,
+      onDragOver: handleDragOver,
+      onDragEnter: handleDragEnter,
+      onDragLeave: handleDragLeave,
+    },
+  };
+};

frontend/src/hooks/useMediaTimeSync.ts ADDED Viewed

	@@ -0,0 +1,69 @@

+import { useEffect } from 'react';
+import { TranscriptionResponse } from '../services/transcriptionApi';
+interface UseMediaTimeSyncProps {
+  audioRef: React.RefObject<HTMLAudioElement>;
+  videoRef: React.RefObject<HTMLVideoElement>;
+  onTimeUpdate: () => void;
+  transcription: TranscriptionResponse | null;
+}
+export const useMediaTimeSync = ({
+  audioRef,
+  videoRef,
+  onTimeUpdate,
+  transcription,
+}: UseMediaTimeSyncProps) => {
+  useEffect(() => {
+    const mediaElement = audioRef.current || videoRef.current;
+    let frameId: number | null = null;
+    console.log('Setting up requestAnimationFrame for media element:', mediaElement?.tagName);
+    const smoothUpdate = () => {
+      if (mediaElement && !mediaElement.paused && !mediaElement.ended) {
+        onTimeUpdate();
+        frameId = requestAnimationFrame(smoothUpdate);
+      } else {
+        frameId = null;
+      }
+    };
+    if (mediaElement && transcription) {
+      const handlePlay = () => {
+        console.log('Media play event triggered');
+        if (frameId) {
+          cancelAnimationFrame(frameId);
+        }
+        frameId = requestAnimationFrame(smoothUpdate);
+      };
+      const handlePause = () => {
+        console.log('Media pause event triggered');
+        if (frameId) {
+          cancelAnimationFrame(frameId);
+          frameId = null;
+        }
+      };
+      mediaElement.addEventListener('play', handlePlay);
+      mediaElement.addEventListener('pause', handlePause);
+      mediaElement.addEventListener('ended', handlePause);
+      // Start if already playing
+      if (!mediaElement.paused) {
+        console.log('Media is already playing, starting animation loop');
+        handlePlay();
+      }
+      return () => {
+        if (frameId) {
+          cancelAnimationFrame(frameId);
+        }
+        mediaElement.removeEventListener('play', handlePlay);
+        mediaElement.removeEventListener('pause', handlePause);
+        mediaElement.removeEventListener('ended', handlePause);
+      };
+    }
+  }, [transcription, onTimeUpdate, audioRef, videoRef]);
+};

frontend/src/hooks/useTimelineDragControls.ts ADDED Viewed

	@@ -0,0 +1,416 @@

+import { useState, useCallback, useEffect } from 'react';
+import { AlignedSegment } from '../services/transcriptionApi';
+import { SegmentWithTrack } from '../utils/trackUtils';
+import { useTranscriptionStore } from '../stores/transcriptionStore';
+interface UseTimelineDragControlsOptions {
+  segmentsWithTracks: SegmentWithTrack[];
+  displaySegments: AlignedSegment[];
+  geometryUtils: {
+    timeToX: (time: number) => number;
+    trackToY: (track: number) => number;
+    canvasXToTime: (canvasX: number) => number;
+    clientXToCanvasX: (clientX: number, canvasRef: React.RefObject<HTMLCanvasElement>) => number;
+    timelineWidth: number;
+  };
+  canvasRef: React.RefObject<HTMLCanvasElement>;
+  containerRef: React.RefObject<HTMLDivElement>;
+  mediaDuration: number;
+  constants: {
+    TRACK_HEIGHT: number;
+    TIMELINE_PADDING: number;
+  };
+}
+type DragType = 'move' | 'resize-start' | 'resize-end';
+export const useTimelineDragControls = ({
+  segmentsWithTracks,
+  displaySegments,
+  geometryUtils,
+  canvasRef,
+  containerRef,
+  mediaDuration,
+  constants,
+}: UseTimelineDragControlsOptions) => {
+  const { TRACK_HEIGHT, TIMELINE_PADDING } = constants;
+  const { timeToX, trackToY, canvasXToTime, clientXToCanvasX, timelineWidth } = geometryUtils;
+  // Get store actions
+  const {
+    seekToTime,
+    selectedSegmentIndex,
+    setSelectedSegmentIndex,
+    updateSegmentTiming,
+    finalizeSegmentPositioning,
+  } = useTranscriptionStore();
+  // Drag state
+  const [isDragging, setIsDragging] = useState(false);
+  const [isTimelineDragging, setIsTimelineDragging] = useState(false);
+  const [dragType, setDragType] = useState<DragType | null>(null);
+  const [dragStartX, setDragStartX] = useState(0);
+  const [dragStartTime, setDragStartTime] = useState(0);
+  const [dragSegmentIndex, setDragSegmentIndex] = useState<number | null>(null);
+  const [hoveredSegment, setHoveredSegment] = useState<number | null>(null);
+  // Find segment at a specific time
+  const findSegmentAtTime = useCallback((time: number) => {
+    for (let i = 0; i < displaySegments.length; i++) {
+      const segment = displaySegments[i];
+      if (time >= segment.start && time <= segment.end) {
+        return i;
+      }
+    }
+    return null;
+  }, [displaySegments]);
+  // Seek to position using centralized store function
+  const seekToPosition = useCallback((clientX: number) => {
+    const actualCanvasX = clientXToCanvasX(clientX, canvasRef);
+    const clickTime = canvasXToTime(actualCanvasX);
+    const clampedTime = Math.max(0, Math.min(clickTime, mediaDuration));
+    seekToTime(clampedTime);
+    // Auto-select segment at the current time position
+    const segmentAtTime = findSegmentAtTime(clampedTime);
+    if (segmentAtTime !== null) {
+      setSelectedSegmentIndex(segmentAtTime);
+    }
+  }, [clientXToCanvasX, canvasRef, canvasXToTime, mediaDuration, seekToTime, findSegmentAtTime, setSelectedSegmentIndex]);
+  // Find segment at position
+  const findSegmentAtPosition = useCallback((canvasX: number, canvasY: number) => {
+    for (let i = 0; i < segmentsWithTracks.length; i++) {
+      const segment = segmentsWithTracks[i];
+      const segmentX = timeToX(segment.start);
+      // Use actual time-based width for hit detection, with minimum 8px for very short segments
+      const actualWidth = timeToX(segment.end) - segmentX;
+      const segmentWidth = Math.max(actualWidth, 8);
+      const segmentY = trackToY(segment.track);
+      if (canvasX >= segmentX && canvasX <= segmentX + segmentWidth &&
+        canvasY >= segmentY && canvasY <= segmentY + TRACK_HEIGHT) {
+        // Find the original segment index in displaySegments
+        const originalIndex = displaySegments.findIndex(s =>
+          s.start === segment.start && s.end === segment.end && s.text === segment.text
+        );
+        return { index: originalIndex, segment };
+      }
+    }
+    return null;
+  }, [segmentsWithTracks, displaySegments, timeToX, trackToY, TRACK_HEIGHT]);
+  // Check if position is on resize handle
+  const getResizeHandle = useCallback((canvasX: number, canvasY: number, segmentIndex: number) => {
+    if (selectedSegmentIndex !== segmentIndex) return null;
+    // Find the segment in segmentsWithTracks that corresponds to the original segment index
+    const originalSegment = displaySegments[segmentIndex];
+    const segmentWithTrack = segmentsWithTracks.find(s =>
+      s.start === originalSegment.start && s.end === originalSegment.end && s.text === originalSegment.text
+    );
+    if (!segmentWithTrack) return null;
+    const segmentX = timeToX(segmentWithTrack.start);
+    // Use actual time-based width for resize handle detection, with minimum for very short segments
+    const actualWidth = timeToX(segmentWithTrack.end) - segmentX;
+    const segmentWidth = Math.max(actualWidth, 16); // Minimum 16px to ensure handles are accessible
+    const segmentY = trackToY(segmentWithTrack.track);
+    const handleWidth = 8;
+    if (canvasX >= segmentX && canvasX <= segmentX + handleWidth &&
+      canvasY >= segmentY && canvasY <= segmentY + TRACK_HEIGHT) {
+      return 'resize-start';
+    }
+    if (canvasX >= segmentX + segmentWidth - handleWidth && canvasX <= segmentX + segmentWidth &&
+      canvasY >= segmentY && canvasY <= segmentY + TRACK_HEIGHT) {
+      return 'resize-end';
+    }
+    return null;
+  }, [selectedSegmentIndex, segmentsWithTracks, displaySegments, timeToX, trackToY, TRACK_HEIGHT]);
+  // Edge-based auto-scroll during user interactions
+  const handleEdgeScroll = useCallback((clientX: number) => {
+    const container = containerRef.current;
+    if (!container) return;
+    // Use container's bounding rect to get the visible viewport area
+    const containerRect = container.getBoundingClientRect();
+    const containerWidth = container.clientWidth;
+    const edgeThreshold = 80; // Increased from 50px to 80px for better UX
+    const scrollSpeed = 8; // Slightly reduced for smoother scrolling
+    // Calculate mouse position relative to the visible container viewport
+    const mouseX = clientX - containerRect.left;
+    // Check if mouse is near the edges or outside the container
+    const isLeftOfContainer = mouseX < 0;
+    const isRightOfContainer = mouseX > containerWidth;
+    const isNearLeftEdge = mouseX > 20 && mouseX < edgeThreshold; // Start 20px from edge, trigger within 80px
+    const isNearRightEdge = mouseX > containerWidth - edgeThreshold && mouseX < containerWidth - 20; // End 20px from edge
+    // Scroll left if near left edge or dragging to the left of container
+    if ((isNearLeftEdge || isLeftOfContainer) && container.scrollLeft > 0) {
+      let adjustedScrollSpeed = scrollSpeed;
+      if (isLeftOfContainer) {
+        // Faster scrolling when outside container
+        const distanceOutside = Math.abs(mouseX);
+        adjustedScrollSpeed = Math.min(scrollSpeed * 2, scrollSpeed + distanceOutside * 0.1);
+      } else {
+        // Variable speed based on distance from edge when inside
+        const distanceFromEdge = mouseX - 20;
+        const scrollMultiplier = 1 - (distanceFromEdge / (edgeThreshold - 20));
+        adjustedScrollSpeed = Math.max(2, scrollSpeed * scrollMultiplier);
+      }
+      container.scrollLeft = Math.max(0, container.scrollLeft - adjustedScrollSpeed);
+    }
+    // Scroll right if near right edge or dragging to the right of container
+    else if (isNearRightEdge || isRightOfContainer) {
+      let adjustedScrollSpeed = scrollSpeed;
+      if (isRightOfContainer) {
+        // Faster scrolling when outside container
+        const distanceOutside = mouseX - containerWidth;
+        adjustedScrollSpeed = Math.min(scrollSpeed * 2, scrollSpeed + distanceOutside * 0.1);
+      } else {
+        // Variable speed based on distance from edge when inside
+        const distanceFromEdge = (containerWidth - 20) - mouseX;
+        const scrollMultiplier = 1 - (distanceFromEdge / (edgeThreshold - 20));
+        adjustedScrollSpeed = Math.max(2, scrollSpeed * scrollMultiplier);
+      }
+      const maxScrollLeft = Math.max(0, timelineWidth - containerWidth);
+      container.scrollLeft = Math.min(maxScrollLeft, container.scrollLeft + adjustedScrollSpeed);
+    }
+  }, [timelineWidth, containerRef]);
+  // Handle mouse events
+  const handleMouseMove = useCallback((event: React.MouseEvent<HTMLCanvasElement>) => {
+    // Completely disable hover detection and cursor updates during any drag operation
+    if (isDragging || isTimelineDragging) {
+      return;
+    }
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+    const rect = canvas.getBoundingClientRect();
+    const actualCanvasX = clientXToCanvasX(event.clientX, canvasRef);
+    const y = event.clientY - rect.top;
+    // Find hovered segment
+    const foundSegment = findSegmentAtPosition(actualCanvasX, y);
+    setHoveredSegment(foundSegment?.index ?? null);
+    // Update cursor
+    if (canvas) {
+      let cursor = 'default';
+      if (foundSegment) {
+        const resizeHandle = getResizeHandle(actualCanvasX, y, foundSegment.index);
+        if (resizeHandle) {
+          cursor = 'ew-resize';
+        } else {
+          cursor = 'move';
+        }
+      }
+      canvas.style.cursor = cursor;
+    }
+  }, [isDragging, isTimelineDragging, findSegmentAtPosition, getResizeHandle, clientXToCanvasX, canvasRef]);
+  const handleMouseDown = useCallback((event: React.MouseEvent<HTMLCanvasElement>) => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+    const rect = canvas.getBoundingClientRect();
+    const actualCanvasX = clientXToCanvasX(event.clientX, canvasRef);
+    const y = event.clientY - rect.top;
+    // Check if clicking on a segment
+    const foundSegment = findSegmentAtPosition(actualCanvasX, y);
+    if (foundSegment) {
+      // Check for resize handles first
+      const resizeHandle = getResizeHandle(actualCanvasX, y, foundSegment.index);
+      if (resizeHandle) {
+        // Start resize drag
+        event.preventDefault();
+        setIsDragging(true);
+        setDragType(resizeHandle as DragType);
+        setDragStartX(event.clientX);
+        setDragSegmentIndex(foundSegment.index);
+        // Only set selected segment if it's not already selected
+        // This prevents changing selection during drag operations
+        if (selectedSegmentIndex !== foundSegment.index) {
+          setSelectedSegmentIndex(foundSegment.index);
+        }
+        const segment = foundSegment.segment;
+        setDragStartTime(resizeHandle === 'resize-end' ? segment.end : segment.start);
+        // Update media time using centralized store function
+        seekToTime(resizeHandle === 'resize-end' ? segment.end : segment.start);
+      } else {
+        // Start move drag
+        event.preventDefault();
+        setIsDragging(true);
+        setDragType('move');
+        setDragStartX(event.clientX);
+        setDragSegmentIndex(foundSegment.index);
+        // Only set selected segment if it's not already selected
+        // This prevents changing selection during drag operations
+        if (selectedSegmentIndex !== foundSegment.index) {
+          setSelectedSegmentIndex(foundSegment.index);
+        }
+        setDragStartTime(foundSegment.segment.start);
+        // Update media time to mouse position using centralized store function
+        const clickTime = canvasXToTime(actualCanvasX);
+        seekToTime(clickTime);
+      }
+    } else {
+      // Clicking outside of any segment - deselect the selected segment
+      if (selectedSegmentIndex !== null) {
+        setSelectedSegmentIndex(null);
+      }
+      // Timeline click - start timeline drag
+      event.preventDefault();
+      setIsTimelineDragging(true);
+      seekToPosition(event.clientX);
+    }
+  }, [
+    findSegmentAtPosition,
+    getResizeHandle,
+    selectedSegmentIndex,
+    setSelectedSegmentIndex,
+    seekToPosition,
+    canvasXToTime,
+    clientXToCanvasX,
+    canvasRef,
+    seekToTime
+  ]);
+  // Global mouse move handler
+  useEffect(() => {
+    const handleGlobalMouseMove = (e: MouseEvent) => {
+      // Handle edge-based scrolling during any drag operation
+      if (isDragging || isTimelineDragging) {
+        handleEdgeScroll(e.clientX);
+        // Clear hover state during drag operations to prevent visual confusion
+        if (hoveredSegment !== null) {
+          setHoveredSegment(null);
+        }
+      }
+      if (isDragging && dragType && dragSegmentIndex !== null) {
+        const deltaX = e.clientX - dragStartX;
+        const timelineWidthPx = timelineWidth - TIMELINE_PADDING * 2;
+        const deltaTime = (deltaX / timelineWidthPx) * mediaDuration;
+        const segment = displaySegments[dragSegmentIndex];
+        let newStart = segment.start;
+        let newEnd = segment.end;
+        switch (dragType) {
+          case 'move':
+            const newStartTime = Math.max(0, dragStartTime + deltaTime);
+            const duration = segment.end - segment.start;
+            newStart = Math.min(newStartTime, mediaDuration - duration);
+            newEnd = newStart + duration;
+            // Update media time to follow mouse using centralized store function
+            const actualCanvasX = clientXToCanvasX(e.clientX, canvasRef);
+            const mouseTime = canvasXToTime(actualCanvasX);
+            seekToTime(mouseTime);
+            break;
+          case 'resize-start':
+            newStart = Math.max(0, Math.min(dragStartTime + deltaTime, segment.end - 0.1));
+            seekToTime(newStart);
+            break;
+          case 'resize-end':
+            newEnd = Math.min(mediaDuration, Math.max(dragStartTime + deltaTime, segment.start + 0.1));
+            seekToTime(newEnd);
+            break;
+        }
+        updateSegmentTiming(dragSegmentIndex, newStart, newEnd, true); // deferSorting=true during drag
+      } else if (isTimelineDragging) {
+        seekToPosition(e.clientX);
+      }
+    };
+    const handleGlobalMouseUp = () => {
+      // If we were dragging a segment, finalize its positioning (re-sort segments)
+      if (isDragging && dragSegmentIndex !== null) {
+        finalizeSegmentPositioning();
+      }
+      setIsDragging(false);
+      setIsTimelineDragging(false);
+      setDragType(null);
+      setDragSegmentIndex(null);
+    };
+    if (isDragging || isTimelineDragging) {
+      document.addEventListener('mousemove', handleGlobalMouseMove);
+      document.addEventListener('mouseup', handleGlobalMouseUp);
+      return () => {
+        document.removeEventListener('mousemove', handleGlobalMouseMove);
+        document.removeEventListener('mouseup', handleGlobalMouseUp);
+      };
+    }
+  }, [
+    isDragging,
+    isTimelineDragging,
+    dragType,
+    dragSegmentIndex,
+    dragStartX,
+    dragStartTime,
+    displaySegments,
+    mediaDuration,
+    timelineWidth,
+    TIMELINE_PADDING,
+    updateSegmentTiming,
+    seekToPosition,
+    canvasXToTime,
+    clientXToCanvasX,
+    canvasRef,
+    seekToTime,
+    handleEdgeScroll,
+    hoveredSegment
+  ]);
+  return {
+    // State
+    isDragging,
+    isTimelineDragging,
+    dragSegmentIndex,
+    hoveredSegment,
+    // Actions
+    seekToPosition,
+    findSegmentAtTime,
+    // Mouse handlers
+    handleMouseMove,
+    handleMouseDown,
+    // Utilities
+    findSegmentAtPosition,
+    getResizeHandle,
+  };
+};

frontend/src/hooks/useTimelineGeometry.ts ADDED Viewed

	@@ -0,0 +1,62 @@

+import { useCallback, useMemo } from 'react';
+interface UseTimelineGeometryOptions {
+  mediaDuration: number;
+  constants: {
+    TRACK_HEIGHT: number;
+    TRACK_PADDING: number;
+    TIMELINE_PADDING: number;
+    PIXELS_PER_SECOND: number;
+  };
+}
+export const useTimelineGeometry = ({
+  mediaDuration,
+  constants,
+}: UseTimelineGeometryOptions) => {
+  const { TRACK_HEIGHT, TRACK_PADDING, TIMELINE_PADDING, PIXELS_PER_SECOND } = constants;
+  // Calculate timeline dimensions
+  const timelineWidth = useMemo(() =>
+    mediaDuration * PIXELS_PER_SECOND,
+    [mediaDuration, PIXELS_PER_SECOND]
+  );
+  // Convert time to x position
+  const timeToX = useCallback((time: number) => {
+    return TIMELINE_PADDING + (time / mediaDuration) * (timelineWidth - TIMELINE_PADDING * 2);
+  }, [mediaDuration, timelineWidth, TIMELINE_PADDING]);
+  // Convert x position to time
+  const xToTime = useCallback((x: number) => {
+    return ((x - TIMELINE_PADDING) / (timelineWidth - TIMELINE_PADDING * 2)) * mediaDuration;
+  }, [mediaDuration, timelineWidth, TIMELINE_PADDING]);
+  // Convert track to y position
+  const trackToY = useCallback((track: number) => {
+    return TIMELINE_PADDING + track * (TRACK_HEIGHT + TRACK_PADDING);
+  }, [TRACK_HEIGHT, TRACK_PADDING, TIMELINE_PADDING]);
+  // Convert canvas coordinates to time (accounting for scroll)
+  const canvasXToTime = useCallback((canvasX: number) => {
+    return xToTime(canvasX);
+  }, [xToTime]);
+  // Convert client coordinates to canvas coordinates
+  const clientXToCanvasX = useCallback((clientX: number, canvasRef: React.RefObject<HTMLCanvasElement>) => {
+    const canvas = canvasRef.current;
+    if (!canvas) return 0;
+    const rect = canvas.getBoundingClientRect();
+    return clientX - rect.left;
+  }, []);
+  return {
+    timelineWidth,
+    timeToX,
+    xToTime,
+    trackToY,
+    canvasXToTime,
+    clientXToCanvasX,
+  };
+};

frontend/src/hooks/useTimelineRenderer.ts ADDED Viewed

	@@ -0,0 +1,254 @@

+import { useCallback, useEffect } from 'react';
+import { AlignedSegment } from '../services/transcriptionApi';
+import { SegmentWithTrack } from '../utils/trackUtils';
+import { formatTime } from '../utils/subtitleUtils';
+interface UseTimelineRendererOptions {
+  canvasRef: React.RefObject<HTMLCanvasElement>;
+  canvasSize: { width: number; height: number };
+  segmentsWithTracks: SegmentWithTrack[];
+  displaySegments: AlignedSegment[];
+  currentTime: number;
+  activeSegmentIndex: number | null;
+  selectedSegmentIndex: number | null;
+  hoveredSegment: number | null;
+  isDragging: boolean;
+  dragSegmentIndex: number | null;
+  mediaDuration: number;
+  geometryUtils: {
+    timeToX: (time: number) => number;
+    trackToY: (track: number) => number;
+    timelineWidth: number;
+  };
+  constants: {
+    TRACK_HEIGHT: number;
+    TIMELINE_PADDING: number;
+    PIXELS_PER_SECOND: number;
+  };
+}
+export const useTimelineRenderer = ({
+  canvasRef,
+  canvasSize,
+  segmentsWithTracks,
+  displaySegments,
+  currentTime,
+  activeSegmentIndex,
+  selectedSegmentIndex,
+  hoveredSegment,
+  isDragging,
+  dragSegmentIndex,
+  mediaDuration,
+  geometryUtils,
+  constants,
+}: UseTimelineRendererOptions) => {
+  const { timeToX, trackToY, timelineWidth } = geometryUtils;
+  const { TRACK_HEIGHT, TIMELINE_PADDING, PIXELS_PER_SECOND } = constants;
+  // Draw the timeline
+  const draw = useCallback(() => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+    const ctx = canvas.getContext('2d');
+    if (!ctx) return;
+    // Set canvas size
+    canvas.width = canvasSize.width;
+    canvas.height = canvasSize.height;
+    // Clear canvas
+    ctx.fillStyle = '#0f172a'; // bg-slate-900
+    ctx.fillRect(0, 0, canvas.width, canvas.height);
+    // Draw timeline base line
+    ctx.strokeStyle = '#4B5563'; // gray-600
+    ctx.lineWidth = 1;
+    ctx.beginPath();
+    const baseY = canvas.height / 2;
+    ctx.moveTo(TIMELINE_PADDING, baseY);
+    ctx.lineTo(timelineWidth - TIMELINE_PADDING, baseY);
+    ctx.stroke();
+    // Draw time markers with dynamic intervals
+    const getOptimalTimeInterval = () => {
+      const pixelsPerSecond = PIXELS_PER_SECOND;
+      const minSpacing = 120; // Increased from 80 to give more space between markers
+      // Calculate what time interval gives us reasonable spacing
+      const minTimeInterval = minSpacing / pixelsPerSecond;
+      // Choose appropriate intervals based on duration and zoom
+      if (minTimeInterval <= 1) return { major: 5, minor: 1 };
+      if (minTimeInterval <= 5) return { major: 10, minor: 2 };
+      if (minTimeInterval <= 10) return { major: 30, minor: 5 };
+      if (minTimeInterval <= 30) return { major: 60, minor: 10 };
+      if (minTimeInterval <= 60) return { major: 300, minor: 60 }; // 5min major, 1min minor
+      if (minTimeInterval <= 300) return { major: 600, minor: 120 }; // 10min major, 2min minor
+      return { major: 1800, minor: 300 }; // 30min major, 5min minor
+    };
+    const { major: majorInterval, minor: minorInterval } = getOptimalTimeInterval();
+    // Draw background grid lines for better visual organization
+    ctx.strokeStyle = '#1E293B'; // slate-800 (very subtle)
+    ctx.lineWidth = 1;
+    for (let time = 0; time <= mediaDuration; time += minorInterval) {
+      const x = timeToX(time);
+      ctx.beginPath();
+      ctx.moveTo(x, 0);
+      ctx.lineTo(x, canvas.height - 40); // Don't overlap with time labels
+      ctx.stroke();
+    }
+    // Draw minor markers (shorter, more visible than before)
+    ctx.strokeStyle = '#64748B'; // slate-500 (more visible than gray-700)
+    ctx.lineWidth = 1;
+    for (let time = 0; time <= mediaDuration; time += minorInterval) {
+        const x = timeToX(time);
+        ctx.beginPath();
+        ctx.moveTo(x, canvas.height - 15);
+        ctx.lineTo(x, canvas.height - 5);
+        ctx.stroke();
+    }
+    // Draw major markers (taller, much more prominent)
+    ctx.strokeStyle = '#94A3B8'; // slate-400 (much more visible)
+    ctx.fillStyle = '#F1F5F9'; // slate-100 (bright white-ish for text)
+    ctx.font = 'bold 13px system-ui'; // Slightly larger and bold
+    ctx.lineWidth = 2; // Thicker lines for major markers
+    for (let time = 0; time <= mediaDuration; time += majorInterval) {
+      const x = timeToX(time);
+      // Draw time label with special handling for 0:00 to avoid clipping
+      const timeText = formatTime(time);
+      ctx.fillStyle = '#F1F5F9'; // slate-100 (bright white-ish)
+      if (time === 0) {
+        // For 0:00, align left and shift right to avoid clipping
+        ctx.textAlign = 'left';
+        ctx.fillText(timeText, x + 4, canvas.height - 20);
+      } else {
+        // For all other times, center align as normal
+        ctx.textAlign = 'center';
+        ctx.fillText(timeText, x, canvas.height - 20);
+      }
+     }
+    // Draw segments
+    segmentsWithTracks.forEach((segment) => {
+      // Find the original segment index in displaySegments
+      const originalIndex = displaySegments.findIndex(s =>
+        s.start === segment.start && s.end === segment.end && s.text === segment.text
+      );
+      const x = timeToX(segment.start);
+      // Calculate actual width based on time duration, don't enforce minimum width for rendering
+      const actualWidth = timeToX(segment.end) - timeToX(segment.start);
+      const width = Math.max(actualWidth, 2); // Minimum 2px so segments are always visible
+      const y = trackToY(segment.track);
+      const height = TRACK_HEIGHT;
+      // Draw all segments (scrolling is handled by container)
+      {
+        // Determine segment color based on original segment index, not track index
+        let fillColor = '#374151'; // gray-700 (default)
+        let strokeColor = '#4B5563'; // gray-600
+        let textColor = '#D1D5DB'; // gray-300
+        // Priority order: dragging > selected > active > hovered
+        // Use originalIndex for all comparisons to maintain consistency during drag operations
+        if (isDragging && dragSegmentIndex === originalIndex) {
+          // Special styling for segment being dragged
+          fillColor = '#DC2626'; // red-600 (dragging indicator)
+          strokeColor = '#EF4444'; // red-500
+          textColor = '#FFFFFF';
+        } else if (selectedSegmentIndex === originalIndex) {
+          fillColor = '#D97706'; // yellow-600
+          strokeColor = '#FBBF24'; // yellow-400
+          textColor = '#FFFFFF';
+        } else if (activeSegmentIndex === originalIndex && selectedSegmentIndex === null) {
+          // Don't highlight active segment in blue when there's a selected segment
+          fillColor = '#2563EB'; // blue-600
+          strokeColor = '#3B82F6'; // blue-500
+          textColor = '#FFFFFF';
+        } else if (hoveredSegment === originalIndex && !isDragging) {
+          // Only show hover state when not dragging
+          fillColor = '#4B5563'; // gray-600
+          strokeColor = '#6B7280'; // gray-500
+        }
+        // Draw segment rectangle
+        ctx.fillStyle = fillColor;
+        ctx.fillRect(x, y, width, height);
+        // Draw segment border
+        ctx.strokeStyle = strokeColor;
+        ctx.lineWidth = 1;
+        ctx.strokeRect(x, y, width, height);
+        // Draw segment text
+        ctx.fillStyle = textColor;
+        ctx.font = '12px system-ui';
+        ctx.textAlign = 'left';
+        // Clip text to segment width
+        ctx.save();
+        ctx.beginPath();
+        ctx.rect(x + 4, y, width - 8, height);
+        ctx.clip();
+        const textY = y + height / 2 + 4; // Center vertically
+        ctx.fillText(segment.text, x + 4, textY);
+        ctx.restore();
+        // Draw resize handles for selected segment
+        if (selectedSegmentIndex === originalIndex) {
+          const handleWidth = 8;
+          ctx.fillStyle = '#3B82F6'; // blue-500
+          // Left handle
+          ctx.fillRect(x, y, handleWidth, height);
+          // Right handle
+          ctx.fillRect(x + width - handleWidth, y, handleWidth, height);
+        }
+      }
+    });
+    // Draw progress indicator
+    const progressX = timeToX(currentTime);
+    ctx.strokeStyle = '#EF4444'; // red-500
+    ctx.lineWidth = 2;
+    ctx.beginPath();
+    ctx.moveTo(progressX, 0);
+    ctx.lineTo(progressX, canvas.height);
+    ctx.stroke();
+  }, [
+    canvasRef,
+    canvasSize,
+    segmentsWithTracks,
+    displaySegments,
+    currentTime,
+    activeSegmentIndex,
+    selectedSegmentIndex,
+    hoveredSegment,
+    isDragging,
+    dragSegmentIndex,
+    mediaDuration,
+    timeToX,
+    trackToY,
+    timelineWidth,
+    TRACK_HEIGHT,
+    TIMELINE_PADDING,
+    PIXELS_PER_SECOND,
+  ]);
+  // Redraw when dependencies change
+  useEffect(() => {
+    draw();
+  }, [draw]);
+  return { draw };
+};

frontend/src/index.css ADDED Viewed

	@@ -0,0 +1,7 @@

+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+body {
+  @apply bg-gray-900 text-white;
+}

frontend/src/main.tsx ADDED Viewed

	@@ -0,0 +1,10 @@

+import React from 'react';
+import ReactDOM from 'react-dom/client';
+import App from './App';
+import './index.css';
+ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>
+);

frontend/src/pages/TranscriptionPage.tsx ADDED Viewed

	@@ -0,0 +1,136 @@

+import React from "react";
+import {useState, useRef, useCallback} from "react";
+import {useTranscriptionStore} from "../stores/transcriptionStore";
+import {
+  SUPPORTED_AUDIO_FORMATS,
+  SUPPORTED_VIDEO_FORMATS,
+  CODEC_INFO,
+} from "../utils/mediaTypes";
+import TranscriptionSideBar from "../components/TranscriptionSideBar";
+import TranscriptionPlayer from "../components/TranscriptionPlayer";
+import MediaRecorder from "../components/MediaRecorder";
+import {useDragAndDrop} from "../hooks/useDragAndDrop";
+import {CloudArrowUpIcon} from "@heroicons/react/24/outline";
+import ErrorBoundary from "../components/ErrorBoundary";
+export default function TranscriptionPage() {
+  const {isRecording, setFile, stopRecording} = useTranscriptionStore();
+  // Sidebar resizing state
+  const [sidebarWidth, setSidebarWidth] = useState(256); // Default 256px (w-64)
+  const [isResizing, setIsResizing] = useState(false);
+  const sidebarRef = useRef<HTMLDivElement>(null);
+  // Drag and drop functionality
+  const {isDragActive, dragProps} = useDragAndDrop({
+    onFileDropped: setFile,
+    acceptedTypes: ["audio/*"],
+  });
+  // Handle sidebar resizing
+  const handleMouseDown = useCallback((e: React.MouseEvent) => {
+    e.preventDefault();
+    setIsResizing(true);
+  }, []);
+  const handleMouseMove = useCallback(
+    (e: MouseEvent) => {
+      if (!isResizing) return;
+      const newWidth = Math.max(200, Math.min(600, e.clientX)); // Min 200px, max 600px
+      setSidebarWidth(newWidth);
+    },
+    [isResizing]
+  );
+  const handleMouseUp = useCallback(() => {
+    setIsResizing(false);
+  }, []);
+  // Add global mouse event listeners
+  React.useEffect(() => {
+    if (isResizing) {
+      document.addEventListener("mousemove", handleMouseMove);
+      document.addEventListener("mouseup", handleMouseUp);
+      document.body.style.userSelect = "none"; // Prevent text selection during drag
+      document.body.style.cursor = "ew-resize";
+    }
+    return () => {
+      document.removeEventListener("mousemove", handleMouseMove);
+      document.removeEventListener("mouseup", handleMouseUp);
+      document.body.style.userSelect = "";
+      document.body.style.cursor = "";
+    };
+  }, [isResizing, handleMouseMove, handleMouseUp]);
+  return (
+    <ErrorBoundary componentName="TranscriptionPage">
+      <div className="flex h-screen bg-gray-900 relative" {...dragProps}>
+        {/* Drag Overlay */}
+        {isDragActive && (
+          <div className="absolute inset-0 bg-blue-900/80 border-4 border-dashed border-blue-400 z-50 flex items-center justify-center">
+            <div className="text-center text-white">
+              <CloudArrowUpIcon className="w-16 h-16 mx-auto mb-4 text-blue-300" />
+              <div className="text-2xl font-semibold mb-2">
+                Drop your audio file here
+              </div>
+              <div className="text-lg text-blue-200 mb-4">
+                Supports audio files only
+              </div>
+              {/* Audio formats section */}
+              <div className="text-center mb-3">
+                <div className="text-sm font-medium text-blue-300 mb-1">
+                  Audio Formats
+                </div>
+                <div className="text-xs text-blue-100 opacity-90">
+                  {SUPPORTED_AUDIO_FORMATS.join(" • ")}
+                </div>
+              </div>
+              {/* Codec info */}
+              <div className="text-center">
+                <div className="text-xs text-blue-200 opacity-75">
+                  Best with standard codecs:{" "}
+                  {CODEC_INFO.audio.common.slice(0, 2).join(", ")}
+                </div>
+              </div>
+            </div>
+          </div>
+        )}
+        {/* Sidebar with Resize Handle */}
+        <div className="relative flex">
+          <div
+            ref={sidebarRef}
+            className="flex-shrink-0 bg-gray-800 text-white overflow-y-auto"
+            style={{width: `${sidebarWidth}px`}}
+          >
+            <TranscriptionSideBar />
+          </div>
+          {/* Drag Handle */}
+          <div
+            className="w-1 bg-gray-600 hover:bg-gray-500 cursor-ew-resize flex-shrink-0 transition-colors duration-150"
+            onMouseDown={handleMouseDown}
+            title="Drag to resize sidebar"
+          />
+        </div>
+        {/* Main Content */}
+        <ErrorBoundary componentName="TranscriptionPlayer">
+          {isRecording ? (
+            <div className="flex-1 flex items-center justify-center bg-gray-900">
+              <MediaRecorder
+                onComplete={() => stopRecording()}
+                onCancel={() => stopRecording()}
+              />
+            </div>
+          ) : (
+            <TranscriptionPlayer />
+          )}
+        </ErrorBoundary>
+      </div>
+    </ErrorBoundary>
+  );
+}

frontend/src/services/transcriptionApi.ts ADDED Viewed

	@@ -0,0 +1,273 @@

+// API service for transcription functionality
+// Common API error handling and utilities
+class ApiError extends Error {
+  constructor(message: string, public status?: number, public isServerBusy: boolean = false) {
+    super(message);
+    this.name = 'ApiError';
+  }
+}
+const getServerUrl = (): string => import.meta.env.VITE_SERVER_URL || "";
+// Centralized fetch wrapper with consistent error handling
+const fetchApi = async (
+  endpoint: string,
+  options: RequestInit = {},
+  expectJson: boolean = true
+): Promise<any> => {
+  const response = await fetch(endpoint, options);
+  if (!response.ok) {
+    let errorMessage = `HTTP error! status: ${response.status}`;
+    let isServerBusy = false;
+    // Try to extract error details from response
+    try {
+      const errorData = await response.json();
+      errorMessage = errorData?.error || errorMessage;
+      if (response.status === 503) {
+        isServerBusy = true;
+        errorMessage = `Server busy: ${errorData?.error || 'Server is currently processing another request'}`;
+      }
+    } catch {
+      // If JSON parsing fails, use default error message
+      if (response.status === 503) {
+        isServerBusy = true;
+        errorMessage = 'Server busy: Server is currently processing another request';
+      }
+    }
+    throw new ApiError(errorMessage, response.status, isServerBusy);
+  }
+  if (expectJson) {
+    return response.json();
+  }
+  return response;
+};
+// Create form data helper
+const createFormData = (data: Record<string, string | File | Blob>): FormData => {
+  const formData = new FormData();
+  Object.entries(data).forEach(([key, value]) => {
+    formData.append(key, value);
+  });
+  return formData;
+};
+export interface AlignedSegment {
+  duration: number;
+  end: number;
+  start: number;
+  text: string;
+  chunk_index?: number;
+  speech_segment_index?: number;
+  // Merge history to allow intelligent splitting
+  mergedFrom?: AlignedSegment[];
+  mergeThreshold?: number; // The threshold used when this merge was created
+}
+export interface ChunkInfo {
+  chunk_index: number;
+  start_time: number;
+  end_time: number;
+  duration: number;
+  num_segments: number;
+  transcription: string;
+}
+export interface PreprocessedAudio {
+  data: string; // base64 encoded audio data
+  format: string; // "wav"
+  sample_rate: number;
+  duration: number;
+  size_bytes: number;
+}
+export interface TranscriptionResponse {
+  aligned_segments: AlignedSegment[];
+  alignment_available?: boolean;
+  device?: string;
+  model: string;
+  num_segments: number;
+  status: string;
+  total_duration: number;
+  transcription: string;
+  // Long-form specific fields
+  chunks?: ChunkInfo[];
+  num_chunks?: number;
+  // Preprocessed audio data
+  preprocessed_audio?: PreprocessedAudio;
+}
+export interface ServerStatus {
+  is_busy: boolean;
+  current_operation?: string;
+  current_filename?: string;
+  progress?: number;
+  duration_seconds?: number;
+  total_completed: number;
+}
+export interface HealthResponse {
+  status: string;
+  message: string;
+  version: string;
+  service: string;
+  device: string;
+  cuda_available: boolean;
+  ffmpeg_available: boolean;
+  transcription_status: ServerStatus;
+  gpu_count?: number;
+  current_device?: number;
+  gpu_name?: string;
+  gpu_memory_allocated_mb?: number;
+  gpu_memory_reserved_mb?: number;
+  gpu_memory_total_mb?: number;
+  gpu_memory_free_mb?: number;
+}
+// Main transcription API function
+export const transcribeAudio = async (
+  file: File,
+  languageCode?: string | null,
+  scriptCode?: string | null,
+  onVideoProcessing?: (isProcessing: boolean) => void
+): Promise<TranscriptionResponse> => {
+  // Determine if this is a video file for UI feedback
+  const isVideoFile = file.type.startsWith("video/");
+  if (isVideoFile) {
+    onVideoProcessing?.(true);
+    console.log("Processing video file on server side:", file.name);
+  }
+  try {
+    // Create form data with unified 'media' field
+    const formData = createFormData({
+      media: file // Single 'media' parameter for all file types
+    });
+    // Combine language and script codes for server if both are specified
+    if (languageCode && scriptCode) {
+      const combinedLanguage = `${languageCode}_${scriptCode}`;
+      formData.append("language", combinedLanguage);
+    }
+    // Request preprocessed audio for waveform generation
+    formData.append("include_preprocessed", "true");
+    console.log('transcribeAudio - About to make API call with formData:', {
+      fileName: file.name,
+      fileType: file.type,
+      fileSize: file.size,
+      hasLanguage: !!languageCode && !!scriptCode,
+      combinedLanguage: languageCode && scriptCode ? `${languageCode}_${scriptCode}` : null
+    });
+    // Debug: Check if the File object is still valid
+    if (file instanceof File) {
+      console.log('transcribeAudio - File is valid File object');
+      // Try to read a small portion to ensure it's accessible
+      try {
+        const slice = file.slice(0, 100);
+        const arrayBuffer = await slice.arrayBuffer();
+        console.log('transcribeAudio - File slice readable, first 100 bytes length:', arrayBuffer.byteLength);
+      } catch (error) {
+        console.error('transcribeAudio - File slice read failed:', error);
+      }
+    } else {
+      console.error('transcribeAudio - File is not a valid File object:', file);
+    }
+    const result = await fetchApi(`${getServerUrl()}/transcribe`, {
+      method: "POST",
+      body: formData,
+    });
+    if (result.status !== "success") {
+      throw new Error("Transcription failed");
+    }
+    return result;
+  } finally {
+    if (isVideoFile) {
+      onVideoProcessing?.(false);
+    }
+  }
+};
+// Server status API functions
+export const getServerStatus = async (): Promise<ServerStatus> => {
+  return fetchApi(`${getServerUrl()}/status`);
+};
+export const getServerHealth = async (): Promise<HealthResponse> => {
+  return fetchApi(`${getServerUrl()}/health`);
+};
+// Video + Subtitles combination API function
+export const combineVideoWithSubtitles = async (
+  videoFile: File,
+  subtitleContent: string,
+  language: string = 'eng',
+  format: 'srt' | 'webvtt' = 'srt',
+  outputFormat: 'mp4' | 'mkv' = 'mp4'
+): Promise<Blob> => {
+  const formData = createFormData({
+    video: videoFile,
+    subtitles: subtitleContent,
+    format,
+    output_format: outputFormat,
+    language
+  });
+  const response = await fetchApi(
+    `${getServerUrl()}/combine-video-subtitles`,
+    { method: 'POST', body: formData },
+    false // Don't expect JSON, expect blob
+  );
+  return response.blob();
+};
+// Cache for supported languages
+let supportedLanguagesCache: string[] | null = null;
+let supportedLanguagesPromise: Promise<string[]> | null = null;
+// Get supported languages with caching
+export const getSupportedLanguages = async (): Promise<string[]> => {
+  // Return from cache if available
+  if (supportedLanguagesCache) {
+    return supportedLanguagesCache;
+  }
+  // Return existing promise if already in flight
+  if (supportedLanguagesPromise) {
+    return supportedLanguagesPromise;
+  }
+  // Create new promise and cache it
+  supportedLanguagesPromise = (async () => {
+    try {
+      const response = await fetchApi(`${getServerUrl()}/supported-languages`);
+      const languages = response.supported_languages;
+      // Cache the result
+      supportedLanguagesCache = languages;
+      return languages;
+    } catch (error) {
+      // Reset promise on error so we can retry
+      supportedLanguagesPromise = null;
+      throw error;
+    }
+  })();
+  return supportedLanguagesPromise;
+};

frontend/src/stores/transcriptionStore.ts ADDED Viewed

	@@ -0,0 +1,1161 @@

+import {create} from "zustand";
+import {devtools} from "zustand/middleware";
+import debounce from "debounce";
+import {
+  TranscriptionResponse,
+  PreprocessedAudio,
+  transcribeAudio,
+  AlignedSegment,
+  ServerStatus,
+  getServerStatus,
+  HealthResponse,
+  getServerHealth,
+} from "../services/transcriptionApi";
+import {generateSRT, downloadVideoWithSubtitles} from "../utils/subtitleUtils";
+import {
+  trackTranscriptionStart,
+  trackTranscriptionComplete,
+  trackTranscriptionError,
+  trackFileUpload,
+  trackLanguageChange,
+  trackDownloadVideoWithSubtitles,
+} from "../analytics/gaEvents";
+// Helper function to find the active segment based on current time
+const findActiveSegmentIndex = (
+  segments: AlignedSegment[],
+  currentTime: number
+): number | null => {
+  for (let i = 0; i < segments.length; i++) {
+    const segment = segments[i];
+    if (currentTime >= segment.start && currentTime <= segment.end) {
+      return i;
+    }
+  }
+  return null;
+};
+// Types for our store state
+interface TranscriptionState {
+  // File and media state
+  file: File | null;
+  mediaUrl: string | null;
+  // Recording state
+  isRecording: boolean;
+  recordingType: "audio" | "video" | null;
+  recordedBlob: Blob | null;
+  // Media refs for seeking (set by components)
+  audioRef: React.RefObject<HTMLAudioElement> | null;
+  videoRef: React.RefObject<HTMLVideoElement> | null;
+  // Transcription state
+  transcription: TranscriptionResponse | null;
+  preprocessedAudio: PreprocessedAudio | null;
+  currentTime: number;
+  activeSegmentIndex: number | null;
+  currentSegments: AlignedSegment[] | null;
+  // Edit state
+  selectedSegmentIndex: number | null;
+  // Viewport state for minimap
+  viewportStart: number;
+  viewportEnd: number;
+  // History state for undo/redo
+  history: AlignedSegment[][];
+  historyIndex: number;
+  // Loading and error state
+  isLoading: boolean;
+  isProcessingVideo: boolean;
+  isDownloadingVideo: boolean;
+  error: string | null;
+  // Language selection
+  selectedLanguage: string | null;
+  selectedScript: string | null;
+  // Server status
+  serverStatus: ServerStatus | null;
+  serverHealth: HealthResponse | null;
+  isPollingStatus: boolean;
+  statusPollingInterval: number | null;
+  // Modal state
+  showWelcomeModal: boolean;
+  // Computed properties
+  isVideoFile: boolean;
+  hasFile: boolean;
+  hasTranscription: boolean;
+  // Actions
+  setFile: (file: File | null) => void;
+  setTranscription: (transcription: TranscriptionResponse | null) => void;
+  // Recording actions
+  startRecording: (type: "audio" | "video") => void;
+  stopRecording: () => void;
+  setRecordedBlob: (blob: Blob | null) => void;
+  setCurrentTime: (time: number) => void;
+  setActiveSegmentIndex: (index: number | null) => void;
+  setIsLoading: (loading: boolean) => void;
+  setIsProcessingVideo: (processing: boolean) => void;
+  setIsDownloadingVideo: (downloading: boolean) => void;
+  setError: (error: string | null) => void;
+  setSelectedLanguage: (language: string | null) => void;
+  setSelectedScript: (script: string | null) => void;
+  setSelectedLanguageAndScript: (
+    language: string | null,
+    script: string | null
+  ) => void;
+  // Modal actions
+  setShowWelcomeModal: (show: boolean) => void;
+  // Media control actions
+  setMediaRefs: (
+    audioRef: React.RefObject<HTMLAudioElement>,
+    videoRef: React.RefObject<HTMLVideoElement>
+  ) => void;
+  seekToTime: (time: number) => void;
+  // Server status actions
+  setServerStatus: (status: ServerStatus | null) => void;
+  setServerHealth: (health: HealthResponse | null) => void;
+  fetchServerStatus: () => Promise<void>;
+  fetchServerHealth: () => Promise<void>;
+  startStatusPolling: () => void;
+  stopStatusPolling: () => void;
+  // Edit actions
+  setSelectedSegmentIndex: (index: number | null) => void;
+  updateSegmentTiming: (
+    index: number,
+    start: number,
+    end: number,
+    deferSorting?: boolean
+  ) => void;
+  updateSegmentText: (index: number, text: string) => void;
+  deleteSegment: (index: number) => void;
+  mergeSegmentsByProximity: (maxDurationSeconds: number) => void;
+  finalizeSegmentPositioning: () => void;
+  // Viewport actions
+  setViewport: (start: number, end: number) => void;
+  initializeViewport: (duration: number) => void;
+  // History actions
+  undo: () => void;
+  redo: () => void;
+  canUndo: boolean;
+  canRedo: boolean;
+  // Helper functions
+  _recordHistoryImmediate: (segments: AlignedSegment[]) => void;
+  _recordHistoryDebounced: (segments: AlignedSegment[]) => void;
+  // Complex actions
+  handleFileSelect: (file: File) => void;
+  handleTranscribe: () => Promise<void>;
+  handleTimeUpdate: () => void;
+  handleDownloadVideoWithSubtitles: () => Promise<void>;
+  reset: () => void;
+}
+// Initial state
+const initialState = {
+  file: null,
+  mediaUrl: null,
+  audioRef: null,
+  videoRef: null,
+  // Recording state
+  isRecording: false,
+  recordingType: null,
+  recordedBlob: null,
+  transcription: null,
+  preprocessedAudio: null,
+  currentTime: 0,
+  activeSegmentIndex: null,
+  selectedSegmentIndex: null,
+  history: [],
+  historyIndex: -1,
+  isLoading: false,
+  isProcessingVideo: false,
+  isDownloadingVideo: false,
+  error: null,
+  selectedLanguage: null,
+  selectedScript: null,
+  currentSegments: null,
+  viewportStart: 0,
+  viewportEnd: 30, // Default to first 30 seconds
+  showWelcomeModal: true, // Show modal on app load
+};
+export const useTranscriptionStore = create<TranscriptionState>()(
+  devtools(
+    (set, get) => ({
+      ...initialState,
+      // Server status state
+      serverStatus: null,
+      serverHealth: null,
+      isPollingStatus: false,
+      statusPollingInterval: null,
+      // Computed properties - these will be updated when relevant state changes
+      isVideoFile: false,
+      hasFile: false,
+      hasTranscription: false,
+      canUndo: false,
+      canRedo: false,
+      // Simple setters
+      setFile: (file) => {
+        const {mediaUrl, showWelcomeModal} = get();
+        // Clean up previous media URL
+        if (mediaUrl) {
+          URL.revokeObjectURL(mediaUrl);
+        }
+        set({
+          ...initialState,
+          // Override only file-specific properties
+          file,
+          mediaUrl: file ? URL.createObjectURL(file) : null,
+          isVideoFile: file?.type.startsWith("video/") ?? false,
+          hasFile: !!file,
+          hasTranscription: false,
+          // Preserve the modal state - don't reset it
+          showWelcomeModal,
+        });
+      },
+      setTranscription: (transcription) => {
+        set({
+          transcription,
+          preprocessedAudio: transcription?.preprocessed_audio || null,
+          hasTranscription: !!transcription,
+          currentSegments: transcription?.aligned_segments || null,
+        });
+        // Initialize history when transcription is first set
+        if (transcription && transcription.aligned_segments) {
+          const segments = [...transcription.aligned_segments];
+          set({
+            history: [segments],
+            historyIndex: 0,
+            canUndo: false,
+            canRedo: false,
+          });
+        }
+      },
+      handleTimeUpdate: () => {
+        const {audioRef, videoRef, transcription} = get();
+        const mediaElement = audioRef?.current || videoRef?.current;
+        if (mediaElement && transcription) {
+          const currentTime = mediaElement.currentTime;
+          const activeIndex = findActiveSegmentIndex(
+            transcription.aligned_segments,
+            currentTime
+          );
+          set({
+            currentTime,
+            activeSegmentIndex: activeIndex,
+          });
+        }
+      },
+      setCurrentTime: (currentTime) => set({currentTime}),
+      setActiveSegmentIndex: (activeSegmentIndex) => set({activeSegmentIndex}),
+      setIsLoading: (isLoading) => set({isLoading}),
+      setIsProcessingVideo: (isProcessingVideo) => set({isProcessingVideo}),
+      setIsDownloadingVideo: (isDownloadingVideo) => set({isDownloadingVideo}),
+      setError: (error) => set({error}),
+      setSelectedLanguage: (selectedLanguage) => {
+        // Track language selection
+        if (selectedLanguage) {
+          trackLanguageChange(selectedLanguage);
+        }
+        set({selectedLanguage});
+      },
+      setSelectedScript: (selectedScript) => set({selectedScript}),
+      setSelectedLanguageAndScript: (selectedLanguage, selectedScript) => {
+        // Track language selection
+        if (selectedLanguage) {
+          trackLanguageChange(selectedLanguage);
+        }
+        set({selectedLanguage, selectedScript});
+      },
+      // Modal actions
+      setShowWelcomeModal: (showWelcomeModal) => set({showWelcomeModal}),
+      // Media control actions
+      setMediaRefs: (audioRef, videoRef) => set({audioRef, videoRef}),
+      seekToTime: (time) => {
+        const {audioRef, videoRef} = get();
+        const mediaElement = audioRef?.current || videoRef?.current;
+        if (mediaElement) {
+          const seekTime = Math.max(
+            0,
+            Math.min(time, mediaElement.duration || Infinity)
+          );
+          mediaElement.currentTime = seekTime;
+          // Immediately update current time to trigger auto-scroll
+          set({currentTime: seekTime});
+        }
+      },
+      // Server status actions
+      setServerStatus: (serverStatus) => set({serverStatus}),
+      setServerHealth: (serverHealth) => set({serverHealth}),
+      fetchServerStatus: async () => {
+        try {
+          const status = await getServerStatus();
+          set({serverStatus: status});
+        } catch (error) {
+          console.error("Failed to fetch server status:", error);
+        }
+      },
+      fetchServerHealth: async () => {
+        try {
+          const health = await getServerHealth();
+          set({serverHealth: health});
+        } catch (error) {
+          console.error("Failed to fetch server health:", error);
+        }
+      },
+      startStatusPolling: () => {
+        const {isPollingStatus, statusPollingInterval} = get();
+        if (isPollingStatus) {
+          return; // Already polling
+        }
+        // Clear any existing interval
+        if (statusPollingInterval) {
+          clearInterval(statusPollingInterval);
+        }
+        const {fetchServerStatus} = get();
+        // Fetch immediately
+        fetchServerStatus();
+        // Then poll every 2 seconds
+        const interval = setInterval(() => {
+          fetchServerStatus();
+        }, 2000);
+        set({
+          isPollingStatus: true,
+          statusPollingInterval: interval,
+        });
+      },
+      stopStatusPolling: () => {
+        const {statusPollingInterval} = get();
+        if (statusPollingInterval) {
+          clearInterval(statusPollingInterval);
+        }
+        set({
+          isPollingStatus: false,
+          statusPollingInterval: null,
+        });
+      },
+      // Helper function to record history immediately (for instant actions like delete)
+      _recordHistoryImmediate: (segments: AlignedSegment[]) => {
+        const {history, historyIndex} = get();
+        // Remove any history after current index (when we're not at the end)
+        const newHistory = history.slice(0, historyIndex + 1);
+        // Add new state to history
+        newHistory.push([...segments]);
+        // Limit history size to prevent memory issues (keep last 50 states)
+        const maxHistorySize = 50;
+        const newIndex = newHistory.length - 1;
+        if (newHistory.length > maxHistorySize) {
+          newHistory.shift();
+          const adjustedIndex = newIndex - 1;
+          set({
+            history: newHistory,
+            historyIndex: adjustedIndex,
+            canUndo: adjustedIndex > 0,
+            canRedo: false, // Always false when adding new history
+          });
+        } else {
+          set({
+            history: newHistory,
+            historyIndex: newIndex,
+            canUndo: newIndex > 0,
+            canRedo: false, // Always false when adding new history
+          });
+        }
+      },
+      // Debounced history recording method
+      _recordHistoryDebounced: debounce((segments: AlignedSegment[]) => {
+        const {_recordHistoryImmediate} = get();
+        _recordHistoryImmediate(segments);
+      }, 500),
+      // Edit mode actions
+      // Initialize history for undo/redo (called automatically when transcription is set)
+      initializeHistory: () => {
+        const {transcription, history} = get();
+        if (!transcription || history.length > 0) return;
+        const segments = [...transcription.aligned_segments];
+        set({
+          history: [segments],
+          historyIndex: 0,
+          canUndo: false,
+          canRedo: false,
+        });
+      },
+      setSelectedSegmentIndex: (selectedSegmentIndex) => {
+        set({selectedSegmentIndex});
+      },
+      updateSegmentTiming: (
+        index: number,
+        start: number,
+        end: number,
+        deferSorting: boolean = false
+      ) => {
+        const {
+          currentSegments,
+          transcription,
+          selectedSegmentIndex,
+          _recordHistoryDebounced,
+        } = get();
+        if (
+          !currentSegments ||
+          !transcription ||
+          index < 0 ||
+          index >= currentSegments.length
+        )
+          return;
+        const updatedSegments = [...currentSegments];
+        updatedSegments[index] = {
+          ...updatedSegments[index],
+          start,
+          end,
+          duration: end - start,
+        };
+        // If deferSorting is true (during drag operations), just update without re-sorting
+        if (deferSorting) {
+          // Update both transcription and current segments without re-sorting
+          const updatedTranscription = {
+            ...transcription,
+            aligned_segments: updatedSegments,
+          };
+          set({
+            transcription: updatedTranscription,
+            currentSegments: updatedSegments,
+          });
+          // Don't record history during intermediate drag updates
+          return;
+        }
+        // Normal operation: re-sort segments by start time to maintain chronological order
+        const sortedSegments = [...updatedSegments].sort(
+          (a, b) => a.start - b.start
+        );
+        // Find the new index of the moved segment after sorting
+        const movedSegment = updatedSegments[index];
+        const newIndex = sortedSegments.findIndex(
+          (seg) =>
+            seg.start === movedSegment.start &&
+            seg.end === movedSegment.end &&
+            seg.text === movedSegment.text
+        );
+        // Update selected segment index if it was the one being moved
+        let newSelectedIndex = selectedSegmentIndex;
+        if (selectedSegmentIndex === index) {
+          newSelectedIndex = newIndex;
+        } else if (selectedSegmentIndex !== null) {
+          // Find where the currently selected segment ended up after sorting
+          const selectedSegment = updatedSegments[selectedSegmentIndex];
+          newSelectedIndex = sortedSegments.findIndex(
+            (seg) =>
+              seg.start === selectedSegment.start &&
+              seg.end === selectedSegment.end &&
+              seg.text === selectedSegment.text
+          );
+        }
+        // Update both transcription and current segments
+        const updatedTranscription = {
+          ...transcription,
+          aligned_segments: sortedSegments,
+        };
+        set({
+          transcription: updatedTranscription,
+          currentSegments: sortedSegments,
+          selectedSegmentIndex: newSelectedIndex,
+        });
+        // Record history with debounce for drag operations
+        _recordHistoryDebounced(sortedSegments);
+      },
+      // New method to finalize segment positioning after drag operations
+      finalizeSegmentPositioning: () => {
+        const {currentSegments, transcription, selectedSegmentIndex} = get();
+        if (!currentSegments || !transcription) return;
+        // Re-sort segments by start time
+        const sortedSegments = [...currentSegments].sort(
+          (a, b) => a.start - b.start
+        );
+        // Update selected segment index to reflect new position
+        let newSelectedIndex = selectedSegmentIndex;
+        if (selectedSegmentIndex !== null) {
+          const selectedSegment = currentSegments[selectedSegmentIndex];
+          newSelectedIndex = sortedSegments.findIndex(
+            (seg) =>
+              seg.start === selectedSegment.start &&
+              seg.end === selectedSegment.end &&
+              seg.text === selectedSegment.text
+          );
+        }
+        // Update both transcription and current segments
+        const updatedTranscription = {
+          ...transcription,
+          aligned_segments: sortedSegments,
+        };
+        set({
+          transcription: updatedTranscription,
+          currentSegments: sortedSegments,
+          selectedSegmentIndex: newSelectedIndex,
+        });
+      },
+      updateSegmentText: (index: number, text: string) => {
+        const {currentSegments, transcription, _recordHistoryDebounced} = get();
+        if (
+          !currentSegments ||
+          !transcription ||
+          index < 0 ||
+          index >= currentSegments.length
+        )
+          return;
+        const updatedSegments = [...currentSegments];
+        updatedSegments[index] = {
+          ...updatedSegments[index],
+          text,
+        };
+        // Update both transcription and current segments
+        const updatedTranscription = {
+          ...transcription,
+          aligned_segments: updatedSegments,
+        };
+        set({
+          transcription: updatedTranscription,
+          currentSegments: updatedSegments,
+        });
+        // Record history with debounce for text changes
+        _recordHistoryDebounced(updatedSegments);
+      },
+      deleteSegment: (index: number) => {
+        const {
+          currentSegments,
+          transcription,
+          selectedSegmentIndex,
+          _recordHistoryImmediate,
+        } = get();
+        if (
+          !currentSegments ||
+          !transcription ||
+          index < 0 ||
+          index >= currentSegments.length
+        )
+          return;
+        const updatedSegments = currentSegments.filter(
+          (_: AlignedSegment, i: number) => i !== index
+        );
+        // Adjust selected segment index if necessary
+        let newSelectedIndex = selectedSegmentIndex;
+        if (selectedSegmentIndex === index) {
+          newSelectedIndex = null; // Clear selection if we deleted the selected segment
+        } else if (
+          selectedSegmentIndex !== null &&
+          selectedSegmentIndex > index
+        ) {
+          newSelectedIndex = selectedSegmentIndex - 1; // Adjust index if selected segment was after deleted one
+        }
+        // Update both transcription and current segments
+        const updatedTranscription = {
+          ...transcription,
+          aligned_segments: updatedSegments,
+        };
+        set({
+          transcription: updatedTranscription,
+          currentSegments: updatedSegments,
+          selectedSegmentIndex: newSelectedIndex,
+        });
+        // Record history immediately for deletions since they're instant actions
+        _recordHistoryImmediate(updatedSegments);
+      },
+      mergeSegmentsByProximity: (maxDurationSeconds: number) => {
+        const {
+          transcription,
+          currentSegments,
+          history,
+          _recordHistoryDebounced,
+        } = get();
+        if (!transcription) return;
+        console.log(`Merge threshold changed to: ${maxDurationSeconds}s`);
+        // Always use current segments - this is the source of truth for user's changes
+        if (!currentSegments) {
+          console.warn("No currentSegments available for merging");
+          return;
+        }
+        // const originalSegments = history.length > 0 ? [...history[0]] : [...transcription.aligned_segments];
+        // // If threshold is 0, reset to original segments (clear all merge history)
+        // if (maxDurationSeconds === 0) {
+        //   console.log(`Resetting to original ${originalSegments.length} segments`);
+        //   // Strip merge history from original segments
+        //   const cleanedOriginals = originalSegments.map(segment => ({
+        //     ...segment,
+        //     mergedFrom: undefined,
+        //     mergeThreshold: undefined,
+        //   }));
+        //   const updatedTranscription = {
+        //     ...transcription,
+        //     aligned_segments: cleanedOriginals,
+        //   };
+        //   set({
+        //     transcription: updatedTranscription,
+        //     currentSegments: cleanedOriginals,
+        //     selectedSegmentIndex: null,
+        //   });
+        //   return;
+        // }
+        // Step 1: Intelligently split segments that were merged at higher thresholds
+        const splitSegmentsRecursively = (
+          segment: AlignedSegment
+        ): AlignedSegment[] => {
+          // If this segment has no merge history or was merged at/below current threshold, keep it
+          if (
+            !segment.mergedFrom ||
+            !segment.mergeThreshold ||
+            segment.mergeThreshold <= maxDurationSeconds
+          ) {
+            return [segment];
+          }
+          // This segment was merged above the current threshold, split it
+          console.log(
+            `Splitting segment merged at ${segment.mergeThreshold}s (current threshold: ${maxDurationSeconds}s)`
+          );
+          // Calculate the relative positions of constituents within the current segment's timing
+          const currentStart = segment.start;
+          const currentEnd = segment.end;
+          const currentDuration = currentEnd - currentStart;
+          // Get the original combined duration when constituents were first merged
+          const constituents = segment.mergedFrom;
+          const originalTotalDuration = constituents.reduce(
+            (sum, c) => sum + c.duration,
+            0
+          );
+          // Position each constituent relative to current segment position
+          let cumulativeTime = 0;
+          const repositionedConstituents = constituents
+            .map((constituent) => {
+              const relativeStart = cumulativeTime / originalTotalDuration;
+              const relativeEnd =
+                (cumulativeTime + constituent.duration) / originalTotalDuration;
+              const newStart = currentStart + relativeStart * currentDuration;
+              const newEnd = currentStart + relativeEnd * currentDuration;
+              cumulativeTime += constituent.duration;
+              const repositioned: AlignedSegment = {
+                ...constituent,
+                start: newStart,
+                end: newEnd,
+                duration: newEnd - newStart,
+              };
+              // Recursively split this constituent if needed
+              return splitSegmentsRecursively(repositioned);
+            })
+            .flat();
+          return repositionedConstituents;
+        };
+        let segments = currentSegments.flatMap((segment) =>
+          splitSegmentsRecursively(segment)
+        );
+        console.log(`After splitting: ${segments.length} segments`);
+        // Step 2: Merge segments that can be merged at the current threshold
+        const originalCount = segments.length;
+        let merged = true;
+        while (merged && segments.length > 1) {
+          merged = false;
+          let closestDistance = Infinity;
+          let closestPair = -1;
+          // Find the closest pair of adjacent segments that can be merged
+          for (let i = 0; i < segments.length - 1; i++) {
+            const segment1 = segments[i];
+            const segment2 = segments[i + 1];
+            const gap = segment2.start - segment1.end;
+            // Calculate what the duration would be if we merged these segments
+            const mergedDuration = segment2.end - segment1.start;
+            // Only consider this pair if the merged duration wouldn't exceed the threshold
+            if (mergedDuration <= maxDurationSeconds && gap < closestDistance) {
+              closestDistance = gap;
+              closestPair = i;
+            }
+          }
+          // Merge the closest pair if found
+          if (closestPair !== -1) {
+            const segment1 = segments[closestPair];
+            const segment2 = segments[closestPair + 1];
+            // Collect all constituent segments (handle nested merges)
+            const getAllConstituents = (
+              segment: AlignedSegment
+            ): AlignedSegment[] => {
+              if (segment.mergedFrom) {
+                return segment.mergedFrom.flatMap(getAllConstituents);
+              }
+              // Return atomic segment without merge history
+              return [
+                {
+                  start: segment.start,
+                  end: segment.end,
+                  duration: segment.duration,
+                  text: segment.text,
+                  chunk_index: segment.chunk_index,
+                  speech_segment_index: segment.speech_segment_index,
+                },
+              ];
+            };
+            const constituents1 = getAllConstituents(segment1);
+            const constituents2 = getAllConstituents(segment2);
+            const allConstituents = [...constituents1, ...constituents2];
+            const mergedSegment: AlignedSegment = {
+              start: segment1.start,
+              end: segment2.end,
+              duration: segment2.end - segment1.start,
+              text: `${segment1.text} ${segment2.text}`,
+              chunk_index: segment1.chunk_index,
+              speech_segment_index: segment1.speech_segment_index,
+              mergedFrom: allConstituents,
+              mergeThreshold: maxDurationSeconds,
+            };
+            segments = [
+              ...segments.slice(0, closestPair),
+              mergedSegment,
+              ...segments.slice(closestPair + 2),
+            ];
+            merged = true;
+            console.log(
+              `Merged segments: "${segment1.text}" + "${segment2.text}"`
+            );
+          }
+        }
+        console.log(
+          `Final result: ${originalCount} → ${segments.length} segments`
+        );
+        // Update both transcription and current segments
+        const updatedTranscription = {
+          ...transcription,
+          aligned_segments: segments,
+        };
+        set({
+          transcription: updatedTranscription,
+          currentSegments: segments,
+          selectedSegmentIndex: null,
+        });
+        // Record history with debounce for merge slider changes
+        _recordHistoryDebounced(segments);
+      },
+      // Viewport actions
+      setViewport: (start: number, end: number) => {
+        set({
+          viewportStart: start,
+          viewportEnd: end,
+        });
+      },
+      initializeViewport: (duration: number) => {
+        const FIXED_VIEWPORT_DURATION = 30; // Fixed viewport window is always 30 seconds
+        const viewportDuration = Math.min(FIXED_VIEWPORT_DURATION, duration);
+        set({
+          viewportStart: 0,
+          viewportEnd: viewportDuration,
+        });
+      },
+      // History actions
+      undo: () => {
+        const {history, historyIndex, transcription} = get();
+        if (historyIndex > 0) {
+          const newIndex = historyIndex - 1;
+          const segments = history[newIndex];
+          // Update both transcription and current segments
+          const updatedTranscription = {
+            ...transcription!,
+            aligned_segments: [...segments],
+          };
+          set({
+            transcription: updatedTranscription,
+            currentSegments: [...segments],
+            historyIndex: newIndex,
+            selectedSegmentIndex: null,
+            canUndo: newIndex > 0,
+            canRedo: newIndex < history.length - 1,
+          });
+        }
+      },
+      redo: () => {
+        const {history, historyIndex, transcription} = get();
+        if (historyIndex < history.length - 1) {
+          const newIndex = historyIndex + 1;
+          const segments = history[newIndex];
+          // Update both transcription and current segments
+          const updatedTranscription = {
+            ...transcription!,
+            aligned_segments: [...segments],
+          };
+          set({
+            transcription: updatedTranscription,
+            currentSegments: [...segments],
+            historyIndex: newIndex,
+            selectedSegmentIndex: null,
+            canUndo: newIndex > 0,
+            canRedo: newIndex < history.length - 1,
+          });
+        }
+      },
+      // Complex actions
+      handleFileSelect: (selectedFile: File) => {
+        // Reject video files - only allow audio
+        if (selectedFile.type.startsWith("video/")) {
+          set({
+            error:
+              "Video files are not supported. Please upload an audio file only.",
+          });
+          return;
+        }
+        // Reject non-audio files
+        if (!selectedFile.type.startsWith("audio/")) {
+          set({
+            error: "Invalid file type. Please upload an audio file.",
+          });
+          return;
+        }
+        const {mediaUrl} = get();
+        // Clean up previous media URL
+        if (mediaUrl) {
+          URL.revokeObjectURL(mediaUrl);
+        }
+        // Create new object URL for media playback
+        const url = URL.createObjectURL(selectedFile);
+        // Track file upload
+        const fileType = "audio";
+        const fileSizeMB =
+          Math.round((selectedFile.size / (1024 * 1024)) * 10) / 10; // Round to 1 decimal
+        trackFileUpload(fileType, fileSizeMB);
+        set({
+          file: selectedFile,
+          mediaUrl: url,
+          transcription: null,
+          currentTime: 0,
+          activeSegmentIndex: null,
+          error: null,
+          isVideoFile: false,
+          hasFile: true,
+          hasTranscription: false,
+          currentSegments: null,
+        });
+      },
+      handleTranscribe: async () => {
+        const {file, selectedLanguage, selectedScript, setTranscription} =
+          get();
+        if (!file) return;
+        set({isLoading: true, error: null});
+        // Track transcription start
+        if (selectedLanguage) {
+          trackTranscriptionStart(selectedLanguage);
+        }
+        const startTime = Date.now();
+        try {
+          const result = await transcribeAudio(
+            file,
+            selectedLanguage,
+            selectedScript,
+            (isProcessing) => set({isProcessingVideo: isProcessing})
+          );
+          // Track transcription completion
+          if (selectedLanguage) {
+            const duration = Math.round((Date.now() - startTime) / 1000); // Duration in seconds
+            trackTranscriptionComplete(selectedLanguage, duration);
+          }
+          // Use setTranscription to properly initialize history
+          setTranscription(result);
+        } catch (err) {
+          console.error("Transcription error:", err);
+          // Track transcription error
+          if (selectedLanguage) {
+            const errorMessage =
+              err instanceof Error ? err.message : "Unknown error";
+            trackTranscriptionError(selectedLanguage, errorMessage);
+          }
+          set({
+            error:
+              err instanceof Error ? err.message : "An unknown error occurred",
+          });
+        } finally {
+          set({isLoading: false, isProcessingVideo: false});
+        }
+      },
+      handleDownloadVideoWithSubtitles: async () => {
+        const {
+          file,
+          transcription,
+          selectedLanguage,
+          setIsDownloadingVideo,
+          setError,
+        } = get();
+        if (!file || !transcription) return;
+        setIsDownloadingVideo(true);
+        try {
+          const srtContent = generateSRT(transcription.aligned_segments);
+          const filename = file.name.replace(
+            /\.[^/.]+$/,
+            "_with_subtitles.mp4"
+          );
+          // Pass the selected language or fallback to 'eng'
+          const language = selectedLanguage || "eng";
+          await downloadVideoWithSubtitles(
+            file,
+            srtContent,
+            filename,
+            language,
+            "srt",
+            "mp4"
+          );
+          // Track video with subtitles download
+          if (selectedLanguage) {
+            trackDownloadVideoWithSubtitles(selectedLanguage);
+          }
+        } catch (err) {
+          console.error("Error creating video with subtitles:", err);
+          setError("Failed to create video with subtitles");
+        } finally {
+          setIsDownloadingVideo(false);
+        }
+      },
+      // Recording actions
+      startRecording: (type: "audio" | "video") => {
+        set({
+          isRecording: true,
+          recordingType: type,
+          recordedBlob: null,
+        });
+      },
+      stopRecording: () => {
+        set({
+          isRecording: false,
+          recordingType: null,
+        });
+      },
+      setRecordedBlob: (blob: Blob | null) => {
+        if (blob) {
+          // Store the current recording type before it gets cleared
+          const currentRecordingType = get().recordingType;
+          // Convert blob to ArrayBuffer first, then back to Blob to ensure data persistence
+          blob
+            .arrayBuffer()
+            .then((arrayBuffer) => {
+              const {mediaUrl: currentUrl, showWelcomeModal} = get();
+              // Clean up previous media URL
+              if (currentUrl) {
+                URL.revokeObjectURL(currentUrl);
+              }
+              const extension =
+                currentRecordingType === "video" ? "webm" : "webm";
+              const mimeType =
+                currentRecordingType === "video" ? "video/webm" : "audio/webm";
+              // Create a new blob from the ArrayBuffer to ensure data persistence
+              const persistentBlob = new Blob([arrayBuffer], {type: mimeType});
+              // Convert to File object
+              const file = new File(
+                [persistentBlob],
+                `recorded_${currentRecordingType}.${extension}`,
+                {
+                  type: mimeType,
+                  lastModified: Date.now(),
+                }
+              );
+              // Create URL from the persistent blob
+              const url = URL.createObjectURL(persistentBlob);
+              // Get duration from the blob by creating a temporary media element
+              const tempElement =
+                currentRecordingType === "video"
+                  ? document.createElement("video")
+                  : document.createElement("audio");
+              // Track file upload
+              const fileType =
+                currentRecordingType === "video" ? "video" : "audio";
+              const fileSizeMB =
+                Math.round((persistentBlob.size / (1024 * 1024)) * 10) / 10;
+              trackFileUpload(fileType, fileSizeMB);
+              // Set all the state - preserve existing media refs!
+              const {audioRef, videoRef} = get();
+              set({
+                ...initialState,
+                audioRef, // Preserve existing audioRef
+                videoRef, // Preserve existing videoRef
+                recordedBlob: persistentBlob, // Store the persistent blob
+                file: file,
+                mediaUrl: url,
+                isRecording: false,
+                recordingType: null,
+                isVideoFile: currentRecordingType === "video",
+                hasFile: true,
+                hasTranscription: false,
+                showWelcomeModal,
+              });
+            })
+            .catch((error) => {
+              console.error("Failed to create persistent blob:", error);
+              set({error: "Failed to process recorded media"});
+            });
+        } else {
+          set({recordedBlob: blob});
+        }
+      },
+      reset: () => {
+        const {mediaUrl} = get();
+        // Clean up media URL
+        if (mediaUrl) {
+          URL.revokeObjectURL(mediaUrl);
+        }
+        set({
+          ...initialState,
+          isVideoFile: false,
+          hasFile: false,
+          hasTranscription: false,
+          currentSegments: null,
+        });
+      },
+    }),
+    {
+      name: "transcription-store", // Name for devtools
+    }
+  )
+);
+// Export the store hook directly - components should use useTranscriptionStore()
+// and destructure what they need directly from the store

frontend/src/utils/languages.ts ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/src/utils/mediaTypes.ts ADDED Viewed

	@@ -0,0 +1,60 @@

+/**
+ * Supported media file types for transcription
+ * These should match the server's supported formats in convert_media_to_wav.py
+ */
+export const SUPPORTED_AUDIO_FORMATS = [
+  'WAV', 'MP3', 'M4A', 'AAC', 'FLAC', 'OGG', 'WMA'
+] as const;
+export const SUPPORTED_VIDEO_FORMATS = [
+  'MP4', 'AVI', 'MOV', 'MKV'
+] as const;
+/**
+ * Common codec information for supported formats
+ * Note: Some esoteric codecs within these containers may not be supported
+ */
+export const CODEC_INFO = {
+  // Audio codecs - widely supported
+  audio: {
+    common: ['AAC', 'MP3', 'PCM', 'FLAC', 'Vorbis'],
+    note: 'Most standard audio codecs are supported'
+  },
+  // Video codecs - audio track extraction
+  video: {
+    common: ['H.264 (AVC)', 'H.265 (HEVC)', 'VP9', 'AV1'],
+    legacy: ['H.263', 'MPEG-4', 'DivX', 'XviD'],
+    note: 'Audio is extracted from video - codec compatibility depends on FFmpeg support'
+  }
+} as const;
+export const SUPPORTED_FORMATS = {
+  audio: SUPPORTED_AUDIO_FORMATS,
+  video: SUPPORTED_VIDEO_FORMATS,
+  all: [...SUPPORTED_AUDIO_FORMATS, ...SUPPORTED_VIDEO_FORMATS]
+} as const;
+/**
+/**
+ * Get potential compatibility warnings for file types
+ */
+export const getCompatibilityWarning = (extension: string): string | null => {
+  const ext = extension.toLowerCase().replace('.', '');
+  switch (ext) {
+    case 'mp4':
+      return 'MP4 files with uncommon codecs (e.g., proprietary or very old codecs) may not process correctly. H.264 video with AAC audio works best.';
+    case 'avi':
+      return 'AVI files may contain various codecs. If processing fails, try converting to MP4 with H.264/AAC first.';
+    case 'mkv':
+      return 'MKV is a container format that supports many codecs. Most common codecs work, but some proprietary ones may not.';
+    case 'mov':
+      return 'MOV files usually work well, but very old or proprietary Apple codecs may cause issues.';
+    case 'wma':
+    case 'aac':
+      return 'Some DRM-protected or uncommon codec variants may not be supported.';
+    default:
+      return null;
+  }
+};