# Dockerfile for Scaleway L40S GPU Instance
# Uses NVIDIA CUDA base image for optimal GPU support
# Updated to CUDA 12.6.3 (latest stable as of 2025)

FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04

# Install Python 3.11 and system dependencies
RUN apt-get update && apt-get install -y \
    python3.11 \
    python3.11-venv \
    python3-pip \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Set Python 3.11 as default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1

# Set working directory
WORKDIR /app

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application file (inline configuration for Scaleway)
COPY app.py .

# Create cache directory for HuggingFace models
RUN mkdir -p /data/.huggingface

# Set environment variables
ENV PYTHONPATH=/app
ENV HF_HOME=/data/.huggingface
ENV APP_PORT=7860
ENV OMP_NUM_THREADS=8
ENV CUDA_VISIBLE_DEVICES=0

# Expose port
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Run the application
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]