# Dockerfile for Scaleway L40S GPU Instance # Uses NVIDIA CUDA base image for optimal GPU support # Updated to CUDA 12.6.3 (latest stable as of 2025) FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04 # Install Python 3.11 and system dependencies RUN apt-get update && apt-get install -y \ python3.11 \ python3.11-venv \ python3-pip \ build-essential \ curl \ && rm -rf /var/lib/apt/lists/* # Set Python 3.11 as default RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 # Set working directory WORKDIR /app # Copy requirements and install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application file (inline configuration for Scaleway) COPY app.py . # Create cache directory for HuggingFace models RUN mkdir -p /data/.huggingface # Set environment variables ENV PYTHONPATH=/app ENV HF_HOME=/data/.huggingface ENV APP_PORT=7860 ENV OMP_NUM_THREADS=8 ENV CUDA_VISIBLE_DEVICES=0 # Expose port EXPOSE 7860 # Health check HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 # Run the application CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]