dragonllm-finance-models / deployment_config.py
jeanbaptdzd's picture
feat: Clean deployment to HuggingFace Space with model config test endpoint
8c0b652
#!/usr/bin/env python3
"""
Deployment Configuration for LinguaCustodia Financial AI API
Consolidated deployment settings and utilities.
"""
import os
import logging
from typing import Dict, Any, Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field
# Load environment variables
load_dotenv()
logger = logging.getLogger(__name__)
class DeploymentConfig(BaseModel):
"""Deployment configuration for different platforms."""
# Platform settings
platform: str = Field("huggingface", description="Deployment platform")
environment: str = Field("production", description="Environment (production, staging, development)")
# Application settings
app_name: str = Field("lingua-custodia-api", description="Application name")
app_port: int = Field(8000, description="Application port")
app_host: str = Field("0.0.0.0", description="Application host")
# Model settings
default_model: str = Field("llama3.1-8b", description="Default model to use")
max_tokens: int = Field(2048, description="Maximum tokens for inference")
temperature: float = Field(0.6, description="Temperature for generation")
timeout_seconds: int = Field(300, description="Request timeout in seconds")
# Logging settings
log_level: str = Field("INFO", description="Logging level")
log_format: str = Field("json", description="Log format")
# Performance settings
worker_processes: int = Field(1, description="Number of worker processes")
worker_threads: int = Field(4, description="Number of worker threads")
max_connections: int = Field(100, description="Maximum connections")
# Security settings
secret_key: Optional[str] = Field(None, description="Secret key for security")
allowed_hosts: str = Field("localhost,127.0.0.1", description="Allowed hosts")
class ScalewayConfig(BaseModel):
"""Scaleway-specific configuration."""
# Authentication
access_key: str = Field(..., description="Scaleway access key")
secret_key: str = Field(..., description="Scaleway secret key")
project_id: str = Field(..., description="Scaleway project ID")
organization_id: Optional[str] = Field(None, description="Scaleway organization ID")
region: str = Field("fr-par", description="Scaleway region")
# Deployment settings
namespace_name: str = Field("lingua-custodia", description="Container namespace name")
container_name: str = Field("lingua-custodia-api", description="Container name")
function_name: str = Field("lingua-custodia-api", description="Function name")
# Resource settings
memory_limit: int = Field(16384, description="Memory limit in MB (16GB for 8B models)")
cpu_limit: int = Field(4000, description="CPU limit in mCPU (4 vCPUs)")
min_scale: int = Field(1, description="Minimum scale")
max_scale: int = Field(3, description="Maximum scale")
timeout: int = Field(600, description="Timeout in seconds (10min for model loading)")
# Privacy settings
privacy: str = Field("public", description="Privacy setting")
http_option: str = Field("enabled", description="HTTP option")
class HuggingFaceConfig(BaseModel):
"""HuggingFace Spaces configuration."""
# Authentication
hf_token: str = Field(..., description="HuggingFace token")
hf_token_lc: str = Field(..., description="LinguaCustodia token")
# Space settings
space_name: str = Field("linguacustodia-financial-api", description="Space name")
space_type: str = Field("docker", description="Space type")
hardware: str = Field("t4-medium", description="Hardware type")
# Storage settings
persistent_storage: bool = Field(True, description="Enable persistent storage")
storage_size: str = Field("150GB", description="Storage size")
class KoyebConfig(BaseModel):
"""Koyeb-specific configuration."""
# Authentication
api_token: str = Field(..., description="Koyeb API token")
region: str = Field("fra", description="Koyeb region")
# Application settings
app_name: str = Field("lingua-custodia-inference", description="Application name")
service_name: str = Field("lingua-custodia-api", description="Service name")
# Instance settings
instance_type: str = Field("small", description="Instance type")
min_instances: int = Field(1, description="Minimum instances")
max_instances: int = Field(3, description="Maximum instances")
def get_deployment_config() -> DeploymentConfig:
"""Get deployment configuration from environment variables."""
return DeploymentConfig(
platform=os.getenv("DEPLOYMENT_PLATFORM", "huggingface"),
environment=os.getenv("ENVIRONMENT", "production"),
app_name=os.getenv("APP_NAME", "lingua-custodia-api"),
app_port=int(os.getenv("APP_PORT", 8000)),
app_host=os.getenv("APP_HOST", "0.0.0.0"),
default_model=os.getenv("DEFAULT_MODEL", "llama3.1-8b"),
max_tokens=int(os.getenv("MAX_TOKENS", 2048)),
temperature=float(os.getenv("TEMPERATURE", 0.6)),
timeout_seconds=int(os.getenv("TIMEOUT_SECONDS", 300)),
log_level=os.getenv("LOG_LEVEL", "INFO"),
log_format=os.getenv("LOG_FORMAT", "json"),
worker_processes=int(os.getenv("WORKER_PROCESSES", 1)),
worker_threads=int(os.getenv("WORKER_THREADS", 4)),
max_connections=int(os.getenv("MAX_CONNECTIONS", 100)),
secret_key=os.getenv("SECRET_KEY"),
allowed_hosts=os.getenv("ALLOWED_HOSTS", "localhost,127.0.0.1")
)
def get_scaleway_config() -> ScalewayConfig:
"""Get Scaleway configuration from environment variables."""
return ScalewayConfig(
access_key=os.getenv("SCW_ACCESS_KEY", ""),
secret_key=os.getenv("SCW_SECRET_KEY", ""),
project_id=os.getenv("SCW_DEFAULT_PROJECT_ID", ""),
organization_id=os.getenv("SCW_DEFAULT_ORGANIZATION_ID"),
region=os.getenv("SCW_REGION", "fr-par"),
namespace_name=os.getenv("SCW_NAMESPACE_NAME", "lingua-custodia"),
container_name=os.getenv("SCW_CONTAINER_NAME", "lingua-custodia-api"),
function_name=os.getenv("SCW_FUNCTION_NAME", "lingua-custodia-api"),
memory_limit=int(os.getenv("SCW_MEMORY_LIMIT", 16384)),
cpu_limit=int(os.getenv("SCW_CPU_LIMIT", 4000)),
min_scale=int(os.getenv("SCW_MIN_SCALE", 1)),
max_scale=int(os.getenv("SCW_MAX_SCALE", 3)),
timeout=int(os.getenv("SCW_TIMEOUT", 600)),
privacy=os.getenv("SCW_PRIVACY", "public"),
http_option=os.getenv("SCW_HTTP_OPTION", "enabled")
)
def get_huggingface_config() -> HuggingFaceConfig:
"""Get HuggingFace configuration from environment variables."""
return HuggingFaceConfig(
hf_token=os.getenv("HF_TOKEN", ""),
hf_token_lc=os.getenv("HF_TOKEN_LC", ""),
space_name=os.getenv("HF_SPACE_NAME", "linguacustodia-financial-api"),
space_type=os.getenv("HF_SPACE_TYPE", "docker"),
hardware=os.getenv("HF_HARDWARE", "t4-medium"),
persistent_storage=os.getenv("HF_PERSISTENT_STORAGE", "true").lower() == "true",
storage_size=os.getenv("HF_STORAGE_SIZE", "150GB")
)
def get_koyeb_config() -> KoyebConfig:
"""Get Koyeb configuration from environment variables."""
return KoyebConfig(
api_token=os.getenv("KOYEB_API_TOKEN", ""),
region=os.getenv("KOYEB_REGION", "fra"),
app_name=os.getenv("KOYEB_APP_NAME", "lingua-custodia-inference"),
service_name=os.getenv("KOYEB_SERVICE_NAME", "lingua-custodia-api"),
instance_type=os.getenv("KOYEB_INSTANCE_TYPE", "small"),
min_instances=int(os.getenv("KOYEB_MIN_INSTANCES", 1)),
max_instances=int(os.getenv("KOYEB_MAX_INSTANCES", 3))
)
def validate_deployment_config(config: DeploymentConfig) -> bool:
"""Validate deployment configuration."""
try:
# Basic validation
if not config.app_name:
logger.error("App name is required")
return False
if config.app_port <= 0 or config.app_port > 65535:
logger.error("Invalid app port")
return False
if config.temperature < 0 or config.temperature > 2:
logger.error("Temperature must be between 0 and 2")
return False
if config.max_tokens <= 0:
logger.error("Max tokens must be positive")
return False
logger.info("✅ Deployment configuration is valid")
return True
except Exception as e:
logger.error(f"❌ Configuration validation failed: {e}")
return False
def get_environment_info() -> Dict[str, Any]:
"""Get environment information for debugging."""
return {
"python_version": os.sys.version,
"current_directory": os.getcwd(),
"environment_variables": {
"APP_NAME": os.getenv("APP_NAME"),
"APP_PORT": os.getenv("APP_PORT"),
"DEFAULT_MODEL": os.getenv("DEFAULT_MODEL"),
"DEPLOYMENT_PLATFORM": os.getenv("DEPLOYMENT_PLATFORM"),
"ENVIRONMENT": os.getenv("ENVIRONMENT"),
"LOG_LEVEL": os.getenv("LOG_LEVEL")
},
"file_system": {
"app_files": [f for f in os.listdir('.') if f.startswith('app')],
"deployment_files": [f for f in os.listdir('.') if f.startswith('deploy')],
"config_files": [f for f in os.listdir('.') if 'config' in f.lower()]
}
}