Spaces:
Runtime error
Runtime error
File size: 9,542 Bytes
8c0b652 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
#!/usr/bin/env python3
"""
Deployment Configuration for LinguaCustodia Financial AI API
Consolidated deployment settings and utilities.
"""
import os
import logging
from typing import Dict, Any, Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field
# Load environment variables
load_dotenv()
logger = logging.getLogger(__name__)
class DeploymentConfig(BaseModel):
"""Deployment configuration for different platforms."""
# Platform settings
platform: str = Field("huggingface", description="Deployment platform")
environment: str = Field("production", description="Environment (production, staging, development)")
# Application settings
app_name: str = Field("lingua-custodia-api", description="Application name")
app_port: int = Field(8000, description="Application port")
app_host: str = Field("0.0.0.0", description="Application host")
# Model settings
default_model: str = Field("llama3.1-8b", description="Default model to use")
max_tokens: int = Field(2048, description="Maximum tokens for inference")
temperature: float = Field(0.6, description="Temperature for generation")
timeout_seconds: int = Field(300, description="Request timeout in seconds")
# Logging settings
log_level: str = Field("INFO", description="Logging level")
log_format: str = Field("json", description="Log format")
# Performance settings
worker_processes: int = Field(1, description="Number of worker processes")
worker_threads: int = Field(4, description="Number of worker threads")
max_connections: int = Field(100, description="Maximum connections")
# Security settings
secret_key: Optional[str] = Field(None, description="Secret key for security")
allowed_hosts: str = Field("localhost,127.0.0.1", description="Allowed hosts")
class ScalewayConfig(BaseModel):
"""Scaleway-specific configuration."""
# Authentication
access_key: str = Field(..., description="Scaleway access key")
secret_key: str = Field(..., description="Scaleway secret key")
project_id: str = Field(..., description="Scaleway project ID")
organization_id: Optional[str] = Field(None, description="Scaleway organization ID")
region: str = Field("fr-par", description="Scaleway region")
# Deployment settings
namespace_name: str = Field("lingua-custodia", description="Container namespace name")
container_name: str = Field("lingua-custodia-api", description="Container name")
function_name: str = Field("lingua-custodia-api", description="Function name")
# Resource settings
memory_limit: int = Field(16384, description="Memory limit in MB (16GB for 8B models)")
cpu_limit: int = Field(4000, description="CPU limit in mCPU (4 vCPUs)")
min_scale: int = Field(1, description="Minimum scale")
max_scale: int = Field(3, description="Maximum scale")
timeout: int = Field(600, description="Timeout in seconds (10min for model loading)")
# Privacy settings
privacy: str = Field("public", description="Privacy setting")
http_option: str = Field("enabled", description="HTTP option")
class HuggingFaceConfig(BaseModel):
"""HuggingFace Spaces configuration."""
# Authentication
hf_token: str = Field(..., description="HuggingFace token")
hf_token_lc: str = Field(..., description="LinguaCustodia token")
# Space settings
space_name: str = Field("linguacustodia-financial-api", description="Space name")
space_type: str = Field("docker", description="Space type")
hardware: str = Field("t4-medium", description="Hardware type")
# Storage settings
persistent_storage: bool = Field(True, description="Enable persistent storage")
storage_size: str = Field("150GB", description="Storage size")
class KoyebConfig(BaseModel):
"""Koyeb-specific configuration."""
# Authentication
api_token: str = Field(..., description="Koyeb API token")
region: str = Field("fra", description="Koyeb region")
# Application settings
app_name: str = Field("lingua-custodia-inference", description="Application name")
service_name: str = Field("lingua-custodia-api", description="Service name")
# Instance settings
instance_type: str = Field("small", description="Instance type")
min_instances: int = Field(1, description="Minimum instances")
max_instances: int = Field(3, description="Maximum instances")
def get_deployment_config() -> DeploymentConfig:
"""Get deployment configuration from environment variables."""
return DeploymentConfig(
platform=os.getenv("DEPLOYMENT_PLATFORM", "huggingface"),
environment=os.getenv("ENVIRONMENT", "production"),
app_name=os.getenv("APP_NAME", "lingua-custodia-api"),
app_port=int(os.getenv("APP_PORT", 8000)),
app_host=os.getenv("APP_HOST", "0.0.0.0"),
default_model=os.getenv("DEFAULT_MODEL", "llama3.1-8b"),
max_tokens=int(os.getenv("MAX_TOKENS", 2048)),
temperature=float(os.getenv("TEMPERATURE", 0.6)),
timeout_seconds=int(os.getenv("TIMEOUT_SECONDS", 300)),
log_level=os.getenv("LOG_LEVEL", "INFO"),
log_format=os.getenv("LOG_FORMAT", "json"),
worker_processes=int(os.getenv("WORKER_PROCESSES", 1)),
worker_threads=int(os.getenv("WORKER_THREADS", 4)),
max_connections=int(os.getenv("MAX_CONNECTIONS", 100)),
secret_key=os.getenv("SECRET_KEY"),
allowed_hosts=os.getenv("ALLOWED_HOSTS", "localhost,127.0.0.1")
)
def get_scaleway_config() -> ScalewayConfig:
"""Get Scaleway configuration from environment variables."""
return ScalewayConfig(
access_key=os.getenv("SCW_ACCESS_KEY", ""),
secret_key=os.getenv("SCW_SECRET_KEY", ""),
project_id=os.getenv("SCW_DEFAULT_PROJECT_ID", ""),
organization_id=os.getenv("SCW_DEFAULT_ORGANIZATION_ID"),
region=os.getenv("SCW_REGION", "fr-par"),
namespace_name=os.getenv("SCW_NAMESPACE_NAME", "lingua-custodia"),
container_name=os.getenv("SCW_CONTAINER_NAME", "lingua-custodia-api"),
function_name=os.getenv("SCW_FUNCTION_NAME", "lingua-custodia-api"),
memory_limit=int(os.getenv("SCW_MEMORY_LIMIT", 16384)),
cpu_limit=int(os.getenv("SCW_CPU_LIMIT", 4000)),
min_scale=int(os.getenv("SCW_MIN_SCALE", 1)),
max_scale=int(os.getenv("SCW_MAX_SCALE", 3)),
timeout=int(os.getenv("SCW_TIMEOUT", 600)),
privacy=os.getenv("SCW_PRIVACY", "public"),
http_option=os.getenv("SCW_HTTP_OPTION", "enabled")
)
def get_huggingface_config() -> HuggingFaceConfig:
"""Get HuggingFace configuration from environment variables."""
return HuggingFaceConfig(
hf_token=os.getenv("HF_TOKEN", ""),
hf_token_lc=os.getenv("HF_TOKEN_LC", ""),
space_name=os.getenv("HF_SPACE_NAME", "linguacustodia-financial-api"),
space_type=os.getenv("HF_SPACE_TYPE", "docker"),
hardware=os.getenv("HF_HARDWARE", "t4-medium"),
persistent_storage=os.getenv("HF_PERSISTENT_STORAGE", "true").lower() == "true",
storage_size=os.getenv("HF_STORAGE_SIZE", "150GB")
)
def get_koyeb_config() -> KoyebConfig:
"""Get Koyeb configuration from environment variables."""
return KoyebConfig(
api_token=os.getenv("KOYEB_API_TOKEN", ""),
region=os.getenv("KOYEB_REGION", "fra"),
app_name=os.getenv("KOYEB_APP_NAME", "lingua-custodia-inference"),
service_name=os.getenv("KOYEB_SERVICE_NAME", "lingua-custodia-api"),
instance_type=os.getenv("KOYEB_INSTANCE_TYPE", "small"),
min_instances=int(os.getenv("KOYEB_MIN_INSTANCES", 1)),
max_instances=int(os.getenv("KOYEB_MAX_INSTANCES", 3))
)
def validate_deployment_config(config: DeploymentConfig) -> bool:
"""Validate deployment configuration."""
try:
# Basic validation
if not config.app_name:
logger.error("App name is required")
return False
if config.app_port <= 0 or config.app_port > 65535:
logger.error("Invalid app port")
return False
if config.temperature < 0 or config.temperature > 2:
logger.error("Temperature must be between 0 and 2")
return False
if config.max_tokens <= 0:
logger.error("Max tokens must be positive")
return False
logger.info("✅ Deployment configuration is valid")
return True
except Exception as e:
logger.error(f"❌ Configuration validation failed: {e}")
return False
def get_environment_info() -> Dict[str, Any]:
"""Get environment information for debugging."""
return {
"python_version": os.sys.version,
"current_directory": os.getcwd(),
"environment_variables": {
"APP_NAME": os.getenv("APP_NAME"),
"APP_PORT": os.getenv("APP_PORT"),
"DEFAULT_MODEL": os.getenv("DEFAULT_MODEL"),
"DEPLOYMENT_PLATFORM": os.getenv("DEPLOYMENT_PLATFORM"),
"ENVIRONMENT": os.getenv("ENVIRONMENT"),
"LOG_LEVEL": os.getenv("LOG_LEVEL")
},
"file_system": {
"app_files": [f for f in os.listdir('.') if f.startswith('app')],
"deployment_files": [f for f in os.listdir('.') if f.startswith('deploy')],
"config_files": [f for f in os.listdir('.') if 'config' in f.lower()]
}
}
|