dragonllm-finance-models / monitor_deployment.py
jeanbaptdzd's picture
feat: Clean deployment to HuggingFace Space with model config test endpoint
8c0b652
#!/usr/bin/env python3
"""
Monitor HuggingFace Space deployment status.
Run this to check when the API endpoints are ready.
"""
import requests
import time
import sys
SPACE_URL = 'https://huggingface.co/spaces/jeanbaptdzd/linguacustodia-financial-api'
def test_endpoint(endpoint_path, endpoint_name):
"""Test a specific endpoint."""
try:
url = f'{SPACE_URL}{endpoint_path}'
response = requests.get(url, timeout=10)
if response.status_code == 200:
print(f'✅ {endpoint_name}: Working!')
try:
data = response.json()
if endpoint_path == '/health':
print(f' - Model loaded: {data.get("model_loaded", False)}')
print(f' - Current model: {data.get("current_model", "unknown")}')
print(f' - Status: {data.get("status", "unknown")}')
elif endpoint_path == '/':
print(f' - Message: {data.get("message", "")[:60]}...')
print(f' - Version: {data.get("version", "unknown")}')
return True
except:
print(f' - Response: {response.text[:100]}...')
return True
elif response.status_code == 404:
print(f'⏳ {endpoint_name}: Not ready yet (404)')
return False
else:
print(f'⚠️ {endpoint_name}: Status {response.status_code}')
return False
except requests.exceptions.Timeout:
print(f'⏳ {endpoint_name}: Timeout (still building)')
return False
except Exception as e:
print(f'⏳ {endpoint_name}: {str(e)[:50]}')
return False
def main():
"""Main monitoring loop."""
print('🔍 Monitoring HuggingFace Space Deployment')
print(f'Space: {SPACE_URL}')
print('=' * 60)
print()
attempt = 0
max_attempts = 20 # 20 attempts * 30 seconds = 10 minutes
while attempt < max_attempts:
attempt += 1
print(f'\n📊 Check #{attempt}:')
# Test main page
main_ready = test_endpoint('/', 'Root endpoint')
# Test health endpoint
health_ready = test_endpoint('/health', 'Health endpoint')
# Test models endpoint
models_ready = test_endpoint('/models', 'Models endpoint')
# Check if all are ready
if main_ready and health_ready and models_ready:
print()
print('=' * 60)
print('🎉 SUCCESS! All endpoints are working!')
print()
print('Available endpoints:')
print(f' - GET {SPACE_URL}/')
print(f' - GET {SPACE_URL}/health')
print(f' - GET {SPACE_URL}/models')
print(f' - POST {SPACE_URL}/inference')
print(f' - GET {SPACE_URL}/docs')
print()
print('Test inference:')
print(f' curl -X POST "{SPACE_URL}/inference" \\')
print(' -H "Content-Type: application/json" \\')
print(' -d \'{"prompt": "What is SFCR?", "max_new_tokens": 150, "temperature": 0.6}\'')
return 0
if attempt < max_attempts:
print(f'\n⏳ Waiting 30 seconds before next check...')
time.sleep(30)
print()
print('=' * 60)
print('⚠️ Deployment still in progress after 10 minutes.')
print('This is normal for first deployment or major updates.')
print('Check the Space logs at:')
print(f'{SPACE_URL}')
return 1
if __name__ == '__main__':
try:
sys.exit(main())
except KeyboardInterrupt:
print('\n\n⚠️ Monitoring interrupted by user.')
sys.exit(1)