Spaces:
Runtime error
Runtime error
| """ | |
| Inference functionality tests | |
| """ | |
| import pytest | |
| import requests | |
| from typing import Dict, Any | |
| class TestInference: | |
| """Test inference endpoints""" | |
| def base_url(self): | |
| return "http://localhost:8000" | |
| def test_basic_inference(self, base_url): | |
| """Test basic inference endpoint""" | |
| payload = { | |
| "prompt": "What is EBITDA?", | |
| "max_new_tokens": 50, | |
| "temperature": 0.6 | |
| } | |
| response = requests.post(f"{base_url}/inference", json=payload) | |
| assert response.status_code == 200 | |
| data = response.json() | |
| assert "response" in data | |
| assert "model_used" in data | |
| assert len(data["response"]) > 0 | |
| def test_inference_with_different_models(self, base_url): | |
| """Test inference with different model parameters""" | |
| payload = { | |
| "prompt": "Explain financial risk management", | |
| "max_new_tokens": 100, | |
| "temperature": 0.3 | |
| } | |
| response = requests.post(f"{base_url}/inference", json=payload) | |
| assert response.status_code == 200 | |
| data = response.json() | |
| assert "response" in data | |
| assert len(data["response"]) > 50 # Should be substantial response | |
| def test_inference_error_handling(self, base_url): | |
| """Test inference error handling""" | |
| # Test with invalid parameters | |
| payload = { | |
| "prompt": "", # Empty prompt | |
| "max_new_tokens": 50 | |
| } | |
| response = requests.post(f"{base_url}/inference", json=payload) | |
| # Should handle gracefully (either 400 or 200 with error message) | |
| assert response.status_code in [200, 400] | |
| def test_inference_performance(self, base_url): | |
| """Test inference performance (basic timing)""" | |
| import time | |
| payload = { | |
| "prompt": "What is the current ratio?", | |
| "max_new_tokens": 30, | |
| "temperature": 0.5 | |
| } | |
| start_time = time.time() | |
| response = requests.post(f"{base_url}/inference", json=payload) | |
| end_time = time.time() | |
| assert response.status_code == 200 | |
| response_time = end_time - start_time | |
| # Should respond within reasonable time (adjust based on your setup) | |
| assert response_time < 30 # 30 seconds max for simple query | |