from dotenv import load_dotenv import os from huggingface_hub import InferenceClient # Load environment variables from .env file if it exists load_dotenv() # Check if we're running locally with custom model settings BASE_URL = os.getenv('BASE_URL') LOCAL_TOKEN = os.getenv('TOKEN') LOCAL_MODE = bool(BASE_URL and LOCAL_TOKEN) MODEL_NAME = os.getenv('MODEL_NAME', 'openai/gpt-oss-20b') client = None # Choose client based on whether we're running locally or in the cloud if LOCAL_MODE: # Running locally with custom model settings # Use local inference server client = InferenceClient(model=BASE_URL, token=LOCAL_TOKEN) else: hf_token = os.environ["HF_TOKEN"] client = InferenceClient(token=hf_token, model=MODEL_NAME) def clean_response(response): """Clean up the response by removing unwanted metadata and formatting artifacts""" import re import json # Remove channel/commentary tags and similar artifacts response = re.sub(r'<\|channel\|>commentary to=assistant', '', response, flags=re.IGNORECASE) response = re.sub(r'<\|constrain\|>json<\|message\|>', '', response, flags=re.IGNORECASE) response = re.sub(r'<\|.*?\|>', '', response) # Remove any other <|...|> patterns response = response.replace("\\n", "\n") # Try to parse JSON response and extract the actual message try: # Look for JSON-like content json_match = re.search(r'\{[^}]*"response"\s*:\s*"([^"]*)"[^}]*\}', response) if json_match: actual_response = json_match.group(1) print(f"🔍 DEBUG - Extracted from JSON: {actual_response}") return actual_response # Try to parse as complete JSON parsed = json.loads(response.strip()) if isinstance(parsed, dict) and "response" in parsed: actual_response = parsed["response"] print(f"🔍 DEBUG - Parsed JSON response: {actual_response}") return actual_response except (json.JSONDecodeError, AttributeError): # If JSON parsing fails, continue with text cleaning pass # Clean up extra whitespace and newlines response = re.sub(r'\n\s*\n', '\n', response) # Remove multiple empty lines response = response.strip() # Remove leading/trailing whitespace return response