# services/ai_assistant.py import io import base64 import os import asyncio from dotenv import load_dotenv from huggingface_hub import InferenceClient load_dotenv() class HuggingFaceAssistantService: def __init__(self): token = os.getenv("HUGGINGFACE_API_TOKEN") if not token: raise RuntimeError("HUGGINGFACE_API_TOKEN not set") self.client = InferenceClient(api_key=token) async def ask(self, question: str, model: str | None): model_repo = model or "meta-llama/Llama-3.2-3B-Instruct" if model_repo.startswith("black-forest-labs/"): # Generate image in a thread image = await asyncio.to_thread( self.client.text_to_image, prompt=question, model=model_repo ) # Convert to base64 buf = io.BytesIO() image.save(buf, format="PNG") img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8") return {"type": "image", "content": img_b64} else: completion = await asyncio.to_thread( self.client.chat.completions.create, model=model_repo, messages=[{"role": "user", "content": question}] ) return {"type": "text", "content": completion.choices[0].message.content}