from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the model (TinyLlama) model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def get_llm_reply(user_input: str) -> str: prompt = f""" You are 'FreightBot', an AI freight agent that helps with truckload bookings, price estimates, and delivery updates. Respond in a short, professional style like a real broker. Always sound confident. User: {user_input} FreightBot: """ inputs = tokenizer(prompt, return_tensors="pt") output = model.generate( **inputs, max_new_tokens=100, do_sample=True, temperature=0.7 ) response = tokenizer.decode(output[0], skip_special_tokens=True) return response.replace(prompt, "").strip()