from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the model (TinyLlama) model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def get_llm_reply(user_input: str) -> str: prompt = f"User: {user_input}\nAssistant:" inputs = tokenizer(prompt, return_tensors="pt") output = model.generate( **inputs, max_new_tokens=100, do_sample=True, temperature=0.7 ) response = tokenizer.decode(output[0], skip_special_tokens=True) return response.replace(prompt, "").strip()