import gradio as gr import tempfile from langdetect import detect, DetectorFactory from gtts import gTTS from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch DetectorFactory.seed = 0 MODEL_NAME = "facebook/blenderbot-90M" # lightweight model for Hugging Face Spaces ASSISTANT_NAME = "Shiba Kamran AI" MAX_CONTEXT_EXCHANGES = 6 # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) def detect_language(text): try: return detect(text) except: return "en" def choose_tts_lang(code): mapping = { "en": "en", "ur": "ur", "hi": "hi", "fr": "fr", "tr": "tr", "ja": "ja", "jp": "ja", "ar": "ar", "es": "es", "pt": "pt", "de": "de" } return mapping.get(code, "en") def build_prompt(history, user_message): persona = ( f"You are {ASSISTANT_NAME}. " "You are calm, wise, friendly and professional. " "Answer clearly and helpfully. " "Match the user's language and tone. " "Keep replies concise and polite." ) recent = history[-MAX_CONTEXT_EXCHANGES:] convo_lines = [] for u, a in recent: convo_lines.append(f"User: {u}") convo_lines.append(f"Assistant: {a}") convo_text = "\n".join(convo_lines) return f"{persona}\n\nConversation:\n{convo_text}\nUser: {user_message}\nAssistant:" def generate_reply(prompt): inputs = tokenizer(prompt, return_tensors="pt", truncation=True) with torch.no_grad(): out = model.generate( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=100 ) reply = tokenizer.decode(out[0], skip_special_tokens=True) return reply.strip() or "Sorry, I could not form an answer. Can you ask in another way?" def make_tts(text, lang_code): try: tts = gTTS(text=text, lang=lang_code) tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tmp_file.close() tts.save(tmp_file.name) return tmp_file.name except: return None def chat(user_message, history): if not user_message or user_message.strip() == "": return "", None, history history = history or [] lang_code = detect_language(user_message) prompt = build_prompt(history, user_message) reply = generate_reply(prompt) history.append((user_message, reply)) audio_path = make_tts(reply, choose_tts_lang(lang_code)) return reply, audio_path, history with gr.Blocks() as demo: gr.Markdown("# Shiba Kamran AI") gr.Markdown("A calm, wise, friendly and professional multilingual assistant. Ask in any language. Voice replies included.") user_input = gr.Textbox(placeholder="Type your message here") send = gr.Button("Send") clear = gr.Button("New Chat") audio_in = gr.Audio(source="microphone", type="filepath", label="Or speak to Shiba (optional)") state = gr.State([]) reply_box = gr.Textbox(label="Shiba's reply", interactive=False) audio_out = gr.Audio(label="Shiba's voice reply", type="filepath") history_box = gr.Textbox(label="Conversation history (recent)", interactive=False) def handle_submit(text, audio_file, history): # optional: audio transcription (remove if not needed) if audio_file: try: import speech_recognition as sr r = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio_data = r.record(source) text = r.recognize_google(audio_data) except: pass # fallback: use typed text reply, audio_path, new_history = chat(text, history) history_display = "\n\n".join([f"{i+1}. You: {u}\n{ASSISTANT_NAME}: {a}" for i, (u, a) in enumerate(new_history[-20:])]) return new_history, reply, audio_path, history_display send.click(handle_submit, inputs=[user_input, audio_in, state], outputs=[state, reply_box, audio_out, history_box]) audio_in.change(handle_submit, inputs=[user_input, audio_in, state], outputs=[state, reply_box, audio_out, history_box]) clear.click(lambda: ([], "", None, ""), outputs=[state, reply_box, audio_out, history_box]) demo.launch()