import gradio as gr
import tempfile
from langdetect import detect, DetectorFactory
from gtts import gTTS
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

DetectorFactory.seed = 0

MODEL_NAME = "facebook/blenderbot-90M"  # lightweight model for Hugging Face Spaces
ASSISTANT_NAME = "Shiba Kamran AI"
MAX_CONTEXT_EXCHANGES = 6

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

def detect_language(text):
    try:
        return detect(text)
    except:
        return "en"

def choose_tts_lang(code):
    mapping = {
        "en": "en", "ur": "ur", "hi": "hi", "fr": "fr", "tr": "tr",
        "ja": "ja", "jp": "ja", "ar": "ar", "es": "es", "pt": "pt", "de": "de"
    }
    return mapping.get(code, "en")

def build_prompt(history, user_message):
    persona = (
        f"You are {ASSISTANT_NAME}. "
        "You are calm, wise, friendly and professional. "
        "Answer clearly and helpfully. "
        "Match the user's language and tone. "
        "Keep replies concise and polite."
    )
    recent = history[-MAX_CONTEXT_EXCHANGES:]
    convo_lines = []
    for u, a in recent:
        convo_lines.append(f"User: {u}")
        convo_lines.append(f"Assistant: {a}")
    convo_text = "\n".join(convo_lines)
    return f"{persona}\n\nConversation:\n{convo_text}\nUser: {user_message}\nAssistant:"

def generate_reply(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    with torch.no_grad():
        out = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=100
        )
    reply = tokenizer.decode(out[0], skip_special_tokens=True)
    return reply.strip() or "Sorry, I could not form an answer. Can you ask in another way?"

def make_tts(text, lang_code):
    try:
        tts = gTTS(text=text, lang=lang_code)
        tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tmp_file.close()
        tts.save(tmp_file.name)
        return tmp_file.name
    except:
        return None

def chat(user_message, history):
    if not user_message or user_message.strip() == "":
        return "", None, history

    history = history or []
    lang_code = detect_language(user_message)
    prompt = build_prompt(history, user_message)
    reply = generate_reply(prompt)
    history.append((user_message, reply))
    audio_path = make_tts(reply, choose_tts_lang(lang_code))
    return reply, audio_path, history

with gr.Blocks() as demo:
    gr.Markdown("# Shiba Kamran AI")
    gr.Markdown("A calm, wise, friendly and professional multilingual assistant. Ask in any language. Voice replies included.")

    user_input = gr.Textbox(placeholder="Type your message here")
    send = gr.Button("Send")
    clear = gr.Button("New Chat")
    audio_in = gr.Audio(source="microphone", type="filepath", label="Or speak to Shiba (optional)")
    state = gr.State([])

    reply_box = gr.Textbox(label="Shiba's reply", interactive=False)
    audio_out = gr.Audio(label="Shiba's voice reply", type="filepath")
    history_box = gr.Textbox(label="Conversation history (recent)", interactive=False)

    def handle_submit(text, audio_file, history):
        # optional: audio transcription (remove if not needed)
        if audio_file:
            try:
                import speech_recognition as sr
                r = sr.Recognizer()
                with sr.AudioFile(audio_file) as source:
                    audio_data = r.record(source)
                    text = r.recognize_google(audio_data)
            except:
                pass  # fallback: use typed text

        reply, audio_path, new_history = chat(text, history)
        history_display = "\n\n".join([f"{i+1}. You: {u}\n{ASSISTANT_NAME}: {a}" for i, (u, a) in enumerate(new_history[-20:])])
        return new_history, reply, audio_path, history_display

    send.click(handle_submit, inputs=[user_input, audio_in, state], outputs=[state, reply_box, audio_out, history_box])
    audio_in.change(handle_submit, inputs=[user_input, audio_in, state], outputs=[state, reply_box, audio_out, history_box])
    clear.click(lambda: ([], "", None, ""), outputs=[state, reply_box, audio_out, history_box])

demo.launch()