import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import time
import random

# Load tiny model
tokenizer = AutoTokenizer.from_pretrained("nilq/mistral-1L-tiny")
model = AutoModelForCausalLM.from_pretrained("nilq/mistral-1L-tiny")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

chat_history = []

def chaotic_ai(user_input):
    global chat_history
    # Keep last 5 messages only
    chat_history = chat_history[-5:]
    chat_history.append({"role": "user", "content": user_input})
    
    try:
        inputs = tokenizer.apply_chat_template(
            chat_history,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        ).to(device)
    except Exception as e:
        yield f"[!] Tokenizer crashed: {str(e)}\n"
        return
    
    output_ids = inputs["input_ids"].clone()
    generated_text = ""
    
    for _ in range(20):  # low max_new_tokens
        try:
            # 10% chance skip token
            if random.random() < 0.1:
                continue
            
            # 5% chance repeat last token
            if random.random() < 0.05 and len(output_ids) > 0:
                next_token_id = output_ids[0, -1].unsqueeze(0)
            else:
                new_tokens = model.generate(
                    **{"input_ids": output_ids},
                    max_new_tokens=1,
                    do_sample=True,
                    temperature=1.5,
                    top_k=50,
                    top_p=0.95
                )
                next_token_id = new_tokens[0, -1].unsqueeze(0)
            
            # Randomly raise error 5% of time
            if random.random() < 0.05:
                raise RuntimeError("Random token generation failure!")
            
            output_ids = torch.cat([output_ids, next_token_id.unsqueeze(0)], dim=1)
            
            # Decode latest token
            token_str = tokenizer.decode(next_token_id)
            
            # Glitch characters 15%
            token_glitch = "".join(c if random.random() > 0.15 else random.choice("@#$%&?") for c in token_str)
            
            # Randomly truncate mid-token 5%
            if random.random() < 0.05:
                token_glitch = token_glitch[:max(1, len(token_glitch)//2)]
            
            # Randomly erase previous characters 5%
            if random.random() < 0.05 and len(generated_text) > 0:
                erase_len = random.randint(1, min(3, len(generated_text)))
                generated_text = generated_text[:-erase_len]
            
            generated_text += token_glitch
            
            # Random slow down
            time.sleep(0.4 + random.random()*0.6)
            
            # Yield live update
            yield generated_text
            
        except Exception as e:
            yield f"[!] Crash: {str(e)}\n"
    
    # Occasionally duplicate response in history
    if random.random() < 0.1:
        chat_history.append({"role": "assistant", "content": generated_text*2})
    else:
        chat_history.append({"role": "assistant", "content": generated_text})

iface = gr.Interface(
    fn=chaotic_ai,
    inputs=gr.Textbox(label="You"),
    outputs=gr.Textbox(label="AI"),
    title="💀 Insane Chaotic Tiny AI",
    description="Slow, glitchy, buggy, erasing, repeating, chaotic AI. Every terrible behavior is real.",
    live=True
)

iface.launch()