import gradio as gr
from transformers import pipeline

# ---------- Default Models ----------
DEFAULT_MODELS = {
    "prompt_refiner": "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "code_model": "codellama/CodeLlama-7b-Instruct-hf",
    "book_model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "critic_1": "google/gemma-2-9b-it",
    "critic_2": "meta-llama/Meta-Llama-3-8B-Instruct"
}

# ---------- Model Descriptions ----------
MODEL_INFO = {
    "mistralai/Mixtral-8x7B-Instruct-v0.1": "Balanced generalist; strong in structured reasoning and storytelling.",
    "codellama/CodeLlama-7b-Instruct-hf": "Excellent code generator; best for pseudocode expansion and logic clarity.",
    "tiiuae/falcon-7b-instruct": "Fast and lightweight; good for simple creative text, but less technical precision.",
    "google/gemma-2-9b-it": "Analytical critic that provides detailed, structured feedback.",
    "meta-llama/Meta-Llama-3-8B-Instruct": "Balanced critic; creative and nuanced, slightly more lenient.",
    "phind/Phind-CodeLlama-34B-v2": "Expert coder model; verbose but deeply logical and precise.",
    "stabilityai/stablelm-2-12b": "Fluent natural-language generator; great for fiction and tone consistency.",
    "gpt2-small": "117M params: Very light text generation for prototyping.",
    "tinybert-4": "14M params: Extremely compact, best for classification or short text outputs.",
    "smollm2-135m": "135M params: Small generative tasks with lightweight footprint.",
    "qwen2.5-0.5b-instruct": "Approx 500M params: Instruction-following moderate model.",
    "tinyllama-1.1b": "Approx 1.1B params: General purpose small LLM, story & code generation.",
    "llama3.2-1b": "Approx 1B params: Balanced small LLM for chat and generation."
}

# ---------- Helpers ----------
def load_pipeline(model_name):
    return pipeline("text-generation", model=model_name)

# ---------- Core Logic ----------
def refine_prompt(idea, model_name):
    model = load_pipeline(model_name)
    refined = model(
        f"Refine this creative idea into a concise, high-quality prompt: {idea}",
        max_new_tokens=200
    )[0]["generated_text"]
    return refined.strip()

def generate_code(prompt, model_name):
    model = load_pipeline(model_name)
    pseudo = model(f"Create simple pseudocode for: {prompt}", max_new_tokens=200)[0]["generated_text"]
    simple = model(f"Expand this pseudocode into a simple code snippet:\n{pseudo}", max_new_tokens=300)[0]["generated_text"]
    full = model(f"Turn this snippet into a complete, functional program:\n{simple}", max_new_tokens=700)[0]["generated_text"]
    return pseudo.strip(), simple.strip(), full.strip()

def generate_book(prompt, model_name):
    model = load_pipeline(model_name)
    structure = ["Start", "Development", "Climax", "Conclusion", "End"]
    parts = []
    for section in structure:
        part = model(f"Write the {section} section of a short book based on this idea: {prompt}", max_new_tokens=400)[0]["generated_text"]
        parts.append(f"### {section}\n{part.strip()}\n")
    return "\n".join(parts)

def refine_output(output_text, model_name):
    model = load_pipeline(model_name)
    refined = model(f"Improve this text/code while preserving meaning and coherence:\n{output_text}", max_new_tokens=700)[0]["generated_text"]
    return refined.strip()

def get_critic_feedback(output_text, model1_name, model2_name):
    critic1 = load_pipeline(model1_name)
    critic2 = load_pipeline(model2_name)
    critique_1 = critic1(f"Rate this text from 0 to 100 and justify the score briefly:\n{output_text}", max_new_tokens=200)[0]["generated_text"]
    critique_2 = critic2(f"Rate this text from 0 to 100 and justify the score briefly:\n{output_text}", max_new_tokens=200)[0]["generated_text"]
    return critique_1.strip(), critique_2.strip()

# ---------- Workflow ----------
def workflow(idea, mode, prompt_model, code_model, book_model, critic1_model, critic2_model):
    refined_prompt = refine_prompt(idea, prompt_model)

    if mode == "Code mode":
        pseudo, simple, full = generate_code(refined_prompt, code_model)
        generated_output = f"## Refined Prompt\n{refined_prompt}\n\n### Pseudocode\n{pseudo}\n\n### Simple Code\n{simple}\n\n### Final Code\n{full}"
        refined_final = refine_output(full, prompt_model)
    else:
        book_text = generate_book(refined_prompt, book_model)
        generated_output = f"## Refined Prompt\n{refined_prompt}\n\n{book_text}"
        refined_final = refine_output(book_text, prompt_model)

    # Critics now evaluate the REFINED version
    feedback1, feedback2 = get_critic_feedback(refined_final, critic1_model, critic2_model)

    return refined_prompt, generated_output, refined_final, feedback1, feedback2


import gradio as gr
from transformers import pipeline

# -------------------------------
# Model configuration dictionary
# -------------------------------
MODEL_INFO = {
    "gpt2": "117M params: Classic small model for text generation, coherent short outputs.",
    "tiiuae/falcon-rw-1b": "1B params: Lightweight general model, good for creative text or simple logic.",
    "microsoft/phi-2": "2.7B params: Compact and strong for reasoning or code, moderate GPU load.",
    "Qwen/Qwen2.5-0.5B-Instruct": "0.5B params: Efficient instruction model, performs well for structured prompts.",
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0": "1.1B params: Balanced, fast, and decent for storytelling and small code snippets.",
    "SmolLM2-135M": "135M params: Extremely light, suitable for quick text generation with limited coherence.",
}

def get_model_description(model_name):
    """Return description of selected model."""
    return MODEL_INFO.get(model_name, "Select a model to view details.")

# -------------------------------
# Pipelines Setup
# -------------------------------
def load_pipeline(model_name):
    """Load a text generation pipeline for a given model."""
    return pipeline("text-generation", model=model_name, device_map="auto")

# Default base models for specific roles
REFINER_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
CRITIC_MODEL_1 = "Qwen/Qwen2.5-0.5B-Instruct"
CRITIC_MODEL_2 = "tiiuae/falcon-rw-1b"

# Preload pipelines for speed
refiner_pipe = load_pipeline(REFINER_MODEL)
critic_pipe_1 = load_pipeline(CRITIC_MODEL_1)
critic_pipe_2 = load_pipeline(CRITIC_MODEL_2)

# -------------------------------
# Core Logic
# -------------------------------
def workflow(idea, mode, model_name):
    # Step 1: Refine the idea
    ref_prompt = f"Refine this idea into a clear, specific prompt for {mode}:\n\n{idea}"
    refined = refiner_pipe(ref_prompt, max_new_tokens=120, temperature=0.7)[0]["generated_text"]

    # Step 2: Generate output
    gen_pipe = load_pipeline(model_name)
    if mode == "Code mode":
        code_prompt = f"Create complete working code for this idea:\n\n{refined}\nInclude comments and clear structure."
    else:
        code_prompt = f"Write a short book with sections: Start, Development, Climax, Conclusion, and End. The theme:\n\n{refined}"

    output = gen_pipe(code_prompt, max_new_tokens=500, temperature=0.8, do_sample=True)[0]["generated_text"]

    # Step 3: Critics
    critique_prompt = (
        f"Rate the following {mode} output from 0 to 100, and explain weaknesses and improvements:\n\n{output}"
    )
    feedback_1 = critic_pipe_1(critique_prompt, max_new_tokens=200)[0]["generated_text"]
    feedback_2 = critic_pipe_2(critique_prompt, max_new_tokens=200)[0]["generated_text"]

    # Try to extract a numeric rating
    def extract_score(text):
        import re
        match = re.search(r"(\d{1,3})", text)
        if match:
            score = int(match.group(1))
            return min(100, max(0, score))
        return 50

    score1 = extract_score(feedback_1)
    score2 = extract_score(feedback_2)
    avg_score = (score1 + score2) / 2

    # Step 4: Refine based on critics
    refine_final_prompt = f"Refine this output based on these two critics' feedbacks:\n\nCritic 1: {feedback_1}\n\nCritic 2: {feedback_2}\n\nOriginal Output:\n{output}"
    final_output = refiner_pipe(refine_final_prompt, max_new_tokens=400)[0]["generated_text"]

    combined_feedback = f"Critic 1 ({score1}/100): {feedback_1}\n\nCritic 2 ({score2}/100): {feedback_2}"

    return refined, output, avg_score, combined_feedback, final_output


# -------------------------------
# Gradio Interface
# -------------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 🧩 AI Idea Refinement & Creation Workflow")

    idea_input = gr.Textbox(
        label="💡 Your Idea",
        placeholder="Type your idea (e.g. 'A tool that teaches programming using natural language puzzles')"
    )

    mode = gr.Radio(["Code mode", "Book mode"], label="Select Mode", value="Code mode")

    with gr.Accordion("⚙️ Advanced Options", open=False):
        model_dropdown = gr.Dropdown(
            choices=list(MODEL_INFO.keys()),
            value="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
            label="Model for Main Generation"
        )
        model_desc = gr.Markdown(get_model_description("TinyLlama/TinyLlama-1.1B-Chat-v1.0"))
        model_dropdown.change(fn=get_model_description, inputs=model_dropdown, outputs=model_desc)

    refined_prompt = gr.Textbox(label="🧠 Refined Prompt", interactive=False)
    initial_output = gr.Textbox(label="🧾 Generated Output (Pre-Critics)", lines=10)
    critic_score = gr.Number(label="📊 Average Score (0–100)", interactive=False)
    critic_feedback = gr.Textbox(label="🧩 Critics’ Combined Feedback", lines=10)
    refined_output = gr.Textbox(label="💎 Final Refined Output (Post-Critics)", lines=10)

    run_button = gr.Button("🚀 Run Full Workflow")

    run_button.click(
        fn=workflow,
        inputs=[idea_input, mode, model_dropdown],
        outputs=[refined_prompt, initial_output, critic_score, critic_feedback, refined_output]
    )

demo.launch()