Spaces:

Xhaheen
/

NanoBanana

Running

File size: 3,382 Bytes

# app.py
import os
import gradio as gr
from openai import OpenAI
from PIL import Image
from io import BytesIO
import base64

# --- Config ---
MODEL = "google/gemini-2.5-flash-image-preview"
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")  # set in HF Space secrets

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

def analyze(prompt, img):
    msg_content = [{"type": "text", "text": prompt}]
    if img:
        # img is a file path → open and encode as base64
        with open(img, "rb") as f:
            b64data = base64.b64encode(f.read()).decode("utf-8")
        image_url = f"data:image/png;base64,{b64data}"  # assumes PNG
        msg_content.append({
            "type": "image_url",
            "image_url": {"url": image_url}
        })

    try:

        # 👇 Print user input
        print("=== USER PROMPT ===")
        print(prompt)
        print("Has image?", bool(img))

        
        completion = client.chat.completions.create(
            model=MODEL,
            messages=[{"role": "user", "content": msg_content}],
        )
        response_text = completion.choices[0].message.content

                # 👇 Print raw response
        print("=== MODEL RESPONSE ===")
        print(response_text)

        # --- Extract image if present ---
        image_obj = None
        msg = completion.choices[0].message
        if hasattr(msg, "images") and msg.images:
            first_img = msg.images[0]
            url = first_img.get("image_url", {}).get("url")
            if url and url.startswith("data:image"):
                header, b64data = url.split(",", 1)
                image_bytes = base64.b64decode(b64data)
                image_obj = Image.open(BytesIO(image_bytes))

        return response_text, image_obj
    except Exception as e:
        return f"Error: {e}", None


with gr.Blocks(theme=gr.themes.Soft()) as demo:

    gr.Markdown("## Gemini 2.5 Flash Image (Preview) — via OpenRouter")
    gr.Markdown(
        "Generate **and** edit images with Google’s latest Gemini 2.5 Flash Image model "
        "(a.k.a. *nano-banana*). Try text-based edits (remove objects, restyle, relight), "
        "maintain character consistency, or **fuse multiple images** into one.\n\n"
        "**Notes:** The model is in preview; quality and output format may evolve. "
        "Images include Google’s SynthID watermark for provenance.\n\n"
        "— built with ❤️ by [xhaheen](https://www.linkedin.com/in/sallu-mandya/)"
    )
    with gr.Row():
        chatbot = gr.Chatbot(label="Conversation", show_copy_button=True)
    with gr.Row():
        txt = gr.Textbox(label="Prompt", scale=4)
        img = gr.Image(type="filepath", label="Optional Image", scale=1)
        send_btn = gr.Button("Send")
    output_image = gr.Image(label="Generated Image", interactive=False)

    def user_interact(history, prompt, image):
        if not prompt and not image:
            return gr.update(value=history), None, ""
        response, gen_img = analyze(prompt, image)
        history = history or []
        history.append((prompt, response))
        return history, gen_img, ""

    send_btn.click(
        user_interact,
        inputs=[chatbot, txt, img],
        outputs=[chatbot, output_image, txt],
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True)