Spaces:

Ghaymah
/

Vision-Q-A-GenAI-Ghaymah

Running

File size: 4,428 Bytes

import base64
import io
import os
from dotenv import load_dotenv
from PIL import Image
import gradio as gr
from openai import OpenAI
import re

# Config 
load_dotenv()
APP_Name    = os.getenv("APP_Name", "Ghaymah Vision QA")
APP_Version = os.getenv("APP_Version", "1.0.0")
API_KEY     = os.getenv("API_KEY")
BASE_URL    = os.getenv("BASE_URL", "https://genai.ghaymah.systems")

CSS = """
.app-header{display:flex;align-items:center;gap:12px;justify-content:center;margin:6px 0 16px}
.app-header img{height:60px;border-radius:12px}
.app-title{font-weight:800;font-size:28px;line-height:1.1}
.app-sub{opacity:.7;font-size:14px}
"""

# Branding
COMPANY_LOGO = "download.jpeg"   
OWNER_NAME   = "ENG. Ahmed Yasser El Sharkawy"

client = OpenAI(api_key=API_KEY, base_url="https://genai.ghaymah.systems")

# Map PIL formats to MIME types
PIL_TO_MIME = {
    "JPEG": "image/jpeg",
    "PNG": "image/png",
    "WEBP": "image/webp",
    "GIF": "image/gif",
    "BMP": "image/bmp",
    "TIFF": "image/tiff",
}

def encode_image_to_data_url(pil_image: Image.Image) -> str:
    fmt = (pil_image.format or "PNG").upper()
    mime = PIL_TO_MIME.get(fmt, "image/png")
    buf = io.BytesIO()
    if fmt == "JPEG" and pil_image.mode not in ("RGB", "L"):
        pil_image = pil_image.convert("RGB")
    pil_image.save(buf, format=fmt)
    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
    return f"data:{mime};base64,{b64}"

def logo_data_uri(path: str) -> str:
    if not os.path.exists(path):
        return ""
    ext = os.path.splitext(path)[1].lower()
    mime = {
        ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
        ".webp": "image/webp", ".gif": "image/gif"
    }.get(ext, "image/png")
    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")
    return f"data:{mime};base64,{b64}"


def to_plain_text(s: str) -> str:
    s = re.sub(r'\*\*(.*?)\*\*', r'\1', s)  # bold
    s = re.sub(r'\*(.*?)\*', r'\1', s)      # italics
    s = re.sub(r'`{1,3}(.*?)`{1,3}', r'\1', s, flags=re.S)  # code
    s = re.sub(r'^\s*[-*]\s+', '• ', s, flags=re.M)         # bullets
    return s

def ask_image_question(image: Image.Image, question: str):
    if image is None:
        return "⚠️ Please upload an image first."
    if not question or not question.strip():
        question = "Describe this image."
    try:
        data_url = encode_image_to_data_url(image)
        msg_content = [
            {"type": "text", "text": question.strip()},
            {"type": "image_url", "image_url": {"url": data_url}},
        ]
        resp = client.chat.completions.create(
            model="gemma-3-4b-it",
            messages=[{"role": "user", "content": msg_content}],
            max_tokens=5000,
            temperature=0.2,
        )
        return to_plain_text(resp.choices[0].message.content or "")
        
    except Exception as e:
        return f"❌ Error: {e}"

# Gradio UI 
with gr.Blocks(title=f"{APP_Name} v{APP_Version}", css=CSS) as demo:
    header_logo_src = logo_data_uri(COMPANY_LOGO)
    logo_html = f"<img src='{header_logo_src}' alt='logo'>" if header_logo_src else ""
    gr.HTML(f"""
    <div class="app-header">
        {logo_html}
        <div class="app-header-text">
            <div class="app-title">{APP_Name}</div>
            <div class="app-sub">v{APP_Version}  •  {OWNER_NAME}</div>
        </div>
    </div>
    """)

    with gr.Row():
        # Left column: image -> question -> ask button
        with gr.Column(scale=3):
            image_in = gr.Image(type="pil", label="Upload image", sources=["upload", "clipboard"])
            question_in = gr.Textbox(label="Your question",
                                     placeholder="e.g., What objects do you see? What is happening?",
                                     lines=3)
            ask_btn = gr.Button("Ask", variant="primary")

        # Right column: logo -> answer box
        with gr.Column(scale=2, min_width=320):
            if os.path.exists(COMPANY_LOGO):
                gr.Image(COMPANY_LOGO, show_label=False, container=False, height=96)
            answer_out = gr.Textbox(label="Answer", lines=14, interactive=False, show_copy_button=True)

    ask_btn.click(ask_image_question, [image_in, question_in], [answer_out])
    question_in.submit(ask_image_question, [image_in, question_in], [answer_out])


if __name__ == "__main__":
    demo.launch(debug=True)