File size: 4,428 Bytes
33a5595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c4b529
33a5595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import base64
import io
import os
from dotenv import load_dotenv
from PIL import Image
import gradio as gr
from openai import OpenAI
import re

# Config 
load_dotenv()
APP_Name    = os.getenv("APP_Name", "Ghaymah Vision QA")
APP_Version = os.getenv("APP_Version", "1.0.0")
API_KEY     = os.getenv("API_KEY")
BASE_URL    = os.getenv("BASE_URL", "https://genai.ghaymah.systems")

CSS = """
.app-header{display:flex;align-items:center;gap:12px;justify-content:center;margin:6px 0 16px}
.app-header img{height:60px;border-radius:12px}
.app-title{font-weight:800;font-size:28px;line-height:1.1}
.app-sub{opacity:.7;font-size:14px}
"""

# Branding
COMPANY_LOGO = "download.jpeg"   
OWNER_NAME   = "ENG. Ahmed Yasser El Sharkawy"

client = OpenAI(api_key=API_KEY, base_url="https://genai.ghaymah.systems")

# Map PIL formats to MIME types
PIL_TO_MIME = {
    "JPEG": "image/jpeg",
    "PNG": "image/png",
    "WEBP": "image/webp",
    "GIF": "image/gif",
    "BMP": "image/bmp",
    "TIFF": "image/tiff",
}

def encode_image_to_data_url(pil_image: Image.Image) -> str:
    fmt = (pil_image.format or "PNG").upper()
    mime = PIL_TO_MIME.get(fmt, "image/png")
    buf = io.BytesIO()
    if fmt == "JPEG" and pil_image.mode not in ("RGB", "L"):
        pil_image = pil_image.convert("RGB")
    pil_image.save(buf, format=fmt)
    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
    return f"data:{mime};base64,{b64}"

def logo_data_uri(path: str) -> str:
    if not os.path.exists(path):
        return ""
    ext = os.path.splitext(path)[1].lower()
    mime = {
        ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
        ".webp": "image/webp", ".gif": "image/gif"
    }.get(ext, "image/png")
    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")
    return f"data:{mime};base64,{b64}"


def to_plain_text(s: str) -> str:
    s = re.sub(r'\*\*(.*?)\*\*', r'\1', s)  # bold
    s = re.sub(r'\*(.*?)\*', r'\1', s)      # italics
    s = re.sub(r'`{1,3}(.*?)`{1,3}', r'\1', s, flags=re.S)  # code
    s = re.sub(r'^\s*[-*]\s+', '• ', s, flags=re.M)         # bullets
    return s

def ask_image_question(image: Image.Image, question: str):
    if image is None:
        return "⚠️ Please upload an image first."
    if not question or not question.strip():
        question = "Describe this image."
    try:
        data_url = encode_image_to_data_url(image)
        msg_content = [
            {"type": "text", "text": question.strip()},
            {"type": "image_url", "image_url": {"url": data_url}},
        ]
        resp = client.chat.completions.create(
            model="gemma-3-4b-it",
            messages=[{"role": "user", "content": msg_content}],
            max_tokens=5000,
            temperature=0.2,
        )
        return to_plain_text(resp.choices[0].message.content or "")
        
    except Exception as e:
        return f"❌ Error: {e}"

# Gradio UI 
with gr.Blocks(title=f"{APP_Name} v{APP_Version}", css=CSS) as demo:
    header_logo_src = logo_data_uri(COMPANY_LOGO)
    logo_html = f"<img src='{header_logo_src}' alt='logo'>" if header_logo_src else ""
    gr.HTML(f"""
    <div class="app-header">
        {logo_html}
        <div class="app-header-text">
            <div class="app-title">{APP_Name}</div>
            <div class="app-sub">v{APP_Version}{OWNER_NAME}</div>
        </div>
    </div>
    """)

    with gr.Row():
        # Left column: image -> question -> ask button
        with gr.Column(scale=3):
            image_in = gr.Image(type="pil", label="Upload image", sources=["upload", "clipboard"])
            question_in = gr.Textbox(label="Your question",
                                     placeholder="e.g., What objects do you see? What is happening?",
                                     lines=3)
            ask_btn = gr.Button("Ask", variant="primary")

        # Right column: logo -> answer box
        with gr.Column(scale=2, min_width=320):
            if os.path.exists(COMPANY_LOGO):
                gr.Image(COMPANY_LOGO, show_label=False, container=False, height=96)
            answer_out = gr.Textbox(label="Answer", lines=14, interactive=False, show_copy_button=True)

    ask_btn.click(ask_image_question, [image_in, question_in], [answer_out])
    question_in.submit(ask_image_question, [image_in, question_in], [answer_out])


if __name__ == "__main__":
    demo.launch(debug=True)