Ahmed-El-Sharkawy's picture
Update app.py
6c4b529 verified
import base64
import io
import os
from dotenv import load_dotenv
from PIL import Image
import gradio as gr
from openai import OpenAI
import re
# Config
load_dotenv()
APP_Name = os.getenv("APP_Name", "Ghaymah Vision QA")
APP_Version = os.getenv("APP_Version", "1.0.0")
API_KEY = os.getenv("API_KEY")
BASE_URL = os.getenv("BASE_URL", "https://genai.ghaymah.systems")
CSS = """
.app-header{display:flex;align-items:center;gap:12px;justify-content:center;margin:6px 0 16px}
.app-header img{height:60px;border-radius:12px}
.app-title{font-weight:800;font-size:28px;line-height:1.1}
.app-sub{opacity:.7;font-size:14px}
"""
# Branding
COMPANY_LOGO = "download.jpeg"
OWNER_NAME = "ENG. Ahmed Yasser El Sharkawy"
client = OpenAI(api_key=API_KEY, base_url="https://genai.ghaymah.systems")
# Map PIL formats to MIME types
PIL_TO_MIME = {
"JPEG": "image/jpeg",
"PNG": "image/png",
"WEBP": "image/webp",
"GIF": "image/gif",
"BMP": "image/bmp",
"TIFF": "image/tiff",
}
def encode_image_to_data_url(pil_image: Image.Image) -> str:
fmt = (pil_image.format or "PNG").upper()
mime = PIL_TO_MIME.get(fmt, "image/png")
buf = io.BytesIO()
if fmt == "JPEG" and pil_image.mode not in ("RGB", "L"):
pil_image = pil_image.convert("RGB")
pil_image.save(buf, format=fmt)
b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
return f"data:{mime};base64,{b64}"
def logo_data_uri(path: str) -> str:
if not os.path.exists(path):
return ""
ext = os.path.splitext(path)[1].lower()
mime = {
".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
".webp": "image/webp", ".gif": "image/gif"
}.get(ext, "image/png")
with open(path, "rb") as f:
b64 = base64.b64encode(f.read()).decode("utf-8")
return f"data:{mime};base64,{b64}"
def to_plain_text(s: str) -> str:
s = re.sub(r'\*\*(.*?)\*\*', r'\1', s) # bold
s = re.sub(r'\*(.*?)\*', r'\1', s) # italics
s = re.sub(r'`{1,3}(.*?)`{1,3}', r'\1', s, flags=re.S) # code
s = re.sub(r'^\s*[-*]\s+', '• ', s, flags=re.M) # bullets
return s
def ask_image_question(image: Image.Image, question: str):
if image is None:
return "⚠️ Please upload an image first."
if not question or not question.strip():
question = "Describe this image."
try:
data_url = encode_image_to_data_url(image)
msg_content = [
{"type": "text", "text": question.strip()},
{"type": "image_url", "image_url": {"url": data_url}},
]
resp = client.chat.completions.create(
model="gemma-3-4b-it",
messages=[{"role": "user", "content": msg_content}],
max_tokens=5000,
temperature=0.2,
)
return to_plain_text(resp.choices[0].message.content or "")
except Exception as e:
return f"❌ Error: {e}"
# Gradio UI
with gr.Blocks(title=f"{APP_Name} v{APP_Version}", css=CSS) as demo:
header_logo_src = logo_data_uri(COMPANY_LOGO)
logo_html = f"<img src='{header_logo_src}' alt='logo'>" if header_logo_src else ""
gr.HTML(f"""
<div class="app-header">
{logo_html}
<div class="app-header-text">
<div class="app-title">{APP_Name}</div>
<div class="app-sub">v{APP_Version}{OWNER_NAME}</div>
</div>
</div>
""")
with gr.Row():
# Left column: image -> question -> ask button
with gr.Column(scale=3):
image_in = gr.Image(type="pil", label="Upload image", sources=["upload", "clipboard"])
question_in = gr.Textbox(label="Your question",
placeholder="e.g., What objects do you see? What is happening?",
lines=3)
ask_btn = gr.Button("Ask", variant="primary")
# Right column: logo -> answer box
with gr.Column(scale=2, min_width=320):
if os.path.exists(COMPANY_LOGO):
gr.Image(COMPANY_LOGO, show_label=False, container=False, height=96)
answer_out = gr.Textbox(label="Answer", lines=14, interactive=False, show_copy_button=True)
ask_btn.click(ask_image_question, [image_in, question_in], [answer_out])
question_in.submit(ask_image_question, [image_in, question_in], [answer_out])
if __name__ == "__main__":
demo.launch(debug=True)