import base64 import io import os from dotenv import load_dotenv from PIL import Image import gradio as gr from openai import OpenAI import re # Config load_dotenv() APP_Name = os.getenv("APP_Name", "Ghaymah Vision QA") APP_Version = os.getenv("APP_Version", "1.0.0") API_KEY = os.getenv("API_KEY") BASE_URL = os.getenv("BASE_URL", "https://genai.ghaymah.systems") CSS = """ .app-header{display:flex;align-items:center;gap:12px;justify-content:center;margin:6px 0 16px} .app-header img{height:60px;border-radius:12px} .app-title{font-weight:800;font-size:28px;line-height:1.1} .app-sub{opacity:.7;font-size:14px} """ # Branding COMPANY_LOGO = "download.jpeg" OWNER_NAME = "ENG. Ahmed Yasser El Sharkawy" client = OpenAI(api_key=API_KEY, base_url="https://genai.ghaymah.systems") # Map PIL formats to MIME types PIL_TO_MIME = { "JPEG": "image/jpeg", "PNG": "image/png", "WEBP": "image/webp", "GIF": "image/gif", "BMP": "image/bmp", "TIFF": "image/tiff", } def encode_image_to_data_url(pil_image: Image.Image) -> str: fmt = (pil_image.format or "PNG").upper() mime = PIL_TO_MIME.get(fmt, "image/png") buf = io.BytesIO() if fmt == "JPEG" and pil_image.mode not in ("RGB", "L"): pil_image = pil_image.convert("RGB") pil_image.save(buf, format=fmt) b64 = base64.b64encode(buf.getvalue()).decode("utf-8") return f"data:{mime};base64,{b64}" def logo_data_uri(path: str) -> str: if not os.path.exists(path): return "" ext = os.path.splitext(path)[1].lower() mime = { ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".webp": "image/webp", ".gif": "image/gif" }.get(ext, "image/png") with open(path, "rb") as f: b64 = base64.b64encode(f.read()).decode("utf-8") return f"data:{mime};base64,{b64}" def to_plain_text(s: str) -> str: s = re.sub(r'\*\*(.*?)\*\*', r'\1', s) # bold s = re.sub(r'\*(.*?)\*', r'\1', s) # italics s = re.sub(r'`{1,3}(.*?)`{1,3}', r'\1', s, flags=re.S) # code s = re.sub(r'^\s*[-*]\s+', '• ', s, flags=re.M) # bullets return s def ask_image_question(image: Image.Image, question: str): if image is None: return "⚠️ Please upload an image first." if not question or not question.strip(): question = "Describe this image." try: data_url = encode_image_to_data_url(image) msg_content = [ {"type": "text", "text": question.strip()}, {"type": "image_url", "image_url": {"url": data_url}}, ] resp = client.chat.completions.create( model="gemma-3-4b-it", messages=[{"role": "user", "content": msg_content}], max_tokens=5000, temperature=0.2, ) return to_plain_text(resp.choices[0].message.content or "") except Exception as e: return f"❌ Error: {e}" # Gradio UI with gr.Blocks(title=f"{APP_Name} v{APP_Version}", css=CSS) as demo: header_logo_src = logo_data_uri(COMPANY_LOGO) logo_html = f"logo" if header_logo_src else "" gr.HTML(f"""
{logo_html}
{APP_Name}
v{APP_Version} • {OWNER_NAME}
""") with gr.Row(): # Left column: image -> question -> ask button with gr.Column(scale=3): image_in = gr.Image(type="pil", label="Upload image", sources=["upload", "clipboard"]) question_in = gr.Textbox(label="Your question", placeholder="e.g., What objects do you see? What is happening?", lines=3) ask_btn = gr.Button("Ask", variant="primary") # Right column: logo -> answer box with gr.Column(scale=2, min_width=320): if os.path.exists(COMPANY_LOGO): gr.Image(COMPANY_LOGO, show_label=False, container=False, height=96) answer_out = gr.Textbox(label="Answer", lines=14, interactive=False, show_copy_button=True) ask_btn.click(ask_image_question, [image_in, question_in], [answer_out]) question_in.submit(ask_image_question, [image_in, question_in], [answer_out]) if __name__ == "__main__": demo.launch(debug=True)