import gradio as gr from diffusers import StableDiffusionPipeline import torch from transformers import pipeline # 🎨 Load Stable Diffusion model pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") pipe.to("cpu") # 🗣️ Load Text-to-Speech model tts = pipeline("text-to-speech", model="nineninesix/kani-tts-370m") # Image generation function def generate_media(prompt): image = pipe(prompt).images[0] audio = tts(prompt) return image, (audio["audio"],) # Gradio UI demo = gr.Interface( fn=generate_media, inputs=gr.Textbox(label="Enter your prompt"), outputs=[gr.Image(label="Generated Image"), gr.Audio(label="AI Voice")], title="AI Image + Voice Generator", description="Generates an image with a matching AI voiceover using Stable Diffusion XL and KaniTTS" ) demo.launch()