Spaces:
Running
Running
| import gradio as gr | |
| from diffusers import StableDiffusionPipeline | |
| import torch | |
| from transformers import pipeline | |
| # π¨ Load Stable Diffusion model | |
| pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") | |
| pipe.to("cpu") | |
| # π£οΈ Load Text-to-Speech model | |
| tts = pipeline("text-to-speech", model="nineninesix/kani-tts-370m") | |
| # Image generation function | |
| def generate_media(prompt): | |
| image = pipe(prompt).images[0] | |
| audio = tts(prompt) | |
| return image, (audio["audio"],) | |
| # Gradio UI | |
| demo = gr.Interface( | |
| fn=generate_media, | |
| inputs=gr.Textbox(label="Enter your prompt"), | |
| outputs=[gr.Image(label="Generated Image"), gr.Audio(label="AI Voice")], | |
| title="AI Image + Voice Generator", | |
| description="Generates an image with a matching AI voiceover using Stable Diffusion XL and KaniTTS" | |
| ) | |
| demo.launch() | |