Spaces:

farjadmalik
/

fromWordsToMedia

Running

farjadmalik commited on Aug 25

Commit

f9630b2

1 Parent(s): 42b79ab

Fix spaces

Files changed (4) hide show

README.md CHANGED Viewed

@@ -2,6 +2,22 @@
 This project generates social media posts, including an image and a caption, from a user-provided text prompt. It leverages deep learning models for both text-to-image synthesis and text generation to create engaging content.
 ## How it Works
 The process is orchestrated by the `main.py` script and follows these steps:

 This project generates social media posts, including an image and a caption, from a user-provided text prompt. It leverages deep learning models for both text-to-image synthesis and text generation to create engaging content.
+## Spaces
+```
+title: FromWordsToMedia
+emoji: 🖼
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.25.2
+app_file: app.py
+pinned: false
+license: mit
+short_description: Generates an image and a caption for social media posts
+```
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 ## How it Works
 The process is orchestrated by the `main.py` script and follows these steps:

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # External library imports
 from datetime import datetime
 import gradio as gr
 # Internal imports
 from src.visual_synthesizer import VisualSynthesizer
@@ -11,6 +12,11 @@ from utils.config import *
 from utils.helpers import richify_prompt, save_caption, save_image
 def compose(prompt: str, filename: str = "generated_post"):
     """
     Main function to compose an Instagram post from a given prompt.
@@ -23,15 +29,11 @@ def compose(prompt: str, filename: str = "generated_post"):
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
     filename = f"{timestamp}_{filename}"
-    # Initialize the visual synthesizer
-    image_gen = VisualSynthesizer()
     # Generate the image
     image = image_gen.generate_image(prompt=richify_prompt(prompt))
     # Save the image
     image_path = save_image(image, filename=filename)
     print(f"Image saved at: {image_path}")
-    # Create a caption for the post
-    text_gen = TextSynthesizer()
     caption = text_gen.generate_caption(prompt=prompt)
     # Save the caption
     caption_path = save_caption(caption, filename=filename)
@@ -40,6 +42,7 @@ def compose(prompt: str, filename: str = "generated_post"):
 if __name__ == '__main__':
     iface = gr.Interface(
         fn=compose,
         inputs=gr.Textbox(lines=5, label="Prompt", placeholder="Enter your prompt here..."),

 # External library imports
 from datetime import datetime
 import gradio as gr
+import asyncio
 # Internal imports
 from src.visual_synthesizer import VisualSynthesizer
 from utils.helpers import richify_prompt, save_caption, save_image
+# Initialize the text and visual synthesizer
+image_gen = VisualSynthesizer()
+text_gen = TextSynthesizer()
 def compose(prompt: str, filename: str = "generated_post"):
     """
     Main function to compose an Instagram post from a given prompt.
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
     filename = f"{timestamp}_{filename}"
     # Generate the image
     image = image_gen.generate_image(prompt=richify_prompt(prompt))
     # Save the image
     image_path = save_image(image, filename=filename)
     print(f"Image saved at: {image_path}")
     caption = text_gen.generate_caption(prompt=prompt)
     # Save the caption
     caption_path = save_caption(caption, filename=filename)
 if __name__ == '__main__':
     iface = gr.Interface(
         fn=compose,
         inputs=gr.Textbox(lines=5, label="Prompt", placeholder="Enter your prompt here..."),

src/visual_synthesizer.py CHANGED Viewed

@@ -43,9 +43,22 @@ class VisualSynthesizer:
         # self.video_pipe.enable_model_cpu_offload()
-    def generate_image(self, prompt: str,
                        negative_prompt: str = "blurry, distorted, poorly drawn, watermark",
-                       num_inference_steps: int = 50, guidance_scale: float = 7.5):
         image = self.image_pipe(prompt,
                           negative_prompt=negative_prompt,
                           num_inference_steps=num_inference_steps,

         # self.video_pipe.enable_model_cpu_offload()
+    def generate_image(self,
+                       prompt: str,
                        negative_prompt: str = "blurry, distorted, poorly drawn, watermark",
+                       num_inference_steps: int = 50,
+                       guidance_scale: float = 7.5):
+        """
+        Generates an image from a text prompt.
+        Args:
+            prompt (str): Text prompt to guide image generation.
+            negative_prompt (str): Optional negative prompts to avoid certain features.
+            num_inference_steps (int): Number of inference steps for generation.
+            guidance_scale (float): Guidance scale for generation.
+        Returns:
+            PIL.Image: Generated image.
+        """
+        # use the pipeline to generate an image based on the prompt and other parameters
         image = self.image_pipe(prompt,
                           negative_prompt=negative_prompt,
                           num_inference_steps=num_inference_steps,

utils/config.py CHANGED Viewed

@@ -17,7 +17,6 @@ VIDEO_MODEL_NAME = "cerspense/zeroscope_v2_XL"  # Placeholder for video model
 # Other models to try # Qwen/Qwen-Image # CompVis/stable-diffusion-v1-4
 # "segmind/SSD-1B" # Or "kandinsky-community/kandinsky-3", "warp-ai/wuerstchen"
 # Video generation models # cerspense/zeroscope_v2_576w # Wan‑Video/Wan2.1
-DEVICE = "cuda"  # Change to "cpu" if no GPU available
 # Font path for overlay text
 # FONT_PATH = "./fonts/arial.ttf"

 # Other models to try # Qwen/Qwen-Image # CompVis/stable-diffusion-v1-4
 # "segmind/SSD-1B" # Or "kandinsky-community/kandinsky-3", "warp-ai/wuerstchen"
 # Video generation models # cerspense/zeroscope_v2_576w # Wan‑Video/Wan2.1
 # Font path for overlay text
 # FONT_PATH = "./fonts/arial.ttf"