Spaces:

wavespeed
/

hidream-arena

Paused

App Files Files Community

chengzeyi commited on Apr 13

Commit

6380d03

1 Parent(s): 67635cf

add more limit

Browse files

Files changed (1) hide show

app.py +143 -69

app.py CHANGED Viewed

@@ -64,9 +64,10 @@ class BackendStatus:
         self.status = "completed"
         self.progress = 100
         self.end_time = time.time()
-        self.history.append(
-            {"timestamp": datetime.now(), "duration": self.end_time - self.start_time}
-        )
     def fail(self):
         self.status = "failed"
@@ -93,10 +94,8 @@ class SessionManager:
         with cls._lock:
             to_remove = []
             for session_id, manager in cls._instances.items():
-                if (
-                    hasattr(manager, "last_activity")
-                    and current_time - manager.last_activity > max_age
-                ):
                     to_remove.append(session_id)
             for session_id in to_remove:
@@ -106,7 +105,10 @@ class SessionManager:
 class GenerationManager:
     def __init__(self):
-        self.backend_statuses = {backend: BackendStatus() for backend in BACKENDS}
         self.last_activity = time.time()
         self.request_timestamps = []  # Track timestamps of requests
@@ -116,7 +118,8 @@ class GenerationManager:
     def add_request_timestamp(self):
         self.request_timestamps.append(time.time())
-    def has_exceeded_limit(self, limit=10):  # Default limit: 10 requests per hour
         current_time = time.time()
         # Filter timestamps to only include those within the last hour
         self.request_timestamps = [
@@ -144,17 +147,14 @@ class GenerationManager:
                         text=[f"{avg_duration:.2f}s"],  # Show time in seconds
                         textposition="auto",
                         width=[0.5],  # Make bars narrower
-                    )
-                )
         # Set a minimum y-axis range if we have data
         if has_data:
-            max_duration = max(
-                [
-                    max([h["duration"] for h in status.history] or [0])
-                    for status in self.backend_statuses.values()
-                ]
-            )
             # Add 20% padding to the top
             y_max = max_duration * 1.2
             # Ensure the y-axis always starts at 0
@@ -209,15 +209,19 @@ class GenerationManager:
             # Use aiohttp instead of requests for async
             async with aiohttp.ClientSession() as session:
-                async with session.post(url, headers=headers, json=payload) as response:
                     if response.status == 200:
                         result = await response.json()
                         request_id = result["data"]["id"]
-                        print(f"Task submitted successfully. Request ID: {request_id}")
                         return request_id
                     else:
                         text = await response.text()
-                        raise Exception(f"API error: {response.status}, {text}")
         except Exception as e:
             status.fail()
@@ -231,6 +235,59 @@ class GenerationManager:
         return self
 # Helper function to create error images as data URIs
 def create_error_image(backend, error_message):
     try:
@@ -305,9 +362,9 @@ async def poll_once(manager, backend, request_id):
                         # It's base64 data - format it as a data URI if needed
                         try:
                             # Format as data URI for Gradio to display directly
-                            if isinstance(output, str) and not output.startswith(
-                                "data:image"
-                            ):
                                 # Convert raw base64 to data URI format
                                 return f"data:image/jpeg;base64,{output}"
                             else:
@@ -315,7 +372,8 @@ async def poll_once(manager, backend, request_id):
                                 return output
                         except Exception as e:
                             print(f"Error processing base64 image: {e}")
-                            raise Exception(f"Failed to process base64 image: {str(e)}")
                 elif current_status == "failed":
                     manager.backend_statuses[backend].fail()
@@ -347,17 +405,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🌊 WaveSpeedAI HiDream Arena")
     # Add the introduction with link to WaveSpeedAI
-    gr.Markdown(
-        """
         [WaveSpeedAI](https://wavespeed.ai/) is the global pioneer in accelerating AI-powered video and image generation.
         Our in-house inference accelerator provides lossless speedup on image & video generation based on our rich inference optimization software stack, including our in-house inference compiler, CUDA kernel libraries and parallel computing libraries.
-        """
-    )
-    gr.Markdown(
-        """
         This demo showcases the performance and outputs of leading image generation models, including HiDream and Flux, on our accelerated inference platform.
-        """
-    )
     with gr.Row():
         with gr.Column(scale=3):
@@ -375,18 +429,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=1):
             generate_btn = gr.Button("Generate", variant="primary")
-    example_dropdown.change(
-        lambda ex: ex, inputs=[example_dropdown], outputs=[input_text]
-    )
     # Two status boxes - small (default) and big (during generation)
-    small_status_box = gr.Markdown("Ready to generate images", elem_id="small-status")
     # Big status box in its own row with styling
     with gr.Row(elem_id="big-status-row"):
-        big_status_box = gr.Markdown(
-            "", elem_id="big-status", visible=False, elem_classes="big-status-box"
-        )
     with gr.Row():
         with gr.Column():
@@ -399,27 +455,27 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     performance_plot = gr.Plot(label="Performance Metrics")
     with gr.Accordion("Recent Generations (last 16)", open=False):
-        recent_gallery = gr.Gallery(
-            label="Prompt and Output", columns=3, interactive=False
-        )
     def get_recent_gallery_items():
         gallery_items = []
         for r in reversed(recent_generations):
             gallery_items.append((r["flux-dev"], f"FLUX-dev: {r['prompt']}"))
-            gallery_items.append((r["hidream-dev"], f"HiDream-dev: {r['prompt']}"))
-            gallery_items.append((r["hidream-full"], f"HiDream-full: {r['prompt']}"))
         return gallery_items
     def update_recent_gallery(prompt, results):
-        recent_generations.append(
-            {
-                "prompt": prompt,
-                "flux-dev": results["flux-dev"],
-                "hidream-dev": results["hidream-dev"],
-                "hidream-full": results["hidream-full"],
-            }
-        )
         if len(recent_generations) > 16:
             recent_generations.pop(0)
         gallery_items = get_recent_gallery_items()
@@ -470,7 +526,30 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.HTML(f"<style>{css}</style>")
     # Update the generation function to use session manager
-    async def generate_all_backends_with_status_boxes(prompt, current_session_id):
         """Generate images with big status box during generation"""
         # Get or create a session manager
         session_id, manager = SessionManager.get_manager(current_session_id)
@@ -478,8 +557,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         # Check if the user has exceeded the request limit
         if manager.has_exceeded_limit(
-            limit=10
-        ):  # Set the limit to 10 requests per hour
             error_message = "❌ You have exceeded the limit of 10 requests per hour. Please try again later."
             yield (
                 error_message,
@@ -557,7 +635,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             poll_attempt = 0
             # Main polling loop
-            while len(completed_backends) < 3 and poll_attempt < max_poll_attempts:
                 poll_attempt += 1
                 # Poll each pending backend
@@ -569,9 +648,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                         # Only do actual API calls every few attempts to reduce load
                         if poll_attempt % 2 == 0 or backend == "flux-dev":
                             # Use the session manager instead of global manager
-                            result = await poll_once(
-                                manager, backend, request_ids[backend]
-                            )
                             if result:  # Backend completed
                                 results[backend] = result
                                 completed_backends.add(backend)
@@ -585,11 +663,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                                     results["flux-dev"],
                                     results["hidream-dev"],
                                     results["hidream-full"],
-                                    (
-                                        manager.get_performance_plot()
-                                        if any(completed_backends)
-                                        else None
-                                    ),
                                     session_id,
                                     None,
                                 )
@@ -600,11 +675,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 await asyncio.sleep(0.1)
             # Final status
-            final_status = (
-                "✅ All generations completed!"
-                if len(completed_backends) == 3
-                else "⚠️ Some generations timed out"
-            )
             gallery_update = update_recent_gallery(prompt, results)
@@ -641,6 +714,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # Schedule periodic cleanup of old sessions
     def cleanup_task():
         SessionManager.cleanup_old_sessions()
         # Schedule the next cleanup
         threading.Timer(3600, cleanup_task).start()  # Run every hour

         self.status = "completed"
         self.progress = 100
         self.end_time = time.time()
+        self.history.append({
+            "timestamp": datetime.now(),
+            "duration": self.end_time - self.start_time
+        })
     def fail(self):
         self.status = "failed"
         with cls._lock:
             to_remove = []
             for session_id, manager in cls._instances.items():
+                if (hasattr(manager, "last_activity")
+                        and current_time - manager.last_activity > max_age):
                     to_remove.append(session_id)
             for session_id in to_remove:
 class GenerationManager:
     def __init__(self):
+        self.backend_statuses = {
+            backend: BackendStatus()
+            for backend in BACKENDS
+        }
         self.last_activity = time.time()
         self.request_timestamps = []  # Track timestamps of requests
     def add_request_timestamp(self):
         self.request_timestamps.append(time.time())
+    def has_exceeded_limit(self,
+                           limit=10):  # Default limit: 10 requests per hour
         current_time = time.time()
         # Filter timestamps to only include those within the last hour
         self.request_timestamps = [
                         text=[f"{avg_duration:.2f}s"],  # Show time in seconds
                         textposition="auto",
                         width=[0.5],  # Make bars narrower
+                    ))
         # Set a minimum y-axis range if we have data
         if has_data:
+            max_duration = max([
+                max([h["duration"] for h in status.history] or [0])
+                for status in self.backend_statuses.values()
+            ])
             # Add 20% padding to the top
             y_max = max_duration * 1.2
             # Ensure the y-axis always starts at 0
             # Use aiohttp instead of requests for async
             async with aiohttp.ClientSession() as session:
+                async with session.post(url, headers=headers,
+                                        json=payload) as response:
                     if response.status == 200:
                         result = await response.json()
                         request_id = result["data"]["id"]
+                        print(
+                            f"Task submitted successfully. Request ID: {request_id}"
+                        )
                         return request_id
                     else:
                         text = await response.text()
+                        raise Exception(
+                            f"API error: {response.status}, {text}")
         except Exception as e:
             status.fail()
         return self
+class ClientManager:
+    _instances = {}
+    _lock = threading.Lock()
+    @classmethod
+    def get_manager(cls, client_id=None):
+        if not client_id:
+            client_id = str(uuid.uuid4())
+        with cls._lock:
+            if client_id not in cls._instances:
+                cls._instances[client_id] = ClientGenerationManager()
+            return cls._instances[client_id]
+    @classmethod
+    def cleanup_old_clients(cls, max_age=3600):  # 1 hour default
+        current_time = time.time()
+        with cls._lock:
+            to_remove = []
+            for client_id, manager in cls._instances.items():
+                if (hasattr(manager, "last_activity")
+                        and current_time - manager.last_activity > max_age):
+                    to_remove.append(client_id)
+            for client_id in to_remove:
+                del cls._instances[client_id]
+class ClientGenerationManager:
+    def __init__(self):
+        self.lock = threading.Lock()
+    def update_activity(self):
+        with self.lock:
+            self.last_activity = time.time()
+    def add_request_timestamp(self):
+        with self.lock:
+            self.request_timestamps.append(time.time())
+    def has_exceeded_limit(self,
+                           limit=100):  # Default limit: 50 requests per hour
+        with self.lock:
+            current_time = time.time()
+            # Filter timestamps to only include those within the last hour
+            self.request_timestamps = [
+                ts for ts in self.request_timestamps
+                if current_time - ts <= 3600
+            ]
+            return len(self.request_timestamps) >= limit
 # Helper function to create error images as data URIs
 def create_error_image(backend, error_message):
     try:
                         # It's base64 data - format it as a data URI if needed
                         try:
                             # Format as data URI for Gradio to display directly
+                            if isinstance(
+                                    output, str
+                            ) and not output.startswith("data:image"):
                                 # Convert raw base64 to data URI format
                                 return f"data:image/jpeg;base64,{output}"
                             else:
                                 return output
                         except Exception as e:
                             print(f"Error processing base64 image: {e}")
+                            raise Exception(
+                                f"Failed to process base64 image: {str(e)}")
                 elif current_status == "failed":
                     manager.backend_statuses[backend].fail()
     gr.Markdown("# 🌊 WaveSpeedAI HiDream Arena")
     # Add the introduction with link to WaveSpeedAI
+    gr.Markdown("""
         [WaveSpeedAI](https://wavespeed.ai/) is the global pioneer in accelerating AI-powered video and image generation.
         Our in-house inference accelerator provides lossless speedup on image & video generation based on our rich inference optimization software stack, including our in-house inference compiler, CUDA kernel libraries and parallel computing libraries.
+        """)
+    gr.Markdown("""
         This demo showcases the performance and outputs of leading image generation models, including HiDream and Flux, on our accelerated inference platform.
+        """)
     with gr.Row():
         with gr.Column(scale=3):
         with gr.Column(scale=1):
             generate_btn = gr.Button("Generate", variant="primary")
+    example_dropdown.change(lambda ex: ex,
+                            inputs=[example_dropdown],
+                            outputs=[input_text])
     # Two status boxes - small (default) and big (during generation)
+    small_status_box = gr.Markdown("Ready to generate images",
+                                   elem_id="small-status")
     # Big status box in its own row with styling
     with gr.Row(elem_id="big-status-row"):
+        big_status_box = gr.Markdown("",
+                                     elem_id="big-status",
+                                     visible=False,
+                                     elem_classes="big-status-box")
     with gr.Row():
         with gr.Column():
     performance_plot = gr.Plot(label="Performance Metrics")
     with gr.Accordion("Recent Generations (last 16)", open=False):
+        recent_gallery = gr.Gallery(label="Prompt and Output",
+                                    columns=3,
+                                    interactive=False)
     def get_recent_gallery_items():
         gallery_items = []
         for r in reversed(recent_generations):
             gallery_items.append((r["flux-dev"], f"FLUX-dev: {r['prompt']}"))
+            gallery_items.append(
+                (r["hidream-dev"], f"HiDream-dev: {r['prompt']}"))
+            gallery_items.append(
+                (r["hidream-full"], f"HiDream-full: {r['prompt']}"))
         return gallery_items
     def update_recent_gallery(prompt, results):
+        recent_generations.append({
+            "prompt": prompt,
+            "flux-dev": results["flux-dev"],
+            "hidream-dev": results["hidream-dev"],
+            "hidream-full": results["hidream-full"],
+        })
         if len(recent_generations) > 16:
             recent_generations.pop(0)
         gallery_items = get_recent_gallery_items()
     gr.HTML(f"<style>{css}</style>")
     # Update the generation function to use session manager
+    async def generate_all_backends_with_status_boxes(prompt,
+                                                      current_session_id,
+                                                      request: gr.Request):
+        client_ip = request.client.host
+        print(f"Client IP: {client_ip}")
+        client_generation_manager = ClientManager.get_manager(client_ip)
+        client_generation_manager.update_activity()
+        if client_generation_manager.has_exceeded_limit(limit=100):
+            error_message = "❌ Your network has exceeded the limit of 100 requests per hour. Please try again later."
+            yield (
+                error_message,
+                error_message,
+                gr.update(visible=False),
+                gr.update(visible=True),
+                None,
+                None,
+                None,
+                None,
+                current_session_id,  # Return the session ID
+                None,
+            )
+            return
+        client_generation_manager.add_request_timestamp()
         """Generate images with big status box during generation"""
         # Get or create a session manager
         session_id, manager = SessionManager.get_manager(current_session_id)
         # Check if the user has exceeded the request limit
         if manager.has_exceeded_limit(
+                limit=10):  # Set the limit to 10 requests per hour
             error_message = "❌ You have exceeded the limit of 10 requests per hour. Please try again later."
             yield (
                 error_message,
             poll_attempt = 0
             # Main polling loop
+            while len(completed_backends
+                      ) < 3 and poll_attempt < max_poll_attempts:
                 poll_attempt += 1
                 # Poll each pending backend
                         # Only do actual API calls every few attempts to reduce load
                         if poll_attempt % 2 == 0 or backend == "flux-dev":
                             # Use the session manager instead of global manager
+                            result = await poll_once(manager, backend,
+                                                     request_ids[backend])
                             if result:  # Backend completed
                                 results[backend] = result
                                 completed_backends.add(backend)
                                     results["flux-dev"],
                                     results["hidream-dev"],
                                     results["hidream-full"],
+                                    (manager.get_performance_plot()
+                                     if any(completed_backends) else None),
                                     session_id,
                                     None,
                                 )
                 await asyncio.sleep(0.1)
             # Final status
+            final_status = ("✅ All generations completed!"
+                            if len(completed_backends) == 3 else
+                            "⚠️ Some generations timed out")
             gallery_update = update_recent_gallery(prompt, results)
     # Schedule periodic cleanup of old sessions
     def cleanup_task():
         SessionManager.cleanup_old_sessions()
+        ClientManager.cleanup_old_clients()
         # Schedule the next cleanup
         threading.Timer(3600, cleanup_task).start()  # Run every hour