Gary Simmons commited on
Commit
4e00399
·
1 Parent(s): 561fce6

reduce default RPM in RateLimitedModel to 8 and update backoff_multiplier to 4 for enhanced retry logic

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -61,7 +61,7 @@ class TokenBucketRateLimiter:
61
  class RateLimitedModel:
62
  """Wraps a model-like callable and enforces a TokenBucketRateLimiter before each call."""
63
 
64
- def __init__(self, model_obj, rpm: int = 10, burst: int | None = None):
65
  self._model = model_obj
66
  # rpm -> tokens per minute
67
  capacity = burst if burst is not None else max(1, rpm)
@@ -118,9 +118,9 @@ class RateLimitedModel:
118
  return attr
119
 
120
 
121
- # Wrap the model with a rate-limiter. Default RPM is 10
122
  # but can be configured via the MODEL_RPM environment variable.
123
- _configured_rpm = int(os.getenv("MODEL_RPM", "10"))
124
  _configured_burst = None
125
  model = RateLimitedModel(
126
  LiteLLMModel(model_id="gemini/gemini-2.5-flash", temperature=0.2),
@@ -162,12 +162,16 @@ class BasicAgent:
162
  return f"AGENT ERROR: {e}"
163
 
164
 
 
 
 
 
165
  def call_model_with_retry(
166
- callable_fn, *args, max_retries=15, initial_delay=10.0, backoff_multiplier=3, **kwargs
167
  ):
168
  """
169
  Calls a function with retry logic and exponential backoff.
170
- The backoff multiplier is configurable (default=3 for more aggressive backoff).
171
  """
172
  delay = initial_delay
173
  for attempt in range(1, max_retries + 1):
 
61
  class RateLimitedModel:
62
  """Wraps a model-like callable and enforces a TokenBucketRateLimiter before each call."""
63
 
64
+ def __init__(self, model_obj, rpm: int = 8, burst: int | None = None):
65
  self._model = model_obj
66
  # rpm -> tokens per minute
67
  capacity = burst if burst is not None else max(1, rpm)
 
118
  return attr
119
 
120
 
121
+ # Wrap the model with a rate-limiter. Default RPM is reduced to 8
122
  # but can be configured via the MODEL_RPM environment variable.
123
+ _configured_rpm = int(os.getenv("MODEL_RPM", "8"))
124
  _configured_burst = None
125
  model = RateLimitedModel(
126
  LiteLLMModel(model_id="gemini/gemini-2.5-flash", temperature=0.2),
 
162
  return f"AGENT ERROR: {e}"
163
 
164
 
165
+ # Note: The backoff_multiplier was changed from 3 to 4, which increases the delay between retries exponentially.
166
+ # This means that after each failed attempt, the wait time before the next retry will grow more rapidly,
167
+ # potentially resulting in significantly longer total retry durations.
168
+
169
  def call_model_with_retry(
170
+ callable_fn, *args, max_retries=15, initial_delay=10.0, backoff_multiplier=4, **kwargs
171
  ):
172
  """
173
  Calls a function with retry logic and exponential backoff.
174
+ The backoff multiplier is configurable (default=4 for more aggressive backoff).
175
  """
176
  delay = initial_delay
177
  for attempt in range(1, max_retries + 1):