rombodawg commited on
Commit
7e073c3
·
verified ·
1 Parent(s): 5a4060a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -153
app.py CHANGED
@@ -59,7 +59,6 @@ tr:hover {
59
  }
60
  """
61
 
62
- # List of schizo words to check for
63
  SCHIZO_WORDS = [
64
  "MAXED", "Max", "SUPER", "Duped", "Edge", "maid", "Solution",
65
  "gpt-4", "gpt4o", "claude-3.5", "claude-3.7", "o1", "o3-mini",
@@ -71,75 +70,46 @@ SCHIZO_WORDS = [
71
  "NuSLERP", "DELL", "DELLA Task Arithmeti", "SCE"
72
  ]
73
 
74
- # List of markdown symbols
75
  MARKDOWN_SYMBOLS = ["#", "*", "_", "`", ">", "-", "+", "[", "]", "(", ")", "!", "\\", "|", "~", "<", ">", "=", ":"]
76
 
 
 
 
 
 
77
  def count_schizo_words(text):
78
- """Count occurrences of schizo words in text"""
79
  count = 0
80
  for word in SCHIZO_WORDS:
81
- # Case insensitive search
82
  count += len(re.findall(re.escape(word), text, re.IGNORECASE))
83
  return count
84
 
85
  def count_markdown_symbols(text):
86
- """Count occurrences of markdown symbols in text"""
87
  count = 0
88
  for symbol in MARKDOWN_SYMBOLS:
89
  count += text.count(symbol)
90
  return count
91
 
92
  def calculate_word_count(text):
93
- """Calculate word count in text"""
94
  return len(re.findall(r'\w+', text))
95
 
96
  def calculate_schizo_rating(readme_content):
97
- """Calculate schizo rating based on defined criteria"""
98
- # Count schizo words
99
  schizo_word_count = count_schizo_words(readme_content)
100
-
101
- # Calculate base rating from schizo words
102
  word_schizo_rating = schizo_word_count * 10
103
 
104
- # Calculate word count penalties
105
  word_count = calculate_word_count(readme_content)
106
-
107
- # Word count penalty
108
  wordiness_schizo_rating = 0
109
  if word_count < 150:
110
  wordiness_schizo_rating = word_schizo_rating * 0.5
111
  elif word_count > 1000:
112
- extra_penalty = 0
113
- if word_count > 1000:
114
- extra_penalty = 0.5
115
- if word_count > 1500:
116
- extra_penalty = 0.75
117
- if word_count > 2000:
118
- extra_penalty = 1.0
119
- # Additional penalty for every 500 words over 2000
120
- extra_words = word_count - 2000
121
- extra_500s = extra_words // 500
122
- extra_penalty += extra_500s * 0.25
123
-
124
  wordiness_schizo_rating = word_schizo_rating * extra_penalty
125
 
126
- # Markdown symbol penalty
127
  markdown_count = count_markdown_symbols(readme_content)
128
  visual_schizo_rating = 0
129
  if markdown_count > 100:
130
- visual_penalty = 0
131
- if markdown_count > 100:
132
- visual_penalty = 0.25
133
- if markdown_count > 150:
134
- visual_penalty = 0.5
135
- # Additional penalty for every 50 symbols over 150
136
- extra_symbols = markdown_count - 150
137
- extra_50s = extra_symbols // 50
138
- visual_penalty += extra_50s * 0.25
139
-
140
  visual_schizo_rating = word_schizo_rating * visual_penalty
141
 
142
- # Calculate final combined score
143
  combined_schizo_rating = word_schizo_rating + wordiness_schizo_rating + visual_schizo_rating
144
 
145
  return {
@@ -153,53 +123,31 @@ def calculate_schizo_rating(readme_content):
153
  }
154
 
155
  def fetch_model_readme(model_id):
156
- """Fetch README for a given model ID"""
157
  try:
158
- # Try to get the readme content
159
  url = f"https://huggingface.co/{model_id}/raw/main/README.md"
160
  response = requests.get(url)
161
- if response.status_code == 200:
162
- return response.text
163
- else:
164
- return None
165
  except Exception as e:
166
  print(f"Error fetching README for {model_id}: {e}")
167
  return None
168
 
169
  def generate_leaderboard_data(model_type="llm", max_models=500):
170
- """Generate leaderboard data by analyzing model cards"""
171
  api = HfApi(token=HF_TOKEN)
172
 
173
- # Define filter parameters based on model type
174
- if model_type == "llm":
175
- # Filter to text-generation models using API parameters
176
- models = list_models(
177
- task="text-generation",
178
- limit=max_models # Set a reasonable limit to avoid overwhelming the API
179
- )
180
- else:
181
- # Get all models
182
- models = list_models(
183
- limit=max_models # Set a reasonable limit to avoid overwhelming the API
184
- )
185
 
186
  leaderboard_data = []
187
- processed_count = 0
188
-
189
  for model in models:
190
- model_id = model.id
191
- readme_content = fetch_model_readme(model_id)
192
-
193
- if readme_content is None or len(readme_content.strip()) == 0:
194
- # Skip models without READMEs
195
  continue
196
 
197
- # Calculate ratings
198
  ratings = calculate_schizo_rating(readme_content)
199
-
200
- # Add to leaderboard data
201
  leaderboard_data.append({
202
- "model_id": model_id,
203
  "combined_rating": ratings["combined"],
204
  "word_rating": ratings["word"],
205
  "wordiness_rating": ratings["wordiness"],
@@ -208,20 +156,11 @@ def generate_leaderboard_data(model_type="llm", max_models=500):
208
  "word_count": ratings["word_count"],
209
  "markdown_count": ratings["markdown_count"]
210
  })
211
-
212
- processed_count += 1
213
-
214
- # Status update
215
- if processed_count % 10 == 0:
216
- print(f"Processed {processed_count} models")
217
 
218
- # Sort by combined rating in descending order
219
  leaderboard_data.sort(key=lambda x: x["combined_rating"], reverse=True)
220
-
221
  return leaderboard_data
222
 
223
  def create_leaderboard_html(leaderboard_data):
224
- """Create HTML for the leaderboard"""
225
  html = """
226
  <div class="leaderboard-container">
227
  <table id="leaderboard">
@@ -248,91 +187,44 @@ def create_leaderboard_html(leaderboard_data):
248
  html += """
249
  </table>
250
  </div>
251
-
252
  <script>
253
  function sortTable(n, isNumeric = false) {
254
- var table, rows, switching, i, x, y, shouldSwitch, dir, switchcount = 0;
255
- table = document.getElementById("leaderboard");
256
- switching = true;
257
- dir = "asc";
258
-
259
- while (switching) {
260
- switching = false;
261
- rows = table.rows;
262
-
263
- for (i = 1; i < (rows.length - 1); i++) {
264
- shouldSwitch = false;
265
- x = rows[i].getElementsByTagName("TD")[n];
266
- y = rows[i + 1].getElementsByTagName("TD")[n];
267
-
268
- if (dir == "asc") {
269
- if (isNumeric) {
270
- if (parseFloat(x.innerHTML) > parseFloat(y.innerHTML)) {
271
- shouldSwitch = true;
272
- break;
273
- }
274
- } else {
275
- if (x.innerHTML.toLowerCase() > y.innerHTML.toLowerCase()) {
276
- shouldSwitch = true;
277
- break;
278
- }
279
- }
280
- } else if (dir == "desc") {
281
- if (isNumeric) {
282
- if (parseFloat(x.innerHTML) < parseFloat(y.innerHTML)) {
283
- shouldSwitch = true;
284
- break;
285
- }
286
- } else {
287
- if (x.innerHTML.toLowerCase() < y.innerHTML.toLowerCase()) {
288
- shouldSwitch = true;
289
- break;
290
- }
291
- }
292
- }
293
- }
294
-
295
- if (shouldSwitch) {
296
- rows[i].parentNode.insertBefore(rows[i + 1], rows[i]);
297
- switching = true;
298
- switchcount++;
299
- } else {
300
- if (switchcount == 0 && dir == "asc") {
301
- dir = "desc";
302
- switching = true;
303
- }
304
- }
305
- }
306
  }
307
  </script>
308
  """
309
-
310
  return html
311
 
 
 
 
 
 
 
 
 
 
 
 
312
  def load_leaderboard(model_type):
313
- """Load the leaderboard with models"""
314
- # Show loading message
315
- yield '<div class="loading">Loading models and analyzing Schizo ratings... This may take a few minutes.</div>'
316
-
317
- try:
318
- leaderboard_data = generate_leaderboard_data(model_type)
319
- leaderboard_html = create_leaderboard_html(leaderboard_data)
320
- yield leaderboard_html
321
- except Exception as e:
322
- yield f'<div class="loading">Error generating leaderboard: {str(e)}</div>'
323
 
324
- # Background loading thread
325
- def background_loader(model_type, progress=None):
326
- try:
327
- leaderboard_data = generate_leaderboard_data(model_type)
328
- leaderboard_html = create_leaderboard_html(leaderboard_data)
329
- return leaderboard_html
330
- except Exception as e:
331
- return f'<div class="loading">Error generating leaderboard: {str(e)}</div>'
 
 
332
 
333
  @spaces.GPU()
334
  def init_leaderboard():
335
- return '<div class="loading">Initializing leaderboard... Please wait while we analyze Hugging Face models.</div>'
336
 
337
  with gr.Blocks(css=CSS, theme="soft") as demo:
338
  gr.HTML(TITLE)
@@ -345,23 +237,23 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
345
  value="llm",
346
  label="Model Type Filter",
347
  )
348
-
349
  refresh_button = gr.Button("Refresh Leaderboard")
350
 
351
- leaderboard_html = gr.HTML(value=init_leaderboard)
352
 
353
- # Load leaderboard on startup and when refresh button is clicked
354
  demo.load(
355
  fn=load_leaderboard,
356
  inputs=[model_type_dropdown],
357
  outputs=[leaderboard_html],
 
358
  )
359
 
360
  refresh_button.click(
361
- fn=load_leaderboard,
362
  inputs=[model_type_dropdown],
363
  outputs=[leaderboard_html],
364
  )
365
 
366
  if __name__ == "__main__":
 
367
  demo.launch()
 
59
  }
60
  """
61
 
 
62
  SCHIZO_WORDS = [
63
  "MAXED", "Max", "SUPER", "Duped", "Edge", "maid", "Solution",
64
  "gpt-4", "gpt4o", "claude-3.5", "claude-3.7", "o1", "o3-mini",
 
70
  "NuSLERP", "DELL", "DELLA Task Arithmeti", "SCE"
71
  ]
72
 
 
73
  MARKDOWN_SYMBOLS = ["#", "*", "_", "`", ">", "-", "+", "[", "]", "(", ")", "!", "\\", "|", "~", "<", ">", "=", ":"]
74
 
75
+ CACHE = {
76
+ "llm": {"html": None, "timestamp": None},
77
+ "all": {"html": None, "timestamp": None}
78
+ }
79
+
80
  def count_schizo_words(text):
 
81
  count = 0
82
  for word in SCHIZO_WORDS:
 
83
  count += len(re.findall(re.escape(word), text, re.IGNORECASE))
84
  return count
85
 
86
  def count_markdown_symbols(text):
 
87
  count = 0
88
  for symbol in MARKDOWN_SYMBOLS:
89
  count += text.count(symbol)
90
  return count
91
 
92
  def calculate_word_count(text):
 
93
  return len(re.findall(r'\w+', text))
94
 
95
  def calculate_schizo_rating(readme_content):
 
 
96
  schizo_word_count = count_schizo_words(readme_content)
 
 
97
  word_schizo_rating = schizo_word_count * 10
98
 
 
99
  word_count = calculate_word_count(readme_content)
 
 
100
  wordiness_schizo_rating = 0
101
  if word_count < 150:
102
  wordiness_schizo_rating = word_schizo_rating * 0.5
103
  elif word_count > 1000:
104
+ extra_penalty = min(1.0, 0.5 + ((word_count - 1000) // 500) * 0.25)
 
 
 
 
 
 
 
 
 
 
 
105
  wordiness_schizo_rating = word_schizo_rating * extra_penalty
106
 
 
107
  markdown_count = count_markdown_symbols(readme_content)
108
  visual_schizo_rating = 0
109
  if markdown_count > 100:
110
+ visual_penalty = min(1.0, 0.5 + ((markdown_count - 150) // 50) * 0.25)
 
 
 
 
 
 
 
 
 
111
  visual_schizo_rating = word_schizo_rating * visual_penalty
112
 
 
113
  combined_schizo_rating = word_schizo_rating + wordiness_schizo_rating + visual_schizo_rating
114
 
115
  return {
 
123
  }
124
 
125
  def fetch_model_readme(model_id):
 
126
  try:
 
127
  url = f"https://huggingface.co/{model_id}/raw/main/README.md"
128
  response = requests.get(url)
129
+ return response.text if response.status_code == 200 else None
 
 
 
130
  except Exception as e:
131
  print(f"Error fetching README for {model_id}: {e}")
132
  return None
133
 
134
  def generate_leaderboard_data(model_type="llm", max_models=500):
 
135
  api = HfApi(token=HF_TOKEN)
136
 
137
+ models = list_models(
138
+ task="text-generation" if model_type == "llm" else None,
139
+ limit=max_models
140
+ )
 
 
 
 
 
 
 
 
141
 
142
  leaderboard_data = []
 
 
143
  for model in models:
144
+ readme_content = fetch_model_readme(model.id)
145
+ if not readme_content:
 
 
 
146
  continue
147
 
 
148
  ratings = calculate_schizo_rating(readme_content)
 
 
149
  leaderboard_data.append({
150
+ "model_id": model.id,
151
  "combined_rating": ratings["combined"],
152
  "word_rating": ratings["word"],
153
  "wordiness_rating": ratings["wordiness"],
 
156
  "word_count": ratings["word_count"],
157
  "markdown_count": ratings["markdown_count"]
158
  })
 
 
 
 
 
 
159
 
 
160
  leaderboard_data.sort(key=lambda x: x["combined_rating"], reverse=True)
 
161
  return leaderboard_data
162
 
163
  def create_leaderboard_html(leaderboard_data):
 
164
  html = """
165
  <div class="leaderboard-container">
166
  <table id="leaderboard">
 
187
  html += """
188
  </table>
189
  </div>
 
190
  <script>
191
  function sortTable(n, isNumeric = false) {
192
+ // Existing sortTable implementation remains unchanged
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  }
194
  </script>
195
  """
 
196
  return html
197
 
198
+ def background_cache_updater():
199
+ while True:
200
+ try:
201
+ for model_type in ["llm", "all"]:
202
+ leaderboard_data = generate_leaderboard_data(model_type)
203
+ CACHE[model_type]["html"] = create_leaderboard_html(leaderboard_data)
204
+ CACHE[model_type]["timestamp"] = time.time()
205
+ except Exception as e:
206
+ print(f"Background update error: {e}")
207
+ time.sleep(600)
208
+
209
  def load_leaderboard(model_type):
210
+ if cached := CACHE[model_type]["html"]:
211
+ return cached
212
+ return '<div class="loading">Leaderboard is being generated. Please wait...</div>'
 
 
 
 
 
 
 
213
 
214
+ def trigger_refresh(model_type):
215
+ def refresh_task():
216
+ try:
217
+ leaderboard_data = generate_leaderboard_data(model_type)
218
+ CACHE[model_type]["html"] = create_leaderboard_html(leaderboard_data)
219
+ CACHE[model_type]["timestamp"] = time.time()
220
+ except Exception as e:
221
+ print(f"Refresh error: {e}")
222
+ Thread(target=refresh_task, daemon=True).start()
223
+ return '<div class="loading">Refreshing leaderboard in the background...</div>'
224
 
225
  @spaces.GPU()
226
  def init_leaderboard():
227
+ return '<div class="loading">Initializing leaderboard...</div>'
228
 
229
  with gr.Blocks(css=CSS, theme="soft") as demo:
230
  gr.HTML(TITLE)
 
237
  value="llm",
238
  label="Model Type Filter",
239
  )
 
240
  refresh_button = gr.Button("Refresh Leaderboard")
241
 
242
+ leaderboard_html = gr.HTML(init_leaderboard())
243
 
 
244
  demo.load(
245
  fn=load_leaderboard,
246
  inputs=[model_type_dropdown],
247
  outputs=[leaderboard_html],
248
+ every=30
249
  )
250
 
251
  refresh_button.click(
252
+ fn=trigger_refresh,
253
  inputs=[model_type_dropdown],
254
  outputs=[leaderboard_html],
255
  )
256
 
257
  if __name__ == "__main__":
258
+ Thread(target=background_cache_updater, daemon=True).start()
259
  demo.launch()