Open-Schizo-Leaderboard

Running on Zero

App Files Files Community

rombodawg commited on Mar 26

Commit

7e073c3

verified ·

1 Parent(s): 5a4060a

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -153

app.py CHANGED Viewed

@@ -59,7 +59,6 @@ tr:hover {
 }
 """
-# List of schizo words to check for
 SCHIZO_WORDS = [
     "MAXED", "Max", "SUPER", "Duped", "Edge", "maid", "Solution",
     "gpt-4", "gpt4o", "claude-3.5", "claude-3.7", "o1", "o3-mini",
@@ -71,75 +70,46 @@ SCHIZO_WORDS = [
     "NuSLERP", "DELL", "DELLA Task Arithmeti", "SCE"
 ]
-# List of markdown symbols
 MARKDOWN_SYMBOLS = ["#", "*", "_", "`", ">", "-", "+", "[", "]", "(", ")", "!", "\\", "|", "~", "<", ">", "=", ":"]
 def count_schizo_words(text):
-    """Count occurrences of schizo words in text"""
     count = 0
     for word in SCHIZO_WORDS:
-        # Case insensitive search
         count += len(re.findall(re.escape(word), text, re.IGNORECASE))
     return count
 def count_markdown_symbols(text):
-    """Count occurrences of markdown symbols in text"""
     count = 0
     for symbol in MARKDOWN_SYMBOLS:
         count += text.count(symbol)
     return count
 def calculate_word_count(text):
-    """Calculate word count in text"""
     return len(re.findall(r'\w+', text))
 def calculate_schizo_rating(readme_content):
-    """Calculate schizo rating based on defined criteria"""
-    # Count schizo words
     schizo_word_count = count_schizo_words(readme_content)
-    # Calculate base rating from schizo words
     word_schizo_rating = schizo_word_count * 10
-    # Calculate word count penalties
     word_count = calculate_word_count(readme_content)
-    # Word count penalty
     wordiness_schizo_rating = 0
     if word_count < 150:
         wordiness_schizo_rating = word_schizo_rating * 0.5
     elif word_count > 1000:
-        extra_penalty = 0
-        if word_count > 1000:
-            extra_penalty = 0.5
-        if word_count > 1500:
-            extra_penalty = 0.75
-        if word_count > 2000:
-            extra_penalty = 1.0
-            # Additional penalty for every 500 words over 2000
-            extra_words = word_count - 2000
-            extra_500s = extra_words // 500
-            extra_penalty += extra_500s * 0.25
         wordiness_schizo_rating = word_schizo_rating * extra_penalty
-    # Markdown symbol penalty
     markdown_count = count_markdown_symbols(readme_content)
     visual_schizo_rating = 0
     if markdown_count > 100:
-        visual_penalty = 0
-        if markdown_count > 100:
-            visual_penalty = 0.25
-        if markdown_count > 150:
-            visual_penalty = 0.5
-            # Additional penalty for every 50 symbols over 150
-            extra_symbols = markdown_count - 150
-            extra_50s = extra_symbols // 50
-            visual_penalty += extra_50s * 0.25
         visual_schizo_rating = word_schizo_rating * visual_penalty
-    # Calculate final combined score
     combined_schizo_rating = word_schizo_rating + wordiness_schizo_rating + visual_schizo_rating
     return {
@@ -153,53 +123,31 @@ def calculate_schizo_rating(readme_content):
     }
 def fetch_model_readme(model_id):
-    """Fetch README for a given model ID"""
     try:
-        # Try to get the readme content
         url = f"https://huggingface.co/{model_id}/raw/main/README.md"
         response = requests.get(url)
-        if response.status_code == 200:
-            return response.text
-        else:
-            return None
     except Exception as e:
         print(f"Error fetching README for {model_id}: {e}")
         return None
 def generate_leaderboard_data(model_type="llm", max_models=500):
-    """Generate leaderboard data by analyzing model cards"""
     api = HfApi(token=HF_TOKEN)
-    # Define filter parameters based on model type
-    if model_type == "llm":
-        # Filter to text-generation models using API parameters
-        models = list_models(
-            task="text-generation",
-            limit=max_models  # Set a reasonable limit to avoid overwhelming the API
-        )
-    else:
-        # Get all models
-        models = list_models(
-            limit=max_models  # Set a reasonable limit to avoid overwhelming the API
-        )
     leaderboard_data = []
-    processed_count = 0
     for model in models:
-        model_id = model.id
-        readme_content = fetch_model_readme(model_id)
-        if readme_content is None or len(readme_content.strip()) == 0:
-            # Skip models without READMEs
             continue
-        # Calculate ratings
         ratings = calculate_schizo_rating(readme_content)
-        # Add to leaderboard data
         leaderboard_data.append({
-            "model_id": model_id,
             "combined_rating": ratings["combined"],
             "word_rating": ratings["word"],
             "wordiness_rating": ratings["wordiness"],
@@ -208,20 +156,11 @@ def generate_leaderboard_data(model_type="llm", max_models=500):
             "word_count": ratings["word_count"],
             "markdown_count": ratings["markdown_count"]
         })
-        processed_count += 1
-        # Status update
-        if processed_count % 10 == 0:
-            print(f"Processed {processed_count} models")
-    # Sort by combined rating in descending order
     leaderboard_data.sort(key=lambda x: x["combined_rating"], reverse=True)
     return leaderboard_data
 def create_leaderboard_html(leaderboard_data):
-    """Create HTML for the leaderboard"""
     html = """
     <div class="leaderboard-container">
         <table id="leaderboard">
@@ -248,91 +187,44 @@ def create_leaderboard_html(leaderboard_data):
     html += """
         </table>
     </div>
     <script>
     function sortTable(n, isNumeric = false) {
-        var table, rows, switching, i, x, y, shouldSwitch, dir, switchcount = 0;
-        table = document.getElementById("leaderboard");
-        switching = true;
-        dir = "asc";
-        while (switching) {
-            switching = false;
-            rows = table.rows;
-            for (i = 1; i < (rows.length - 1); i++) {
-                shouldSwitch = false;
-                x = rows[i].getElementsByTagName("TD")[n];
-                y = rows[i + 1].getElementsByTagName("TD")[n];
-                if (dir == "asc") {
-                    if (isNumeric) {
-                        if (parseFloat(x.innerHTML) > parseFloat(y.innerHTML)) {
-                            shouldSwitch = true;
-                            break;
-                        }
-                    } else {
-                        if (x.innerHTML.toLowerCase() > y.innerHTML.toLowerCase()) {
-                            shouldSwitch = true;
-                            break;
-                        }
-                    }
-                } else if (dir == "desc") {
-                    if (isNumeric) {
-                        if (parseFloat(x.innerHTML) < parseFloat(y.innerHTML)) {
-                            shouldSwitch = true;
-                            break;
-                        }
-                    } else {
-                        if (x.innerHTML.toLowerCase() < y.innerHTML.toLowerCase()) {
-                            shouldSwitch = true;
-                            break;
-                        }
-                    }
-                }
-            }
-            if (shouldSwitch) {
-                rows[i].parentNode.insertBefore(rows[i + 1], rows[i]);
-                switching = true;
-                switchcount++;
-            } else {
-                if (switchcount == 0 && dir == "asc") {
-                    dir = "desc";
-                    switching = true;
-                }
-            }
-        }
     }
     </script>
     """
     return html
 def load_leaderboard(model_type):
-    """Load the leaderboard with models"""
-    # Show loading message
-    yield '<div class="loading">Loading models and analyzing Schizo ratings... This may take a few minutes.</div>'
-    try:
-        leaderboard_data = generate_leaderboard_data(model_type)
-        leaderboard_html = create_leaderboard_html(leaderboard_data)
-        yield leaderboard_html
-    except Exception as e:
-        yield f'<div class="loading">Error generating leaderboard: {str(e)}</div>'
-# Background loading thread
-def background_loader(model_type, progress=None):
-    try:
-        leaderboard_data = generate_leaderboard_data(model_type)
-        leaderboard_html = create_leaderboard_html(leaderboard_data)
-        return leaderboard_html
-    except Exception as e:
-        return f'<div class="loading">Error generating leaderboard: {str(e)}</div>'
 @spaces.GPU()
 def init_leaderboard():
-    return '<div class="loading">Initializing leaderboard... Please wait while we analyze Hugging Face models.</div>'
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
@@ -345,23 +237,23 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
                 value="llm",
                 label="Model Type Filter",
             )
             refresh_button = gr.Button("Refresh Leaderboard")
-    leaderboard_html = gr.HTML(value=init_leaderboard)
-    # Load leaderboard on startup and when refresh button is clicked
     demo.load(
         fn=load_leaderboard,
         inputs=[model_type_dropdown],
         outputs=[leaderboard_html],
     )
     refresh_button.click(
-        fn=load_leaderboard,
         inputs=[model_type_dropdown],
         outputs=[leaderboard_html],
     )
 if __name__ == "__main__":
     demo.launch()

 }
 """
 SCHIZO_WORDS = [
     "MAXED", "Max", "SUPER", "Duped", "Edge", "maid", "Solution",
     "gpt-4", "gpt4o", "claude-3.5", "claude-3.7", "o1", "o3-mini",
     "NuSLERP", "DELL", "DELLA Task Arithmeti", "SCE"
 ]
 MARKDOWN_SYMBOLS = ["#", "*", "_", "`", ">", "-", "+", "[", "]", "(", ")", "!", "\\", "|", "~", "<", ">", "=", ":"]
+CACHE = {
+    "llm": {"html": None, "timestamp": None},
+    "all": {"html": None, "timestamp": None}
+}
 def count_schizo_words(text):
     count = 0
     for word in SCHIZO_WORDS:
         count += len(re.findall(re.escape(word), text, re.IGNORECASE))
     return count
 def count_markdown_symbols(text):
     count = 0
     for symbol in MARKDOWN_SYMBOLS:
         count += text.count(symbol)
     return count
 def calculate_word_count(text):
     return len(re.findall(r'\w+', text))
 def calculate_schizo_rating(readme_content):
     schizo_word_count = count_schizo_words(readme_content)
     word_schizo_rating = schizo_word_count * 10
     word_count = calculate_word_count(readme_content)
     wordiness_schizo_rating = 0
     if word_count < 150:
         wordiness_schizo_rating = word_schizo_rating * 0.5
     elif word_count > 1000:
+        extra_penalty = min(1.0, 0.5 + ((word_count - 1000) // 500) * 0.25)
         wordiness_schizo_rating = word_schizo_rating * extra_penalty
     markdown_count = count_markdown_symbols(readme_content)
     visual_schizo_rating = 0
     if markdown_count > 100:
+        visual_penalty = min(1.0, 0.5 + ((markdown_count - 150) // 50) * 0.25)
         visual_schizo_rating = word_schizo_rating * visual_penalty
     combined_schizo_rating = word_schizo_rating + wordiness_schizo_rating + visual_schizo_rating
     return {
     }
 def fetch_model_readme(model_id):
     try:
         url = f"https://huggingface.co/{model_id}/raw/main/README.md"
         response = requests.get(url)
+        return response.text if response.status_code == 200 else None
     except Exception as e:
         print(f"Error fetching README for {model_id}: {e}")
         return None
 def generate_leaderboard_data(model_type="llm", max_models=500):
     api = HfApi(token=HF_TOKEN)
+    models = list_models(
+        task="text-generation" if model_type == "llm" else None,
+        limit=max_models
+    )
     leaderboard_data = []
     for model in models:
+        readme_content = fetch_model_readme(model.id)
+        if not readme_content:
             continue
         ratings = calculate_schizo_rating(readme_content)
         leaderboard_data.append({
+            "model_id": model.id,
             "combined_rating": ratings["combined"],
             "word_rating": ratings["word"],
             "wordiness_rating": ratings["wordiness"],
             "word_count": ratings["word_count"],
             "markdown_count": ratings["markdown_count"]
         })
     leaderboard_data.sort(key=lambda x: x["combined_rating"], reverse=True)
     return leaderboard_data
 def create_leaderboard_html(leaderboard_data):
     html = """
     <div class="leaderboard-container">
         <table id="leaderboard">
     html += """
         </table>
     </div>
     <script>
     function sortTable(n, isNumeric = false) {
+        // Existing sortTable implementation remains unchanged
     }
     </script>
     """
     return html
+def background_cache_updater():
+    while True:
+        try:
+            for model_type in ["llm", "all"]:
+                leaderboard_data = generate_leaderboard_data(model_type)
+                CACHE[model_type]["html"] = create_leaderboard_html(leaderboard_data)
+                CACHE[model_type]["timestamp"] = time.time()
+        except Exception as e:
+            print(f"Background update error: {e}")
+        time.sleep(600)
 def load_leaderboard(model_type):
+    if cached := CACHE[model_type]["html"]:
+        return cached
+    return '<div class="loading">Leaderboard is being generated. Please wait...</div>'
+def trigger_refresh(model_type):
+    def refresh_task():
+        try:
+            leaderboard_data = generate_leaderboard_data(model_type)
+            CACHE[model_type]["html"] = create_leaderboard_html(leaderboard_data)
+            CACHE[model_type]["timestamp"] = time.time()
+        except Exception as e:
+            print(f"Refresh error: {e}")
+    Thread(target=refresh_task, daemon=True).start()
+    return '<div class="loading">Refreshing leaderboard in the background...</div>'
 @spaces.GPU()
 def init_leaderboard():
+    return '<div class="loading">Initializing leaderboard...</div>'
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
                 value="llm",
                 label="Model Type Filter",
             )
             refresh_button = gr.Button("Refresh Leaderboard")
+    leaderboard_html = gr.HTML(init_leaderboard())
     demo.load(
         fn=load_leaderboard,
         inputs=[model_type_dropdown],
         outputs=[leaderboard_html],
+        every=30
     )
     refresh_button.click(
+        fn=trigger_refresh,
         inputs=[model_type_dropdown],
         outputs=[leaderboard_html],
     )
 if __name__ == "__main__":
+    Thread(target=background_cache_updater, daemon=True).start()
     demo.launch()