Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| from pathlib import Path | |
| from leaderboard_tab import ( | |
| create_leaderboard_tab, | |
| search_leaderboard, | |
| update_columns_to_show, | |
| ) | |
| from utils import load_json_results | |
| # Constants | |
| RERANKER_ABOUT_SECTION = """ | |
| ## About Reranking Evaluation | |
| The reranking evaluation assesses a model's ability to improve search quality by reordering initially retrieved results. Models are evaluated across multiple unseen Arabic datasets to ensure robust performance. | |
| ### Evaluation Metrics | |
| - **MRR@10 (Mean Reciprocal Rank at 10)**: Measures the ranking quality focusing on the first relevant result in top-10 | |
| - **NDCG@10 (Normalized DCG at 10)**: Evaluates the ranking quality of all relevant results in top-10 | |
| - **MAP (Mean Average Precision)**: Measures the overall precision across all relevant documents | |
| All metrics are averaged across multiple evaluation datasets to provide a comprehensive assessment of model performance. | |
| ### Model Requirements | |
| - Must accept query-document pairs as input | |
| - Should output relevance scores for reranking (has cross-attention or similar mechanism for query-document matching) | |
| - Support for Arabic text processing | |
| ### Evaluation Process | |
| 1. Models are tested on multiple unseen Arabic datasets | |
| 2. For each dataset: | |
| - Initial candidate documents are provided | |
| - Model reranks the candidates | |
| - MRR@10, NDCG@10, and MAP are calculated | |
| 3. Final scores are averaged across all datasets | |
| 4. Models are ranked based on overall performance | |
| ### How to Prepare Your Model | |
| - Model should be public on HuggingFace Hub (private models are not supported yet) | |
| - Make sure it works coherently with `sentence-transformers` library | |
| """ | |
| # Global variables | |
| reranking_df = None | |
| def load_reranking_leaderboard(): | |
| """Load and prepare the reranking leaderboard data""" | |
| global reranking_df | |
| # Prepare reranking dataframe | |
| dataframe_path = Path(__file__).parent / "results" / "reranking_results.json" | |
| reranking_df = load_json_results( | |
| dataframe_path, | |
| prepare_for_display=True, | |
| sort_col="Average Score", | |
| drop_cols=["Revision", "Task"], | |
| ) | |
| reranking_df.insert(0, "Rank", range(1, 1 + len(reranking_df))) | |
| return reranking_df | |
| def reranking_search_leaderboard(model_name, columns_to_show): | |
| """Search function for reranking leaderboard""" | |
| return search_leaderboard(reranking_df, model_name, columns_to_show) | |
| def update_reranker_columns_to_show(columns_to_show): | |
| """Update displayed columns for reranking leaderboard""" | |
| return update_columns_to_show(reranking_df, columns_to_show) | |
| def create_reranking_tab(): | |
| """Create the complete reranking leaderboard tab""" | |
| global reranking_df | |
| # Load data if not already loaded | |
| if reranking_df is None: | |
| reranking_df = load_reranking_leaderboard() | |
| # Define default columns to show | |
| default_columns = [ | |
| "Rank", | |
| "Model", | |
| "Average Score", | |
| "Model Size (MB)", | |
| "Context Length", | |
| "Embedding Dimension", | |
| "Namaa Global Knowledge", | |
| "Navid General Knowledge", | |
| ] | |
| # Create and return the tab | |
| return create_leaderboard_tab( | |
| df=reranking_df, | |
| initial_columns_to_show=default_columns, | |
| search_function=reranking_search_leaderboard, | |
| update_function=update_reranker_columns_to_show, | |
| about_section=RERANKER_ABOUT_SECTION, | |
| task_type="Reranker", | |
| ) | |