Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| from pathlib import Path | |
| from huggingface_hub import HfApi | |
| from huggingface_hub.errors import RepositoryNotFoundError | |
| from datasets import load_dataset | |
| api = HfApi() | |
| OWNER = "Navid-AI" | |
| DATASET_REPO_ID = f"{OWNER}/requests-dataset-rag" | |
| results_dir = Path(__file__).parent / "results" | |
| # Add a helper to load JSON results with optional formatting. | |
| def load_json_results( | |
| file_path: Path, prepare_for_display=False, sort_col=None, drop_cols=None | |
| ): | |
| if file_path.exists(): | |
| df = pd.read_json(file_path) | |
| else: | |
| raise FileNotFoundError(f"File '{file_path}' not found.") | |
| if prepare_for_display: | |
| # Apply common mapping for model link formatting. | |
| df[["Model"]] = df[["Model"]].map( | |
| lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>' | |
| ) | |
| if drop_cols is not None: | |
| df.drop(columns=drop_cols, inplace=True) | |
| if sort_col is not None: | |
| df.sort_values(sort_col, ascending=False, inplace=True) | |
| return df | |
| def fetch_model_information(model_id): | |
| try: | |
| model_info = api.model_info(model_id) | |
| except Exception as e: | |
| print(f"Error fetching model info for '{model_id}': {e}") | |
| model_info = None | |
| num_parameters = str(round(model_info.safetensors.total / 1e6)) if model_info and model_info.safetensors else "N/A" | |
| num_downloads = ( | |
| str(model_info.downloads) | |
| if model_info and model_info.downloads is not None | |
| else "N/A" | |
| ) | |
| num_likes = ( | |
| str(model_info.likes) if model_info and model_info.likes is not None else "N/A" | |
| ) | |
| license = ( | |
| str(model_info.card_data["license"]) | |
| if model_info and "license" in model_info.card_data | |
| else "N/A" | |
| ) | |
| supported_precisions = ( | |
| list(model_info.safetensors.parameters.keys()) | |
| if model_info and model_info.safetensors | |
| else ["BF16"] | |
| ) | |
| return ( | |
| gr.update(choices=supported_precisions, value=supported_precisions[0]), | |
| license, | |
| num_parameters, | |
| num_downloads, | |
| num_likes, | |
| ) | |
| def load_requests(status_folder, task_type=None): | |
| # Load the dataset from the HuggingFace Hub | |
| ds = load_dataset(DATASET_REPO_ID, split="test") | |
| df = ds.to_pandas() | |
| # Filter the dataframe based on the status folder and task type | |
| df = df[df["status"] == status_folder.upper()] | |
| df = df[df["task"] == task_type] if task_type else df | |
| df.drop(columns=["status", "task"], inplace=True) | |
| return df | |
| def submit_model(model_name, revision, precision, params, license, task, model_param_limit): | |
| # Load pending and finished requests from the dataset repository | |
| df_pending = load_requests("pending", task_type=task) | |
| df_finished = load_requests("finished", task_type=task) | |
| df_failed = load_requests("failed", task_type=task) | |
| # Validate model presence | |
| try: | |
| api.model_info(model_name) | |
| except RepositoryNotFoundError: | |
| return ( | |
| f"<h2 style='color:red; text-align:center;'>❌ Model '{model_name}' not found on HuggingFace Hub.</h2>", | |
| df_pending, | |
| ) | |
| # Check if Auto Fetch feature couldn't fetch model info | |
| if params == "N/A": | |
| return ( | |
| "<h2 style='color:red; text-align:center;'>❌ I think the auto-fetch feature couldn't fetch model info." | |
| "If your model is not suitable for this task evaluation then this is expected, but if it's suitable and this behavior happened with you then please open a community discussion so we can fix your problem ASAP.</h2>", | |
| df_pending, | |
| ) | |
| # Check if model size is in valid range | |
| if float(params) > model_param_limit: | |
| return ( | |
| f"<h2 style='color:red; text-align:center;'>❌ Model size should be less than {model_param_limit} million parameters. Please check the model size and try again.</h2>", | |
| df_pending, | |
| ) | |
| # Handle 'Missing' precision | |
| precision = precision.strip().lower() | |
| # Helper function to check if model exists in a dataframe | |
| def model_exists_in_df(df): | |
| if df.empty: | |
| return False | |
| return ( | |
| (df["model_name"] == model_name) | |
| & (df["revision"] == revision) | |
| & (df["precision"] == precision) | |
| ).any() | |
| # Check if model is already in pending requests | |
| if model_exists_in_df(df_pending): | |
| return ( | |
| f"<h2 style='color:green; text-align:center;'>👍 Model {model_name} is already in the evaluation queue as a {task}.</h2>", | |
| df_pending, | |
| ) | |
| # Check if model is in finished requests | |
| if model_exists_in_df(df_finished): | |
| return ( | |
| f"<h2 style='color:green; text-align:center;'>👍 Model {model_name} has already been evaluated as a {task}.</h2>", | |
| df_pending, | |
| ) | |
| # Check if model is in failed requests | |
| if model_exists_in_df(df_failed): | |
| return ( | |
| f"<h2 style='color:red; text-align:center;'>❌ Model {model_name} has previously failed evaluation as a {task}.</h2>", | |
| df_pending, | |
| ) | |
| # Check if model exists on HuggingFace Hub | |
| try: | |
| api.model_info(model_name) | |
| except Exception as e: | |
| print(f"Error fetching model info: {e}") | |
| return f"<h2 style='color:red; text-align:center;'>🤷♂️ Model {model_name} not found on HuggingFace Hub.</h2>", df_pending | |
| # Proceed with submission | |
| status = "PENDING" | |
| # Prepare the submission data | |
| submission = { | |
| "model_name": model_name, | |
| "license": license, | |
| "revision": revision, | |
| "precision": precision, | |
| "status": status, | |
| "params": params, | |
| "task": task, | |
| } | |
| # Serialize the submission to JSON | |
| submission_json = json.dumps(submission, indent=2) | |
| # Define the file path in the repository | |
| org_model = model_name.split("/") | |
| if len(org_model) != 2: | |
| return ( | |
| "<h2 style='color:red; text-align:center;'>❌ Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct'.</h2>", | |
| df_pending, | |
| ) | |
| org, model_id = org_model | |
| precision_str = precision if precision else "missing" | |
| file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json" | |
| # Upload the submission to the dataset repository | |
| try: | |
| api.upload_file( | |
| path_or_fileobj=submission_json.encode("utf-8"), | |
| path_in_repo=file_path_in_repo, | |
| repo_id=DATASET_REPO_ID, | |
| repo_type="dataset", | |
| ) | |
| except Exception as e: | |
| print(f"Error uploading file: {e}") | |
| return ( | |
| f"<h2 style='color:red; text-align:center;'>❌ Could not submit model '{model_name}' for evaluation.</h2>", | |
| df_pending, | |
| ) | |
| df_pending = load_requests("pending", task_type=task) | |
| return ( | |
| f"<h2 style='color:green; text-align:center;'>✅ Model {model_name} has been submitted successfully as a {task}.</h2>", | |
| df_pending, | |
| ) | |
| def submit_gradio_module(task_type, model_param_limit): | |
| var = gr.State(value=task_type) | |
| model_param_limit = gr.State(value=model_param_limit) | |
| with gr.Row(equal_height=True): | |
| model_name_input = gr.Textbox( | |
| label="Model", | |
| placeholder="Enter the full model name from HuggingFace Hub (e.g., intfloat/multilingual-e5-large-instruct)", | |
| scale=4, | |
| ) | |
| fetch_data_button = gr.Button( | |
| value="Auto Fetch Model Info", variant="secondary" | |
| ) | |
| with gr.Row(): | |
| precision_input = gr.Dropdown( | |
| choices=["F16", "F32", "BF16", "I8", "U8", "I16"], | |
| label="Precision", | |
| value="F16", | |
| ) | |
| license_input = gr.Textbox( | |
| label="License", | |
| placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)", | |
| value="Open", | |
| ) | |
| revision_input = gr.Textbox(label="Revision", placeholder="main", value="main") | |
| with gr.Row(): | |
| params_input = gr.Textbox( | |
| label="Params (in Millions)", | |
| interactive=True, | |
| ) | |
| num_downloads_input = gr.Textbox( | |
| label="Number of Downloads", | |
| interactive=False, | |
| ) | |
| num_likes_input = gr.Textbox( | |
| label="Number of Likes", | |
| interactive=False, | |
| ) | |
| submit_button = gr.Button("Submit Model", variant="primary") | |
| submission_result = gr.HTML(label="Submission Result") | |
| fetch_outputs = [ | |
| precision_input, | |
| license_input, | |
| params_input, | |
| num_downloads_input, | |
| num_likes_input, | |
| ] | |
| fetch_data_button.click( | |
| fetch_model_information, inputs=[model_name_input], outputs=fetch_outputs | |
| ) | |
| model_name_input.submit( | |
| fetch_model_information, inputs=[model_name_input], outputs=fetch_outputs | |
| ) | |
| # Load pending, finished, and failed requests | |
| df_pending = load_requests("pending", task_type) | |
| df_finished = load_requests("finished", task_type) | |
| df_failed = load_requests("failed", task_type) | |
| # Display the tables | |
| gr.Markdown("## Evaluation Status") | |
| with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=True): | |
| pending_gradio_df = gr.Dataframe(df_pending) | |
| with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False): | |
| if not df_finished.empty: | |
| gr.Dataframe(df_finished) | |
| else: | |
| gr.Markdown("No finished evaluations.") | |
| with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False): | |
| if not df_failed.empty: | |
| gr.Dataframe(df_failed) | |
| else: | |
| gr.Markdown("No failed evaluations.") | |
| submit_button.click( | |
| submit_model, | |
| inputs=[ | |
| model_name_input, | |
| revision_input, | |
| precision_input, | |
| params_input, | |
| license_input, | |
| var, | |
| model_param_limit, | |
| ], | |
| outputs=[submission_result, pending_gradio_df], | |
| ) | |