Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from datasets import load_dataset | |
| import os | |
| import json | |
| from pprint import pprint | |
| pd.options.plotting.backend = "plotly" | |
| MODELS = [ | |
| "mistralai__Mistral-7B-Instruct-v0.2", | |
| # "HuggingFaceH4__zephyr-7b-beta", | |
| # "meta-llama__Llama-2-7b-chat-hf", | |
| # "01-ai__Yi-34B-Chat", | |
| ] | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| score_turn = { | |
| 1: "multi_turn", | |
| 0: "single_turn", | |
| } | |
| def get_dataframe_lighteval() -> pd.DataFrame: | |
| samples = [] | |
| scores = [] | |
| for model in MODELS: | |
| details_lighteval = load_dataset( | |
| f"SaylorTwift/details_{model}_private", | |
| "extended_mt_bench_0", | |
| split="latest", | |
| token=HF_TOKEN, | |
| ) | |
| for d in details_lighteval: | |
| judement_prompt = d["judement_prompt"] | |
| judgement = d["judgement"] | |
| predictions = d["predictions"][0] | |
| prompts = d["full_prompt"] | |
| turns = [] | |
| for turn in range(len(predictions)): | |
| if turn == 1: | |
| prompt = prompts[turn].format(model_response=predictions[turn - 1]) | |
| else: | |
| prompt = prompts[turn] | |
| turns.append([]) | |
| turns[turn].append(prompt) | |
| turns[turn].append(predictions[turn]) | |
| turns[turn].append(judement_prompt[turn]) | |
| turns[turn].append(judgement[turn]) | |
| for i, turn in enumerate(turns): | |
| samples.append( | |
| { | |
| "model": model, | |
| "turn": i, | |
| "prompt": turn[0], | |
| "response": turn[1], | |
| "judgement_prompt": turn[2], | |
| "judgment": turn[3], | |
| "score": d["metrics"][score_turn[i]], | |
| "question_id": d["specifics"]["id"], | |
| } | |
| ) | |
| dataframe_all_samples = pd.DataFrame(samples) | |
| return dataframe_all_samples | |
| def construct_dataframe() -> pd.DataFrame: | |
| """ | |
| Construct a dataframe from the data in the data folder | |
| """ | |
| lighteval = get_dataframe_lighteval() | |
| lighteval["model"] = lighteval["model"].apply(lambda x: x.split("__")[1]) | |
| lighteval = lighteval.set_index(["question_id", "turn", "model"]) | |
| all_samples = lighteval.reset_index() | |
| all_samples = all_samples.set_index("question_id") | |
| return all_samples.dropna() | |
| def create_plot(model: str, dataframe: pd.DataFrame): | |
| new = dataframe[dataframe["model"] == model].dropna() | |
| new = new[new["turn"] == 1] | |
| new["score_lighteval"] = new["score_lighteval"].astype(int) | |
| new["score_mt_bench"] = new["score_mt_bench"].astype(int) | |
| new = new[['score_lighteval', 'score_mt_bench']] | |
| new.index = new.index.astype(str) | |
| fig = new.plot.bar(title="Scores", labels={"index": "Index", "value": "Score"}, barmode="group") | |
| return fig | |
| def get_scores(dataframe): | |
| dataframe = dataframe.dropna() | |
| dataframe["score"] = dataframe["score"].astype(int) | |
| new = dataframe[['score', "turn", "model"]] | |
| new = new.groupby(["model", "turn"]).mean() | |
| new = new.groupby(["model"]).mean() | |
| return new | |
| if __name__ == "__main__": | |
| df = construct_dataframe() | |
| from pprint import pprint | |
| pprint(df) | |
| #print(df.iloc[130]) | |
| # model = "zephyr-7b-beta" | |
| # fig = create_plot(model, df) | |
| # fig.show() |