Spaces:
Running
Running
| import json | |
| DATA_PATH = "groundtruths/gaia-2023-validation.jsonl" | |
| class Verifier: | |
| def __init__(self, data_path: str | None=None): | |
| if data_path is None: | |
| data_path = DATA_PATH | |
| self.data_path = data_path | |
| self.data: dict = self.load_data() | |
| self.correct_cnt = 0 | |
| self.total_cnt = 0 | |
| def load_data(self) -> dict: | |
| data = {} | |
| with open(self.data_path, "r") as f: | |
| for line in f: | |
| record = json.loads(line) | |
| data[record["task_id"]] = record | |
| return data | |
| def verify(self, task_id: str, answer: str) -> None | tuple[bool, str]: | |
| record = self.data.get(task_id) | |
| if not record: | |
| print(f"Task ID {task_id} not found.") | |
| return None | |
| self.total_cnt += 1 | |
| if record["Final answer"] == answer: | |
| self.correct_cnt += 1 | |
| return True, record["Final answer"] | |
| return False, record["Final answer"] | |
| def get_answer(self, task_id: str) -> str: | |
| record = self.data.get(task_id) | |
| if not record: | |
| print(f"Task ID {task_id} not found.") | |
| return "" | |
| return record["Final answer"] | |
| def get_accuracy(self) -> float: | |
| if self.total_cnt == 0: | |
| return 0.0 | |
| return self.correct_cnt / self.total_cnt | |
| def get_output(self) -> str: | |
| return f"**Correct:** {self.correct_cnt}/{self.total_cnt} ({self.get_accuracy():.2%})" | |
| if __name__ == "__main__": | |
| verifier = Verifier() | |
| print(f"Loaded {len(verifier.data)} records from {verifier.data_path}") | |
| for record in verifier.data: | |
| print(record) |