Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import yaml | |
| from loguru import logger | |
| def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str): | |
| """Creates the base config dictionary""" | |
| return { | |
| "hf_configuration": { | |
| "token": "$HF_TOKEN", | |
| "private": True, | |
| "hf_organization": hf_org, | |
| "hf_dataset_name": hf_dataset_name, | |
| }, | |
| "model_list": [ | |
| { | |
| "model_name": "meta-llama/Llama-3.3-70B-Instruct", | |
| "provider": "novita", | |
| "max_concurrent_requests": 32, | |
| }, | |
| { | |
| "model_name": "Qwen/Qwen2.5-72B-Instruct", | |
| "provider": "novita", | |
| "max_concurrent_requests": 32, | |
| } | |
| ], | |
| "model_roles": { | |
| "ingestion": ["meta-llama/Llama-3.3-70B-Instruct"], | |
| "summarization": ["Qwen/Qwen2.5-72B-Instruct"], | |
| "single_shot_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"], | |
| "multi_hop_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"], | |
| "answer_generation": ["Qwen/Qwen2.5-72B-Instruct"], | |
| "judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"], | |
| }, | |
| "pipeline": { | |
| "ingestion": { | |
| "source_documents_dir": f"/app/{session_uid}/uploaded_files/", | |
| "output_dir": f"/app/{session_uid}/ingested", | |
| "run": True, | |
| }, | |
| "upload_ingest_to_hub": { | |
| "source_documents_dir": f"/app/{session_uid}/ingested", | |
| "run": True, | |
| }, | |
| "summarization": {"run": True}, | |
| "chunking": { | |
| "chunking_configuration": { | |
| "l_min_tokens": 64, | |
| "l_max_tokens": 128, | |
| "tau_threshold": 0.3, | |
| "h_min": 2, | |
| "h_max": 4, | |
| }, | |
| "run": True, | |
| }, | |
| "single_shot_question_generation": { | |
| "diversification_seed": "24 year old adult", | |
| "run": True, | |
| }, | |
| "multi_hop_question_generation": {"run": False}, | |
| "answer_generation": { | |
| "question_type": "single_shot", | |
| "run": True, | |
| "strategies": [ | |
| { | |
| "name": "zeroshot", | |
| "prompt": "ZEROSHOT_QA_USER_PROMPT", | |
| "model_name": "meta-llama/Llama-3.3-70B-Instruct", | |
| }, | |
| { | |
| "name": "gold", | |
| "prompt": "GOLD_QA_USER_PROMPT", | |
| "model_name": "meta-llama/Llama-3.3-70B-Instruct", | |
| }, | |
| ], | |
| }, | |
| "judge_answers": { | |
| "run": False, # to change when fixed | |
| "comparing_strategies": [["zeroshot", "gold"]], | |
| "chunk_column_index": 0, | |
| "random_seed": 42, | |
| }, | |
| }, | |
| } | |
| def save_yaml_file(config: str, path: str): | |
| """Saves the given config dictionary to a YAML file""" | |
| with open(path, "w") as file: | |
| yaml.dump(config, file, default_flow_style=False, sort_keys=False) | |
| return path | |
| def generate_and_save_config(hf_org: str, hf_name: str, session_uid: str, config_path: str): | |
| """Generates and saves the YAML configuration file""" | |
| logger.debug(f"Generating config with org: {hf_org}, dataset name: {hf_name}") | |
| config = generate_base_config(hf_org, hf_name, session_uid) | |
| file_path = save_yaml_file(config, config_path) | |
| logger.success(f"Config saved at: {file_path}") | |
| return file_path | |