Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import json | |
| import shutil | |
| import gc | |
| import gradio as gr | |
| import torch | |
| import safetensors | |
| # hack to load safetensors.torch | |
| from safetensors.torch import save_file | |
| from huggingface_hub import hf_hub_download | |
| def check_simple_file(st_weights_path, torch_weights_path): | |
| st_weights = safetensors.torch.load_file(st_weights_path) | |
| torch_weights = torch.load(torch_weights_path, map_location=torch.device('cpu')) | |
| # check if keys are the same | |
| if st_weights.keys() != torch_weights.keys(): | |
| # retrieve different keys | |
| unexpected_keys = st_weights.keys() - torch_weights.keys() | |
| return f"keys are not the same ! Conversion failed - unexpected keys are: {unexpected_keys} for the file {st_weights_path}" | |
| total_errors = [] | |
| # check all weights are same | |
| for key, value in st_weights.items(): | |
| # this automatically asserts that the weights are same and raises error if not | |
| try: | |
| torch.testing.assert_close(torch_weights[key], value, rtol=1e-5, atol=1e-5) | |
| except Exception as e: | |
| total_errors.append(e) | |
| del st_weights | |
| del torch_weights | |
| gc.collect() | |
| return total_errors | |
| def run(pr_number, model_id): | |
| is_sharded = False | |
| try: | |
| st_sharded_index_file = hf_hub_download(repo_id=model_id, filename="model.safetensors.index.json", revision=f"refs/pr/{pr_number}") | |
| torch_sharded_index_file = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin.index.json") | |
| is_sharded = True | |
| except: | |
| pass | |
| if not is_sharded: | |
| try: | |
| st_weights_path = hf_hub_download(repo_id=model_id, filename="model.safetensors", revision=f"refs/pr/{pr_number}") | |
| torch_weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin") | |
| except Exception as e: | |
| return f"Error: {e} | \n Maybe you specified model ids or PRs that does not exist or does not contain any `model.safetensors` or `pytorch_model.bin` files" | |
| total_errors = check_simple_file(st_weights_path, torch_weights_path) | |
| else: | |
| total_errors = [] | |
| total_st_files = set(json.load(open(st_sharded_index_file, "r"))["weight_map"].values()) | |
| total_pt_files = set(json.load(open(torch_sharded_index_file, "r"))["weight_map"].values()) | |
| if len(total_st_files) != len(total_pt_files): | |
| return f"weights are not the same there are {len(total_st_files)} files in safetensors and {len(total_pt_files)} files in torch ! Conversion failed - {len(total_errors)} errors : {total_errors}" | |
| # check if the mapping are correct | |
| if not all([pt_file.replace("pytorch_model", "model").replace(".bin", ".safetensors") in total_st_files for pt_file in total_pt_files]): | |
| return f"Conversion failed! Safetensors files are not the same as torch files - make sure you have the correct files in the PR" | |
| for pt_file in total_pt_files: | |
| st_file = pt_file.replace("pytorch_model", "model").replace(".bin", ".safetensors") | |
| st_weights_path = hf_hub_download(repo_id=model_id, filename=st_file, revision=f"refs/pr/{pr_number}") | |
| torch_weights_path = hf_hub_download(repo_id=model_id, filename=pt_file) | |
| total_errors += check_simple_file(st_weights_path, torch_weights_path) | |
| # remove files for memory optimization | |
| shutil.rmtree(st_weights_path) | |
| shutil.rmtree(torch_weights_path) | |
| if len(total_errors) > 0: | |
| return f"weights are not the same ! Conversion failed - {len(total_errors)} errors : {total_errors}" | |
| return "Safetensors and torch weights are the same! Conversion sucessfull - you can safely merge the PR" | |
| DESCRIPTION = """ | |
| The steps are the following: | |
| - You got tagged in a Safetensors PR? Check if it works! | |
| - Identify the PR number that you want to check. | |
| - Paste the model id and the PR number below | |
| - Click "Submit" | |
| - That's it! You'll get feedback if the user successfully converted a model in `safetensors` format or not! | |
| This checker also support sharded weights. | |
| """ | |
| demo = gr.Interface( | |
| title="SafeTensors Checker", | |
| description=DESCRIPTION, | |
| allow_flagging="never", | |
| article="Check out the [Safetensors repo on GitHub](https://github.com/huggingface/safetensors)", | |
| inputs=[ | |
| gr.Text(max_lines=1, label="PR number"), | |
| gr.Text(max_lines=1, label="model_id"), | |
| ], | |
| outputs=[gr.Markdown(label="output")], | |
| fn=run, | |
| ).queue() | |
| demo.launch() |