Spaces:

safetensors
/

safetensors-checker

Running on CPU Upgrade

App Files Files Community

safetensors-checker / app.py

ybelkada

Fix map_location issue

d8ca1f3 over 2 years ago

raw

history blame contribute delete

4.52 kB

	import json
	import shutil
	import gc
	import gradio as gr

	import torch
	import safetensors
	# hack to load safetensors.torch
	from safetensors.torch import save_file
	from huggingface_hub import hf_hub_download

	def check_simple_file(st_weights_path, torch_weights_path):
	st_weights = safetensors.torch.load_file(st_weights_path)
	torch_weights = torch.load(torch_weights_path, map_location=torch.device('cpu'))

	# check if keys are the same
	if st_weights.keys() != torch_weights.keys():
	# retrieve different keys
	unexpected_keys = st_weights.keys() - torch_weights.keys()
	return f"keys are not the same ! Conversion failed - unexpected keys are: {unexpected_keys} for the file {st_weights_path}"

	total_errors = []

	# check all weights are same
	for key, value in st_weights.items():
	# this automatically asserts that the weights are same and raises error if not
	try:
	torch.testing.assert_close(torch_weights[key], value, rtol=1e-5, atol=1e-5)
	except Exception as e:
	total_errors.append(e)

	del st_weights
	del torch_weights
	gc.collect()

	return total_errors

	def run(pr_number, model_id):
	is_sharded = False
	try:
	st_sharded_index_file = hf_hub_download(repo_id=model_id, filename="model.safetensors.index.json", revision=f"refs/pr/{pr_number}")
	torch_sharded_index_file = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin.index.json")

	is_sharded = True
	except:
	pass

	if not is_sharded:
	try:
	st_weights_path = hf_hub_download(repo_id=model_id, filename="model.safetensors", revision=f"refs/pr/{pr_number}")
	torch_weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin")
	except Exception as e:
	return f"Error: {e} \| \n Maybe you specified model ids or PRs that does not exist or does not contain any `model.safetensors` or `pytorch_model.bin` files"

	total_errors = check_simple_file(st_weights_path, torch_weights_path)
	else:
	total_errors = []
	total_st_files = set(json.load(open(st_sharded_index_file, "r"))["weight_map"].values())
	total_pt_files = set(json.load(open(torch_sharded_index_file, "r"))["weight_map"].values())

	if len(total_st_files) != len(total_pt_files):
	return f"weights are not the same there are {len(total_st_files)} files in safetensors and {len(total_pt_files)} files in torch ! Conversion failed - {len(total_errors)} errors : {total_errors}"

	# check if the mapping are correct
	if not all([pt_file.replace("pytorch_model", "model").replace(".bin", ".safetensors") in total_st_files for pt_file in total_pt_files]):
	return f"Conversion failed! Safetensors files are not the same as torch files - make sure you have the correct files in the PR"

	for pt_file in total_pt_files:
	st_file = pt_file.replace("pytorch_model", "model").replace(".bin", ".safetensors")

	st_weights_path = hf_hub_download(repo_id=model_id, filename=st_file, revision=f"refs/pr/{pr_number}")
	torch_weights_path = hf_hub_download(repo_id=model_id, filename=pt_file)

	total_errors += check_simple_file(st_weights_path, torch_weights_path)

	# remove files for memory optimization
	shutil.rmtree(st_weights_path)
	shutil.rmtree(torch_weights_path)


	if len(total_errors) > 0:
	return f"weights are not the same ! Conversion failed - {len(total_errors)} errors : {total_errors}"

	return "Safetensors and torch weights are the same! Conversion sucessfull - you can safely merge the PR"

	DESCRIPTION = """
	The steps are the following:
	- You got tagged in a Safetensors PR? Check if it works!
	- Identify the PR number that you want to check.
	- Paste the model id and the PR number below
	- Click "Submit"
	- That's it! You'll get feedback if the user successfully converted a model in `safetensors` format or not!

	This checker also support sharded weights.
	"""

	demo = gr.Interface(
	title="SafeTensors Checker",
	description=DESCRIPTION,
	allow_flagging="never",
	article="Check out the [Safetensors repo on GitHub](https://github.com/huggingface/safetensors)",
	inputs=[
	gr.Text(max_lines=1, label="PR number"),
	gr.Text(max_lines=1, label="model_id"),
	],
	outputs=[gr.Markdown(label="output")],
	fn=run,
	).queue()

	demo.launch()