Spaces:
Running
Running
Upload 2 files
Browse files- app.py +32 -13
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -21,6 +21,9 @@ HF_USER = os.environ.get("HF_USER") if os.environ.get("HF_USER") else "" # set y
|
|
| 21 |
REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
|
| 22 |
REGEX_HF_PATH = r'^[\w_\-\.]+/[\w_\-\.]+(/?:.+)?$'
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
def is_valid_reponame(repo_id: str):
|
| 25 |
return re.fullmatch(REGEX_HF_REPO, repo_id)
|
| 26 |
|
|
@@ -38,9 +41,10 @@ def extract_src_reponame(source_repo: str):
|
|
| 38 |
else:
|
| 39 |
source_repo, target = re.findall(r'^(?:http.+\.co/)?(?:datasets)?(?:spaces)?([\w_\-\.]+/[\w_\-\.]+)/?(?:blob/main/)?(?:resolve/main/)?(.+)?$', source_repo)[0]
|
| 40 |
target = urllib.parse.unquote(target.removesuffix("/"))
|
|
|
|
| 41 |
return source_repo, target
|
| 42 |
except Exception as e:
|
| 43 |
-
|
| 44 |
return source_repo, ""
|
| 45 |
|
| 46 |
def extract_dst_reponame(dst_repo: str):
|
|
@@ -49,9 +53,10 @@ def extract_dst_reponame(dst_repo: str):
|
|
| 49 |
else:
|
| 50 |
dst_repo, subfolder = re.findall(r'^([\w_\-\.]+/[\w_\-\.]+)/?(.+)?$', dst_repo)[0]
|
| 51 |
subfolder = subfolder.removesuffix("/")
|
|
|
|
| 52 |
return dst_repo, subfolder
|
| 53 |
except Exception as e:
|
| 54 |
-
|
| 55 |
return dst_repo, ""
|
| 56 |
|
| 57 |
def remove_repo_tags(repo_id: str, tags: list[str], repo_type: str, hf_token: str):
|
|
@@ -76,12 +81,12 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, re
|
|
| 76 |
except Exception as e:
|
| 77 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
| 78 |
|
| 79 |
-
if not is_valid_path(dst_repo): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
| 80 |
|
| 81 |
try:
|
| 82 |
source_repo, target = extract_src_reponame(source_repo)
|
| 83 |
dst_repo, subfolder = extract_dst_reponame(dst_repo)
|
| 84 |
-
if auto_dir: subfolder = source_repo
|
| 85 |
|
| 86 |
if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
|
| 87 |
|
|
@@ -90,12 +95,18 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, re
|
|
| 90 |
create_repo(dst_repo, repo_type, private, hf_token)
|
| 91 |
for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
|
| 92 |
if target and target not in path: continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
|
|
|
| 94 |
if not Path(file).exists(): continue
|
| 95 |
if Path(file).is_dir(): # unused for now
|
| 96 |
-
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=
|
| 97 |
elif Path(file).is_file():
|
| 98 |
-
api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=
|
| 99 |
if Path(file).exists(): Path(file).unlink()
|
| 100 |
if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
|
| 101 |
elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
|
|
@@ -152,7 +163,7 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
|
|
| 152 |
except Exception as e:
|
| 153 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
| 154 |
|
| 155 |
-
if not is_valid_path(dst_repo): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
| 156 |
|
| 157 |
try:
|
| 158 |
dst_repo, subfolder_prefix = extract_dst_reponame(dst_repo)
|
|
@@ -160,8 +171,7 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
|
|
| 160 |
source_repos = parse_repos(source_repos_str)
|
| 161 |
for source_repo in source_repos:
|
| 162 |
source_repo, target = extract_src_reponame(source_repo)
|
| 163 |
-
|
| 164 |
-
subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
|
| 165 |
|
| 166 |
temp_dir = tempfile.mkdtemp()
|
| 167 |
create_repo(dst_repo, repo_type, private, hf_token)
|
|
@@ -169,10 +179,12 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
|
|
| 169 |
if target and target not in path: continue
|
| 170 |
path_in_repo = f"{subfolder}/{path}" if subfolder else path
|
| 171 |
if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
|
| 172 |
-
|
|
|
|
| 173 |
continue
|
| 174 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
| 175 |
if not Path(file).exists(): continue
|
|
|
|
| 176 |
if Path(file).is_dir(): # unused for now
|
| 177 |
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
| 178 |
elif Path(file).is_file():
|
|
@@ -270,6 +282,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 270 |
with gr.Row():
|
| 271 |
submit_button = gr.Button("Submit", variant="primary")
|
| 272 |
clear_button = gr.Button("Clear", variant="secondary")
|
|
|
|
| 273 |
with gr.Column():
|
| 274 |
output_md = gr.Markdown(label="output")
|
| 275 |
output_image = gr.Image(show_label=False)
|
|
@@ -292,6 +305,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 292 |
with gr.Row():
|
| 293 |
m2o_submit_button = gr.Button("Submit", variant="primary")
|
| 294 |
m2o_clear_button = gr.Button("Clear", variant="secondary")
|
|
|
|
| 295 |
with gr.Column():
|
| 296 |
m2o_output_md = gr.Markdown(label="output")
|
| 297 |
m2o_output_image = gr.Image(show_label=False)
|
|
@@ -318,20 +332,25 @@ with gr.Blocks(css=css) as demo:
|
|
| 318 |
with gr.Row():
|
| 319 |
m2m_submit_button = gr.Button("Submit", variant="primary")
|
| 320 |
m2m_clear_button = gr.Button("Clear", variant="secondary")
|
|
|
|
| 321 |
with gr.Column():
|
| 322 |
m2m_output_md = gr.Markdown(label="output")
|
| 323 |
m2m_output_image = gr.Image(show_label=False)
|
| 324 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
| 325 |
-
submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
|
|
|
|
| 326 |
clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
|
| 327 |
m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
|
| 328 |
-
m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite], [m2o_output_md, m2o_output_image])
|
|
|
|
| 329 |
m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
|
| 330 |
[m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
|
| 331 |
m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
|
| 332 |
-
m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
|
| 333 |
[m2m_output_md, m2m_output_image])
|
|
|
|
| 334 |
m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
|
| 335 |
[m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
|
|
|
|
| 336 |
|
| 337 |
demo.queue().launch()
|
|
|
|
| 21 |
REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
|
| 22 |
REGEX_HF_PATH = r'^[\w_\-\.]+/[\w_\-\.]+(/?:.+)?$'
|
| 23 |
|
| 24 |
+
def debug(s: str):
|
| 25 |
+
print(s)
|
| 26 |
+
|
| 27 |
def is_valid_reponame(repo_id: str):
|
| 28 |
return re.fullmatch(REGEX_HF_REPO, repo_id)
|
| 29 |
|
|
|
|
| 41 |
else:
|
| 42 |
source_repo, target = re.findall(r'^(?:http.+\.co/)?(?:datasets)?(?:spaces)?([\w_\-\.]+/[\w_\-\.]+)/?(?:blob/main/)?(?:resolve/main/)?(.+)?$', source_repo)[0]
|
| 43 |
target = urllib.parse.unquote(target.removesuffix("/"))
|
| 44 |
+
debug(f"Sourece repo_id:{source_repo} folder:{target}")
|
| 45 |
return source_repo, target
|
| 46 |
except Exception as e:
|
| 47 |
+
debug(e)
|
| 48 |
return source_repo, ""
|
| 49 |
|
| 50 |
def extract_dst_reponame(dst_repo: str):
|
|
|
|
| 53 |
else:
|
| 54 |
dst_repo, subfolder = re.findall(r'^([\w_\-\.]+/[\w_\-\.]+)/?(.+)?$', dst_repo)[0]
|
| 55 |
subfolder = subfolder.removesuffix("/")
|
| 56 |
+
debug(f"Destination repo_id:{dst_repo} folder:{subfolder}")
|
| 57 |
return dst_repo, subfolder
|
| 58 |
except Exception as e:
|
| 59 |
+
debug(e)
|
| 60 |
return dst_repo, ""
|
| 61 |
|
| 62 |
def remove_repo_tags(repo_id: str, tags: list[str], repo_type: str, hf_token: str):
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
| 83 |
|
| 84 |
+
if not is_valid_path(extract_dst_reponame(dst_repo)[0]): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
| 85 |
|
| 86 |
try:
|
| 87 |
source_repo, target = extract_src_reponame(source_repo)
|
| 88 |
dst_repo, subfolder = extract_dst_reponame(dst_repo)
|
| 89 |
+
if auto_dir: subfolder = f"{subfolder}/{source_repo}" if subfolder else source_repo
|
| 90 |
|
| 91 |
if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
|
| 92 |
|
|
|
|
| 95 |
create_repo(dst_repo, repo_type, private, hf_token)
|
| 96 |
for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
|
| 97 |
if target and target not in path: continue
|
| 98 |
+
path_in_repo = f"{subfolder}/{path}" if subfolder else path
|
| 99 |
+
if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
|
| 100 |
+
debug(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
| 101 |
+
progress(0, desc=f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
| 102 |
+
continue
|
| 103 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
| 104 |
+
debug(f"Uploading {file} to {path_in_repo}")
|
| 105 |
if not Path(file).exists(): continue
|
| 106 |
if Path(file).is_dir(): # unused for now
|
| 107 |
+
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
| 108 |
elif Path(file).is_file():
|
| 109 |
+
api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
| 110 |
if Path(file).exists(): Path(file).unlink()
|
| 111 |
if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
|
| 112 |
elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
|
|
|
|
| 163 |
except Exception as e:
|
| 164 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
| 165 |
|
| 166 |
+
if not is_valid_path(extract_dst_reponame(dst_repo)[0]): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
| 167 |
|
| 168 |
try:
|
| 169 |
dst_repo, subfolder_prefix = extract_dst_reponame(dst_repo)
|
|
|
|
| 171 |
source_repos = parse_repos(source_repos_str)
|
| 172 |
for source_repo in source_repos:
|
| 173 |
source_repo, target = extract_src_reponame(source_repo)
|
| 174 |
+
subfolder = f"{subfolder_prefix}/{source_repo}" if subfolder_prefix else source_repo
|
|
|
|
| 175 |
|
| 176 |
temp_dir = tempfile.mkdtemp()
|
| 177 |
create_repo(dst_repo, repo_type, private, hf_token)
|
|
|
|
| 179 |
if target and target not in path: continue
|
| 180 |
path_in_repo = f"{subfolder}/{path}" if subfolder else path
|
| 181 |
if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
|
| 182 |
+
debug(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
| 183 |
+
progress(0, desc=f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
| 184 |
continue
|
| 185 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
| 186 |
if not Path(file).exists(): continue
|
| 187 |
+
debug(f"Uploading {file} to {path_in_repo}")
|
| 188 |
if Path(file).is_dir(): # unused for now
|
| 189 |
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
| 190 |
elif Path(file).is_file():
|
|
|
|
| 282 |
with gr.Row():
|
| 283 |
submit_button = gr.Button("Submit", variant="primary")
|
| 284 |
clear_button = gr.Button("Clear", variant="secondary")
|
| 285 |
+
stop_button = gr.Button("Stop", variant="stop")
|
| 286 |
with gr.Column():
|
| 287 |
output_md = gr.Markdown(label="output")
|
| 288 |
output_image = gr.Image(show_label=False)
|
|
|
|
| 305 |
with gr.Row():
|
| 306 |
m2o_submit_button = gr.Button("Submit", variant="primary")
|
| 307 |
m2o_clear_button = gr.Button("Clear", variant="secondary")
|
| 308 |
+
m2o_stop_button = gr.Button("Stop", variant="stop")
|
| 309 |
with gr.Column():
|
| 310 |
m2o_output_md = gr.Markdown(label="output")
|
| 311 |
m2o_output_image = gr.Image(show_label=False)
|
|
|
|
| 332 |
with gr.Row():
|
| 333 |
m2m_submit_button = gr.Button("Submit", variant="primary")
|
| 334 |
m2m_clear_button = gr.Button("Clear", variant="secondary")
|
| 335 |
+
m2m_stop_button = gr.Button("Stop", variant="stop")
|
| 336 |
with gr.Column():
|
| 337 |
m2m_output_md = gr.Markdown(label="output")
|
| 338 |
m2m_output_image = gr.Image(show_label=False)
|
| 339 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
| 340 |
+
submit_event = submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
|
| 341 |
+
stop_button.click(fn=None, inputs=None, outputs=None, cancels=[submit_event])
|
| 342 |
clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
|
| 343 |
m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
|
| 344 |
+
m2o_submit_event = m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite], [m2o_output_md, m2o_output_image])
|
| 345 |
+
m2o_stop_button.click(fn=None, inputs=None, outputs=None, cancels=[m2o_submit_event])
|
| 346 |
m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
|
| 347 |
[m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
|
| 348 |
m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
|
| 349 |
+
m2m_submit_event = m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
|
| 350 |
[m2m_output_md, m2m_output_image])
|
| 351 |
+
m2m_stop_button.click(fn=None, inputs=None, outputs=None, cancels=[m2m_submit_event])
|
| 352 |
m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
|
| 353 |
[m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
|
| 354 |
+
|
| 355 |
|
| 356 |
demo.queue().launch()
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
huggingface_hub>=0.22.2
|
|
|
|
|
|
|
| 2 |
gradio_huggingfacehub_search==0.0.7
|
| 3 |
pydantic==2.10.6
|
|
|
|
| 1 |
huggingface_hub>=0.22.2
|
| 2 |
+
hf_transfer
|
| 3 |
+
hf_xet
|
| 4 |
gradio_huggingfacehub_search==0.0.7
|
| 5 |
pydantic==2.10.6
|