Spaces:
Sleeping
Sleeping
File size: 4,986 Bytes
6a88c2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
import json
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import os, shutil
import zipfile
import glob
from unidecode import unidecode
def extract_json_from_script(scripts, key_fragment):
for script in scripts:
if script.string and key_fragment in script.string:
js = script.string
break
else:
return None
try:
js_clean = js.split(' = ', 1)[1]
brace_count = 0
for i, char in enumerate(js_clean):
if char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
if brace_count == 0:
js_clean = js_clean[:i+1]
break
return json.loads(js_clean)
except Exception as e:
print(f"β JSON extraction failed: {e}")
return None
def designer_to_shows(designer):
designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower())
URL = f"https://www.vogue.com/fashion-shows/designer/{designer}"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html5lib')
data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'window.__PRELOADED_STATE__')
if not data:
print("β Could not find JSON script")
return []
try:
shows = [show['hed'] for show in data['transformed']['runwayDesignerContent']['designerCollections']]
return shows
except Exception as e:
print(f"β Failed to parse shows list: {e}")
return []
def designer_show_to_download_images(designer, show, progress):
show = unidecode(show.replace(' ','-').lower())
designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower())
url = f"https://www.vogue.com/fashion-shows/{show}/{designer}"
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html5lib')
data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'runwayShowGalleries')
if not data:
print("β Could not extract image data")
return None, []
try:
items = data['transformed']['runwayShowGalleries']['galleries'][0]['items']
except Exception as e:
print(f"β Could not find gallery images: {e}")
return None, []
save_path = './images'
if os.path.exists(save_path):
shutil.rmtree(save_path)
os.makedirs(save_path)
image_path_list = []
for i, item in enumerate(progress.tqdm(items)):
try:
img_url = item['image']['sources']['md']['url']
response = requests.get(img_url)
img = Image.open(BytesIO(response.content))
export_path = os.path.join(save_path, f"{designer}-{show}-{i+1}.png")
img.save(export_path)
image_path_list.append(export_path)
print(f"β
Downloaded: {img_url}")
except Exception as e:
print(f"β οΈ Error downloading image {i+1}: {e}")
zip_file_name = f"{designer}-{show}.zip"
with zipfile.ZipFile(zip_file_name, 'w') as f:
for file in glob.glob(save_path + '/*'):
f.write(file)
return zip_file_name, image_path_list
def get_collections(designer):
new_options = designer_to_shows(designer)
return gr.Dropdown.update(choices=new_options)
def download_images(designer, collection, progress=gr.Progress(track_tqdm=True)):
return designer_show_to_download_images(designer, collection, progress)
with gr.Blocks() as demo:
gr.Markdown(
"""
# Vogue Runway Scraper
by [Tony Assi](https://www.tonyassi.com/)
This space scrapes images from [Vogue Runway](https://www.vogue.com/fashion-shows)

1. Enter the name of the designer (all designers can be found [here](https://huggingface.co/spaces/tonyassi/vogue-runway-scraper-dev/blob/main/designers.txt) or [here](https://www.vogue.com/fashion-shows))
2. Click "Get Collection" button
3. Choose the collection from the dropdown
4. Click "Download Images" button
""")
designer_textbox = gr.Text(label="1) Designer", value="Gucci")
get_collections_button = gr.Button("2) Get Collections")
collections_dropdown = gr.Dropdown(choices=[], label='3) Collections', interactive=True)
download_button = gr.Button("4) Download Images")
output_file = gr.File(label='ZIP file')
image_gallery = gr.Gallery(columns=4, preview=True)
get_collections_button.click(get_collections, inputs=designer_textbox, outputs=collections_dropdown)
download_button.click(download_images, inputs=[designer_textbox, collections_dropdown], outputs=[output_file, image_gallery])
if __name__ == "__main__":
demo.launch()
|