File size: 4,986 Bytes
6a88c2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
import json
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import os, shutil
import zipfile
import glob
from unidecode import unidecode


def extract_json_from_script(scripts, key_fragment):
    for script in scripts:
        if script.string and key_fragment in script.string:
            js = script.string
            break
    else:
        return None

    try:
        js_clean = js.split(' = ', 1)[1]
        brace_count = 0
        for i, char in enumerate(js_clean):
            if char == '{':
                brace_count += 1
            elif char == '}':
                brace_count -= 1
                if brace_count == 0:
                    js_clean = js_clean[:i+1]
                    break
        return json.loads(js_clean)
    except Exception as e:
        print(f"❌ JSON extraction failed: {e}")
        return None


def designer_to_shows(designer):
    designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower())
    URL = f"https://www.vogue.com/fashion-shows/designer/{designer}"
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'html5lib')

    data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'window.__PRELOADED_STATE__')
    if not data:
        print("❌ Could not find JSON script")
        return []

    try:
        shows = [show['hed'] for show in data['transformed']['runwayDesignerContent']['designerCollections']]
        return shows
    except Exception as e:
        print(f"❌ Failed to parse shows list: {e}")
        return []


def designer_show_to_download_images(designer, show, progress):
    show = unidecode(show.replace(' ','-').lower())
    designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower())

    url = f"https://www.vogue.com/fashion-shows/{show}/{designer}"
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html5lib')

    data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'runwayShowGalleries')
    if not data:
        print("❌ Could not extract image data")
        return None, []

    try:
        items = data['transformed']['runwayShowGalleries']['galleries'][0]['items']
    except Exception as e:
        print(f"❌ Could not find gallery images: {e}")
        return None, []

    save_path = './images'
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)

    image_path_list = []
    for i, item in enumerate(progress.tqdm(items)):
        try:
            img_url = item['image']['sources']['md']['url']
            response = requests.get(img_url)
            img = Image.open(BytesIO(response.content))
            export_path = os.path.join(save_path, f"{designer}-{show}-{i+1}.png")
            img.save(export_path)
            image_path_list.append(export_path)
            print(f"βœ… Downloaded: {img_url}")
        except Exception as e:
            print(f"⚠️ Error downloading image {i+1}: {e}")

    zip_file_name = f"{designer}-{show}.zip"
    with zipfile.ZipFile(zip_file_name, 'w') as f:
        for file in glob.glob(save_path + '/*'):
            f.write(file)

    return zip_file_name, image_path_list


def get_collections(designer):
    new_options = designer_to_shows(designer)
    return gr.Dropdown.update(choices=new_options)


def download_images(designer, collection, progress=gr.Progress(track_tqdm=True)):
    return designer_show_to_download_images(designer, collection, progress)


with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Vogue Runway Scraper

    by [Tony Assi](https://www.tonyassi.com/)

    This space scrapes images from [Vogue Runway](https://www.vogue.com/fashion-shows)

    ![gucci-spring-2018-ready-to-wear-1](https://github.com/TonyAssi/Vogue-Runway-Scraper/assets/42156881/081f2c82-fbc5-419f-a0e8-52f8f1a8cdcd)

    1. Enter the name of the designer (all designers can be found [here](https://huggingface.co/spaces/tonyassi/vogue-runway-scraper-dev/blob/main/designers.txt) or [here](https://www.vogue.com/fashion-shows))
    2. Click "Get Collection" button
    3. Choose the collection from the dropdown
    4. Click "Download Images" button
    """)

    designer_textbox = gr.Text(label="1) Designer", value="Gucci")
    get_collections_button = gr.Button("2) Get Collections")
    collections_dropdown = gr.Dropdown(choices=[], label='3) Collections', interactive=True)
    download_button = gr.Button("4) Download Images")
    output_file = gr.File(label='ZIP file')
    image_gallery = gr.Gallery(columns=4, preview=True)

    get_collections_button.click(get_collections, inputs=designer_textbox, outputs=collections_dropdown)
    download_button.click(download_images, inputs=[designer_textbox, collections_dropdown], outputs=[output_file, image_gallery])


if __name__ == "__main__":
    demo.launch()