Spaces:

tonyassi
/

vogue-runway

Sleeping

File size: 4,986 Bytes

6a88c2d

import gradio as gr
import json
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import os, shutil
import zipfile
import glob
from unidecode import unidecode


def extract_json_from_script(scripts, key_fragment):
    for script in scripts:
        if script.string and key_fragment in script.string:
            js = script.string
            break
    else:
        return None

    try:
        js_clean = js.split(' = ', 1)[1]
        brace_count = 0
        for i, char in enumerate(js_clean):
            if char == '{':
                brace_count += 1
            elif char == '}':
                brace_count -= 1
                if brace_count == 0:
                    js_clean = js_clean[:i+1]
                    break
        return json.loads(js_clean)
    except Exception as e:
        print(f"❌ JSON extraction failed: {e}")
        return None


def designer_to_shows(designer):
    designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower())
    URL = f"https://www.vogue.com/fashion-shows/designer/{designer}"
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'html5lib')

    data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'window.__PRELOADED_STATE__')
    if not data:
        print("❌ Could not find JSON script")
        return []

    try:
        shows = [show['hed'] for show in data['transformed']['runwayDesignerContent']['designerCollections']]
        return shows
    except Exception as e:
        print(f"❌ Failed to parse shows list: {e}")
        return []


def designer_show_to_download_images(designer, show, progress):
    show = unidecode(show.replace(' ','-').lower())
    designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower())

    url = f"https://www.vogue.com/fashion-shows/{show}/{designer}"
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html5lib')

    data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'runwayShowGalleries')
    if not data:
        print("❌ Could not extract image data")
        return None, []

    try:
        items = data['transformed']['runwayShowGalleries']['galleries'][0]['items']
    except Exception as e:
        print(f"❌ Could not find gallery images: {e}")
        return None, []

    save_path = './images'
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)

    image_path_list = []
    for i, item in enumerate(progress.tqdm(items)):
        try:
            img_url = item['image']['sources']['md']['url']
            response = requests.get(img_url)
            img = Image.open(BytesIO(response.content))
            export_path = os.path.join(save_path, f"{designer}-{show}-{i+1}.png")
            img.save(export_path)
            image_path_list.append(export_path)
            print(f"✅ Downloaded: {img_url}")
        except Exception as e:
            print(f"⚠️ Error downloading image {i+1}: {e}")

    zip_file_name = f"{designer}-{show}.zip"
    with zipfile.ZipFile(zip_file_name, 'w') as f:
        for file in glob.glob(save_path + '/*'):
            f.write(file)

    return zip_file_name, image_path_list


def get_collections(designer):
    new_options = designer_to_shows(designer)
    return gr.Dropdown.update(choices=new_options)


def download_images(designer, collection, progress=gr.Progress(track_tqdm=True)):
    return designer_show_to_download_images(designer, collection, progress)


with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Vogue Runway Scraper

    by [Tony Assi](https://www.tonyassi.com/)

    This space scrapes images from [Vogue Runway](https://www.vogue.com/fashion-shows)

    ![gucci-spring-2018-ready-to-wear-1](https://github.com/TonyAssi/Vogue-Runway-Scraper/assets/42156881/081f2c82-fbc5-419f-a0e8-52f8f1a8cdcd)

    1. Enter the name of the designer (all designers can be found [here](https://huggingface.co/spaces/tonyassi/vogue-runway-scraper-dev/blob/main/designers.txt) or [here](https://www.vogue.com/fashion-shows))
    2. Click "Get Collection" button
    3. Choose the collection from the dropdown
    4. Click "Download Images" button
    """)

    designer_textbox = gr.Text(label="1) Designer", value="Gucci")
    get_collections_button = gr.Button("2) Get Collections")
    collections_dropdown = gr.Dropdown(choices=[], label='3) Collections', interactive=True)
    download_button = gr.Button("4) Download Images")
    output_file = gr.File(label='ZIP file')
    image_gallery = gr.Gallery(columns=4, preview=True)

    get_collections_button.click(get_collections, inputs=designer_textbox, outputs=collections_dropdown)
    download_button.click(download_images, inputs=[designer_textbox, collections_dropdown], outputs=[output_file, image_gallery])


if __name__ == "__main__":
    demo.launch()