Spaces:
Build error
Build error
| import os | |
| import pandas as pd | |
| import plotly.express as px | |
| import gradio as gr | |
| import urllib.parse | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| def read_google_sheet(sheet_id, sheet_name): | |
| # URL encode the sheet name | |
| encoded_sheet_name = urllib.parse.quote(sheet_name) | |
| # Construct the base URL | |
| base_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={encoded_sheet_name}" | |
| try: | |
| # Read the sheet into a pandas DataFrame | |
| df = pd.read_csv(base_url) | |
| return df | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return None | |
| # Function to generate tick values and labels | |
| def log2_ticks(values): | |
| min_val, max_val = np.floor(values.min()), np.ceil(values.max()) | |
| print(max_val, min_val) | |
| tick_vals = np.arange(min_val, max_val+1) | |
| tick_text = [f"{2**val:.0f}" for val in tick_vals] | |
| return tick_vals, tick_text | |
| # Load data | |
| sheet_id = "1g07tdGf9ocOZ8XZgLGepI5Q4u6ZH961J0T9O9P64rYw" | |
| sheet_names = [f"{i} node" if i == 1 else f"{i} nodes" for i in [1, 8]] | |
| df = pd.concat([read_google_sheet(sheet_id, sheet_name) for sheet_name in sheet_names]) | |
| df = df.rename(columns={"micro_batch_size":"mbs", "batch_accumulation_per_replica": "gradacc"}) | |
| df["tok/s/gpu"] = df["tok/s/gpu"].replace(-1, 0) | |
| df["throughput"] = df["tok/s/gpu"]*df["nnodes"]*8 | |
| def get_figure(nodes, hide_nans): | |
| # Create a temporary DataFrame with only the rows where nnodes is 8 | |
| df_tmp = df[df["nnodes"]==nodes].reset_index(drop=True) | |
| if hide_nans: | |
| df_tmp = df_tmp.dropna() | |
| # Apply log2 scale to all columns except throughput | |
| log_columns = ['dp', 'tp', 'pp', 'mbs', 'gradacc'] | |
| for col in log_columns: | |
| df_tmp[f'log_{col}'] = np.log2(df_tmp[col]) | |
| # Generate dimensions list | |
| dimensions = [] | |
| for col in log_columns: | |
| ticks, labels = log2_ticks(df_tmp[f'log_{col}']) | |
| dimensions.append( | |
| dict(range = [df_tmp[f'log_{col}'].min(), df_tmp[f'log_{col}'].max()], | |
| label = col, | |
| values = df_tmp[f'log_{col}'], | |
| tickvals = ticks, | |
| ticktext = labels) | |
| ) | |
| # Add throughput dimension (not log-scaled) | |
| dimensions.append( | |
| dict(range = [df_tmp['throughput'].min(), df_tmp['throughput'].max()], | |
| label = 'throughput', | |
| values = df_tmp['throughput']) | |
| ) | |
| fig = go.Figure(data= | |
| go.Parcoords( | |
| line = dict(color = df_tmp['throughput'], | |
| colorscale = 'GnBu', | |
| showscale = True, | |
| cmin = df_tmp['throughput'].min(), | |
| cmax = df_tmp['throughput'].max()), | |
| dimensions = dimensions | |
| ) | |
| ) | |
| # Update the layout if needed | |
| fig.update_layout( | |
| title = "3D parallel setup throughput ", | |
| plot_bgcolor = 'white', | |
| paper_bgcolor = 'white' | |
| ) | |
| return fig | |
| with gr.Blocks() as demo: | |
| title = gr.Markdown("# 3D parallel benchmark") | |
| with gr.Row(): | |
| nnodes = gr.Dropdown(choices=[1, 8], label="Number of nodes", value=8) | |
| hide_nan = gr.Dropdown(choices=[False, True], label="Hide NaNs", value=False) | |
| plot = gr.Plot() | |
| demo.load(get_figure, [nnodes, hide_nan], [plot]) | |
| nnodes.change(get_figure, [nnodes, hide_nan], [plot]) | |
| hide_nan.change(get_figure, [nnodes, hide_nan], [plot]) | |
| demo.launch(show_api=False) | |