# check out the Dash docs - https://dash.plotly.com/
# check out the Plotly docs - https://plotly.com/python/
from dash import Dash, Input, Output, callback, dcc, html
import plotly.express as px
import pandas as pd
# you can download the data here if you'd like: https://docs.google.com/spreadsheets/d/1O_zxndHKhKMIfJ9e7_M5L7b4F3S__d1nVnUS8iZn8yE/edit?gid=0#gid=0
df = pd.read_csv("https://raw.githubusercontent.com/Coding-with-Adam/Dash-by-Plotly/refs/heads/master/Other/NYC%20Marathon%20Results%2C%202024%20-%20Marathon%20Runner%20Results.csv")
# Convert `pace` column from string format (minutes:seconds) to numeric (float) in minutes
def convert_pace_to_minutes(pace_str):
try:
minutes, seconds = map(int, pace_str.split(':'))
return minutes + seconds / 60
except ValueError:
return None
# Apply conversion to the `pace` column
df['pace_minutes'] = df['pace'].apply(convert_pace_to_minutes)
# Drop rows where `pace_minutes` could not be calculated
cleaned_data = df.dropna(subset=['pace_minutes'])
# # Define age groups
# bins = [10, 20, 30, 40, 50, 60, 70, 80, 90]
# labels = ['10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90']
# # Create a new column for age groups
# cleaned_data['age_group'] = pd.cut(cleaned_data['age'], bins=bins, labels=labels, right=False)
# print(cleaned_data)
counts = cleaned_data.groupby('countryCode')['runnerId'].count()
print(f'counts\n{counts}')
filtered= counts[counts>500]
print(filtered)
filtered_group = cleaned_data[cleaned_data['countryCode'].isin(filtered.index)]
app = Dash(__name__)
app.layout = html.Div([
html.Img(src='https://secure.meetupstatic.com/photos/event/d/a/c/8/clean_518156008.webp',
width='200px',
style={ 'justify-content': 'right'}),
html.Div( children=[
dcc.RangeSlider(15,99,1, value=[15,99],marks=None, id='age-range-slider', tooltip={"placement": "bottom", "always_visible": True})
]),
html.Div(
children=[
dcc.Graph(id='graph')
]
)]
)
@app.callback(
Output("graph", "figure"),
Input("age-range-slider", "value"))
def update_graph(values):
stats= filtered_group[filtered_group['age'].between(values[0] ,values[1])]
stats= stats.groupby('countryCode')['pace_minutes'].mean()
stats=stats.to_frame().reset_index()
print(stats)
fig = px.bar(stats,
x='countryCode', y='pace_minutes',
title = f'Comparing Pace by Country, Filtered by Age {values[0]}-{values[1]}',
labels={'pace_minutes': 'Pace (minutes per mile)', 'age': 'Age', 'countryCode':'Country'},
)
fig.update_layout( title_x=0.5)
return fig