import pandas as pd
import numpy as np
import plotly.express as px
import dash
import dash_bootstrap_components as dbc
from dash import Input, Output, dcc, html
df = pd.read_csv("NYC Marathon Results, 2024 - Marathon Runner Results.csv")
def convert_time_to_minutes(time_str):
parts = time_str.split(':')
if len(parts) == 3: # Formato HH:MM:SS
hours, minutes, seconds = map(int, parts)
total_minutes = hours * 60 + minutes + seconds / 60
elif len(parts) == 2: # Formato MM:SS
minutes, seconds = map(int, parts)
total_minutes = minutes + seconds / 60
else:
raise ValueError("Formato de tiempo no reconocido")
return total_minutes
# Crear una función para categorizar por edad
def categorize_age(age):
if 18 <= age <= 19:
return '18-19'
elif 20 <= age <= 24:
return '20-24'
elif 25 <= age <= 29:
return '25-29'
elif 30 <= age <= 34:
return '30-34'
elif 35 <= age <= 39:
return '35-39'
elif 40 <= age <= 44:
return '40-44'
elif 45 <= age <= 49:
return '45-49'
elif 50 <= age <= 54:
return '50-54'
elif 55 <= age <= 59:
return '55-59'
elif 60 <= age <= 64:
return '60-64'
elif 65 <= age <= 70:
return '65-70'
else:
return '71+'
df['pace_minutes'] = (df['pace'].apply(convert_time_to_minutes).round(2))
df['overallTime_minutes'] = df['overallTime'].apply(convert_time_to_minutes).round(2)
df['ageGradeTime_minutes'] = df['ageGradeTime'].apply(convert_time_to_minutes).round(2)
df['ageCategory'] = df['age'].apply(categorize_age)
brasil_df = df.query("iaaf == 'BRA'")
gender_dict = {"M":"Men", "W":"Woman"}
brasil_df.loc[:, 'gender'] = brasil_df.gender.map(gender_dict)
def categorize_country(country_code):
if country_code in ['BRA', 'USA']:
return country_code
else:
return "Other Country"
# Assuming brasil_df is your pandas DataFrame
brasil_df.loc[:,'country_living'] = brasil_df['countryCode'].apply(categorize_country)
def brasil_pie_chart():
brasil_pie = brasil_df.country_living.value_counts()
fig = px.pie(brasil_pie, values=brasil_pie.values, names=brasil_pie.index,hole=0.65,
color_discrete_sequence=px.colors.sequential.Viridis_r,
template='plotly_white')
fig.update_layout(legend=dict(title=None, orientation="h", y=1.1, yanchor="bottom", x=0.1, xanchor="center", font=dict(size=16)),
paper_bgcolor='rgb(252, 248, 202)', plot_bgcolor='rgb(252, 248, 202)')
return fig
def brasil_runners_bar():
category_orders = {'ageCategory':['20-24', '25-29', '30-34', '35-39','40-44', '45-49', '50-54', '55-59', '60-64', '65-70', '71+']}
data = brasil_df.groupby(['ageCategory', 'gender'], as_index=False).agg({'runnerId':'count'})
fig = px.bar(data, x='ageCategory', y='runnerId', color='gender',
category_orders=category_orders, barmode='relative',
color_discrete_sequence=px.colors.qualitative.Vivid,
labels={'ageCategory':''},
opacity=0.70,
template='plotly_white',text_auto=True,range_y=[0,230])
fig.update_layout(legend=dict( title=None, orientation="h", y=1.03, yanchor="bottom", x=0.2, xanchor="center", font=dict(size=18)),
xaxis=dict(showline=True, linewidth=2.5, linecolor='lightgray',zeroline=True, zerolinewidth=2, zerolinecolor='red',
tickfont=dict(size=18)),
paper_bgcolor='rgb(252, 248, 202)', plot_bgcolor='rgb(252, 248, 202)')
fig.update_traces(textfont_size=14, textangle=0, textposition="outside", cliponaxis=False)
fig.update_yaxes(visible=False)
return fig
def pace_minutes_chart():
category_orders = {'ageCategory':['20-24', '25-29', '30-34', '35-39','40-44', '45-49', '50-54', '55-59', '60-64', '65-70', '71+']}
data1 = brasil_df.groupby(['ageCategory', 'gender'], as_index=False).agg({'pace_minutes':'mean'}).round(2)
fig = px.line(data1, x='ageCategory', y='pace_minutes',color='gender', category_orders=category_orders,markers=True,#text_auto=True, barmode='relative',
color_discrete_sequence=px.colors.qualitative.Vivid,
labels={'ageCategory':'Age Range'},
template='plotly_white', text='pace_minutes',
symbol='gender', symbol_map={'Men': 'circle', 'Women': 'diamond'}
)
fig.update_layout(legend=dict( title=None, orientation="h", y=0.95, yanchor="bottom", x=0.1, xanchor="center", font=dict(size=18)),
xaxis=dict(showline=True, linewidth=2.5, linecolor='lightgray',zeroline=True, zerolinewidth=2, zerolinecolor='red',
title_font=dict(size=20), tickfont=dict(size=18)),
paper_bgcolor='rgb(252, 248, 202)', plot_bgcolor='rgb(252, 248, 202)'
)
fig.update_traces(textfont_size=16,
# textangle=0,
textposition="top left", cliponaxis=False, marker=dict(size=15))
fig.update_yaxes(visible=False)
return fig
def overall_time_chart():
category_orders = {'ageCategory':['20-24', '25-29', '30-34', '35-39','40-44', '45-49', '50-54', '55-59', '60-64', '65-70', '71+']}
data1 = brasil_df.groupby(['ageCategory', 'gender'], as_index=False).agg({'overallTime_minutes':'mean'}).round(2)
fig = px.line(data1, x='ageCategory', y='overallTime_minutes',color='gender', category_orders=category_orders,markers=True,#text_auto=True, barmode='relative',
color_discrete_sequence=px.colors.qualitative.Vivid,
template='plotly_white',labels={'ageCategory':'Age Range'}, text='overallTime_minutes',
symbol='gender', symbol_map={'Men': 'circle', 'Women': 'diamond'}
)
fig.update_layout(legend=dict( title=None, orientation="h", y=0.95, yanchor="bottom", x=0.1, xanchor="center", font=dict(size=18)),
xaxis=dict(showline=True, linewidth=2.5, linecolor='lightgray',zeroline=True, zerolinewidth=2, zerolinecolor='red',
title_font=dict(size=20), tickfont=dict(size=18)),
paper_bgcolor='rgb(252, 248, 202)', plot_bgcolor='rgb(252, 248, 202)'
)
fig.update_traces(textfont_size=16,
# textangle=0,
textposition="top left", cliponaxis=False, marker=dict(size=15))
fig.update_yaxes(visible=False)
return fig
app = dash.Dash(external_stylesheets=[dbc.themes.SANDSTONE])
app.title = "New York Marathon 2024"
app.layout = dbc.Container(
[
html.H2("The Brazilian Runners Performance at the NYC Marathon 2024",
style={'textAlign': 'center', 'padding': '10px', 'margin': '10px','font-weight': 'bold'}),
html.H3("Exploring Age, Pace, and Origin",
style={'textAlign': 'center', 'padding': '10px', 'margin': '10px'}),
html.Hr(),
dbc.Row([
dbc.Col(
[
html.H5("A significant portion (nearly 20%) of Brazilian runners in the NY Marathon are based abroad",
style={'textAlign': 'center', 'padding': '5px', 'margin': '5px'}),
dcc.Graph(id='brasil-pie', figure=brasil_pie_chart())
], width=5),
dbc.Col(
[
html.H5("Age Distribution of 827 Brazilian who competed in this Edition",
style={'textAlign': 'center', 'padding': '5px', 'margin': '5px'}),
dcc.Graph(id='runners-bar', figure=brasil_runners_bar())
], width=7)
]),
dbc.Tabs([
dbc.Tab(label="Pace Minutes per Mile by Age Range",
children=[
html.H5("How Fast They Ran: Average Pace in Minutes per Mile",
style={'textAlign': 'center', 'padding': '5px', 'margin': '5px'}),
html.H6("Men Winner Pace Time = 4.88, Women Winner Pace Time = 5.52 ",
style={'textAlign': 'center', 'padding': '5px', 'margin': '5px','font-style': 'italic'}),
dbc.Col(
dbc.Card(dcc.Graph(id='pace_minutes-bar', figure=pace_minutes_chart())
)
)
]),
dbc.Tab(label="Overall Time Minutes per Mile by Age Range",
children=[
html.H5("Average Time to Complete the Marathon for Brazilian Athletes (Minutes)",
style={'textAlign': 'center', 'padding': '0px', 'margin': '5px'}),
html.H6("Men Winner Time = 127.65, Women Winner Pace Time = 144.58 ",
style={'textAlign': 'center', 'padding': '5px', 'margin': '5px','font-style': 'italic'}),
dbc.Col(
dbc.Card(dcc.Graph(id='overall_minutes-bar', figure=overall_time_chart()),
)
)
]
)
])
],# fluid=True,
style={"backgroundColor": '#fcf8ca'}
)
if __name__ == "__main__":
app.run_server(debug=True)