Py.Cafe

dm2173/

NYC-Plotly-Meetup-March

Age-Based Running Performance Analysis

DocsPricing
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# check out the Dash docs - https://dash.plotly.com/ 
# check out the Plotly docs - https://plotly.com/python/

from dash import Dash, Input, Output, callback, dcc, html
import plotly.express as px
import pandas as pd

# you can download the data here if you'd like: https://docs.google.com/spreadsheets/d/1O_zxndHKhKMIfJ9e7_M5L7b4F3S__d1nVnUS8iZn8yE/edit?gid=0#gid=0
df = pd.read_csv("https://raw.githubusercontent.com/Coding-with-Adam/Dash-by-Plotly/refs/heads/master/Other/NYC%20Marathon%20Results%2C%202024%20-%20Marathon%20Runner%20Results.csv")

# Convert `pace` column from string format (minutes:seconds) to numeric (float) in minutes
def convert_pace_to_minutes(pace_str):
    try:
        minutes, seconds = map(int, pace_str.split(':'))
        return minutes + seconds / 60
    except ValueError:
        return None

# Apply conversion to the `pace` column
df['pace_minutes'] = df['pace'].apply(convert_pace_to_minutes)

# Drop rows where `pace_minutes` could not be calculated
cleaned_data = df.dropna(subset=['pace_minutes'])

# # Define age groups
# bins = [10, 20, 30, 40, 50, 60, 70, 80, 90]
# labels = ['10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90']

# # Create a new column for age groups
# cleaned_data['age_group'] = pd.cut(cleaned_data['age'], bins=bins, labels=labels, right=False)
# print(cleaned_data)

counts = cleaned_data.groupby('countryCode')['runnerId'].count()
print(f'counts\n{counts}')
filtered= counts[counts>500]
print(filtered)
filtered_group = cleaned_data[cleaned_data['countryCode'].isin(filtered.index)]


app = Dash(__name__)
app.layout = html.Div([
html.Img(src='https://secure.meetupstatic.com/photos/event/d/a/c/8/clean_518156008.webp', 
    width='200px',
    style={ 'justify-content': 'right'}),
html.Div( children=[
    dcc.RangeSlider(15,99,1, value=[15,99],marks=None, id='age-range-slider', tooltip={"placement": "bottom", "always_visible": True})
]),
html.Div(
    children=[
        dcc.Graph(id='graph')
    ]
)]
)

@app.callback(
    Output("graph", "figure"), 
    Input("age-range-slider", "value"))
def update_graph(values):
    stats= filtered_group[filtered_group['age'].between(values[0] ,values[1])]
    stats= stats.groupby('countryCode')['pace_minutes'].mean()
    stats=stats.to_frame().reset_index()
    print(stats)
    fig = px.bar(stats, 
    x='countryCode', y='pace_minutes',
    title = f'Comparing Pace by Country, Filtered by Age {values[0]}-{values[1]}',
    labels={'pace_minutes': 'Pace (minutes per mile)', 'age': 'Age', 'countryCode':'Country'},
    )
    fig.update_layout( title_x=0.5)
    return fig