import plotly.graph_objects as go
import pandas as pd
import numpy as np
from dash import Dash, html, dcc, Input, Output, callback
import dash_bootstrap_components as dbc
df = pd.read_csv('https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2024/week-51/ors-limited-dataset.csv')
df["ESTIMATE"] = pd.to_numeric(df["ESTIMATE"], errors='coerce')
df = df[(df['ESTIMATE TEXT'] == 'Hours of the day that workers were required to sit, mean') | (df['ESTIMATE TEXT'] == 'Hours of the day that workers were required to stand, mean')]
# Drop these two occupations because of a glitch in reporting, dropped all workers too, it's a special case.
df = df[(df['OCCUPATION'] != 'All workers') & (df['OCCUPATION'] != 'Firefighters') & (df['OCCUPATION'] != 'First-line supervisors of fire fighting and prevention workers')]
# # Add a temporary column for sorting priority
# df['estimate_priority'] = df['ESTIMATE TEXT'].map({'Hours of the day that workers were required to sit, mean': 0, 'Hours of the day that workers were required to stand, mean': 1})
# sorted_df = df.sort_values(by=['estimate_priority', 'ESTIMATE'], ascending=[True, False])
# # Drop the temporary column
# sorted_df = sorted_df.drop(columns=['estimate_priority','estima'])
##input df = from adam's sourcecode
df = df.drop(columns = ['ESTIMATE TEXT', 'DATATYPE'])
### Let's assume a stand/sit ratio of 1 is really the healthiest option
#some occupations do not have both stand and sit estimates
#do not remove like below, but fix it in the loop where you query sit/stand
# vc = df['OCCUPATION'].value_counts()
##occupations with both a sit and stand row
# occupations = vc[vc==2].index.tolist()
occupations = df['OCCUPATION'].unique()
#add occupations as first column
dfp = pd.DataFrame({'Occupation': occupations})
dfp["Ratio"] = float(0)
#decide on the color of the dot. Boundaries are doing more than 5 hours something and doing more than 6 hours
#something. The values below are comparing values for ratio and ratio is division stand/sit.
red_up = float(6/2)
red_low = float(2/6)
orange_up = float(5/3)
orange_low = float(3/5)
for i, r in dfp.iterrows():
occupation = r['Occupation']
#sitting = df.query('OCCUPATION == @occupation & CATEGORY == "Sitting"', inplace=False)['ESTIMATE'].values[0]
#standing = df.query('OCCUPATION == @occupation & CATEGORY == "Standing"', inplace=False)['ESTIMATE'].values[0]
df_sitting = df.query('OCCUPATION == @occupation & CATEGORY == "Sitting"', inplace=False)
if (len(df_sitting) != 0):
sitting = df_sitting['ESTIMATE'].values[0]
else:
sitting= 1 #correct this to 0 when ratio is done for the scatterplot
df_standing = df.query('OCCUPATION == @occupation & CATEGORY == "Standing"', inplace=False)
if (len(df_standing) != 0):
standing = df_standing['ESTIMATE'].values[0]
else:
standing= 0 #correct this to 0 when ratio is done for the scatterplot
if (len(df_sitting) == 0 and sitting == 1):
ratio = round(100*standing,2)
elif (len(df_standing) == 0):
ratio=round(100*sitting)
else:
ratio = round(standing/sitting,2)
#reset sitting for the visual and reset only if there was no result for the sitting query
if (len(df_sitting) == 0): sitting = 0
dfp.at[i,'Sitting'] = sitting
dfp.at[i,'Standing'] = standing
dfp.at[i,'Ratio'] = ratio
#add colorcolumn for styling dots
dfp['colors'] = dfp['Ratio'].apply(lambda x: 'red' if x > red_up or x < red_low else ('orange' if x > orange_up or x < orange_low else 'green'))
dfp['markersize'] =6
def create_scatter(dfp, selectedvalue):
fig=go.Figure()
#apparently, if you want extra data in the hover, you have to create lists
#occupations is already there, but check if nothing changed. This is to add occupations
#but if you want to add ratio two you have to create a 2dim array
#I used ratio to see if the tooltip made sense, removed it because it does not make
#sense to people who have no idea.
mycustomdata = np.stack((dfp['Occupation'], dfp['Ratio']), axis=-1)
#painting the dots
fig.add_trace(go.Scatter(x=dfp['Sitting'],
y= dfp['Standing'],
mode = 'markers',
marker=
dict(
color=dfp.colors,
size = dfp.markersize)
))
#creating the custom tooltip
fig.update_traces(customdata=mycustomdata,
#hovertemplate = "<b>%{customdata[0]} - ratio %{customdata[1]}<br>"+\
hovertemplate = "<b>%{customdata[0]} <br>"+\
"Avg. sitting: %{x} hrs<br>"+\
"Avg. standing: %{y} hrs");
#add line perfect balance
fig.update_layout(shapes=[
dict(
type= 'line',
yref= 'y', y0= 0, y1= 8,
xref= 'x', x0= 0, x1= 8,
name="lowerboundary",
line=dict(
color="#70d158",
width=1,
dash="dot",
)
),
])
#remove those horrible margins
fig.update_layout(
margin=dict(l=5, r=5, t=5, b=5),
)
#add some annotations instead of text along the axis
fig.add_annotation(x=1.5, y=8.5,
text="<b>↑</b> mean standing hours",font=dict(family = "Arial",
size=16,color="#333333"), showarrow=False)
fig.add_annotation(x=8, y=2,
text="mean sitting hours →",font=dict(family = "Arial",
size=16,color="#333333"), showarrow=False)
fig.add_annotation(x=8.5, y=8.5,
text="perfect balance", showarrow=False, font=dict(family = "Arial",
size=16,color="green"),
)
return fig
def create_listgroup(df):
listChildren = []
for i, r in df.iterrows():
listChildren.append(dbc.ListGroupItem(r['Occupation']))
listgroup = dbc.ListGroup( children = listChildren, flush=True)
return listgroup
#healthy card shows occupations nearest to ratio 1
def healthy_card(dfp):
#compare with one as the optimal value for ratio, find nearest 6 occupations
#thanks to stack overflow
df_output = dfp.iloc[(dfp['Ratio']-1).abs().argsort()[:6]]
occ_list = create_listgroup(df_output)
healthycard = dbc.Card(
[
dbc.CardBody(
[
html.H4("Looking for an occupation with a nice sit/stand balance?", className="card-title"),
html.H5('Try:'),
html.Div(occ_list)
]
),
], style={"marginBottom": "2rem"}
)
return healthycard
def not_healthy_card(dfp):
#pick head(3) and tail(3) of dataframe ordered on ratio.
#head and tail have the most "red" values. It could be that
#it could be head(4), tail(2) are better, or another combi,
#because the head values are more extreme (or tail)
#sort ratio
df_sort = dfp.sort_values(by=['Ratio'])
#combine head and tail to make creating a list easier.
df_output = pd.concat([df_sort.head(3), df_sort.tail(3)], ignore_index=True)
#loop and create an html list, I have not found a list component I like out of
#the box.
occ_list = create_listgroup(df_output)
not_healthycard = dbc.Card(
[
dbc.CardBody(
[
html.H4("Take very good care of your health if this is your occupation:", className="card-title"),
#dynamic listgroup
html.Div(occ_list)
]
),
]
)
return not_healthycard
def how_about_me(dfp, selected_job):
#queries the df for the selected job, returns one row
q= dfp.query('Occupation == @selected_job', inplace=False)
dot_path = ""
dot_path = "assets/"+q['colors'].values[0]+"_dot.png"
styledot= {
"width":'24px',
"height": '24px'
}
#create table output, this should be a css grid with divs, but this is faster.
row1 = html.Tr([html.Td(html.Img(src=dot_path, style=styledot)), \
html.Td('Avg. sitting ' + str(q['Sitting'].values[0]) + ' hrs a workingday'), \
html.Td('Avg. standing ' + str(q['Standing'].values[0]) + ' hrs a workingday')])
stylestring= {
"borderColor": q['colors'],
"fontSize": '18px'
}
#finally understand how you can dynamically create a stylestring and insert it
#this one sets the bordercolor of the table and enlarges the font for no reason :-)
this_about_you = dbc.Table([html.Tbody([row1])], bordered=True, style=stylestring)
return this_about_you
def legend_table():
dotstyle = {"width": "12px","height": "12px"}
row = html.Tr([
html.Td('Sit:stand ratio'),
html.Td ([html.Img(src='assets/green_dot.png', style=dotstyle),
html.Span(' = between 4:4 & 5:3 hours')]),
html.Td ([html.Img(src='assets/orange_dot.png', style=dotstyle),
html.Span(' = between 5:3 & 6:2 hours')]),
html.Td ([html.Img(src='assets/red_dot.png', style=dotstyle),
html.Span(' = 6:2 or worse')])
])
legend = dbc.Table(html.Tbody([row]), bordered=False)
return legend
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
app = Dash(__name__, external_stylesheets=[dbc.themes.SANDSTONE, dbc_css])
app.layout = dbc.Container(
[ dbc.Row([
dbc.Col([
html.H2('How healthy is your Occupation?'),
html.P('Some people say the perfect occupation has a workingday divided in 50% standing and 50% \
sitting.'),
html.P('In the graph below every dot represents the sit/stand balance for an occupation. The green dashed\
line represents balance perfection. '),
html.Div(legend_table()),
dcc.Graph(id="scatter-plot", figure = create_scatter(dfp, '')),
html.H2('How about me?', style={"marginTop":"2rem"}),
#ok, this should not have a default value, it should be empty and no tablerow displayed at first.
html.Div([
#dropdown from occupationslist
dcc.Dropdown(
id='search_job',
options=occupations,
searchable = True,
value='Software developers',
placeholder="Find occupation..."
),
], style={'marginBottom':'2rem', 'MarginTop':'2rem'}),
html.Div(id='jobdata')
], className = 'col-md-8'),
dbc.Col([
#if you comment out these two card functions, this code will work
#without dangerous html, I hope.
healthy_card(dfp),
not_healthy_card(dfp)
], className = 'col-md-4')
])
], style={"marginTop": "2rem"})
@app.callback( Output('jobdata', 'children'),
Output('scatter-plot', 'figure'),
Input(component_id='search_job', component_property='value'))
def update_job_data(job):
jobtable = how_about_me(dfp, job)
#reset all markersizes to remove old data
dfp['markersize'] = 6
#update for the selected value
dfp.loc[dfp['Occupation'] == job, 'markersize'] = 15
plot = create_scatter(dfp, job)
return jobtable, plot
app.run_server(debug=True)