# -*- coding: utf-8 -*-
"""
Created on Sat Jun 7 06:43:13 2025
@author: win11
"""
import dash
from dash import Dash, dcc, html,Input, Output, callback
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import dash_bootstrap_components as dbc
# stylesheet with the .dbc class to style dcc, DataTable and AG Grid components with a Bootstrap theme
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
df = pd.read_csv("https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2025/week-23/steak-risk-survey.csv")
#CLEANING ETC
#columnnames for yes/no columns
columns_yn = df.columns[2:-6]
for columnname in columns_yn:
df[columnname] = df[columnname].apply(lambda x: 1 if x == "Yes" else 0).astype(int)
questions_txt = df.columns[2:-6]
# #rename those columns, names to long
df = df.rename(columns={'Do you ever smoke cigarettes?':'Cigarettes',
'Do you ever drink alcohol?':'Alcohol',
'Do you ever gamble?':'Gamble',
'Have you ever been skydiving?':'Skydiving',
'Do you ever drive above the speed limit?':'Exceed speed limit',
'Have you ever cheated on your significant other?':'Cheated significant other',
'Do you eat steak?': 'Steak',
'Consider the following hypothetical situations: <br>In Lottery A, you have a 50% chance of success, with a payout of $100. <br>In Lottery B, you have a 90% chance of success, with a payout of $20. <br><br>Assuming you have $10 to bet, would you play Lottery A or Lottery B?': 'Lottery'})
#drop some columns
df = df.drop(columns=['How do you like your steak prepared?','Location (Census Region)'])
# # remove rows with empty values from Gender column
df = df[df[ 'Gender'].notna()].reset_index(drop=True)
df = df[df[ 'Age'].notna()].reset_index(drop=True)
df = df[df[ 'Household Income'].notna()].reset_index(drop=True)
df = df[df[ 'Education'].notna()].reset_index(drop=True)
#remove Less than high school degree, not nice to do but only 2 persons
df = df.loc[df['Education']!='Less than high school degree'].reset_index(drop=True)
# Define custom ordering for categorical variables
income_order = [
'$0 - $24,999',
'$25,000 - $49,999',
'$50,000 - $99,999',
'$100,000 - $149,999',
'$150,000+'
]
education_order = [
'High school degree',
'Some college or Associate degree',
'Bachelor degree',
'Graduate degree'
]
# Apply custom ordering
df['Household Income'] = pd.Categorical(df['Household Income'], categories=income_order, ordered=True)
df['Education'] = pd.Categorical(df['Education'], categories=education_order, ordered=True)
columns_badhabits = df.columns[2:-4]
#UI/UX
color_discrete_map_m={
0: "rgba(0,128,157,.2)",
1: "rgba(0,128,157,.3)",
2: "rgba(0,128,157,.4)",
3: "rgba(0,128,157,.5)",
4: "rgba(0,128,157,.6)",
5:'rgba(0,128,157,.7)',
6: 'rgba(0,128,157,1)'}
color_discrete_map_f={
0: "rgba(201, 87, 146,.2)",
1: "rgba(201, 87, 146,.3)",
2: "rgba(201, 87, 146,.4)",
3: "rgba(201, 87, 146,.5)",
4: "rgba(201, 87, 146,.6)",
5:'rgba(201, 87, 146,.7)',
6: 'rgba(201, 87, 146,1)'}
select_view = html.Div(
[
html.H2("Select view"),
dbc.RadioItems(
options=[
{"label": "Age", "value": 'Age'},
{"label": "Household income", "value": 'Household Income'},
{"label": "Education", "value": 'Education'},
],
value='Age',
id="view_input",
),
]
)
select_habits = html.Div(
[
html.H2("Select habits"),
html.P('Segments in the barchart reflect the number of habits people responded positive about.'),
dcc.Dropdown(
options=[
{'label':i, 'value':i} for i in columns_badhabits
],
multi=True,
id='habits_input',
value=columns_badhabits
)
]
)
def create_viz(dfg,view_group):
# Subset data
df_female = dfg[dfg['Gender'] == 'Female']
df_male = dfg[dfg['Gender'] == 'Male']
female_totals = df_female.groupby(view_group)['RespondentCount'].sum()
male_totals = df_male.groupby(view_group)['RespondentCount'].sum()
# Create figure
fig = go.Figure()
# Female bars (positive)
fig.add_trace(go.Bar(
x=df_female['RespondentCount'],
y=df_female[view_group],
marker_color=df_female['Risky habits #'].apply(lambda x: color_discrete_map_f.get(x)),
orientation='h',
name='Female',
customdata=np.stack([
df_female[view_group],
df_female['PositiveText'],
round(df_female['Percentage'],1)
], axis=-1),
hovertemplate=(
"♀ Female: %{x}, %{customdata[2]}%<br>" +
"Group: %{customdata[0]}<br>" +
"%{customdata[1]}<extra></extra>"
)
))
# Male bars (negative)
fig.add_trace(go.Bar(
x=-df_male['RespondentCount'],
y=df_male[view_group],
marker_color=df_male['Risky habits #'].apply(lambda x: color_discrete_map_m.get(x)),
orientation='h',
name='Male',
customdata=np.stack([
df_male[view_group],
df_male['PositiveText'],
df_male['RespondentCount'],
round(df_male['Percentage'],1)
], axis=-1),
hovertemplate=(
"♂ Male: %{customdata[2]}, %{customdata[3]}%<br>" +
"Group: %{customdata[0]}<br>" +
"%{customdata[1]}<extra></extra>"
)
))
# Layout update
fig.update_layout(
xaxis=dict(
title="<b>Number of respondents</b>",
title_standoff=10,
tickmode='array',
tickvals=[-100, -80, -60, -40, -20, 0, 20, 40, 60, 80, 100],
ticktext=['100', '80', '60', '40', '20', '0', '20', '40', '60', '80', '100']
),
yaxis=dict(
tickvals=[], # remove tick labels
showticklabels=False
),
barmode='relative',
bargap=0.1,
showlegend=False,
#title='Mirrored Barplot of Respondent Counts by Gender and Age',
height=500,
paper_bgcolor='rgba(0,0,0,0)', # outer background
plot_bgcolor='rgba(0,0,0,0)' # inner plot background
)
# Add annotations above each bar group (i.e., centered y-labels)
for income in sorted(dfg[view_group].unique()):
fig.add_annotation(
x=0,
y=income,
text=f"<b>{income}</b>",
showarrow=False,
font=dict(size=12),
xanchor='center',
yanchor='middle'
)
fig.update_yaxes(showticklabels=False)
fig.update_xaxes(range=[-100, 100],showticklabels=False)
fig.add_annotation(
x=-50, # adjust based on your x-range
y=1.05, # above the plot area (normalized)
xref='x',
yref='paper',
text="<b>Male</b>",
showarrow=False,
font=dict(size=14),
xanchor='center'
)
fig.add_annotation(
x=0,
y=1.05,
xref='x',
yref='paper',
text=f"<b>{view_group}</b>",
showarrow=False,
font=dict(size=14),
xanchor='center'
)
fig.add_annotation(
x=50,
y=1.05,
xref='x',
yref='paper',
text="<b>Female</b>",
showarrow=False,
font=dict(size=14),
xanchor='center'
)
# Female totals (right of bar)
for income_group, total in female_totals.items():
fig.add_annotation(
x=total + 5,
y=income_group,
text=f"♀ <b>{total}</b>",
showarrow=False,
font=dict(size=14),
xanchor='left',
yanchor='middle'
)
# Male totals (left of bar)
for income_group, total in male_totals.items():
fig.add_annotation(
x=-total - 5,
y=income_group,
text=f"<b>{total}</b> ♂",
showarrow=False,
font=dict(size=14),
xanchor='right',
yanchor='middle'
)
return dcc.Graph(figure=fig)
# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = [dbc.Container([
dbc.Row([
dbc.Col([
html.H1('Habits from different viewpoints'),
html.P('Based on responses from approx. 430 people from the USA.')
], style={'textAlign':'center'})
]),
dbc.Row([dbc.Col([html.Div(id = 'visual')])]),
dbc.Row([dbc.Col(select_view),
dbc.Col(select_habits)
])
])
]
@app.callback(
Output("visual", "children"),
Input("view_input", "value"),
Input("habits_input", "value")
)
def update_visual(view, habits):
#group and filter data based on selected view and habits
if view == None:
view="Age"
if habits == None:
df['Risky habits #'] = df[columns_badhabits].sum(axis=1)
tot = 6
else:
df['Risky habits #'] = df[habits].sum(axis=1)
tot=len(habits)
dfg = df.groupby(['Gender', view,'Risky habits #']).agg(
RespondentCount=('RespondentID', 'count')
).reset_index()
totals = dfg.groupby(['Gender', view])['RespondentCount'].transform('sum')
dfg['Percentage'] = dfg['RespondentCount'] / totals * 100
#this can only chatGPT formulate in this way
dfg['PositiveText'] = dfg['Risky habits #'].astype(str) + f" out of {tot} positive"
return create_viz(dfg,view)
if __name__ == "__main__":
app.run(debug=True)