# -*- coding: utf-8 -*-
"""
Created on Fri May 16 21:01:26 2025
@author: Marie-Anne Melis
"""
import pandas as pd
import dash
from dash import dcc, html
import plotly.graph_objects as go
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
from dash_bootstrap_templates import load_figure_template
from plotly.subplots import make_subplots
colors_class = ['rgba(184,0,127,.2)',
'rgba(184,0,127,.4)',
'rgba(184,0,127,.7)',
'rgba(184,0,127,1)']
#df_raw = pd.read_csv('nation.csv')
#df = df_raw[['Federal ID','Hazard Potential Classification','Primary Owner Type','Primary Purpose', 'EAP Prepared']].reset_index()
#df.to_csv('nation-dams.csv')
df=pd.read_csv('nation-dams.csv')
# stylesheet with the .dbc class to style dcc, DataTable and AG Grid components with a Bootstrap theme
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
# if using the vizro theme
vizro_bootstrap = "https://cdn.jsdelivr.net/gh/mckinsey/vizro@main/vizro-core/src/vizro/static/css/vizro-bootstrap.min.css?v=2"
# default dark mode
#pio.templates.default = "vizro_dark"
load_figure_template(["vizro", "vizro_dark"])
#subplot reference https://fronkan.hashnode.dev/different-barmodes-simultaneously-in-plotly-subplots-python
#df6 == PURPOSE
dff6 = df.groupby(['Hazard Potential Classification','Primary Purpose', 'EAP Prepared'])['Federal ID'].count().reset_index()
dff6.rename(columns={"Federal ID": "Number"}, inplace=True)
# Calculate percentage within each hazard group
dff6["Percentage Hazard"] = round(dff6["Number"] / dff6.groupby("Hazard Potential Classification")["Number"].transform("sum") * 100,1)
dff6["Percentage Hazard Purpose"] = round(dff6["Number"] / dff6.groupby(["Hazard Potential Classification","Primary Purpose"])["Number"].transform("sum") * 100,1)
#df2 = OWNER
dff2 = df.groupby(['Hazard Potential Classification','Primary Owner Type', 'EAP Prepared'])['Federal ID'].count().reset_index()
dff2.rename(columns={"Federal ID": "Number"}, inplace=True)
# Calculate percentage within each hazard group
dff2["Percentage Hazard"] = round(dff2["Number"] / dff2.groupby("Hazard Potential Classification")["Number"].transform("sum") * 100,1)
dff2["Percentage Hazard Owner"] = round(dff2["Number"] / dff2.groupby(["Hazard Potential Classification","Primary Owner Type"])["Number"].transform("sum") * 100,1)
# choose between owners and purpose view
radioitems = html.Div(
[
dbc.Label("Zoom in on:"),
dbc.RadioItems(
options=[
{"label": "Owner dam", "value": 'Owner'},
{"label": "Purpose dam", "value": "Purpose"},
],
value="Owner",
id="radioitems-input",
inline=True
),
]
)
def create_summary(df):
#FUNCTION CREATES THE GENERAL OVERVIEW OF HAZARD CLASSIFICATION AND EAP
# Aggregate
dff4 = df.groupby(['Hazard Potential Classification', 'EAP Prepared'])['Federal ID'].count().reset_index()
dff4.rename(columns={"Federal ID": "Number"}, inplace=True)
# Percentages within group
dff4["Percentage"] = round(
dff4["Number"] / dff4.groupby("Hazard Potential Classification")["Number"].transform("sum") * 100, 1
)
# Desired order and colors
eap_statuses = ['No', 'Not Required', 'Yes']
colors = ['rgba(0, 61, 84,1)',
'rgba(56, 111, 152, 1)',
'rgba(188, 212, 230, 1)']
# Set correct order for y-axis
hazard_order = ["Undetermined", "Low", "Significant", "High"]
dff4["Hazard Potential Classification"] = pd.Categorical(
dff4["Hazard Potential Classification"],
categories=hazard_order,
ordered=True
)
dff4 = dff4.sort_values("Hazard Potential Classification")
#dfs contains number of dambs per classification
dfs = df['Hazard Potential Classification'].value_counts().reset_index()
dfs["Hazard Potential Classification"] = pd.Categorical(
dfs["Hazard Potential Classification"],
categories=hazard_order,
ordered=True
)
dfs = dfs.sort_values("Hazard Potential Classification")
# Creating two subplots
fig = make_subplots(rows=1, cols=2,specs=[[{}, {}]], shared_xaxes=True,
shared_yaxes=False, vertical_spacing=0.01,
subplot_titles=("USA: number of dams per Hazard Potential Classification", "Emergency Action Plan prepared?"))
# Build traces for subplot 2, the percentages
#traces = []
for status, color in zip(eap_statuses, colors):
subset = dff4[dff4["EAP Prepared"] == status]
fig.add_trace(
go.Bar(
y=subset["Hazard Potential Classification"],
x=subset["Percentage"],
name=status,
orientation="h",
offsetgroup=1,
marker_color=color,
text=subset["Percentage"].astype(str) + '%',
textposition="inside",
insidetextanchor="middle",
),
row=1,
col=2,
),
# Subplot1 - Overview
fig.add_trace(
go.Bar(
x=dfs['count'],
y=dfs['Hazard Potential Classification'],
orientation="h",
marker_color=colors_class,
offsetgroup=0,
showlegend=False,
text=dfs['count'],
textposition="outside",
insidetextanchor="end",
),
row=1,
col=1,
)
#add a warning rectangle for class High , no and not required
fig.add_shape(type="rect",
x0=0, y0=2.6, x1=23.7, y1=3.4,
line=dict(
color=colors_class[3],
width=1,
),
fillcolor=colors_class[1],
row=1,
col=2,
)
#add a warning rectangle for class Significant , no and not required
fig.add_shape(type="rect",
x0=0, y0=1.6, x1=34.3, y1=2.4,
line=dict(
color=colors_class[3],
width=1,
),
fillcolor=colors_class[1],
row=1,
col=2,
)
fig.update_layout(
barmode="stack",
yaxis_title=None,
xaxis_title=None,
xaxis2_title=None,
template="plotly_dark",
height=350,
yaxis=dict(
categoryorder="array",
categoryarray=hazard_order
),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.05,
xanchor="right",
x=1,
title_text="EAP?"
),
yaxis2=dict(
showgrid=False,
showline=True,
showticklabels=False,
),
margin=dict(l=20, r=20, t=50, b=20),
showlegend=True # Set to False if you want to hide the legend
)
return fig
def create_tab(df,view, hazard_class):
#FUNCTION CREATES THE DRILLDOWN BY HAZARD POTENTIAL CLASSIFICATION INTO
#OWNER TYPE OR PRIMARY PURPOSE
#INPUT DFF2 = OWNER OR DFF6=PURPOSE, ALWAYS FILTERED BY HAZARD POTENTIAL CLASSIFICATION
if view == 'Owner':
colname = 'Primary Owner Type'
colnameperc = 'Percentage Hazard Owner'
else:
colname = 'Primary Purpose'
colnameperc = 'Percentage Hazard Purpose'
# Aggregate for the total numbers per view
dfg = df.groupby(colname)['Number'].sum().reset_index().sort_values(colname, ascending=False)
#color subplot if dict will not work and you're in a hurry and
#not even create a switch
if hazard_class == 'High':
single_bar_color = colors_class[3]
elif hazard_class == 'Significant':
single_bar_color = colors_class[2]
elif hazard_class == 'Low':
single_bar_color = colors_class[1]
else:
single_bar_color = colors_class[0]
# Desired order and colors
eap_statuses = ['No', 'Not Required', 'Yes']
colors = ['rgba(0, 61, 84,1)',
'rgba(56, 111, 152, 1)',
'rgba(188, 212, 230, 1)']
# # Set correct order for y-axis
y_order = df[colname].unique().tolist()[::-1]
df[colname] = pd.Categorical(
df[colname],
categories=y_order,
ordered=True
)
df = df.sort_values(colname)
# Creating two subplots
fig = make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
shared_yaxes=False, vertical_spacing=0.001,
subplot_titles=("Number of dams", "Emergency Action Plan prepared?"))
# Build traces => subplot 2 % yes, not required, no
for status, color in zip(eap_statuses, colors):
subset = df[df["EAP Prepared"] == status]
fig.add_trace(
go.Bar(
y=subset[colname],
x=subset[colnameperc],
name=status,
orientation="h",
offsetgroup=1,
marker_color=color,
text=subset[colnameperc].astype(str) + '%',
textposition="inside",
insidetextanchor="middle",
),
row=1,
col=2,
),
# Subplot 1 - Grouped numbers
fig.add_trace(
go.Bar(
# name=df[colname],
x=dfg['Number'],
y=dfg[colname],
orientation="h",
showlegend=False,
marker_color=single_bar_color,
offsetgroup=0,
text=dfg['Number'],
textposition="outside",
insidetextanchor="end",
),
row=1,
col=1,
)
fig.update_layout(
barmode="stack",
yaxis_title=None,
xaxis_title=None,
xaxis2_title=None,
template="plotly_dark",
yaxis2=dict(
showgrid=False,
showline=True,
showticklabels=False,
),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.05,
xanchor="right",
x=1,
title_text="EAP?"
),
showlegend=True # Set to False if you want to hide the legend
)
return dcc.Graph(figure=fig)
# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP, dbc_css, vizro_bootstrap])
# Layout
app.layout = dbc.Container([
dcc.Graph(figure=create_summary(df), id='general-overview'),
dbc.Row([
dbc.Col( html.H2(id='click-data')),
dbc.Col(radioitems)
],style={"marginTop":"2rem","marginBottom":"1rem"}),
html.Div(id='owner-overview')
])
@app.callback(
Output('click-data', 'children'),
Output('owner-overview','children'),
Input('general-overview', 'clickData'),
Input("radioitems-input", "value"))
def display_click_data(clickData, value):
if clickData is None:
hazard_class='High'
else:
hazard_class=clickData["points"][0]["label"]
if value is None:
df=dff2[dff2['Hazard Potential Classification']==hazard_class]
elif value == "Owner":
df=dff2[dff2['Hazard Potential Classification']==hazard_class]
else:
df=dff6[dff6['Hazard Potential Classification']==hazard_class]
h2_title = f"{hazard_class} Hazard Potential, {value}"
return h2_title, create_tab(df, value, hazard_class)
if __name__ == '__main__':
app.run(debug=True)