# -*- coding: utf-8 -*-
"""
Created on Thu Feb 20 15:00:14 2025
@author: win11
"""
import dash
from dash import dcc, html, callback,clientside_callback, Input, Output, Patch
import pandas as pd
import dash_bootstrap_components as dbc
import plotly.graph_objects as go
import numpy as np
#LAYOUT STUFF
from dash_bootstrap_templates import load_figure_template
import plotly.io as pio
load_figure_template(["vizro", "vizro_dark"])
vizro_bootstrap = "https://cdn.jsdelivr.net/gh/mckinsey/vizro@main/vizro-core/src/vizro/static/css/vizro-bootstrap.min.css?v=2"
# Create components for the dashboard
color_mode_switch = html.Span(
[
dbc.Label(className="fa fa-moon", html_for="vizro-switch"),
dbc.Switch(id="vizro-switch", value=False, className="d-inline-block ms-1"),
dbc.Label(className="fa fa-sun", html_for="vizro-switch"),
], style={"textAlign":"right"}
)
# READ AND PROCESS DATA STATIONS
stations_all = pd.read_csv('stops.txt')
#filter out the metrostations
stations_metro = stations_all[stations_all['stop_url'].str.contains('metro') & stations_all['stop_url'].notna()].copy()
#remove 'Station ' from station name, first eight characters
stations_metro['stop_name'] = stations_metro['stop_name'].str[8:]
#map station name and line to see the distinction
map_stations = {
"9999114" : "Berri-UQAM Ligne jaune",
"9999492": "Snowdon Ligne orange",
"9999112": "Berri-UQAM Ligne orange",
"9999052": "Jean-Talon Ligne orange",
"9999495": "Snowdon Ligne bleue",
"9999055": "Jean-Talon Ligne bleue",
"9999111" : "Berri-UQAM Ligne verte"
}
stations_metro["stop_name"] = stations_metro["stop_id"].map(map_stations).fillna(stations_metro["stop_name"])
#convert stopid to int
stations_metro["stop_id"] = stations_metro["stop_id"].astype(int)
#the incident dataframe
#READ DATA INCIDENTS
df = pd.read_csv('Incidents-du-reseau-du-metro.csv')
#remove some columns from df incidents
df = df.drop(["Heure de l'incident","Heure de reprise",'Année civile','Année civile/mois', 'Mois calendrier','Jour du mois','Jour de la semaine'], axis=1)
#Replace "Code de lieu" with stop_id as much as possible
df["Jour calendaire"] = pd.to_datetime(df["Jour calendaire"], errors='coerce')
df['year-month']= df['Jour calendaire'].dt.to_period('M')
df['year']= df['Jour calendaire'].dt.to_period('Y')
df['Code de lieu'] = np.where(
df['Code de lieu'].isin(['Snowdon','Berri-UQAM','Jean-Talon']),
df['Code de lieu'] + " " + df['Ligne'].astype(str),
df['Code de lieu']
)
#add stop_id to dataframe to make it possible to map to a line/station instead of summing up per location.
dict_name_to_id = stations_metro.set_index('stop_name')['stop_id'].to_dict()
append_name_to_id = {
"St-Laurent": 30,
"Place St-Henri": 46,
"Square-Victoria": 14,
"Place-D'Armes": 13,
"Iberville": 63 ,
"De l'Eglise": 39,
"Berri L-2": 9999112,
"St-Michel": 64 ,
"Côte Ste-Catherine" : 50,
"Champs-de-Mars":12 ,
"Honorée-Beaugrand":18,
"Edouard Montpetit": 57,
"Cartier": 66,
"L'Assomption": 22,
"Université de Montréal" : 56,
"Longueuil": 44,
"Montmorency": 68,
"De La Concorde": 67
}
dict_name_to_id = dict_name_to_id|append_name_to_id
#create a list with all stopids (lieu_ids) to use in the map
#go get the clicked lieu_id == station
list_stopids = list(dict_name_to_id.values())
df['lieu_id'] = df['Code de lieu'].map(dict_name_to_id).fillna(0).astype(int)
#set base dataframe to use only incidents with one lieu_id.
dfb = df[df['lieu_id'].isin(list_stopids)].copy().reset_index(drop=True)
dfb = dfb[dfb['Cause primaire'].notna()]
#basic grouping incidents on ligne, lieu_id, jour calendair, later used for the linechart, the less efficient
# the better.
dfg = dfb.groupby([pd.Grouper(key='Jour calendaire', freq="ME"),"Type d'incident", 'Ligne','Code de lieu', 'Cause primaire','Incident en minutes'])["Numero d'incident"].count().reset_index()
#FILTERELEMENTS
#dropdown year
years = ['All years', 2019,2020,2021,2022,2023,2024,2025]
select_year = html.Div(
dbc.Select(
years,
'2024',
id="select-year",
),
className="py-2",
)
#dropdown delay
delays = ["All delays"] + list(dfb['Incident en minutes'].unique())
select_delay = html.Div(
dbc.Select(
delays,
"All delays", # Set "All" as default selection
id="select-delay",
),
className="py-2",
)
#dropdown causes
causes = ["All causes"] + list(dfb['Cause primaire'].unique())
select_cause = html.Div(
dbc.Select(
causes,
"All causes", # Set "All" as default selection
id="select-cause",
),
className="py-2",
)
#linechart selection
select_linechart = html.Div(
[
dbc.RadioItems(
options=[
{"label": "# of incidents", "value": "incidents"},
{"label": "# of primary causes", "value": "causes"},
{"label": "# of delays", "value": "delays"},
],
value="incidents",
id="select-linechart",
inline=True
),
]
)
# Initialize the Dash app
app = dash.Dash(__name__, suppress_callback_exceptions=True, external_stylesheets=[vizro_bootstrap, dbc.icons.FONT_AWESOME])
# Define the app layout
app.layout = dbc.Container([
dbc.Row([
color_mode_switch,
html.H1("Montreal Metro Incidents 2019 (jan 1) -2025 (may 1)",
style={'textAlign': 'left', 'marginBottom': '20px'}),
]),
dbc.Row([
dbc.Col([
html.Div([
html.Div(id="density-div"),
html.Img(
src='assets/metromap.jpg',
style={
"width": "175px",
"height": "auto",
"position": "absolute",
"bottom": "20px",
"left": "20px",
"z-index": "1000",
"border": "2px solid #333",
"border-radius": "8px",
"background": "white",
"box-shadow": "0 4px 8px rgba(0,0,0,0.3)", # Add shadow
"padding": "5px"
}
),
], style={
"position": "relative",
"width": "100%",
"height": "750px" # Match your map height
})
], width=6), # Specify width here
dbc.Col([
dbc.Card(
dbc.CardBody([
html.H2("Filter datapoints on the map", className="card-title"),
dbc.Row([
dbc.Col(select_year, width=4), # Specify widths for filter columns too
dbc.Col(select_delay, width=4),
dbc.Col(select_cause, width=4),
]),
]),
style={"width": "100%", "marginBottom": "2rem"},
),
dbc.Card(
dbc.CardBody([
html.H2("Month over month (Not influenced by selected filters)",
className="card-title"),
select_linechart,
html.Div(id="linechart-div"),
]),
style={"width": "100%"},
),
], width=6) # Specify width here
])
], fluid=True, style={'padding': '20px'})
@callback(
Output("density-div", "children"),
Input("select-year", "value"),
Input("select-delay", "value"),
Input("select-cause", "value"),
)
def update_figures(year, delay, cause):
# Start with the base dataframe
filtered_dfb = dfb.copy()
# Apply filters based on dropdown selections
if year is not None and year != "All years":
filtered_dfb = filtered_dfb[filtered_dfb['year'] == year]
if delay is not None and delay != "All delays":
filtered_dfb = filtered_dfb[filtered_dfb['Incident en minutes'] == delay]
if cause is not None and cause != "All causes":
filtered_dfb = filtered_dfb[filtered_dfb['Cause primaire'] == cause]
# Always group by 'lieu_id' since we need it for the map
# Add other columns to groupby only if they're not filtered out
groupby_columns = ['lieu_id']
#print(f"Grouping by: {groupby_columns}")
# print(f"Filtered data shape: {filtered_dfb.shape}")
# Group the filtered data
dfm = filtered_dfb.groupby(groupby_columns)["Numero d'incident"].count().reset_index()
# Join with stations data
data = dfm.merge(stations_metro, left_on='lieu_id', right_on='stop_id', how='left')
# Remove rows where stations data is missing (lieu_id not found in stations)
data = data.dropna(subset=['stop_lat', 'stop_lon'])
# print(f"Final data shape: {data.shape}")
# print(f"Incident counts: {data["Numero d'incident"].describe()}")
# Create the figure
fig = go.Figure()
if not data.empty:
fig.add_trace(
go.Scattermapbox(
customdata=data['stop_name'],
lat=data['stop_lat'],
lon=data['stop_lon'],
mode='markers',
marker=dict(
size=data["Numero d'incident"] /8,
color=data["Numero d'incident"],
# colorscale='Hot_r',
opacity=0.6,
sizemode='diameter',
sizemin=8
),
hovertemplate='<b>Station:</b> %{customdata}<br>' +
'<b>Incidents:</b> %{marker.color}<br>' +
'<extra></extra>'
)
)
# Use mapbox layout instead of geo
fig.update_layout(
mapbox=dict(
style='carto-positron', # This gives you actual map tiles
center=dict(lat=45.52354030, lon=-73.6261896),
zoom=11
),
height=750,
showlegend=False,
template="vizro",
margin=dict(r=0, t=0, l=0, b=0)
)
return dcc.Graph(figure=fig, id='density-map')
@callback(
Output("linechart-div", "children"),
Input("select-linechart", "value"),
)
def update_linechart(show):
if (show == None):
show='incidents'
fig = go.Figure()
lignes = {'Ligne bleue':'#0095E6', 'Ligne orange':'#D95700', 'Ligne verte': '#00B300', 'Ligne jaune':'#FFD900'}
causes = {'Autres':'#ffa700', 'Clientèle':'#cc7b00', 'Matériel roulant':'#d0a800', 'Équipements fixes':'#bf9a00', 'Exploitation trains':'#9c7e00'}
durations = dfg[ 'Incident en minutes'].unique()
if (show == 'incidents'):
#incidents by line
data = dfg.groupby([pd.Grouper(key='Jour calendaire', freq="ME"),'Ligne'])["Numero d'incident"].sum().reset_index()
#trace per line
for ligne in lignes.keys():
#filter data for ligne
cdata = data[data['Ligne']==ligne]
fig.add_trace(go.Scatter(
x=cdata['Jour calendaire'],
y=cdata["Numero d'incident"],
mode='lines+markers',
name=ligne,
marker_color = lignes[ligne],
stackgroup="incidents"))
fig.update_layout(
title = 'Number of incidents by month',
template="vizro")
elif (show == 'causes'):
#causes
data = dfg.groupby([pd.Grouper(key='Jour calendaire', freq="ME"),'Cause primaire'])["Numero d'incident"].sum().reset_index()
#trace per cause
for cause in causes.keys():
#filter data for ligne
#filter data for ligne
cdata = data[data['Cause primaire'] == cause].copy()
# Set date as index and resample, #missing datapoints 0 for some months
cdata.set_index('Jour calendaire', inplace=True)
cdata_resampled = cdata.resample('ME').agg({
'Numero d\'incident': 'sum', # Sum incidents (will be 0 for missing months)
'Cause primaire': 'first' # Keep the duration value
}).fillna({'Cause primaire': cause, 'Numero d\'incident': 0})
# Reset index
cdata_complete = cdata_resampled.reset_index()
fig.add_trace(go.Scatter(
x=cdata_complete['Jour calendaire'],
y=cdata_complete["Numero d'incident"],
mode='lines+markers',
name=cause,
marker_color = causes[cause],
stackgroup = "causes"))
fig.update_layout(
title = 'Primary cause',
template="vizro")
else:
#duration
data = dfg.groupby([pd.Grouper(key='Jour calendaire', freq="ME"),'Incident en minutes'])["Numero d'incident"].sum().reset_index()
#trace per cause
for duration in durations:
#filter data for ligne
cdata = data[data['Incident en minutes'] == duration].copy()
# Set date as index and resample, #missing datapoints 0 for some months
cdata.set_index('Jour calendaire', inplace=True)
cdata_resampled = cdata.resample('ME').agg({
'Numero d\'incident': 'sum', # Sum incidents (will be 0 for missing months)
'Incident en minutes': 'first' # Keep the duration value
}).fillna({'Incident en minutes': duration, 'Numero d\'incident': 0})
# Reset index
cdata_complete = cdata_resampled.reset_index()
fig.add_trace(go.Scatter(
x=cdata_complete ['Jour calendaire'],
y=cdata_complete ["Numero d'incident"],
mode='lines+markers',
name=duration,
stackgroup = "delays"
#marker_color = causes[cause]
))
fig.update_layout(
title = 'Duration',
template="vizro")
return dcc.Graph(figure=fig,id='line-chart')
# Add callbacks to switch between dark / light
@callback(
Output("line-chart","figure"),
Input("vizro-switch", "value")
)
def update_figure_template(switch_on):
"""Sync the figure template with the color mode switch on the bootstrap template."""
template = pio.templates["vizro"] if switch_on else pio.templates["vizro_dark"]
patched_figure = Patch()
patched_figure["layout"]["template"] = template
return patched_figure
clientside_callback(
"""
(switchOn) => {
switchOn
? document.documentElement.setAttribute('data-bs-theme', 'light')
: document.documentElement.setAttribute('data-bs-theme', 'dark')
return window.dash_clientside.no_update
}
""",
Output("vizro-switch", "id"),
Input("vizro-switch", "value"),
)
# Run the app
if __name__ == '__main__':
app.run(debug=True)