import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
# Improved data loading with optimized column types
def load_data():
# Define dtypes for faster loading and memory efficiency
dtype_dict = {
'Subways: Total Estimated Ridership': 'float32',
'Buses: Total Estimated Ridership': 'float32',
'LIRR: Total Estimated Ridership': 'float32',
'Metro-North: Total Estimated Ridership': 'float32',
'Staten Island Railway: Total Estimated Ridership': 'float32',
'Access-A-Ride: Total Scheduled Trips': 'float32',
'Bridges and Tunnels: Total Traffic': 'float32',
'Subways: % of Comparable Pre-Pandemic Day': 'float32',
'Buses: % of Comparable Pre-Pandemic Day': 'float32',
'LIRR: % of Comparable Pre-Pandemic Day': 'float32',
'Metro-North: % of Comparable Pre-Pandemic Day': 'float32',
'Staten Island Railway: % of Comparable Pre-Pandemic Day': 'float32',
'Access-A-Ride: % of Comparable Pre-Pandemic Day': 'float32',
'Bridges and Tunnels: % of Comparable Pre-Pandemic Day': 'float32'
}
mta_df = pd.read_csv("MTA_Daily_Ridership.csv", parse_dates=['Date'], dtype=dtype_dict)
# Precompute total ridership for better performance
mta_df['Total Estimated Ridership'] = (
mta_df['Subways: Total Estimated Ridership'] +
mta_df['Buses: Total Estimated Ridership'] +
mta_df['LIRR: Total Estimated Ridership'] +
mta_df['Metro-North: Total Estimated Ridership'] +
mta_df['Staten Island Railway: Total Estimated Ridership']
)
return mta_df
# Load data once at startup
mta_df = load_data()
# Create a column mapping dictionary for reusability
COLUMN_MAPPING = {
'Subways: Total Estimated Ridership': 'Subways',
'Buses: Total Estimated Ridership': 'Buses',
'LIRR: Total Estimated Ridership': 'Long Island Rails',
'Metro-North: Total Estimated Ridership': 'Metro-North',
'Staten Island Railway: Total Estimated Ridership': 'Staten Island Railway',
'Access-A-Ride: Total Scheduled Trips': 'Access-A-Ride',
'Bridges and Tunnels: Total Traffic': 'Bridges and Tunnels',
'Subways: % of Comparable Pre-Pandemic Day': 'Subways',
'Buses: % of Comparable Pre-Pandemic Day': 'Buses',
'LIRR: % of Comparable Pre-Pandemic Day': 'Long Island Rails',
'Metro-North: % of Comparable Pre-Pandemic Day': 'Metro-North',
'Staten Island Railway: % of Comparable Pre-Pandemic Day': 'Staten Island Railway',
'Access-A-Ride: % of Comparable Pre-Pandemic Day': 'Access-A-Ride',
'Bridges and Tunnels: % of Comparable Pre-Pandemic Day': 'Bridges and Tunnels'
}
# Modern color palette
COLOR_MAP = {
'Subways': '#1f77b4',
'Buses': '#ff7f0e',
'Long Island Rails': '#2ca02c',
'Metro-North': '#9467bd',
'Staten Island Railway': '#d62728',
'Access-A-Ride': '#17becf',
'Bridges and Tunnels': '#8c564b'
}
# Updated styles with a more cohesive design system
THEME_COLORS = {
'primary': '#0466c8',
'secondary': '#979dac',
'accent': '#ff7f0e',
'background': '#f8f9fa',
'card': '#ffffff',
'text': '#212529',
'border': '#dee2e6'
}
# Fixed styles dictionary
STYLES = {
'container': {
'backgroundColor': THEME_COLORS['background'],
'padding': '20px'
},
'header': {
'color': THEME_COLORS['primary'],
'textAlign': 'center',
'marginBottom': '30px',
'paddingBottom': '10px',
'borderBottom': f'1px solid {THEME_COLORS["border"]}'
},
'card': {
'marginBottom': '20px',
'boxShadow': '0 4px 6px rgba(0, 0, 0, 0.1)',
'border': 'none'
},
'graph_card': {
'boxShadow': '0 4px 6px rgba(0, 0, 0, 0.1)',
'border': 'none'
},
'stat_card': {
'height': '100%',
'textAlign': 'center',
'boxShadow': '0 4px 6px rgba(0, 0, 0, 0.1)',
'border': 'none'
},
'control_section': {
'backgroundColor': THEME_COLORS['card'],
'padding': '15px',
'borderRadius': '8px',
'boxShadow': '0 2px 4px rgba(0, 0, 0, 0.1)',
'marginBottom': '20px'
},
'section_header': {
'fontWeight': 'bold',
'fontSize': '16px',
'color': THEME_COLORS['primary'],
'marginBottom': '15px',
'borderBottom': f'1px solid {THEME_COLORS["border"]}',
'paddingBottom': '8px'
},
'checklist_item': {
'marginBottom': '8px',
'fontSize': '14px'
}
}
# Initialize Dash app with a modern theme
app = Dash(__name__, external_stylesheets=[dbc.themes.MINTY])
app.title = "MTA Ridership Dashboard"
# Helper functions
def format_number(value):
"""Format large numbers into readable format with K, M, B suffixes"""
if value >= 1e9:
return f"{value / 1e9:.1f}B"
elif value >= 1e6:
return f"{value / 1e6:.1f}M"
elif value >= 1e3:
return f"{value / 1e3:.1f}K"
else:
return f"{value:.0f}"
def format_title(modes, values):
"""Create a formatted title with transport modes and their values"""
titles = [f"{mode}: {format_number(value)}" for mode, value in zip(modes, values)]
return " | ".join(titles)
def format_percentage_title(modes, percentages):
"""Create a formatted title with transport modes and their percentage values"""
titles = [f"{mode}: {value:.1f}%" for mode, value in zip(modes, percentages)]
return " | ".join(titles)
# Check if a date is within the filtered date range
def is_date_in_range(date_str, start_date, end_date):
"""Check if a date string is within a date range"""
import pandas as pd
date = pd.to_datetime(date_str)
return (date >= pd.to_datetime(start_date)) and (date <= pd.to_datetime(end_date))
# App layout with improved organization and responsive design
app.layout = dbc.Container([
# Header
html.H3("MTA Data Dashboard: Analyzing Public Transport Trends", style=STYLES['header']),
# Controls section
dbc.Card([
dbc.CardHeader(html.H5("Dashboard Controls", className="mb-0")),
dbc.CardBody([
dbc.Row([
# Transport mode selection - in a cleaner format
dbc.Col([
html.Div(html.H6("Transport Modes", className="text-primary"),
style=STYLES['section_header']),
dbc.Row([
dbc.Col([
dbc.Checklist(
id='mta-checklist-rail',
options=[
{'label': ' Subways', 'value': 'Subways'},
{'label': ' Metro-North', 'value': 'Metro-North'},
{'label': ' Long Island Rails', 'value': 'Long Island Rails'},
{'label': ' Staten Island Railway', 'value': 'Staten Island Railway'},
],
value=['Subways'],
style={'lineHeight': '1.8'},
inputClassName="me-2"
),
], md=6),
dbc.Col([
dbc.Checklist(
id='mta-checklist-road',
options=[
{'label': ' Buses', 'value': 'Buses'},
{'label': ' Access-A-Ride', 'value': 'Access-A-Ride'},
{'label': ' Bridges and Tunnels', 'value': 'Bridges and Tunnels'},
],
value=['Buses', 'Bridges and Tunnels'],
style={'lineHeight': '1.8'},
inputClassName="me-2"
),
], md=6),
]),
], md=6, sm=12),
# Time aggregation and date range - better organized
dbc.Col([
html.Div(html.H6("Time Settings", className="text-primary"),
style=STYLES['section_header']),
# Time aggregation - better radio buttons
html.Div([
html.Label("Time Aggregation:", className="mb-2 text-muted"),
dbc.RadioItems(
id='date-radioitems',
options=[
{'label': ' Daily', 'value': 'D'},
{'label': ' Weekly', 'value': 'W'},
{'label': ' Monthly', 'value': 'ME'},
{'label': ' Quarterly', 'value': 'QE'},
{'label': ' Yearly', 'value': 'YE'}
],
value='W',
inline=True,
className="mb-3",
inputClassName="me-1"
),
]),
# Date range - cleaner presentation
html.Div([
html.Label("Date Range:", className="mb-2 text-muted"),
dcc.DatePickerRange(
id='date-picker-range',
start_date=mta_df['Date'].min(),
end_date=mta_df['Date'].max(),
display_format='YYYY-MM-DD',
style={'width': '100%'},
className="mb-3"
),
]),
], md=6, sm=12),
])
])
], style=STYLES['card']),
# Stats cards
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardHeader("RIDERSHIP", style={'fontWeight': 'bold', 'padding': '10px'}),
dbc.CardBody(html.Div(id='avg-ridership'))
], style=STYLES['stat_card'])
], md=4, sm=12),
dbc.Col([
dbc.Card([
dbc.CardHeader("SCHEDULED TRIPS", style={'fontWeight': 'bold', 'padding': '10px'}),
dbc.CardBody(html.Div(id='avg-scheduled-trips'))
], style=STYLES['stat_card'])
], md=4, sm=12),
dbc.Col([
dbc.Card([
dbc.CardHeader("TRAFFIC VOLUME", style={'fontWeight': 'bold', 'padding': '10px'}),
dbc.CardBody(html.Div(id='avg-traffic'))
], style=STYLES['stat_card'])
], md=4, sm=12),
], className="my-4"),
# Graphs
dbc.Card([
dbc.CardHeader(html.H5("Trends in Public Transport Ridership", className="text-center")),
dbc.CardBody(dcc.Graph(id='mta-area'))
], style=STYLES['graph_card'], className="mb-4"),
dbc.Card([
dbc.CardHeader(html.H5("Recovery vs. Pre-Pandemic (%)", className="text-center")),
dbc.CardBody(dcc.Graph(id='mta-percentage'))
], style=STYLES['graph_card']),
# Footer
html.Footer(
html.P("Data source: Metropolitan Transportation Authority (MTA)", className="text-center text-muted mt-4")
)
], fluid=True, style=STYLES['container'])
@app.callback(
[Output('avg-ridership', 'children'),
Output('avg-scheduled-trips', 'children'),
Output('avg-traffic', 'children'),
Output("mta-area", "figure"),
Output("mta-percentage", "figure")],
[Input("mta-checklist-rail", "value"),
Input("mta-checklist-road", "value"),
Input("date-radioitems", "value"),
Input("date-picker-range", "start_date"),
Input("date-picker-range", "end_date")]
)
def update_dashboard(rail_modes, road_modes, date_aggregation, start_date, end_date):
"""Main callback to update all dashboard components based on user selections"""
# Combine transport modes
transport_modes = rail_modes + road_modes
# Filter data by date range
mask = (mta_df['Date'] >= start_date) & (mta_df['Date'] <= end_date)
filtered_df = mta_df[mask]
if filtered_df.empty:
return "No data available", "No data available", "No data available", {}, {}
# Prepare the ridership data with efficient data transformation
ridership_columns = [
'Subways: Total Estimated Ridership',
'Buses: Total Estimated Ridership',
'LIRR: Total Estimated Ridership',
'Metro-North: Total Estimated Ridership',
'Staten Island Railway: Total Estimated Ridership',
'Access-A-Ride: Total Scheduled Trips',
'Bridges and Tunnels: Total Traffic'
]
# Create a temporary dataframe with only needed columns and efficient resampling
temp_df = filtered_df.set_index("Date")[ridership_columns].copy()
temp_df = temp_df.rename(columns=COLUMN_MAPPING)
transportation_er = temp_df.resample(date_aggregation).sum()
# Prepare percentage data
percentage_columns = [
'Subways: % of Comparable Pre-Pandemic Day',
'Buses: % of Comparable Pre-Pandemic Day',
'LIRR: % of Comparable Pre-Pandemic Day',
'Metro-North: % of Comparable Pre-Pandemic Day',
'Staten Island Railway: % of Comparable Pre-Pandemic Day',
'Access-A-Ride: % of Comparable Pre-Pandemic Day',
'Bridges and Tunnels: % of Comparable Pre-Pandemic Day'
]
# Efficient transformation for percentage data
temp_pct_df = filtered_df.set_index("Date")[percentage_columns].copy()
temp_pct_df = temp_pct_df.rename(columns=COLUMN_MAPPING)
percentage_er = temp_pct_df.resample(date_aggregation).mean()
# Calculate totals and percentages for title
selected_data = transportation_er[transport_modes]
total_values = [selected_data[mode].sum() for mode in transport_modes]
percentage_values = [percentage_er[mode].mean() for mode in transport_modes]
# Create the area chart with modern styling
area_fig = px.area(
transportation_er,
x=transportation_er.index,
y=transport_modes,
color_discrete_map=COLOR_MAP,
markers=True,
labels={'value': 'Ridership', 'Date': '', 'variable': 'Mode'},
template='plotly_white'
)
# Add a more prominent title
area_fig.update_layout(
title={
'text': format_title(transport_modes, total_values),
'y': 0.95,
'x': 0.5,
'xanchor': 'center',
'yanchor': 'top',
'font': {'size': 16, 'color': THEME_COLORS['text']}
},
legend={
'orientation': 'h',
'y': -0.15,
'x': 0.5,
'xanchor': 'center'
},
margin={'l': 40, 'r': 40, 't': 80, 'b': 80},
plot_bgcolor=THEME_COLORS['background'],
paper_bgcolor=THEME_COLORS['card'],
hovermode='x unified',
# Set x-axis range to match the selected date range
xaxis={
'range': [start_date, end_date],
'autorange': False
}
)
# Only add pandemic annotation if it falls within the date range
if is_date_in_range('2020-03-01', start_date, end_date):
max_y_value = selected_data.max().max() * 1.1 # Add some padding
area_fig.add_annotation(
x='2020-03-01',
y=max_y_value,
text="Start of Pandemic",
showarrow=True,
arrowhead=2,
arrowcolor=THEME_COLORS['accent'],
arrowwidth=2,
bgcolor='rgba(255, 255, 255, 0.8)',
bordercolor=THEME_COLORS['border'],
borderwidth=1,
borderpad=4,
font={'color': THEME_COLORS['text']}
)
# Create the percentage line chart
line_fig = px.line(
percentage_er,
x=percentage_er.index,
y=transport_modes,
color_discrete_map=COLOR_MAP,
markers=True,
labels={'value': '% vs Pre-Pandemic', 'Date': '', 'variable': 'Mode'},
template='plotly_white'
)
# Update layout for percentage chart
line_fig.update_layout(
title={
'text': format_percentage_title(transport_modes, percentage_values),
'y': 0.95,
'x': 0.5,
'xanchor': 'center',
'yanchor': 'top',
'font': {'size': 16, 'color': THEME_COLORS['text']}
},
legend={
'orientation': 'h',
'y': -0.15,
'x': 0.5,
'xanchor': 'center'
},
margin={'l': 40, 'r': 40, 't': 80, 'b': 80},
plot_bgcolor=THEME_COLORS['background'],
paper_bgcolor=THEME_COLORS['card'],
hovermode='x unified',
# Set x-axis range to match the selected date range
xaxis={
'range': [start_date, end_date],
'autorange': False
}
)
# Only add pandemic annotation if it falls within the date range
if is_date_in_range('2020-03-01', start_date, end_date):
max_pct = percentage_er[transport_modes].max().max() * 1.1
line_fig.add_annotation(
x='2020-03-01',
y=max_pct,
text="Start of Pandemic",
showarrow=True,
arrowhead=2,
arrowcolor=THEME_COLORS['accent'],
arrowwidth=2,
bgcolor='rgba(255, 255, 255, 0.8)',
bordercolor=THEME_COLORS['border'],
borderwidth=1,
borderpad=4,
font={'color': THEME_COLORS['text']}
)
# Calculate statistics for metrics cards
columns_for_stats = [
'Total Estimated Ridership',
'Access-A-Ride: Total Scheduled Trips',
'Bridges and Tunnels: Total Traffic'
]
stats = filtered_df[columns_for_stats].agg(['mean', 'min', 'max']).astype('int')
# Format the stats for display
ridership_stats = html.Div([
html.P(f"Avg: {format_number(stats.loc['mean', 'Total Estimated Ridership'])}", className="mb-1"),
html.P(f"Min: {format_number(stats.loc['min', 'Total Estimated Ridership'])}", className="mb-1"),
html.P(f"Max: {format_number(stats.loc['max', 'Total Estimated Ridership'])}", className="mb-0")
])
trips_stats = html.Div([
html.P(f"Avg: {format_number(stats.loc['mean', 'Access-A-Ride: Total Scheduled Trips'])}", className="mb-1"),
html.P(f"Min: {format_number(stats.loc['min', 'Access-A-Ride: Total Scheduled Trips'])}", className="mb-1"),
html.P(f"Max: {format_number(stats.loc['max', 'Access-A-Ride: Total Scheduled Trips'])}", className="mb-0")
])
traffic_stats = html.Div([
html.P(f"Avg: {format_number(stats.loc['mean', 'Bridges and Tunnels: Total Traffic'])}", className="mb-1"),
html.P(f"Min: {format_number(stats.loc['min', 'Bridges and Tunnels: Total Traffic'])}", className="mb-1"),
html.P(f"Max: {format_number(stats.loc['max', 'Bridges and Tunnels: Total Traffic'])}", className="mb-0")
])
return ridership_stats, trips_stats, traffic_stats, area_fig, line_fig