PyCafe - Dash - mta_daily

MTA_Daily_Ridership.csv
app.py
mta_ridership_analysis.py
requirements.txt
mta_ridership_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc

# Improved data loading with optimized column types
def load_data():
    # Define dtypes for faster loading and memory efficiency
    dtype_dict = {
        'Subways: Total Estimated Ridership': 'float32',
        'Buses: Total Estimated Ridership': 'float32',
        'LIRR: Total Estimated Ridership': 'float32',
        'Metro-North: Total Estimated Ridership': 'float32',
        'Staten Island Railway: Total Estimated Ridership': 'float32',
        'Access-A-Ride: Total Scheduled Trips': 'float32',
        'Bridges and Tunnels: Total Traffic': 'float32',
        'Subways: % of Comparable Pre-Pandemic Day': 'float32',
        'Buses: % of Comparable Pre-Pandemic Day': 'float32',
        'LIRR: % of Comparable Pre-Pandemic Day': 'float32',
        'Metro-North: % of Comparable Pre-Pandemic Day': 'float32',
        'Staten Island Railway: % of Comparable Pre-Pandemic Day': 'float32',
        'Access-A-Ride: % of Comparable Pre-Pandemic Day': 'float32',
        'Bridges and Tunnels: % of Comparable Pre-Pandemic Day': 'float32'
    }
    
    mta_df = pd.read_csv("MTA_Daily_Ridership.csv", parse_dates=['Date'], dtype=dtype_dict)
    
    # Precompute total ridership for better performance
    mta_df['Total Estimated Ridership'] = (
        mta_df['Subways: Total Estimated Ridership'] +
        mta_df['Buses: Total Estimated Ridership'] +
        mta_df['LIRR: Total Estimated Ridership'] +
        mta_df['Metro-North: Total Estimated Ridership'] +
        mta_df['Staten Island Railway: Total Estimated Ridership']
    )
    
    return mta_df

# Load data once at startup
mta_df = load_data()

# Create a column mapping dictionary for reusability
COLUMN_MAPPING = {
    'Subways: Total Estimated Ridership': 'Subways',
    'Buses: Total Estimated Ridership': 'Buses',
    'LIRR: Total Estimated Ridership': 'Long Island Rails',
    'Metro-North: Total Estimated Ridership': 'Metro-North',
    'Staten Island Railway: Total Estimated Ridership': 'Staten Island Railway',
    'Access-A-Ride: Total Scheduled Trips': 'Access-A-Ride',
    'Bridges and Tunnels: Total Traffic': 'Bridges and Tunnels',
    'Subways: % of Comparable Pre-Pandemic Day': 'Subways',
    'Buses: % of Comparable Pre-Pandemic Day': 'Buses',
    'LIRR: % of Comparable Pre-Pandemic Day': 'Long Island Rails',
    'Metro-North: % of Comparable Pre-Pandemic Day': 'Metro-North',
    'Staten Island Railway: % of Comparable Pre-Pandemic Day': 'Staten Island Railway',
    'Access-A-Ride: % of Comparable Pre-Pandemic Day': 'Access-A-Ride',
    'Bridges and Tunnels: % of Comparable Pre-Pandemic Day': 'Bridges and Tunnels'
}

# Modern color palette
COLOR_MAP = {
    'Subways': '#1f77b4',
    'Buses': '#ff7f0e',
    'Long Island Rails': '#2ca02c',
    'Metro-North': '#9467bd',
    'Staten Island Railway': '#d62728',
    'Access-A-Ride': '#17becf',
    'Bridges and Tunnels': '#8c564b'
}

# Updated styles with a more cohesive design system
THEME_COLORS = {
    'primary': '#0466c8',
    'secondary': '#979dac',
    'accent': '#ff7f0e',
    'background': '#f8f9fa',
    'card': '#ffffff',
    'text': '#212529',
    'border': '#dee2e6'
}

# Fixed styles dictionary
STYLES = {
    'container': {
        'backgroundColor': THEME_COLORS['background'],
        'padding': '20px'
    },
    'header': {
        'color': THEME_COLORS['primary'],
        'textAlign': 'center',
        'marginBottom': '30px',
        'paddingBottom': '10px',
        'borderBottom': f'1px solid {THEME_COLORS["border"]}'
    },
    'card': {
        'marginBottom': '20px',
        'boxShadow': '0 4px 6px rgba(0, 0, 0, 0.1)',
        'border': 'none'
    },
    'graph_card': {
        'boxShadow': '0 4px 6px rgba(0, 0, 0, 0.1)',
        'border': 'none'
    },
    'stat_card': {
        'height': '100%',
        'textAlign': 'center',
        'boxShadow': '0 4px 6px rgba(0, 0, 0, 0.1)',
        'border': 'none'
    },
    'control_section': {
        'backgroundColor': THEME_COLORS['card'],
        'padding': '15px',
        'borderRadius': '8px',
        'boxShadow': '0 2px 4px rgba(0, 0, 0, 0.1)',
        'marginBottom': '20px'
    },
    'section_header': {
        'fontWeight': 'bold',
        'fontSize': '16px',
        'color': THEME_COLORS['primary'],
        'marginBottom': '15px',
        'borderBottom': f'1px solid {THEME_COLORS["border"]}',
        'paddingBottom': '8px'
    },
    'checklist_item': {
        'marginBottom': '8px',
        'fontSize': '14px'
    }
}

# Initialize Dash app with a modern theme
app = Dash(__name__, external_stylesheets=[dbc.themes.MINTY])
app.title = "MTA Ridership Dashboard"

# Helper functions
def format_number(value):
    """Format large numbers into readable format with K, M, B suffixes"""
    if value >= 1e9:
        return f"{value / 1e9:.1f}B"
    elif value >= 1e6:
        return f"{value / 1e6:.1f}M"
    elif value >= 1e3:
        return f"{value / 1e3:.1f}K"
    else:
        return f"{value:.0f}"

def format_title(modes, values):
    """Create a formatted title with transport modes and their values"""
    titles = [f"{mode}: {format_number(value)}" for mode, value in zip(modes, values)]
    return " | ".join(titles)

def format_percentage_title(modes, percentages):
    """Create a formatted title with transport modes and their percentage values"""
    titles = [f"{mode}: {value:.1f}%" for mode, value in zip(modes, percentages)]
    return " | ".join(titles)

# Check if a date is within the filtered date range
def is_date_in_range(date_str, start_date, end_date):
    """Check if a date string is within a date range"""
    import pandas as pd
    date = pd.to_datetime(date_str)
    return (date >= pd.to_datetime(start_date)) and (date <= pd.to_datetime(end_date))

# App layout with improved organization and responsive design
app.layout = dbc.Container([
    # Header
    html.H3("MTA Data Dashboard: Analyzing Public Transport Trends", style=STYLES['header']),
    
    # Controls section
    dbc.Card([
    dbc.CardHeader(html.H5("Dashboard Controls", className="mb-0")),
    dbc.CardBody([
        dbc.Row([
            # Transport mode selection - in a cleaner format
            dbc.Col([
                html.Div(html.H6("Transport Modes", className="text-primary"), 
                         style=STYLES['section_header']),
                dbc.Row([
                    dbc.Col([
                        dbc.Checklist(
                            id='mta-checklist-rail',
                            options=[
                                {'label': ' Subways', 'value': 'Subways'},
                                {'label': ' Metro-North', 'value': 'Metro-North'},
                                {'label': ' Long Island Rails', 'value': 'Long Island Rails'},
                                {'label': ' Staten Island Railway', 'value': 'Staten Island Railway'},
                            ],
                            value=['Subways'],
                            style={'lineHeight': '1.8'},
                            inputClassName="me-2"
                        ),
                    ], md=6),
                    dbc.Col([
                        dbc.Checklist(
                            id='mta-checklist-road',
                            options=[
                                {'label': ' Buses', 'value': 'Buses'},
                                {'label': ' Access-A-Ride', 'value': 'Access-A-Ride'},
                                {'label': ' Bridges and Tunnels', 'value': 'Bridges and Tunnels'},
                            ],
                            value=['Buses', 'Bridges and Tunnels'],
                            style={'lineHeight': '1.8'},
                            inputClassName="me-2"
                        ),
                    ], md=6),
                ]),
            ], md=6, sm=12),
            
            # Time aggregation and date range - better organized
            dbc.Col([
                html.Div(html.H6("Time Settings", className="text-primary"), 
                         style=STYLES['section_header']),
                
                # Time aggregation - better radio buttons
                html.Div([
                    html.Label("Time Aggregation:", className="mb-2 text-muted"),
                    dbc.RadioItems(
                        id='date-radioitems',
                        options=[
                            {'label': ' Daily', 'value': 'D'},
                            {'label': ' Weekly', 'value': 'W'},
                            {'label': ' Monthly', 'value': 'ME'},
                            {'label': ' Quarterly', 'value': 'QE'},
                            {'label': ' Yearly', 'value': 'YE'}
                        ],
                        value='W',
                        inline=True,
                        className="mb-3",
                        inputClassName="me-1"
                    ),
                ]),
                
                # Date range - cleaner presentation
                html.Div([
                    html.Label("Date Range:", className="mb-2 text-muted"),
                    dcc.DatePickerRange(
                        id='date-picker-range',
                        start_date=mta_df['Date'].min(),
                        end_date=mta_df['Date'].max(),
                        display_format='YYYY-MM-DD',
                        style={'width': '100%'},
                        className="mb-3"
                    ),
                ]),
            ], md=6, sm=12),
        ])
    ])
], style=STYLES['card']),
    
    # Stats cards
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader("RIDERSHIP", style={'fontWeight': 'bold', 'padding': '10px'}),
                dbc.CardBody(html.Div(id='avg-ridership'))
            ], style=STYLES['stat_card'])
        ], md=4, sm=12),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader("SCHEDULED TRIPS", style={'fontWeight': 'bold', 'padding': '10px'}),
                dbc.CardBody(html.Div(id='avg-scheduled-trips'))
            ], style=STYLES['stat_card'])
        ], md=4, sm=12),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader("TRAFFIC VOLUME", style={'fontWeight': 'bold', 'padding': '10px'}),
                dbc.CardBody(html.Div(id='avg-traffic'))
            ], style=STYLES['stat_card'])
        ], md=4, sm=12),
    ], className="my-4"),
    
    # Graphs
    dbc.Card([
        dbc.CardHeader(html.H5("Trends in Public Transport Ridership", className="text-center")),
        dbc.CardBody(dcc.Graph(id='mta-area'))
    ], style=STYLES['graph_card'], className="mb-4"),
    
    dbc.Card([
        dbc.CardHeader(html.H5("Recovery vs. Pre-Pandemic (%)", className="text-center")),
        dbc.CardBody(dcc.Graph(id='mta-percentage'))
    ], style=STYLES['graph_card']),
    
    # Footer
    html.Footer(
        html.P("Data source: Metropolitan Transportation Authority (MTA)", className="text-center text-muted mt-4")
    )
], fluid=True, style=STYLES['container'])

@app.callback(
    [Output('avg-ridership', 'children'),
     Output('avg-scheduled-trips', 'children'),
     Output('avg-traffic', 'children'),
     Output("mta-area", "figure"),
     Output("mta-percentage", "figure")],
    [Input("mta-checklist-rail", "value"),
     Input("mta-checklist-road", "value"),
     Input("date-radioitems", "value"),
     Input("date-picker-range", "start_date"),
     Input("date-picker-range", "end_date")]
)
def update_dashboard(rail_modes, road_modes, date_aggregation, start_date, end_date):
    """Main callback to update all dashboard components based on user selections"""
    # Combine transport modes
    transport_modes = rail_modes + road_modes
    
    # Filter data by date range
    mask = (mta_df['Date'] >= start_date) & (mta_df['Date'] <= end_date)
    filtered_df = mta_df[mask]
    
    if filtered_df.empty:
        return "No data available", "No data available", "No data available", {}, {}
    
    # Prepare the ridership data with efficient data transformation
    ridership_columns = [
        'Subways: Total Estimated Ridership',
        'Buses: Total Estimated Ridership',
        'LIRR: Total Estimated Ridership',
        'Metro-North: Total Estimated Ridership',
        'Staten Island Railway: Total Estimated Ridership',
        'Access-A-Ride: Total Scheduled Trips',
        'Bridges and Tunnels: Total Traffic'
    ]
    
    # Create a temporary dataframe with only needed columns and efficient resampling
    temp_df = filtered_df.set_index("Date")[ridership_columns].copy()
    temp_df = temp_df.rename(columns=COLUMN_MAPPING)
    transportation_er = temp_df.resample(date_aggregation).sum()
    
    # Prepare percentage data
    percentage_columns = [
        'Subways: % of Comparable Pre-Pandemic Day',
        'Buses: % of Comparable Pre-Pandemic Day',
        'LIRR: % of Comparable Pre-Pandemic Day',
        'Metro-North: % of Comparable Pre-Pandemic Day',
        'Staten Island Railway: % of Comparable Pre-Pandemic Day',
        'Access-A-Ride: % of Comparable Pre-Pandemic Day',
        'Bridges and Tunnels: % of Comparable Pre-Pandemic Day'
    ]
    
    # Efficient transformation for percentage data
    temp_pct_df = filtered_df.set_index("Date")[percentage_columns].copy()
    temp_pct_df = temp_pct_df.rename(columns=COLUMN_MAPPING)
    percentage_er = temp_pct_df.resample(date_aggregation).mean()
    
    # Calculate totals and percentages for title
    selected_data = transportation_er[transport_modes]
    total_values = [selected_data[mode].sum() for mode in transport_modes]
    percentage_values = [percentage_er[mode].mean() for mode in transport_modes]
    
    # Create the area chart with modern styling
    area_fig = px.area(
        transportation_er,
        x=transportation_er.index,
        y=transport_modes,
        color_discrete_map=COLOR_MAP,
        markers=True,
        labels={'value': 'Ridership', 'Date': '', 'variable': 'Mode'},
        template='plotly_white'
    )
    
    # Add a more prominent title
    area_fig.update_layout(
        title={
            'text': format_title(transport_modes, total_values),
            'y': 0.95,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': {'size': 16, 'color': THEME_COLORS['text']}
        },
        legend={
            'orientation': 'h',
            'y': -0.15,
            'x': 0.5,
            'xanchor': 'center'
        },
        margin={'l': 40, 'r': 40, 't': 80, 'b': 80},
        plot_bgcolor=THEME_COLORS['background'],
        paper_bgcolor=THEME_COLORS['card'],
        hovermode='x unified',
        # Set x-axis range to match the selected date range
        xaxis={
            'range': [start_date, end_date],
            'autorange': False
        }
    )
    
    # Only add pandemic annotation if it falls within the date range
    if is_date_in_range('2020-03-01', start_date, end_date):
        max_y_value = selected_data.max().max() * 1.1  # Add some padding
        area_fig.add_annotation(
            x='2020-03-01',
            y=max_y_value,
            text="Start of Pandemic",
            showarrow=True,
            arrowhead=2,
            arrowcolor=THEME_COLORS['accent'],
            arrowwidth=2,
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor=THEME_COLORS['border'],
            borderwidth=1,
            borderpad=4,
            font={'color': THEME_COLORS['text']}
        )
    
    # Create the percentage line chart
    line_fig = px.line(
        percentage_er,
        x=percentage_er.index,
        y=transport_modes,
        color_discrete_map=COLOR_MAP,
        markers=True,
        labels={'value': '% vs Pre-Pandemic', 'Date': '', 'variable': 'Mode'},
        template='plotly_white'
    )
    
    # Update layout for percentage chart
    line_fig.update_layout(
        title={
            'text': format_percentage_title(transport_modes, percentage_values),
            'y': 0.95,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': {'size': 16, 'color': THEME_COLORS['text']}
        },
        legend={
            'orientation': 'h',
            'y': -0.15,
            'x': 0.5,
            'xanchor': 'center'
        },
        margin={'l': 40, 'r': 40, 't': 80, 'b': 80},
        plot_bgcolor=THEME_COLORS['background'],
        paper_bgcolor=THEME_COLORS['card'],
        hovermode='x unified',
        # Set x-axis range to match the selected date range
        xaxis={
            'range': [start_date, end_date],
            'autorange': False
        }
    )
    
    # Only add pandemic annotation if it falls within the date range
    if is_date_in_range('2020-03-01', start_date, end_date):
        max_pct = percentage_er[transport_modes].max().max() * 1.1
        line_fig.add_annotation(
            x='2020-03-01',
            y=max_pct,
            text="Start of Pandemic",
            showarrow=True,
            arrowhead=2,
            arrowcolor=THEME_COLORS['accent'],
            arrowwidth=2,
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor=THEME_COLORS['border'],
            borderwidth=1,
            borderpad=4,
            font={'color': THEME_COLORS['text']}
        )
    
    # Calculate statistics for metrics cards
    columns_for_stats = [
        'Total Estimated Ridership',
        'Access-A-Ride: Total Scheduled Trips',
        'Bridges and Tunnels: Total Traffic'
    ]
    
    stats = filtered_df[columns_for_stats].agg(['mean', 'min', 'max']).astype('int')
    
    # Format the stats for display
    ridership_stats = html.Div([
        html.P(f"Avg: {format_number(stats.loc['mean', 'Total Estimated Ridership'])}", className="mb-1"),
        html.P(f"Min: {format_number(stats.loc['min', 'Total Estimated Ridership'])}", className="mb-1"),
        html.P(f"Max: {format_number(stats.loc['max', 'Total Estimated Ridership'])}", className="mb-0")
    ])

    trips_stats = html.Div([
        html.P(f"Avg: {format_number(stats.loc['mean', 'Access-A-Ride: Total Scheduled Trips'])}", className="mb-1"),
        html.P(f"Min: {format_number(stats.loc['min', 'Access-A-Ride: Total Scheduled Trips'])}", className="mb-1"),
        html.P(f"Max: {format_number(stats.loc['max', 'Access-A-Ride: Total Scheduled Trips'])}", className="mb-0")
    ])

    traffic_stats = html.Div([
        html.P(f"Avg: {format_number(stats.loc['mean', 'Bridges and Tunnels: Total Traffic'])}", className="mb-1"),
        html.P(f"Min: {format_number(stats.loc['min', 'Bridges and Tunnels: Total Traffic'])}", className="mb-1"),
        html.P(f"Max: {format_number(stats.loc['max', 'Bridges and Tunnels: Total Traffic'])}", className="mb-0")
    ])
    
    return ridership_stats, trips_stats, traffic_stats, area_fig, line_fig
Py.Cafe

mta_daily_ridership