import pandas as pd
import dash
from dash import dcc, html, Input, Output, State
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import plotly.figure_factory as ff
# ---------- LOAD AND PREPARE DATA ----------
# Load the dataset (replace with your file path)
df = pd.read_csv('TLC_New_Driver_Application.csv', parse_dates=['App Date'])
# Convert the date column to datetime
df['App Date'] = pd.to_datetime(df['App Date'])
# ---------- PREPARE DATA FOR ML MODEL ----------
def prepare_data_for_ml(df):
"""Prepare data for machine learning model."""
# Create a copy of the dataframe
model_df = df.copy()
# Create target variable (binary: Approved or Not)
model_df['is_approved'] = model_df['Status'].str.contains('Approved').astype(int)
# Feature engineering
# Extract month and day of week from application date
model_df['app_month'] = model_df['App Date'].dt.month
model_df['app_day_of_week'] = model_df['App Date'].dt.dayofweek
# Convert stage completion to binary features (completed=1, not completed=0)
stages = ['FRU Interview Scheduled', 'Drug Test', 'WAV Course',
'Defensive Driving', 'Driver Exam', 'Medical Clearance Form', 'Other Requirements']
completed_values = ['completed', 'yes', 'done', 'complete']
for stage in stages:
if stage in model_df.columns:
model_df[f'{stage}_completed'] = model_df[stage].str.lower().isin(completed_values).astype(int)
# Select features for model
feature_cols = [col for col in model_df.columns if col.endswith('_completed')] + ['app_month', 'app_day_of_week']
X = model_df[feature_cols]
y = model_df['is_approved']
return X, y, model_df
# ---------- TRAIN LOGISTIC REGRESSION MODEL ----------
def train_logistic_regression(X, y):
"""Train a logistic regression model."""
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Create and train the model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# Get feature importance
feature_importance = pd.DataFrame({
'Feature': X.columns,
'Importance': np.abs(model.coef_[0])
}).sort_values('Importance', ascending=False)
return model, feature_importance, accuracy, X_test, y_test
# ---------- PREPARE ML DATA AND TRAIN MODEL ----------
X, y, model_df = prepare_data_for_ml(df)
model, feature_importance, model_accuracy, X_test, y_test = train_logistic_regression(X, y)
# ---------- INITIALIZE DASH APPLICATION ----------
app = dash.Dash(__name__,
external_stylesheets=[
'https://cdn.jsdelivr.net/npm/bootstrap@5.2.3/dist/css/bootstrap.min.css'
])
server = app.server # For production deployment
# ---------- FUNCTIONS TO CREATE CHARTS ----------
def create_funnel_chart(df):
"""Creates an enhanced funnel chart showing the flow of applications by stage."""
# Process stages based on dataset columns
stages = ['FRU Interview Scheduled', 'Defensive Driving','Medical Clearance Form','WAV Course', 'Drug Test',
'Driver Exam', 'Other Requirements']
# Count applications that pass each stage
counts = [len(df)] # Total applications
# Store data broken down by internal status
stage_details = {}
for stage in stages:
# Values indicating different statuses
status_mapping = {
'complete': ['Completed', 'Yes', 'Done', 'Complete'],
'needed': ['Needed', 'Pending', 'Required'],
'not_applicable': ['Not Applicable', 'N/A', 'NA']
}
# Clean values for normalization (lowercase and strip)
df_clean = df.copy()
if stage in df_clean.columns:
df_clean[stage] = df_clean[stage].astype(str).str.lower().str.strip()
# Count different statuses for this stage
completed_values = [v.lower() for v in status_mapping['complete']]
needed_values = [v.lower() for v in status_mapping['needed']]
not_applicable_values = [v.lower() for v in status_mapping['not_applicable']]
complete_count = sum(df_clean[stage].str.lower().isin(completed_values)) if stage in df_clean.columns else 0
needed_count = sum(df_clean[stage].str.lower().isin(needed_values)) if stage in df_clean.columns else 0
not_applicable_count = sum(df_clean[stage].str.lower().isin(not_applicable_values)) if stage in df_clean.columns else 0
# Sum those who have completed this stage
completed = complete_count + not_applicable_count
counts.append(completed)
# Save details for hover
stage_details[stage] = {
'Complete': complete_count,
'Needed': needed_count,
'Not Applicable': not_applicable_count
}
# Final stage: approved applications
approved_values = ['Approved - License Issued']
approved = len(df[df['Status'].str.lower().isin([v.lower() for v in approved_values])])
counts.append(approved)
# Names to display on the chart
display_stages = ['Total Applications'] + stages + ['Approved']
# Create list of custom texts for hover
custom_data = []
for i, stage in enumerate(display_stages):
if i == 0: # Total applications
custom_data.append(f"<b>Total applications:</b> {counts[i]}")
elif i == len(display_stages) - 1: # Final stage (Approved)
custom_data.append(f"<b>Approved applications:</b> {counts[i]}")
else:
stage_name = stages[i-1]
details = stage_details[stage_name]
hover_text = f"<b>{stage}:</b><br>" + \
f"Completed: {details['Complete']}<br>" + \
f"Pending: {details['Needed']}<br>" + \
f"Not Applicable: {details['Not Applicable']}"
custom_data.append(hover_text)
# Taxi/Limo color scheme
colors = ["#FFCC00", "#FFD633", "#FFE066", "#FFEB99", "#FFF5CC", "#F0CF65", "#E6C44C", "#D9B833", "#FFCC00"]
# Create funnel chart with custom hover
fig = go.Figure(go.Funnel(
y=display_stages,
x=counts,
textinfo="value+percent initial",
marker={"color": colors},
customdata=custom_data,
hovertemplate="%{customdata}<extra></extra>"
))
fig.update_layout(
font=dict(size=12),
height=700, # Increase height to make it more prominent
margin=dict(t=50, b=50, l=50, r=50),
hoverlabel=dict(
bgcolor="white",
font_size=12,
font_family="Arial"
),
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
return fig
def create_trend_chart(df):
"""Creates a line chart showing application trends over time."""
# Aggregate applications by day
df['Day'] = df['App Date'].dt.strftime('%Y-%m-%d')
daily_counts = df.groupby('Day').size().reset_index(name='Count')
fig = go.Figure()
fig.add_trace(go.Scatter(
x=daily_counts['Day'],
y=daily_counts['Count'],
mode='lines+markers',
name='Total Applications',
line=dict(color='#FFCC00', width=3)
))
# Add lines by status (top 3 statuses) - MODIFIED COLORS FOR BETTER IDENTIFICATION
common_statuses = df['Status'].value_counts().nlargest(3).index.tolist()
colors = ['#006400', '#8B0000', '#0000CD'] # Green, Dark Red, Dark Blue for better status distinction
for i, status in enumerate(common_statuses):
status_df = df[df['Status'] == status]
status_counts = status_df.groupby(status_df['App Date'].dt.strftime('%Y-%m-%d')).size()
status_counts = status_counts.reset_index(name='Count')
if not status_counts.empty:
fig.add_trace(go.Scatter(
x=status_counts['App Date'],
y=status_counts['Count'],
mode='lines+markers',
name=status,
line=dict(color=colors[i], width=2)
))
fig.update_layout(
# title="Application Trend Over Time",
# xaxis_title="Day",
yaxis_title="Number of Applications",
# FIX: Move legend down and adjust layout
legend=dict(
orientation="h",
y=-0.2, # Move legend below the chart
x=0.5, # Center horizontally
xanchor="center"
),
height=700,
margin=dict(t=50, b=100, l=50, r=50), # Increased bottom margin to accommodate legend
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
# Adjust date format for better visualization
fig.update_xaxes(
tickangle=45,
tickmode='auto',
nticks=20,
tickformat='%Y-%m-%d'
)
return fig
def create_bottleneck_chart(df):
"""Creates a stacked bar chart showing completion status by stage."""
stages = ['FRU Interview Scheduled', 'Defensive Driving','Medical Clearance Form','WAV Course', 'Drug Test',
'Driver Exam', 'Other Requirements']
completion_rates = []
pending_rates = []
for stage in stages:
total = len(df)
# Values indicating completion
completed_values = ['Completed', 'Yes', 'Done', 'Complete']
completed_count = len(df[df[stage].str.lower().isin([v.lower() for v in completed_values])]) if stage in df.columns else 0
pending = total - completed_count
completion_rate = (completed_count / total) * 100
pending_rate = (pending / total) * 100
completion_rates.append(completion_rate)
pending_rates.append(pending_rate)
fig = go.Figure()
fig.add_trace(go.Bar(
x=stages,
y=completion_rates,
name='Completed',
marker_color='#FFCC00', # Taxi yellow
text=[f"{rate:.1f}%" for rate in completion_rates],
textposition='auto'
))
# MODIFIED: Using pattern fill instead of solid black
fig.add_trace(go.Bar(
x=stages,
y=pending_rates,
name='Pending',
marker=dict(
color='#E0E0E0', # Light gray base color
pattern=dict(
shape="/", # Diagonal lines pattern
bgcolor="#505050", # Pattern color
solidity=0.85 # Pattern density
)
),
text=[f"{rate:.1f}%" for rate in pending_rates],
textposition='auto'
))
fig.update_layout(
yaxis_title="Percentage",
barmode='stack',
legend=dict(orientation="h", y=1.1),
height=700,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
# Adjust x-axis text for better visualization
fig.update_xaxes(tickangle=45, tickfont=dict(size=12))
return fig
def create_status_cards(df):
"""Creates a set of cards showing counts for each status."""
# Get counts by status
status_counts = df['Status'].value_counts().reset_index()
status_counts.columns = ['Status', 'Count']
# Define colors for cards (customized for taxi theme)
status_colors = {
'Approved': '#006400', # Green
'Approved - Active': '#008000', # Green
'Pending': '#FFCC00', # Yellow (taxi)
'In Review': '#303030', # Dark gray
'Incomplete': '#8B0000', # Dark red
'Rejected': '#606060', # Gray
}
# Default color for undefined statuses
default_color = '#606060'
# Create cards
cards = []
for _, row in status_counts.iterrows():
status = row['Status']
count = row['Count']
# Determine card color
color = status_colors.get(status, default_color)
# Create individual card
card = html.Div([
html.Div(count, className='card-body',
style={'fontSize': '24px', 'fontWeight': 'bold', 'textAlign': 'center'}),
html.Div(status, className='card-footer bg-transparent',
style={'textAlign': 'center', 'fontWeight': '500'})
], className='card mb-3 border-0', style={
'backgroundColor': 'white',
'borderLeft': f'4px solid {color}',
'margin': '10px 0',
'borderRadius': '5px',
'boxShadow': '0 2px 4px rgba(0,0,0,0.1)'
})
cards.append(card)
return cards
# ---------- NEW AND MODIFIED FUNCTIONS FOR ML VISUALIZATION ----------
def create_feature_importance_chart(feature_importance):
"""Creates a horizontal bar chart showing feature importance from logistic regression."""
# Sort by importance and take top 10 features
top_features = feature_importance.head(10)
fig = go.Figure()
fig.add_trace(go.Bar(
y=top_features['Feature'],
x=top_features['Importance'],
orientation='h',
marker_color='#FFCC00', # Taxi yellow
text=[f"{imp:.3f}" for imp in top_features['Importance']],
textposition='auto'
))
fig.update_layout(
title="Top Features for Approval Prediction",
xaxis_title="Coefficient Magnitude (Importance)",
yaxis=dict(
title="Feature",
categoryorder='total ascending' # Sort bars
),
height=600,
margin=dict(t=50, b=50, l=200, r=50), # Increased left margin for feature names
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
return fig
# FUNCIÓN MEJORADA: Crear un indicador gauge más claro para mostrar la predicción
def create_prediction_gauge(model, X_sample):
"""Creates a simplified gauge chart showing the prediction probability."""
# Calculate the log-odds using the model
log_odds = model.decision_function([X_sample])[0]
# Convert to probability
probability = 1 / (1 + np.exp(-log_odds))
# Determine the prediction and color
prediction = "Approved" if probability > 0.5 else "Not Approved"
# Set colors based on the probability range
if probability > 0.8:
color = "green"
elif probability > 0.5:
color = "#FFCC00" # Taxi yellow
elif probability > 0.2:
color = "orange"
else:
color = "red"
# Create gauge - REMOVED DELTA to reduce confusion
fig = go.Figure(go.Indicator(
mode="gauge+number", # Removed "delta" from mode
value=probability * 100, # Convert to percentage
domain={"x": [0, 1], "y": [0, 1]},
title={
"text": f"<b>Prediction: {prediction}</b>",
"font": {"size": 24}
},
gauge={
"axis": {"range": [0, 100], "tickwidth": 1, "tickcolor": "darkblue"},
"bar": {"color": color},
"bgcolor": "white",
"borderwidth": 2,
"bordercolor": "gray",
"steps": [
{"range": [0, 20], "color": "red"},
{"range": [20, 50], "color": "orange"},
{"range": [50, 80], "color": "#FFCC00"},
{"range": [80, 100], "color": "green"}
],
"threshold": {
"line": {"color": "black", "width": 4},
"thickness": 0.75,
"value": 50 # The decision threshold
}
}
))
# Add a clearer explanation text instead of just listing features
approval_factors = []
rejection_factors = []
# Get top features and their values
for f in X_sample.index:
if f.endswith('_completed') and X_sample[f] == 1:
approval_factors.append(f.replace('_completed', ''))
elif f.endswith('_completed') and X_sample[f] == 0:
rejection_factors.append(f.replace('_completed', ''))
# Create a more helpful annotation
annotation_text = "<b>Key Factors:</b><br>"
if len(approval_factors) > 0:
annotation_text += "<span style='color:green'>✓ Completed: " + ", ".join(approval_factors[:3]) + "</span><br>"
if len(rejection_factors) > 0:
annotation_text += "<span style='color:red'>✗ Pending: " + ", ".join(rejection_factors[:3]) + "</span>"
fig.add_annotation(
x=0.01,
y=0,
xref="paper",
yref="paper",
text=annotation_text,
showarrow=False,
font=dict(size=12),
align="left",
bordercolor="black",
borderwidth=1,
borderpad=4,
bgcolor="white",
opacity=0.8
)
# Add a clear interpretation of the probability
interpretation = ""
if probability > 0.8:
interpretation = "This application is <b>highly likely to be approved</b>."
elif probability > 0.5:
interpretation = "This application is <b>likely to be approved</b>, but requires attention."
elif probability > 0.2:
interpretation = "This application is <b>likely to be rejected</b> without addressing pending requirements."
else:
interpretation = "This application is <b>highly likely to be rejected</b>."
fig.add_annotation(
x=0.5,
y=-0.15,
xref="paper",
yref="paper",
text=interpretation,
showarrow=False,
font=dict(size=14),
align="center"
)
fig.update_layout(
height=600,
margin=dict(t=100, b=150, l=100, r=50), # Increased bottom margin for interpretation
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
return fig
def create_ml_overview(model, feature_importance, accuracy):
"""Creates an HTML Div with model metrics and description."""
return html.Div([
html.H3("Logistic Regression Model", className="text-center mb-3"),
html.Div([
html.Div([
html.Div([
html.H4(f"{accuracy:.1%}", className="text-center display-4"),
html.P("Model Accuracy", className="text-center text-muted")
], className="card-body")
], className="card bg-white shadow-sm mb-4"),
html.Div([
html.H5("Model Description", className="card-header bg-light"),
html.Div([
html.P("This logistic regression model predicts whether a TLC driver application will be approved based on completion of different process stages."),
html.P("The features with the highest impact on approval probability are shown in the feature importance chart.")
], className="card-body")
], className="card bg-white shadow-sm")
])
])
# ---------- MODIFIED DASHBOARD LAYOUT ----------
app.layout = html.Div([
# Header Section
html.Div([
html.Div([
html.H1("NY TLC Driver Application Analysis Dashboard",
className="display-5 fw-bold text-center mb-2"),
html.P("Process flow analysis and approval prediction for TLC driver applications",
className="lead text-center text-muted mb-4")
], className="py-3")
], className="container-fluid bg-white shadow-sm"),
# Main Content Section
html.Div([
html.Div([
# Filters Row
html.Div([
html.Div([
html.Label("Filter by Status:", className="form-label"),
dcc.Dropdown(
id='status-filter',
options=[{'label': 'All', 'value': 'all'}] +
[{'label': s, 'value': s} for s in df['Status'].unique()],
value='all',
clearable=False,
className="form-select"
)
], className='col-md-3'),
html.Div([
html.Label("Date Range:", className="form-label"),
dcc.DatePickerRange(
id='date-range',
min_date_allowed=df['App Date'].min(),
max_date_allowed=df['App Date'].max(),
start_date=df['App Date'].min(),
end_date=df['App Date'].max(),
display_format='YYYY-MM-DD',
className="form-control"
)
], className='col-md-3')
], className='row mb-4 bg-light p-3 rounded shadow-sm'),
# Main Content Row (Charts + Sidebar)
html.Div([
# Charts Column
html.Div([
# Chart Selector Buttons (ADDED ML BUTTON)
html.Div([
html.Button('TLC Process Stages', id='btn-funnel', n_clicks=1,
className="btn me-2 fw-bold"),
html.Button('Application Timeline', id='btn-trend', n_clicks=0,
className="btn me-2 fw-bold"),
html.Button('Process Bottlenecks', id='btn-bottleneck', n_clicks=0,
className="btn me-2 fw-bold"),
html.Button('Approval Prediction', id='btn-ml', n_clicks=0,
className="btn fw-bold")
], className="d-flex justify-content-center mb-4"),
# Chart Containers
html.Div([
html.H2("Application Flow by Stage", className="text-center mb-3"),
dcc.Graph(id='funnel-chart')
], id='funnel-container', className="bg-white p-3 rounded shadow"),
html.Div([
html.H2("Application Trend Over Time", className="text-center mb-3"),
dcc.Graph(id='trend-chart')
], id='trend-container', className="bg-white p-3 rounded shadow d-none"),
html.Div([
html.H2("Completion Status by Stage", className="text-center mb-3"),
dcc.Graph(id='bottleneck-chart')
], id='bottleneck-container', className="bg-white p-3 rounded shadow d-none"),
# MODIFIED ML CONTAINER - El gráfico de cascada se sustituye por un gauge
html.Div([
html.H2("Application Approval Prediction", className="text-center mb-3"),
# Add sample selection dropdown - INCREMENTADO A 10 MUESTRAS
html.Div([
html.Label("Select Application Sample:", className="form-label"),
dcc.Dropdown(
id='sample-selector',
options=[{'label': f'Sample {i+1}', 'value': i} for i in range(10)],
value=0,
clearable=False,
className="form-select mb-4"
)
], className="mb-4"),
# ML visualizations
html.Div([
# Left side: Feature importance
html.Div([
html.H4("Feature Importance", className="text-center mb-3"),
dcc.Graph(id='feature-importance-chart')
], className="col-md-6"),
# Right side: NUEVO - Prediction gauge chart
html.Div([
html.H4("Approval Prediction", className="text-center mb-3"),
dcc.Graph(id='prediction-gauge-chart')
], className="col-md-6"),
], className="row")
], id='ml-container', className="bg-white p-3 rounded shadow d-none"),
], className='col-lg-9'),
# Sidebar Column
html.Div([
# Status cards
html.Div([
html.H3("Status Distribution", className="text-center mb-3"),
html.Div(id='status-cards', className="px-2")
], className="bg-white rounded shadow p-3 mb-4"),
# ML Overview Section (Solo aparece con la vista de predicción)
html.Div(
id='ml-overview',
className="bg-white rounded shadow p-3 sticky-top d-none" # Initially hidden
)
], className='col-lg-3')
], className='row')
], className='container-fluid py-4')
], className="bg-light min-vh-100"),
# Footer Section
html.Footer([
html.Hr(),
html.Div([
html.P("TLC Driver Application Analysis Dashboard © 2025",
className="text-center text-muted")
], className="container py-3")
], className="bg-white shadow-sm mt-auto")
], className="d-flex flex-column min-vh-100")
# ---------- CALLBACKS FOR INTERACTIVITY ----------
# Callback to show/hide charts based on button pressed
@app.callback(
[Output('funnel-container', 'className'),
Output('trend-container', 'className'),
Output('bottleneck-container', 'className'),
Output('ml-container', 'className'),
Output('btn-funnel', 'className'),
Output('btn-trend', 'className'),
Output('btn-bottleneck', 'className'),
Output('btn-ml', 'className'),
Output('ml-overview', 'className')], # Added this output to control ML overview visibility
[Input('btn-funnel', 'n_clicks'),
Input('btn-trend', 'n_clicks'),
Input('btn-bottleneck', 'n_clicks'),
Input('btn-ml', 'n_clicks')] # These were missing in your code
)
def toggle_chart_visibility(funnel_clicks, trend_clicks, bottleneck_clicks, ml_clicks):
# Determine which button was last pressed
ctx = dash.callback_context
if not ctx.triggered:
# No trigger, show funnel by default
button_id = 'btn-funnel'
else:
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
# Base styles for containers and buttons
funnel_container_class = "bg-white p-3 rounded shadow d-none"
trend_container_class = "bg-white p-3 rounded shadow d-none"
bottleneck_container_class = "bg-white p-3 rounded shadow d-none"
ml_container_class = "bg-white p-3 rounded shadow d-none"
ml_overview_class = "bg-white rounded shadow p-3 sticky-top d-none" # Default hidden
funnel_btn_class = "btn me-2 fw-bold"
trend_btn_class = "btn me-2 fw-bold"
bottleneck_btn_class = "btn me-2 fw-bold"
ml_btn_class = "btn fw-bold"
# Set styles based on active button
if button_id == 'btn-funnel':
funnel_container_class = "bg-white p-3 rounded shadow"
funnel_btn_class += " btn-warning" # Yellow taxi color for active button
trend_btn_class += " btn-outline-dark"
bottleneck_btn_class += " btn-outline-dark"
ml_btn_class += " btn-outline-dark"
elif button_id == 'btn-trend':
trend_container_class = "bg-white p-3 rounded shadow"
trend_btn_class += " btn-warning"
funnel_btn_class += " btn-outline-dark"
bottleneck_btn_class += " btn-outline-dark"
ml_btn_class += " btn-outline-dark"
elif button_id == 'btn-bottleneck':
bottleneck_container_class = "bg-white p-3 rounded shadow"
bottleneck_btn_class += " btn-warning"
funnel_btn_class += " btn-outline-dark"
trend_btn_class += " btn-outline-dark"
ml_btn_class += " btn-outline-dark"
else: # ML button
ml_container_class = "bg-white p-3 rounded shadow"
ml_overview_class = "bg-white rounded shadow p-3 sticky-top" # Show when ML view is active
ml_btn_class += " btn-warning"
funnel_btn_class += " btn-outline-dark"
trend_btn_class += " btn-outline-dark"
bottleneck_btn_class += " btn-outline-dark"
return (funnel_container_class, trend_container_class, bottleneck_container_class, ml_container_class,
funnel_btn_class, trend_btn_class, bottleneck_btn_class, ml_btn_class, ml_overview_class)
# Completar el código con las funciones y callbacks restantes
# Callback para actualizar los gráficos basado en los filtros
@app.callback(
[Output('funnel-chart', 'figure'),
Output('trend-chart', 'figure'),
Output('bottleneck-chart', 'figure'),
Output('status-cards', 'children')],
[Input('status-filter', 'value'),
Input('date-range', 'start_date'),
Input('date-range', 'end_date')]
)
def update_charts(status, start_date, end_date):
# Registrar qué componente activó el callback
ctx = dash.callback_context
# Filter the data based on the selected status and date range
filtered_df = df.copy()
# Apply status filter
if status != 'all':
filtered_df = filtered_df[filtered_df['Status'] == status]
# Apply date range filter
if start_date and end_date:
filtered_df = filtered_df[(filtered_df['App Date'] >= start_date) &
(filtered_df['App Date'] <= end_date)]
# Generate charts
funnel_fig = create_funnel_chart(filtered_df)
trend_fig = create_trend_chart(filtered_df)
bottleneck_fig = create_bottleneck_chart(filtered_df)
cards = create_status_cards(filtered_df)
return funnel_fig, trend_fig, bottleneck_fig, cards
# Callback para actualizar las visualizaciones del modelo ML y seleccionar muestras
@app.callback(
[Output('feature-importance-chart', 'figure'),
Output('prediction-gauge-chart', 'figure'),
Output('ml-overview', 'children')],
[Input('sample-selector', 'value')],
[State('status-filter', 'value'),
State('date-range', 'start_date'),
State('date-range', 'end_date')]
)
def update_ml_visualizations(sample_index, status_filter, start_date, end_date):
# Feature importance chart (no cambia con la muestra)
feature_importance_fig = create_feature_importance_chart(feature_importance)
# Preparar las muestras para la predicción
# Usamos STATE para no recalcular las muestras cada vez que cambia un filtro
# Filtramos el modelo_df según los mismos filtros usados en otros gráficos
filtered_model_df = model_df.copy()
# Aplicar filtro de estado si corresponde
if status_filter != 'all' and status_filter is not None:
# Mapear el filtro de estado al campo is_approved
if 'Approved' in status_filter:
filtered_model_df = filtered_model_df[filtered_model_df['is_approved'] == 1]
else:
filtered_model_df = filtered_model_df[filtered_model_df['is_approved'] == 0]
# Aplicar filtro de fecha si corresponde
if start_date and end_date:
filtered_model_df = filtered_model_df[(filtered_model_df['App Date'] >= start_date) &
(filtered_model_df['App Date'] <= end_date)]
# Separar en aprobados y no aprobados
approved_df = filtered_model_df[filtered_model_df['is_approved'] == 1]
not_approved_df = filtered_model_df[filtered_model_df['is_approved'] == 0]
# Calcular cuántas muestras tomar de cada grupo
num_approved_samples = min(7, len(approved_df))
num_not_approved_samples = min(10 - num_approved_samples, len(not_approved_df))
# Seleccionar muestras aleatorias
approved_samples = approved_df.sample(n=num_approved_samples, random_state=42) if num_approved_samples > 0 else pd.DataFrame()
not_approved_samples = not_approved_df.sample(n=num_not_approved_samples, random_state=42) if num_not_approved_samples > 0 else pd.DataFrame()
# Combinar las muestras
sample_df = pd.concat([approved_samples, not_approved_samples])
# Asegurarnos de que hay al menos una muestra
if len(sample_df) == 0:
# Usar una muestra predeterminada si no hay datos disponibles
# Creamos una muestra ficticia con valores promedio
feature_cols = [col for col in model_df.columns if col.endswith('_completed')] + ['app_month', 'app_day_of_week']
X_sample = pd.Series([0.5] * len(feature_cols), index=feature_cols)
else:
# Seleccionar las variables para el modelo
feature_cols = [col for col in sample_df.columns if col.endswith('_completed')] + ['app_month', 'app_day_of_week']
X_samples = sample_df[feature_cols]
# Si hay menos de 10 muestras totales, reajustamos el índice para evitar errores
if sample_index >= len(X_samples):
sample_index = 0
# Seleccionar la muestra específica
X_sample = X_samples.iloc[sample_index]
# Crear el gráfico gauge de predicción
prediction_gauge_fig = create_prediction_gauge(model, X_sample)
# Crear el resumen del modelo
ml_overview_content = create_ml_overview(model, feature_importance, model_accuracy)
return feature_importance_fig, prediction_gauge_fig, ml_overview_content
# Actualizar las opciones del menú desplegable para las muestras
@app.callback(
[Output('sample-selector', 'options'),
Output('sample-selector', 'value')],
[Input('status-filter', 'value'),
Input('date-range', 'start_date'),
Input('date-range', 'end_date')],
[State('sample-selector', 'value')]
)
def update_sample_options(status, start_date, end_date, current_value):
# Esta función actualiza las opciones cuando cambian los filtros
# Calculamos cuántas muestras aprobadas y no aprobadas tendremos
filtered_model_df = model_df.copy()
# Aplicar filtro de estado si corresponde
if status != 'all' and status is not None:
# Mapear el filtro de estado al campo is_approved
if 'Approved' in status:
filtered_model_df = filtered_model_df[filtered_model_df['is_approved'] == 1]
else:
filtered_model_df = filtered_model_df[filtered_model_df['is_approved'] == 0]
# Aplicar filtro de fecha si corresponde
if start_date and end_date:
filtered_model_df = filtered_model_df[(filtered_model_df['App Date'] >= start_date) &
(filtered_model_df['App Date'] <= end_date)]
# Ver cuántas muestras aprobadas y no aprobadas tenemos
num_approved = len(filtered_model_df[filtered_model_df['is_approved'] == 1])
num_not_approved = len(filtered_model_df[filtered_model_df['is_approved'] == 0])
# Crear etiquetas descriptivas para las opciones
options = []
# Agregar opciones para muestras aprobadas
for i in range(min(7, num_approved)):
options.append({'label': f'Sample {i+1} (Approved)', 'value': i})
# Agregar opciones para muestras no aprobadas
for i in range(min(3, num_not_approved)):
options.append({'label': f'Sample {i+7+1} (Not Approved)', 'value': i+7})
# Si no hay suficientes muestras, agregamos opciones genéricas para llegar a 10
while len(options) < 10:
options.append({'label': f'Sample {len(options)+1}', 'value': len(options)})
# Mantener la selección actual si es válida, o resetear a 0
new_value = current_value if current_value is not None and current_value < len(options) else 0
return options, new_value
# ---------- ADD CUSTOM CSS ----------
app.index_string = '''
<!DOCTYPE html>
<html>
<head>
{%metas%}
<title>TLC Driver Application Dashboard</title>
{%favicon%}
{%css%}
<style>
:root {
--taxi-yellow: #FFCC00;
--taxi-dark: #303030;
}
body {
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
}
.btn-warning {
background-color: var(--taxi-yellow);
border-color: var(--taxi-yellow);
}
.btn-outline-dark {
color: var(--taxi-dark);
border-color: var(--taxi-dark);
}
.btn-outline-dark:hover {
background-color: var(--taxi-dark);
color: white;
}
/* Taxi checkerboard pattern for header */
.taxi-header {
background: linear-gradient(45deg, var(--taxi-yellow) 25%, var(--taxi-dark) 25%, var(--taxi-dark) 50%,
var(--taxi-yellow) 50%, var(--taxi-yellow) 75%, var(--taxi-dark) 75%, var(--taxi-dark) 100%);
background-size: 10px 10px;
padding: 5px 0;
}
/* Card hover effects */
.card {
transition: transform 0.3s ease, box-shadow 0.3s ease;
}
.card:hover {
transform: translateY(-5px);
box-shadow: 0 8px 16px rgba(0,0,0,0.2) !important;
}
/* Footer styling */
footer {
display: block !important;
visibility: visible !important;
}
</style>
</head>
<body>
<!-- Taxi checkerboard pattern header stripe -->
<div class="taxi-header"></div>
{%app_entry%}
<footer>
{%config%}
{%scripts%}
{%renderer%}
<!-- Bootstrap JS -->
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.3/dist/js/bootstrap.bundle.min.js"></script>
</footer>
</body>
</html>
'''