import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import random
from functools import lru_cache
# OPTIMIZACIÓN 1: Cargar y procesar datos una sola vez al inicio
df = pd.read_csv("marvel_movies_pca_cluster.csv")
# OPTIMIZACIÓN 2: Pre-calcular todos los análisis de clusters
@lru_cache(maxsize=1)
def get_cluster_analysis():
"""Pre-calcula el análisis de clusters y lo cachea"""
cluster_analysis = {}
for cluster in df['model_clusters_pca'].unique():
cluster_df = df[df['model_clusters_pca'] == cluster]
cluster_analysis[cluster] = {
'count': len(cluster_df),
'avg_gross': cluster_df['worldwide gross'].mean(),
'avg_critic': cluster_df['critics % score'].mean(),
'avg_audience': cluster_df['audience % score'].mean(),
'avg_profit': cluster_df['profit'].mean(),
'top_film': cluster_df.loc[cluster_df['worldwide gross'].idxmax()]['film'] if len(cluster_df) > 0 else "N/A"
}
return cluster_analysis
# OPTIMIZACIÓN 3: Pre-calcular configuraciones
CLUSTER_COLORS = ["#E74C3C", "#3498DB", "#F39C12", "#9B59B6"]
CLUSTER_DESCRIPTIONS = {
"Consistent & Solid Performers": "Well-balanced films with steady commercial success and strong critical reception across the board.",
"Critically Challenged & Financially Subdued (by Marvel Standards)": "Films that underperformed both critically and financially compared to typical Marvel standards.",
"Global Blockbuster Powerhouses": "Massive commercial successes with exceptional worldwide box office performance and critical acclaim.",
"Underperforming (for Marvel) & Audience-Disappointing Films": "Movies that failed to meet Marvel's typical audience engagement and commercial expectations."
}
# Pre-calcular mapeo de colores
CLUSTER_INFO = {}
CLUSTER_ANALYSIS = get_cluster_analysis()
colors = CLUSTER_COLORS
for i, cluster in enumerate(CLUSTER_ANALYSIS.keys()):
CLUSTER_INFO[cluster] = {
'description': CLUSTER_DESCRIPTIONS.get(cluster, "Marvel film cluster with unique performance characteristics."),
'color': colors[i % len(colors)]
}
CLUSTER_ORDER = list(CLUSTER_INFO.keys())
COLOR_MAP = {cluster: info['color'] for cluster, info in CLUSTER_INFO.items()}
MARVEL_TRIVIA = [
"Iron Man (2008) launched the Marvel Cinematic Universe and changed superhero movies forever.",
"Avengers: Endgame became the highest-grossing film of all time with over $2.7 billion worldwide.",
"Stan Lee made cameo appearances in nearly every Marvel movie until his passing in 2018.",
"The Marvel Cinematic Universe spans over 25 films and has grossed over $25 billion worldwide.",
"Black Panther was the first superhero film nominated for Best Picture at the Academy Awards.",
"Spider-Man has been portrayed by three different actors in the MCU: Tobey Maguire, Andrew Garfield, and Tom Holland.",
"The Infinity Stones storyline was planned across multiple films over more than a decade.",
"Guardians of the Galaxy introduced audiences to the cosmic side of the Marvel Universe.",
"Captain America: The Winter Soldier was praised for its political thriller approach to superhero storytelling.",
"Thor: Ragnarok completely reinvented the Thor franchise with humor and vibrant visuals."
]
# Inicializar app con optimizaciones
app = dash.Dash(
__name__,
external_stylesheets=[
dbc.themes.SKETCHY,
'https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css'
],
# OPTIMIZACIÓN 5: Configuraciones de rendimiento
suppress_callback_exceptions=True,
prevent_initial_callbacks=True
)
app.title = "Marvel Films Analytics"
# OPTIMIZACIÓN 6: Función de estilo optimizada y cacheada
@lru_cache(maxsize=10)
def get_base_figure_layout():
"""Retorna la configuración base de layout para reutilizar"""
return dict(
plot_bgcolor='rgba(240, 240, 240, 0.5)',
paper_bgcolor='white',
font=dict(family='Nunito, sans-serif', size=14),
margin=dict(l=40, r=40, t=40, b=60),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5,
font=dict(size=16),
title_font=dict(size=16)
),
xaxis=dict(
showgrid=True,
gridcolor='rgba(220, 220, 220, 0.8)',
tickfont=dict(size=12),
title_font=dict(size=16, color='#505050')
),
yaxis=dict(
showgrid=True,
gridcolor='rgba(220, 220, 220, 0.8)',
tickfont=dict(size=12),
title_font=dict(size=16, color='#505050')
),
hoverlabel=dict(
bgcolor="white",
font_size=14,
font_family="Nunito, sans-serif",
bordercolor="gray"
)
)
def style_figure(fig):
"""Aplica estilos optimizados a las figuras"""
fig.update_layout(get_base_figure_layout())
return fig
# OPTIMIZACIÓN 7: Componentes UI simplificados
def create_header():
return html.Div([
html.Div([
html.H2("Marvel Films Analytics", className="display-4 fw-bold"),
html.P("Exploring the Marvel Cinematic Universe through data", className="lead")
], className="container py-4"),
html.Div([
html.I(className="fas fa-mask me-2"),
html.I(className="fas fa-bolt me-2"),
html.I(className="fas fa-star me-2")
], className="d-flex justify-content-end align-items-center")
], className="bg-danger text-white mb-4 d-flex justify-content-between")
def create_sidebar():
return html.Div([
dbc.Card([
dbc.CardHeader(html.H4("Data Visualizations", className="text-danger fw-bold")),
dbc.CardBody([
# Botones principales
html.Div([
dbc.Button([html.I(className="fas fa-chart-pie me-2"), "Film Groupings"],
id="btn-cluster-overview", color="danger", outline=True,
className="mb-2 w-100 text-start shadow-sm"),
dbc.Button([html.I(className="fas fa-chart-line me-2"), "Box Office Trends"],
id="btn-trend-metrics", color="danger", outline=True,
className="mb-2 w-100 text-start shadow-sm"),
# Opciones de tendencias
html.Div([
html.Div(className="ps-3 mt-2 mb-3 border-start border-3 border-danger", children=[
dbc.Button("Worldwide Gross", id="btn-worldwide", color="link",
className="text-decoration-none d-block text-start ps-2 py-2 fw-normal"),
dbc.Button("Domestic Gross", id="btn-domestic", color="link",
className="text-decoration-none d-block text-start ps-2 py-2 fw-normal"),
dbc.Button("International Gross", id="btn-international", color="link",
className="text-decoration-none d-block text-start ps-2 py-2 fw-normal")
])
], id="trend-options", style={"display": "none"}),
dbc.Button([html.I(className="fas fa-chart-bar me-2"), "Performance Metrics"],
id="btn-film-metrics", color="danger", outline=True,
className="mb-2 w-100 text-start shadow-sm"),
# Opciones de métricas
html.Div([
html.Div(className="ps-3 mt-2 mb-3 border-start border-3 border-danger", children=[
dbc.Button("Box Office Performance", id="btn-box-office", color="link",
className="text-decoration-none d-block text-start ps-2 py-2 fw-normal"),
dbc.Button("Budget Recovery", id="btn-budget-recovery", color="link",
className="text-decoration-none d-block text-start ps-2 py-2 fw-normal"),
dbc.Button("Profit Analysis", id="btn-profit", color="link",
className="text-decoration-none d-block text-start ps-2 py-2 fw-normal"),
dbc.Button("Opening Weekend", id="btn-opening", color="link",
className="text-decoration-none d-block text-start ps-2 py-2 fw-normal")
])
], id="metrics-options", style={"display": "none"}),
dbc.Button([html.I(className="fas fa-clock me-2"), "Timeline"],
id="btn-movie-timeline", color="danger", outline=True,
className="mb-2 w-100 text-start shadow-sm"),
], className="mb-4"),
html.Hr(),
# Sección de trivia
html.Div([
html.H5("Marvel Trivia", className="text-danger mb-3 fw-bold"),
dbc.Button("Random Fact", id='trivia-button', color="danger", className="w-100 shadow-sm"),
html.Div(id='trivia-section', className="p-3 mt-3 border rounded bg-light shadow-sm")
])
])
], className="sticky-top shadow-sm")
], style={'position': 'sticky', 'top': '0', 'height': '100vh', 'overflowY': 'auto'})
# Layout optimizado
app.layout = html.Div([
create_header(),
dbc.Container([
dbc.Row([
dbc.Col(create_sidebar(), width=3, className="mb-4"),
dbc.Col([
html.H3(id="graph-title", className="mb-4 text-danger fw-bold"),
dcc.Loading(
id="loading",
type="default",
children=html.Div(id="content-area", className="mb-4 shadow-sm")
)
], width=9)
])
], fluid=True),
html.Footer([
html.Div("© 2025 Marvel Films Analysis Dashboard", className="text-center py-3")
], className="bg-light mt-4 border-top")
], style={'backgroundColor': '#F8F9FA', 'fontFamily': '"Nunito", sans-serif'})
# OPTIMIZACIÓN 8: Funciones de generación optimizadas con datos pre-calculados
@lru_cache(maxsize=1)
def generate_cluster_overview():
"""Genera la vista general de clusters usando datos pre-calculados"""
cluster_cards = []
for cluster_name in CLUSTER_ORDER:
cluster_data = CLUSTER_ANALYSIS[cluster_name]
if cluster_data['count'] == 0:
continue
# Crear tarjeta usando datos pre-calculados
card = dbc.Col(
dbc.Card([
dbc.CardHeader(
html.H5(cluster_name, className="fw-bold m-0"),
style={"backgroundColor": CLUSTER_INFO[cluster_name]["color"], "color": "white"}
),
dbc.CardBody([
html.P(CLUSTER_INFO[cluster_name]["description"], className="mb-4"),
html.Div([
html.Div([
html.H3(f"{cluster_data['count']}", className="fw-bold text-danger text-center mb-1"),
html.P("Films", className="text-muted text-center small")
], className="col"),
html.Div([
html.H3(f"${cluster_data['avg_gross']:.0f}M", className="fw-bold text-danger text-center mb-1"),
html.P("Avg. Gross", className="text-muted text-center small")
], className="col"),
html.Div([
html.H3(f"{cluster_data['avg_critic']:.0f}%", className="fw-bold text-danger text-center mb-1"),
html.P("Critics", className="text-muted text-center small")
], className="col"),
html.Div([
html.H3(f"{cluster_data['avg_audience']:.0f}%", className="fw-bold text-danger text-center mb-1"),
html.P("Audience", className="text-muted text-center small")
], className="col")
], className="row mb-3"),
html.Div(className="border-top pt-3 mt-2", children=[
html.P([html.Span("Top performer: ", className="text-muted"),
html.Span(f"{cluster_data['top_film']}", className="fw-bold")],
className="text-center mb-0")
])
])
], className="h-100 shadow"),
width=6,
className="mb-4"
)
cluster_cards.append(card)
# Crear scatter plot optimizado
scatter_fig = px.scatter(
df,
x='profit',
y='critics % score',
color='model_clusters_pca',
size='worldwide gross',
size_max=30,
color_discrete_map=COLOR_MAP,
labels={
'profit': 'Profit ($M)',
'critics % score': 'Critics Score (%)',
'model_clusters_pca': 'Cluster Group',
'worldwide gross': 'Worldwide Gross ($M)',
},
hover_name='film',
hover_data={
'film': False,
'profit': ':.1f',
'critics % score': True,
'audience % score': True,
'worldwide gross': ':.0f',
'model_clusters_pca': False
}
)
scatter_fig.update_traces(marker=dict(opacity=0.85, line=dict(width=1, color='white')))
scatter_fig = style_figure(scatter_fig)
scatter_fig.update_layout(legend_title_text='Marvel Film Groups')
scatter_card = dbc.Card([
dbc.CardBody([
dcc.Graph(figure=scatter_fig, style={"height": "600px"})
])
], className="shadow")
return html.Div([dbc.Row(cluster_cards, className="g-3 mb-4"), scatter_card])
# Resto de funciones optimizadas
def generate_trend_chart(metric_key):
"""Genera gráficos de tendencia optimizados"""
metric_labels = {
'worldwide gross': 'Worldwide Gross ($M)',
'domestic gross ($m)': 'Domestic Gross ($M)',
'international gross ($m)': 'International Gross ($M)'
}
# Pre-ordenar datos
trend_df = df.sort_values(by='year')
fig = px.line(
trend_df,
x='year',
y=metric_key,
color='model_clusters_pca',
color_discrete_map=COLOR_MAP,
line_shape='spline',
markers=True,
labels={
'year': 'Release Year',
metric_key: metric_labels[metric_key],
'model_clusters_pca': 'Cluster Group'
},
hover_name='film',
hover_data={
'film': False,
'year': True,
metric_key: ':.0f',
'model_clusters_pca': False
}
)
fig.update_traces(
line=dict(width=4),
marker=dict(size=12, line=dict(width=1, color='white'))
)
fig = style_figure(fig)
fig.update_layout(legend_title_text='Marvel Film Groups')
return dbc.Card(dbc.CardBody([
dcc.Graph(figure=fig, style={"height": "600px"})
]), className="shadow")
def generate_performance_chart(metric_key):
"""Genera gráficos de rendimiento optimizados"""
metric_labels = {
'worldwide gross': 'Worldwide Gross ($M)',
'budget_recovered': 'Budget Recovery (%)',
'profit': 'Profit ($M)',
'opening weekend ($m)': 'Opening Weekend ($M)'
}
# Pre-ordenar datos
df_plot = df.sort_values(by=metric_key, ascending=False)
fig = px.bar(
df_plot,
x='film',
y=metric_key,
color='model_clusters_pca',
color_discrete_map=COLOR_MAP,
labels={
'film': 'Movie',
metric_key: metric_labels[metric_key],
'model_clusters_pca': 'Cluster Group'
},
hover_data={
'year': True,
metric_key: ':.1f',
'critics % score': True,
'audience % score': True,
'model_clusters_pca': False,
'film': False
}
)
fig.update_traces(
marker=dict(line=dict(width=1, color='white'), opacity=0.9),
width=0.7
)
fig = style_figure(fig)
fig.update_xaxes(tickangle=45, tickfont=dict(size=12))
fig.update_layout(legend_title_text='Marvel Film Groups')
return dbc.Card(dbc.CardBody([
dcc.Graph(figure=fig, style={"height": "600px"})
]), className="shadow")
def generate_timeline():
"""Genera timeline optimizado"""
fig = px.scatter(
df,
x='year',
y='critics % score',
size='worldwide gross',
size_max=35,
color='model_clusters_pca',
color_discrete_map=COLOR_MAP,
labels={
'year': 'Release Year',
'worldwide gross': 'Worldwide Gross ($M)',
'critics % score': 'Critics Score (%)',
'model_clusters_pca': 'Cluster Group'
},
hover_name='film',
hover_data={
'year': True,
'worldwide gross': ':.0f',
'critics % score': True,
'audience % score': True,
'film': False,
'model_clusters_pca': False
}
)
fig.update_traces(marker=dict(opacity=0.9, line=dict(width=1, color='white')))
fig = style_figure(fig)
fig.update_layout(legend_title_text='Marvel Film Groups')
return dbc.Card(dbc.CardBody([
dcc.Graph(figure=fig, style={"height": "600px"})
]), className="shadow")
# OPTIMIZACIÓN 9: Callbacks optimizados
@app.callback(
[Output("trend-options", "style"), Output("metrics-options", "style"),
Output("btn-cluster-overview", "color"), Output("btn-trend-metrics", "color"),
Output("btn-film-metrics", "color"), Output("btn-movie-timeline", "color"),
Output("btn-cluster-overview", "outline"), Output("btn-trend-metrics", "outline"),
Output("btn-film-metrics", "outline"), Output("btn-movie-timeline", "outline")],
[Input("btn-trend-metrics", "n_clicks"), Input("btn-film-metrics", "n_clicks"),
Input("btn-cluster-overview", "n_clicks"), Input("btn-movie-timeline", "n_clicks")],
prevent_initial_call=True
)
def toggle_options(n1, n2, n3, n4):
ctx = dash.callback_context
if not ctx.triggered:
return [{"display": "none"}] * 2 + ["danger"] * 4 + [True] * 4
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
# Valores por defecto
trend_style = {"display": "none"}
metrics_style = {"display": "none"}
colors = ["danger"] * 4
outlines = [True] * 4
# Mapeo optimizado
button_config = {
"btn-trend-metrics": ({"display": "block"}, {"display": "none"}, 1),
"btn-film-metrics": ({"display": "none"}, {"display": "block"}, 2),
"btn-cluster-overview": ({"display": "none"}, {"display": "none"}, 0),
"btn-movie-timeline": ({"display": "none"}, {"display": "none"}, 3)
}
if button_id in button_config:
trend_style, metrics_style, active_idx = button_config[button_id]
outlines[active_idx] = False
return trend_style, metrics_style, *colors, *outlines
@app.callback(
Output('graph-title', 'children'),
[Input('btn-cluster-overview', 'n_clicks'), Input('btn-trend-metrics', 'n_clicks'),
Input('btn-worldwide', 'n_clicks'), Input('btn-domestic', 'n_clicks'),
Input('btn-international', 'n_clicks'), Input('btn-film-metrics', 'n_clicks'),
Input('btn-box-office', 'n_clicks'), Input('btn-budget-recovery', 'n_clicks'),
Input('btn-profit', 'n_clicks'), Input('btn-opening', 'n_clicks'),
Input('btn-movie-timeline', 'n_clicks')],
prevent_initial_call=True
)
def update_graph_title(*args):
ctx = dash.callback_context
if not ctx.triggered:
return 'Marvel Films Analytics'
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
# Mapeo directo de títulos
titles = {
'btn-cluster-overview': 'Marvel Film Grouping Overview',
'btn-worldwide': 'Worldwide Gross Trends',
'btn-domestic': 'Domestic Gross Trends',
'btn-international': 'International Gross Trends',
'btn-box-office': 'Box Office Performance',
'btn-budget-recovery': 'Budget Recovery Analysis',
'btn-profit': 'Profit Analysis',
'btn-opening': 'Opening Weekend Performance',
'btn-movie-timeline': 'Marvel Movies Timeline'
}
return titles.get(button_id, 'Marvel Films Analytics')
@app.callback(
Output('trivia-section', 'children'),
Input('trivia-button', 'n_clicks')
)
def update_trivia(n_clicks):
if n_clicks:
return html.P(random.choice(MARVEL_TRIVIA), className="mb-0 fst-italic")
return html.P("Click for a fun Marvel fact!", className="mb-0 text-muted")
@app.callback(
Output('content-area', 'children'),
[Input('btn-cluster-overview', 'n_clicks'), Input('btn-worldwide', 'n_clicks'),
Input('btn-domestic', 'n_clicks'), Input('btn-international', 'n_clicks'),
Input('btn-box-office', 'n_clicks'), Input('btn-budget-recovery', 'n_clicks'),
Input('btn-profit', 'n_clicks'), Input('btn-opening', 'n_clicks'),
Input('btn-movie-timeline', 'n_clicks')],
prevent_initial_call=True
)
def update_content(*args):
ctx = dash.callback_context
if not ctx.triggered:
return html.Div("Select a visualization using the buttons on the left",
className="text-center text-muted my-5 py-5")
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
# Mapeo optimizado de funciones
content_mapping = {
'btn-cluster-overview': generate_cluster_overview,
'btn-worldwide': lambda: generate_trend_chart('worldwide gross'),
'btn-domestic': lambda: generate_trend_chart('domestic gross ($m)'),
'btn-international': lambda: generate_trend_chart('international gross ($m)'),
'btn-box-office': lambda: generate_performance_chart('worldwide gross'),
'btn-budget-recovery': lambda: generate_performance_chart('budget_recovered'),
'btn-profit': lambda: generate_performance_chart('profit'),
'btn-opening': lambda: generate_performance_chart('opening weekend ($m)'),
'btn-movie-timeline': generate_timeline
}
if button_id in content_mapping:
return content_mapping[button_id]()
return html.Div("Select a visualization using the buttons on the left",
className="text-center text-muted my-5 py-5")