# Vizro is an open-source toolkit for creating modular data visualization applications.
# check out https://github.com/mckinsey/vizro for more info about Vizro
# and checkout https://vizro.readthedocs.io/en/stable/for documentation.
import vizro.plotly.express as px
from vizro import Vizro
import vizro.models as vm
import pandas as pd
import glob
import os
# =========================================================================
# 1. LOAD AND COMBINE YOUR SPECIFIC DATA FILES
# =========================================================================
# List of all your specific files
file_names = [
"What_We_Watched_A_Netflix_Engagement_Report_2023Jan-Jun (1).csv",
"What_We_Watched_A_Netflix_Engagement_Report_2023Jul-Dec (1).csv",
"What_We_Watched_A_Netflix_Engagement_Report_2024Jan-Jun (1).csv",
"What_We_Watched_A_Netflix_Engagement_Report_2024Jul-Dec (1).csv",
"What_We_Watched_A_Netflix_Engagement_Report_2025Jan-Jun (1).csv"
]
df_list = []
print("Attempting to load files...")
for filename in file_names:
if os.path.exists(filename):
# FIX: Added header=2 to correctly skip initial rows and capture the real headers
df_temp = pd.read_csv(filename, header=2)
# Add a column to identify the report period from the filename for use in the dashboard
period = filename.split('_')[4] # Extracts '2023Jan-Jun', '2023Jul-Dec', etc.
df_temp['Report Period'] = period
df_list.append(df_temp)
print(f"Loaded: {filename}")
else:
print(f"Warning: File not found: {filename}")
# Concatenate all DataFrames into one master DataFrame
if df_list:
netflix_df = pd.concat(df_list, ignore_index=True)
# 🧹 DATA CLEANING: Drop rows where the 'Title ' is missing or not useful
# NOTE: The column names are assumed to have a trailing space ('Title', 'Hours Viewed ', etc.)
netflix_df = netflix_df.dropna(subset=['Title'])
print(f"Successfully merged {len(df_list)} files into one DataFrame.")
else:
print("Error: No data files were loaded. Dashboard will be empty.")
# Create a minimal DataFrame with expected columns to prevent errors
netflix_df = pd.DataFrame({'Title': ['No Data'], 'Hours Viewed ': [0],
'Language of Origin ': ['N/A'], 'Report Period': ['N/A']})
# =========================================================================
# 2. Configure the Page
# =========================================================================
page = vm.Page(
title="Netflix Engagement Report",
layout=vm.Grid(grid=[[0, 1], [2, 2], [3, 3]], row_min_height="140px"), # Simplified grid for cleaner look
components=[
vm.Card(
text="""
### What is the Netflix Engagement Report?
Analyzing global viewing patterns across multiple reporting periods (2023-2025), this dashboard provides key metrics
on title performance, regional engagement, and content trends."""
),
# 3. GRAPH 1: Scatter Chart (Using corrected column names)
vm.Graph(
id="scatter_chart",
figure=px.scatter(
netflix_df,
# ✅ UPDATED to actual column names with spaces
x="Hours Viewed",
y="Title",
color="Language of Origin"
)
),
# 4. GRAPH 2: Histogram (Using corrected column names)
vm.Graph(
id="hist_chart",
figure=px.histogram(
netflix_df,
# ✅ UPDATED to actual column names
x="Report Period",
color="Language of Origin "
)
),
], # This closes the 'components' list.
# 5. FILTERS (Using corrected column names)
controls=[
# ✅ UPDATED to actual column names
vm.Filter(column="Report Period"),
vm.Filter(column="Language of Origin"),
vm.Filter(column="Hours Viewed")
],
)
dashboard = vm.Dashboard(pages=[page])
Vizro().build(dashboard).run()