Py.Cafe

alem781/

Netflix Engagement Example

Vizro-PyCafe Iris Data Visualization

DocsPricing
  • Reports /
  • What_We_Watched_A_Netflix_Engagement_Report_2023Jan-Jun (1).csv
  • What_We_Watched_A_Netflix_Engagement_Report_2023Jul-Dec (1).csv
  • What_We_Watched_A_Netflix_Engagement_Report_2024Jan-Jun (1).csv
  • What_We_Watched_A_Netflix_Engagement_Report_2024Jul-Dec (1).csv
  • What_We_Watched_A_Netflix_Engagement_Report_2025Jan-Jun (1).csv
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Vizro is an open-source toolkit for creating modular data visualization applications.
# check out https://github.com/mckinsey/vizro for more info about Vizro
# and checkout https://vizro.readthedocs.io/en/stable/for documentation.

import vizro.plotly.express as px
from vizro import Vizro
import vizro.models as vm
import pandas as pd
import glob
import os

# =========================================================================
# 1. LOAD AND COMBINE YOUR SPECIFIC DATA FILES
# =========================================================================

# List of all your specific files
file_names = [
    "What_We_Watched_A_Netflix_Engagement_Report_2023Jan-Jun (1).csv",
    "What_We_Watched_A_Netflix_Engagement_Report_2023Jul-Dec (1).csv",
    "What_We_Watched_A_Netflix_Engagement_Report_2024Jan-Jun (1).csv",
    "What_We_Watched_A_Netflix_Engagement_Report_2024Jul-Dec (1).csv",
    "What_We_Watched_A_Netflix_Engagement_Report_2025Jan-Jun (1).csv"
]

df_list = []
print("Attempting to load files...")

for filename in file_names:
    if os.path.exists(filename):
        # FIX: Added header=2 to correctly skip initial rows and capture the real headers
        df_temp = pd.read_csv(filename, header=2) 
        
        # Add a column to identify the report period from the filename for use in the dashboard
        period = filename.split('_')[4] # Extracts '2023Jan-Jun', '2023Jul-Dec', etc.
        df_temp['Report Period'] = period
        
        df_list.append(df_temp)
        print(f"Loaded: {filename}")
    else:
        print(f"Warning: File not found: {filename}")

# Concatenate all DataFrames into one master DataFrame
if df_list:
    netflix_df = pd.concat(df_list, ignore_index=True)
    
    # 🧹 DATA CLEANING: Drop rows where the 'Title ' is missing or not useful 
    # NOTE: The column names are assumed to have a trailing space ('Title', 'Hours Viewed ', etc.)
    netflix_df = netflix_df.dropna(subset=['Title'])
    print(f"Successfully merged {len(df_list)} files into one DataFrame.")
else:
    print("Error: No data files were loaded. Dashboard will be empty.")
    # Create a minimal DataFrame with expected columns to prevent errors
    netflix_df = pd.DataFrame({'Title': ['No Data'], 'Hours Viewed ': [0], 
                               'Language of Origin ': ['N/A'], 'Report Period': ['N/A']})
    
# =========================================================================
# 2. Configure the Page
# =========================================================================

page = vm.Page(
    title="Netflix Engagement Report",
    layout=vm.Grid(grid=[[0, 1], [2, 2], [3, 3]], row_min_height="140px"), # Simplified grid for cleaner look
    components=[
        vm.Card(
            text="""
                ### What is the Netflix Engagement Report?
                Analyzing global viewing patterns across multiple reporting periods (2023-2025), this dashboard provides key metrics 
                on title performance, regional engagement, and content trends."""
        ),
        # 3. GRAPH 1: Scatter Chart (Using corrected column names)
        vm.Graph(
            id="scatter_chart", 
            figure=px.scatter(
                netflix_df, 
                # ✅ UPDATED to actual column names with spaces
                x="Hours Viewed", 
                y="Title", 
                color="Language of Origin" 
            )
        ),
        # 4. GRAPH 2: Histogram (Using corrected column names)
        vm.Graph(
            id="hist_chart", 
            figure=px.histogram(
                netflix_df, 
                # ✅ UPDATED to actual column names
                x="Report Period", 
                color="Language of Origin " 
            )
         ),
        ], # This closes the 'components' list.

        # 5. FILTERS (Using corrected column names)
        controls=[
            # ✅ UPDATED to actual column names
            vm.Filter(column="Report Period"),
            vm.Filter(column="Language of Origin"), 
            vm.Filter(column="Hours Viewed")
        ], 
)   
dashboard = vm.Dashboard(pages=[page])
Vizro().build(dashboard).run()