PyCafe - Vizro - 24H2 Netflix

24H2_Netflix Report_Movies.csv
24H2_Netflix Report_TV.csv
app.py
requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import pandas as pd
import vizro.plotly.express as px
from vizro import Vizro
import vizro.models as vm
import vizro.tables as vt # Import vizro.tables

# --- 1. Data Loading and Preprocessing ---

# The names of the files uploaded from the Excel sheets have been updated
# New files are:
SHOWS_FILE = "24H2_Netflix Report_TV.csv"
MOVIES_FILE = "24H2_Netflix Report_Movies.csv"

# Load dataframes
try:
    # Adding thousands separator and decimal handling for robust loading of data like '313,000,000'
    df_shows = pd.read_csv(SHOWS_FILE, thousands=',')
    df_movies = pd.read_csv(MOVIES_FILE, thousands=',')
except FileNotFoundError:
    print("Error: One or both data files were not found. Please ensure the file paths are correct.")
    # Create empty dataframes to prevent crash in the vizro part
    df_shows = pd.DataFrame(columns=['Title', 'Available Globally?', 'Release Date', 'Hours Viewed', 'Runtime', 'Views'])
    df_movies = pd.DataFrame(columns=['Title', 'Available Globally?', 'Release Date', 'Hours Viewed', 'Runtime', 'Views'])

def preprocess_data(df):
    """Cleans and transforms raw data for visualization."""
    # Convert 'Release Date' to datetime, coercing errors (e.g., if a date is missing)
    df['Release Date'] = pd.to_datetime(df['Release Date'], errors='coerce')
    df['Release Month'] = df['Release Date'].dt.to_period('M').astype(str).fillna('TBD')

    # Convert H:MM runtime string to total hours (float)
    def runtime_to_hours(runtime_str):
        if pd.isna(runtime_str):
            return 0
        try:
            # Handle cases where runtime might be a single number (e.g., if it was parsed as int/float)
            if isinstance(runtime_str, (int, float)):
                return runtime_str
            h, m = map(int, str(runtime_str).split(':'))
            return round(h + m / 60, 2)
        except:
            return 0

    df['Runtime (Hours)'] = df['Runtime'].apply(runtime_to_hours)

    # Standardize column names for easier use in Vizro/Plotly
    df.rename(columns={
        "Available Globally?": "Available_Globally",
        "Hours Viewed": "Hours_Viewed",
    }, inplace=True)

    # Ensure Hours_Viewed and Views are numeric (needed especially after using thousands=',')
    for col in ['Hours_Viewed', 'Views']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    return df

df_shows = preprocess_data(df_shows)
df_movies = preprocess_data(df_movies)

# Prepare data for specific charts (Top N lists are often best calculated pre-chart)
shows_top_10 = df_shows.sort_values("Hours_Viewed", ascending=False).head(10).reset_index(drop=True)
movies_top_10 = df_movies.sort_values("Views", ascending=False).head(10).reset_index(drop=True)

# --- 2. Vizro Components (Charts and Tables) ---

# --- Shows Components ---
shows_top_hours_chart = vm.Graph(
    title="Top 10 Shows by Hours Viewed",
    # Added Header and Footer for context, as seen in the demo app
    header="Shows are ranked by their total **Hours Viewed** in the second half of 2024. This metric is the primary indicator of content consumption.",
    footer="The top performers, contributing significantly to overall viewing hours.",
    figure=px.bar(
        shows_top_10,
        x="Title",
        y="Hours_Viewed",
        color="Available_Globally",
        template="vizro_dark",
        labels={"Hours_Viewed": "Hours Viewed (Millions)", "Available_Globally": "Global Release"}
    )
)

shows_month_hours_chart = vm.Graph(
    title="Total Hours Viewed by Release Month",
    # Added Header and Footer for context, as seen in the demo app
    header="Analysis of total hours viewed grouped by the original content **release month** (H2 2024).",
    footer="Viewing hours show .",
    figure=px.bar(
        df_shows.groupby("Release Month", as_index=False)["Hours_Viewed"].sum().sort_values("Release Month"),
        x="Release Month",
        y="Hours_Viewed",
        template="vizro_dark",
        labels={"Hours_Viewed": "Total Hours Viewed (Millions)"}
    )
)

shows_table = vm.Table(
    id="shows_data_table",
    title="All 2024 H2 Shows Data",
    figure=vt.dash_data_table(data_frame=df_shows[['Title', 'Release Date', 'Available_Globally', 'Hours_Viewed', 'Views', 'Runtime (Hours)']])
)

# --- Movies Components ---
movies_top_views_chart = vm.Graph(
    title="Top 10 Movies by Views",
    # Added Header and Footer for context, as seen in the demo app
    header="Movies are ranked by their total **Views** in the second half of 2024. This indicates overall audience reach and popularity.",
    footer="The movie 'Back in Action' dominated H2 2024 in terms of views, signaling a high-demand title with strong initial audience pull.",
    figure=px.bar(
        movies_top_10,
        x="Title",
        y="Views",
        color="Available_Globally",
        template="vizro_dark",
        labels={"Views": "Views (Millions)", "Available_Globally": "Global Release"}
    )
)

movies_avg_runtime_chart = vm.Graph(
    title="Average Runtime by Release Month",
    # Added Header and Footer for context, as seen in the demo app
    header="The average runtime (in hours) for movies released in each month of H2 2024.",
    footer="The consistency in average runtime suggests a controlled release strategy, with no dramatic variation in film length across the half-year.",
    figure=px.bar(
        df_movies.groupby("Release Month", as_index=False)["Runtime (Hours)"].mean().sort_values("Release Month"),
        x="Release Month",
        y="Runtime (Hours)",
        template="vizro_dark",
        labels={"Runtime (Hours)": "Avg Runtime (Hours)"}
    )
)

movies_table = vm.Table(
    id="movies_data_table",
    title="All H2 2024 Movies Data",
    figure=vt.dash_data_table(data_frame=df_movies[['Title', 'Release Date', 'Available_Globally', 'Hours_Viewed', 'Views', 'Runtime (Hours)']])
)

# --- 3. Vizro Pages and Dashboard ---

shows_page = vm.Page(
    title="TV Shows Analysis",
    # FIXED: Grid indices must start at 0. [0, 1] for the top row charts, [2, 2] for the table spanning the bottom row.
    layout=vm.Layout(grid=[[0, 1], [2, 2]]),
    components=[
        shows_top_hours_chart,
        shows_month_hours_chart,
        shows_table,
    ],
    controls=[
        # Filters are applied to all components on the page that use the respective columns
        vm.Filter(column="Available_Globally", selector=vm.Dropdown(title="Global Availability")),
        # FIX: Removed explicit selector to allow Vizro to use the correct DatePickerRange component automatically for temporal data.
        vm.Filter(column="Release Date"),
    ],
)

movies_page = vm.Page(
    title="Movies Analysis",
    # FIXED: Grid indices must start at 0. [0, 1] for the top row charts, [2, 2] for the table spanning the bottom row.
    layout=vm.Layout(grid=[[0, 1], [2, 2]]),
    components=[
        movies_top_views_chart,
        movies_avg_runtime_chart,
        movies_table,
    ],
    controls=[
        vm.Filter(column="Available_Globally", selector=vm.Dropdown(title="Global Availability")),
        # FIX: Removed explicit selector to allow Vizro to use the correct DatePickerRange component automatically for temporal data.
        vm.Filter(column="Release Date"),
    ],
)

# Combine pages into the final dashboard
dashboard = vm.Dashboard(
    title="Netflix 2024 H2 Report Dashboard",
    pages=[shows_page, movies_page]
)

# Build and run the dashboard
Vizro().build(dashboard).run()