PyCafe - Vizro - netflix views

23H1_Netflix Report.csv
23H2_Netflix Report_Movies.csv
23H2_Netflix Report_TV.csv
24H1_Netflix Report_Film.csv
24H1_Netflix Report_Shows.csv
24H2_Netflix Report_Movies.csv
24H2_Netflix Report_TV.csv
25_H1 Netflix Report_Movies.csv
25_H1 Netflix Report_Shows.csv
app.py
requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import pandas as pd
import vizro.plotly.express as px
from vizro import Vizro
import vizro.models as vm
import vizro.tables as vt # Import vizro.tables

# --- 1. Data Loading and Preprocessing ---

# The names of the files uploaded from the Excel sheets have been updated
# New files are:
SHOWS_FILE = "25_H1 Netflix Report_Shows.csv"
MOVIES_FILE = "25_H1 Netflix Report_Movies.csv"

# Load dataframes
try:
    # Adding thousands separator and decimal handling for robust loading of data like '313,000,000'
    df_shows = pd.read_csv(SHOWS_FILE, thousands=',')
    df_movies = pd.read_csv(MOVIES_FILE, thousands=',')
except FileNotFoundError:
    print("Error: One or both data files were not found. Please ensure the file paths are correct.")
    # Create empty dataframes to prevent crash in the vizro part
    df_shows = pd.DataFrame(columns=['Title', 'Available Globally?', 'Release Date', 'Hours Viewed', 'Runtime', 'Views'])
    df_movies = pd.DataFrame(columns=['Title', 'Available Globally?', 'Release Date', 'Hours Viewed', 'Runtime', 'Views'])

def preprocess_data(df):
    """Cleans and transforms raw data for visualization."""
    # Convert 'Release Date' to datetime, coercing errors (e.g., if a date is missing)
    df['Release Date'] = pd.to_datetime(df['Release Date'], errors='coerce')
    df['Release Month'] = df['Release Date'].dt.to_period('M').astype(str).fillna('TBD')

    # Convert H:MM runtime string to total hours (float)
    def runtime_to_hours(runtime_str):
        if pd.isna(runtime_str):
            return 0
        try:
            # Handle cases where runtime might be a single number (e.g., if it was parsed as int/float)
            if isinstance(runtime_str, (int, float)):
                return runtime_str
            h, m = map(int, str(runtime_str).split(':'))
            return round(h + m / 60, 2)
        except:
            return 0

    df['Runtime (Hours)'] = df['Runtime'].apply(runtime_to_hours)

    # Standardize column names for easier use in Vizro/Plotly
    df.rename(columns={
        "Available Globally?": "Available_Globally",
        "Hours Viewed": "Hours_Viewed",
    }, inplace=True)

    # Ensure Hours_Viewed and Views are numeric (needed especially after using thousands=',')
    for col in ['Hours_Viewed', 'Views']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    return df

df_shows = preprocess_data(df_shows)
df_movies = preprocess_data(df_movies)

# Prepare data for specific charts (Top N lists are often best calculated pre-chart)
shows_top_10 = df_shows.sort_values("Hours_Viewed", ascending=False).head(10).reset_index(drop=True)
movies_top_10 = df_movies.sort_values("Views", ascending=False).head(10).reset_index(drop=True)

# --- 2. Vizro Components (Charts and Tables) ---

# --- Shows Components ---
shows_top_hours_chart = vm.Graph(
    title="Top 10 Shows by Hours Viewed",
    # Added Header and Footer for context, as seen in the demo app
    header="Shows are ranked by their total **Hours Viewed** in the first half of 2025. This metric is the primary indicator of content consumption.",
    footer="Data reveals 'Adolescence: Limited Series' and 'Squid Game: Season 2' were the top performers, contributing significantly to overall viewing hours.",
    figure=px.bar(
        shows_top_10,
        x="Title",
        y="Hours_Viewed",
        color="Available_Globally",
        template="vizro_dark",
        labels={"Hours_Viewed": "Hours Viewed (Millions)", "Available_Globally": "Global Release"}
    )
)

shows_month_hours_chart = vm.Graph(
    title="Total Hours Viewed by Release Month",
    # Added Header and Footer for context, as seen in the demo app
    header="Analysis of total hours viewed grouped by the original content **release month** (H1 2025).",
    footer="Viewing hours show high peaks in January and March, suggesting successful rollouts of major titles during these periods.",
    figure=px.bar(
        df_shows.groupby("Release Month", as_index=False)["Hours_Viewed"].sum().sort_values("Release Month"),
        x="Release Month",
        y="Hours_Viewed",
        template="vizro_dark",
        labels={"Hours_Viewed": "Total Hours Viewed (Millions)"}
    )
)

shows_table = vm.Table(
    id="shows_data_table",
    title="All 2025 H1 Shows Data",
    figure=vt.dash_data_table(data_frame=df_shows[['Title', 'Release Date', 'Available_Globally', 'Hours_Viewed', 'Views', 'Runtime (Hours)']])
)

# --- Movies Components ---
movies_top_views_chart = vm.Graph(
    title="Top 10 Movies by Views",
    # Added Header and Footer for context, as seen in the demo app
    header="Movies are ranked by their total **Views** in the first half of 2025. This indicates overall audience reach and popularity.",
    footer="The movie 'Back in Action' dominated H1 in terms of views, signaling a high-demand title with strong initial audience pull.",
    figure=px.bar(
        movies_top_10,
        x="Title",
        y="Views",
        color="Available_Globally",
        template="vizro_dark",
        labels={"Views": "Views (Millions)", "Available_Globally": "Global Release"}
    )
)

movies_avg_runtime_chart = vm.Graph(
    title="Average Runtime by Release Month",
    # Added Header and Footer for context, as seen in the demo app
    header="The average runtime (in hours) for movies released in each month of H1 2025.",
    footer="The consistency in average runtime suggests a controlled release strategy, with no dramatic variation in film length across the half-year.",
    figure=px.bar(
        df_movies.groupby("Release Month", as_index=False)["Runtime (Hours)"].mean().sort_values("Release Month"),
        x="Release Month",
        y="Runtime (Hours)",
        template="vizro_dark",
        labels={"Runtime (Hours)": "Avg Runtime (Hours)"}
    )
)

movies_table = vm.Table(
    id="movies_data_table",
    title="All 2025 H1 Movies Data",
    figure=vt.dash_data_table(data_frame=df_movies[['Title', 'Release Date', 'Available_Globally', 'Hours_Viewed', 'Views', 'Runtime (Hours)']])
)

# --- 3. Vizro Pages and Dashboard ---

shows_page = vm.Page(
    title="TV Shows Analysis",
    # FIXED: Grid indices must start at 0. [0, 1] for the top row charts, [2, 2] for the table spanning the bottom row.
    layout=vm.Layout(grid=[[0, 1], [2, 2]]),
    components=[
        shows_top_hours_chart,
        shows_month_hours_chart,
        shows_table,
    ],
    controls=[
        # Filters are applied to all components on the page that use the respective columns
        vm.Filter(column="Available_Globally", selector=vm.Dropdown(title="Global Availability")),
        # FIX: Removed explicit selector to allow Vizro to use the correct DatePickerRange component automatically for temporal data.
        vm.Filter(column="Release Date"),
    ],
)

movies_page = vm.Page(
    title="Movies Analysis",
    # FIXED: Grid indices must start at 0. [0, 1] for the top row charts, [2, 2] for the table spanning the bottom row.
    layout=vm.Layout(grid=[[0, 1], [2, 2]]),
    components=[
        movies_top_views_chart,
        movies_avg_runtime_chart,
        movies_table,
    ],
    controls=[
        vm.Filter(column="Available_Globally", selector=vm.Dropdown(title="Global Availability")),
        # FIX: Removed explicit selector to allow Vizro to use the correct DatePickerRange component automatically for temporal data.
        vm.Filter(column="Release Date"),
    ],
)

# Combine pages into the final dashboard
dashboard = vm.Dashboard(
    title="Netflix 2025 H1 Report Dashboard",
    pages=[shows_page, movies_page]
)

# Build and run the dashboard
Vizro().build(dashboard).run()
Py.Cafe

25H1 Netflix_vWIP

netflix views