Py.Cafe

alem781/

netflix-content-analysis-h1-2025

Netflix Content Analysis (H1 2025)

DocsPricing
  • 25_H1 Netflix Report_Movies.csv
  • 25_H1 Netflix Report_Shows.csv
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import pandas as pd
import plotly.express as px
from shiny import App, render, ui, reactive

# --- Data Loading and Preparation (Global Scope) ---

# Define the file names
FILE_MOVIES = "25_H1 Netflix Report_Movies.csv"
FILE_SHOWS = "25_H1 Netflix Report_Shows.csv"

# 1. Initialize reactive values to hold data (will be populated asynchronously later)
# We must initialize them globally so the server can access and update them.
r_movies = reactive.Value(pd.DataFrame())
r_shows = reactive.Value(pd.DataFrame())

# Store all available reactive values in a dictionary
ALL_DATA_R = {
    'Movies': r_movies,
    'Shows': r_shows
}

def load_and_clean_data(file_name: str) -> pd.DataFrame:
    """Loads a CSV, cleans the 'Views' column, and sorts the resulting DataFrame."""
    # NOTE: This function remains synchronous, but it will be called from an async context.
    try:
        # Load the data
        df_load = pd.read_csv(file_name)

        # Data Cleaning: Convert 'Views' to a numeric column for plotting/sorting.
        df_load['Views_Numeric'] = (
            df_load['Views'].astype(str)
            .str.replace(r'[,\*]', '', regex=True)
            .astype(float)
        )

        # Sort the data for consistent base display and reset index
        df_load = df_load.sort_values('Views_Numeric', ascending=False).reset_index(drop=True)
        return df_load

    except Exception as e:
        print(f"Error loading {file_name}: {e}")
        # Return an empty DataFrame on error
        return pd.DataFrame() 

# Get unique filter options (we rely on the data being loaded here, so we'll just 
# provide a standard list for the dropdown until the data is loaded)
GLOBAL_OPTIONS = ["All", "Yes", "No"] 


# --- Shiny UI Definition (Front-end Layout) ---

app_ui = ui.page_fluid(
    # Load Tailwind CSS for modern styling
    ui.head_content(
        ui.tags.script(src="https://cdn.tailwindcss.com"),
        ui.tags.style("""
            @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
            body { font-family: 'Inter', sans-serif; background-color: #f3f4f6; }
            .container-main { max-width: 1200px; margin: 0 auto; padding: 20px; }
            /* Basic Spinner CSS for loading state */
            .loader {
                border: 4px solid #f3f3f3;
                border-top: 4px solid #ef4444; /* Tailwind red-500 */
                border-radius: 50%;
                width: 30px;
                height: 30px;
                animation: spin 1s linear infinite;
                margin: 20px auto;
            }
            @keyframes spin {
                0% { transform: rotate(0deg); }
                100% { transform: rotate(360deg); }
            }
        """)
    ),

    ui.div(
        {"class": "container-main bg-white shadow-2xl rounded-xl p-8 my-8"},
        
        # Header content outside the sidebar layout
        ui.h2(
            "Netflix Content Viewership Analysis (H1 2025)",
            {"class": "text-4xl font-extrabold text-red-600 mb-2"}
        ),
        ui.p(
            "Explore the top titles by Views for Movies and Shows. Use the filters to refine the visualization.",
            {"class": "text-gray-600 mb-8"}
        ),

        # Corrected Sidebar Layout
        ui.page_sidebar(
            # The sidebar content is now defined using ui.sidebar()
            ui.sidebar(
                {"class": "bg-gray-50 p-6 rounded-xl shadow-lg border border-gray-200"},
                
                ui.h4("Filters", {"class": "text-2xl font-bold mb-4 text-gray-800"}),
                
                # Control 1: Content Type Radio Buttons
                ui.input_radio_buttons(
                    "content_type", 
                    ui.tags.span("Select Content Type", {"class": "font-semibold text-lg text-gray-700"}), 
                    choices=list(ALL_DATA_R.keys()), 
                    selected='Movies',
                    inline=True
                ),
                
                ui.tags.hr({"class": "my-4 border-gray-300"}),
                
                # Control 2: Global Availability Select
                ui.input_select(
                    "global_filter", 
                    ui.tags.span("Available Globally?", {"class": "font-semibold text-lg text-gray-700"}), 
                    choices=GLOBAL_OPTIONS,
                    selected="All"
                ),
                
                ui.tags.hr({"class": "my-4 border-gray-300"}),

                # Dynamic record count display
                ui.output_ui("record_count_display")
            ),

            # Main content area - using the generic plot output
            ui.div(
                {"class": "p-2 min-h-[550px] flex items-center justify-center"},
                ui.output_plot("views_plot") 
            )
        )
    )
)


# --- Shiny Server Logic (Back-end) ---

def server(input, output, session):

    # Reactive value to track loading state
    is_data_loaded = reactive.Value(False)

    # 1. ASYNCHRONOUS DATA LOADING - This runs AFTER the UI is displayed
    @reactive.Effect
    async def load_all_data():
        print("Starting asynchronous data loading...")
        
        # We need to run the synchronous file loading/cleaning function in a separate 
        # thread so it doesn't block the Python interpreter's main loop.
        loaded_movies = await reactive.isolate(lambda: load_and_clean_data(FILE_MOVIES))
        loaded_shows = await reactive.isolate(lambda: load_and_clean_data(FILE_SHOWS))
        
        # Update reactive values, which triggers downstream calculations
        r_movies.set(loaded_movies)
        r_shows.set(loaded_shows)
        is_data_loaded.set(True)
        print("Data loading complete.")

    # 2. Reactive value to select the base dataframe (Movies or Shows)
    @reactive.Calc
    def selected_base_df():
        """Returns the appropriate DataFrame based on the 'content_type' selection."""
        # This returns the current DataFrame inside the reactive value (r_movies() or r_shows())
        df_reactive_value = ALL_DATA_R.get(input.content_type(), r_movies)
        return df_reactive_value()

    # 3. Reactive calculation to filter the data based on user input
    @reactive.Calc
    def filtered_data():
        """Filters the selected base DataFrame based on the 'Available Globally?' filter."""
        base_df = selected_base_df()
        
        if base_df.empty:
            return base_df # Return empty if still loading
            
        selected_value = input.global_filter()

        if selected_value == "All":
            # Return the top 20 records immediately
            return base_df.head(20)
        else:
            # Filter by 'Available Globally?' and return the top 20 of the filtered result
            # We use .query for safe column name handling
            return base_df.query("`Available Globally?` == @selected_value").head(20)

    # 4. Render function for the Plotly Bar Chart - Using @render.plotly
    @output
    @render.plotly(alt="Bar chart of top 20 titles by views.")
    def views_plot():
        
        if not is_data_loaded():
             # Show a loading message while data is being prepared
            return ui.div(
                ui.div({"class": "loader"}),
                ui.p("Loading data in background...", {"class": "text-center text-gray-500 mt-4"})
            )

        plot_df = filtered_data()
        content_type = input.content_type()
        
        if plot_df.empty:
            return ui.div(
                ui.p(f"No {content_type} titles found matching the filter.", {"class": "text-center text-gray-500 mt-4"})
            )

        # Determine the number of records displayed
        num_records = len(plot_df)
        
        # Create the Plotly Express figure
        fig = px.bar(
            plot_df,
            x="Title",
            y="Views_Numeric",
            color="Available Globally?",
            title=f"Top {num_records} Viewed {content_type} (Filtered)",
            height=550,
            template="plotly_white", # Use a clean template
            text_auto=True # Automatically display the view count on the bars
        )

        # Improve visual aesthetics and readability
        fig.update_layout(
            xaxis_title="Title",
            yaxis_title="Views (Numeric)",
            title_font_size=24,
            yaxis={'tickformat': '.3s'} # Format Y-axis to display large numbers cleanly (e.g., 100M)
        )
        
        # Customize the hover information
        fig.update_traces(
            hovertemplate="<b>%{x}</b><br>Views: %{y}<extra></extra>"
        )
        
        # Rotate labels for better fit on x-axis
        fig.update_xaxes(tickangle=45, tickfont=dict(size=10))

        return fig
    
    # 5. Dynamic record count display UI
    @output
    @render.ui
    def record_count_display():
        """Renders the total number of records for the currently selected content type."""
        if not is_data_loaded():
            return ui.p("Waiting for data to load...", {"class": "text-sm text-gray-500 mt-2 italic"})
            
        df_current = selected_base_df()
        content_type = input.content_type()
        return ui.p(
            f"Loaded {len(df_current):,} total {content_type} records from file.", 
            {"class": "text-sm text-gray-500 mt-2 italic"}
        )


# --- Run the App ---
app = App(app_ui, server)