PyCafe - Dash - Billboard Hot 100

app.py
data.csv
requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 15 17:03:19 2025

@author: win11
"""

import pandas as pd
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, callback
import dash_bootstrap_components as dbc
from dash_bootstrap_templates import load_figure_template
from plotly.subplots import make_subplots
import dash_ag_grid as dag

#load figure template
load_figure_template("cyborg")

#load theme bootstrap css
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"

data_raw = pd.read_csv('data.csv')

columns = data_raw.columns

df = data_raw.iloc[:, [0,1,2,3,8,14,15,41,42,43,44,45,46,76]].copy()

#helperfunction to convert the date column
def parse_mixed_date(s):
    # Parse using two-digit year format
    dt = pd.to_datetime(s, format="%d-%b-%y", errors="coerce")
    if pd.isna(dt):
        return pd.NaT
    
    year = dt.year
    # Assume anything before 1970 is actually 2000+
    if year > 2025:
        dt = dt.replace(year=year - 100)
    return dt

#convert datatypes
df['Date'] = df['Date'].apply(parse_mixed_date)
df['Year'] = df['Date'].dt.year

# Fixed the dropdown_list (added missing closing quote)
dropdown_list = ['Overall Rating','Energy','Danceability','Happiness', 'Loudness (dB)','Length (Sec)','BPM']

def card_content(title, dfin):
    
    dff = dfin.loc[dfin[title] == dfin[title].max()]
    row=dff.sample(n=1)
    footer=f"One out of {len(dff)}" if len(dff) > 1 else ""
    
    card_content = [
        dbc.CardHeader(f"Highest {title}"),
        dbc.CardBody(
            [
                html.H5(row['Song'], className="card-title"),
                html.P(
                    row['Artist'],
                    className="card-text",
                ),
            ]
        ),
        dbc.CardFooter(f"{footer}")

    ]
    return card_content










#grouped df by year for some scatterplots
#date/year is connected to the first day the song reached
#the number one position
# Only aggregate columns that exist in dropdown_list
agg_dict = {'Song':'count', 'Weeks at Number One': 'mean'}
for col in dropdown_list:
    if col in df.columns:
        agg_dict[col] = 'mean'

dfg = df.groupby('Year').agg(agg_dict).reset_index()

# Initialize the app
app = Dash(__name__, suppress_callback_exceptions=True,external_stylesheets=[dbc.themes.CYBORG, dbc.icons.FONT_AWESOME, dbc_css])

# Define the layout with two dropdowns and a graph
app.layout = dbc.Container([

    

    
    dbc.Row([
        dbc.Col([
            html.H1("Billboard hot 100 number 1 songs", className="mb-4", style={"fontSize":"32px"}),
            html.P("Thank you to Chris Dalla Riva for the data.", className="mb-4"),
            dbc.Row([
                dbc.Col([
                    dbc.Label("Select X-axis variable:"),
                    dcc.Dropdown(
                        id='x-dropdown',
                        options=[{'label': col, 'value': col} for col in dropdown_list],
                        value='Overall Rating',  # Default value
                        clearable=False
                    )
                ], width=4),
                
                dbc.Col([
                    dbc.Label("Select Y-axis variable:"),
                    dcc.Dropdown(
                        id='y-dropdown',
                        options=[{'label': col, 'value': col} for col in dropdown_list if col != 'Overall Rating'],
                        value='Happiness',  # Default value
                        clearable=False
                    )
                ], width=4)
            ], className="mb-4"),
            html.H3("Song trends based on yearly average values", className="mb-3", style={"fontSize":"24px"}),
            html.P("Markersize indicates only the number of songs, click on a marker to see the songs.", className="mb-3", style={"fontSize":"16px"}),
            dcc.Graph(id='scatter-plot')
        ], width=6),
        dbc.Col([
            dbc.Row([
                dbc.Col([
                    html.H3(id='songs-title', className="text-center mb-3", style={"fontSize":"24px"}),
                    html.Div(id="static-cards"),
                    
                    dag.AgGrid(
                        id='songs-grid',
                        columnDefs=[],
                        rowData=[],
                        dashGridOptions={
                            "animateRows": True,
                            "pagination": True,
                            "paginationPageSize": 10,
                            "domLayout": "autoHeight"
                        },
                        style={"height": None},
                        className="ag-theme-alpine-dark"
                    )
                ], width=11)
            ], className="mb-5", style={"display": "none"}, id="grid-section"),

            
        ], width=6)
    ], className="mb-5"),
    

    

], style={'margin':'2rem'},fluid=True)

# Callback to update y-dropdown options based on x-dropdown selection
@app.callback(
    Output('y-dropdown', 'options'),
    Output('y-dropdown', 'value'),
    Input('x-dropdown', 'value')
)
def update_y_dropdown(x_value):
    # Filter out the selected x value from y dropdown options
    y_options = [{'label': col, 'value': col} for col in dropdown_list if col != x_value]
    
    # If current y value is same as x value, change it to the first available option
    current_y = 'Happiness' if x_value != 'Happiness' else dropdown_list[0] if dropdown_list[0] != x_value else dropdown_list[1]
    
    return y_options, current_y

# Callback to update the scatter plot based on dropdown selections
@app.callback(
    Output('scatter-plot', 'figure'),
    Input('x-dropdown', 'value'),
    Input('y-dropdown', 'value')
)
def update_scatter_plot(x_col, y_col):
    # Check if selected columns exist in dfg
    if x_col not in dfg.columns or y_col not in dfg.columns:
        # Return empty figure with error message
        fig = go.Figure()
        fig.add_annotation(
            text=f"Error: Column '{x_col}' or '{y_col}' not found in aggregated data",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False
        )
        return fig
    
    # Create the scatter plot with year information
    fig = go.Figure(data=go.Scatter(
        x=dfg[x_col],
        y=dfg[y_col],

        mode='markers',
        marker=dict(
            color=dfg['Year'], #set color equal to a variable
            colorscale='Viridis', # one of plotly colorscales
            showscale=True,
            size=dfg['Song'],
    ),
        text=[f"Year: {year}<br>{x_col}: {x:.2f}<br>{y_col}: {y:.2f}<br>Number of Songs: {songs}" 
              for year, x, y, songs in zip(dfg['Year'], dfg[x_col], dfg[y_col], dfg['Song'])],
        hovertemplate='%{text}<extra></extra>',
        customdata=dfg[['Year']].values  # Pass year as customdata
    ))
    
    # Update layout
    fig.update_layout(
        title=dict(
            text=f"{y_col} vs {x_col} by Year",
            font=dict(size=20)
        ),
        xaxis=dict(
            title=dict(
                text=x_col,
                font=dict(size=14)
            )
        ),
        yaxis=dict(
            title=dict(
                text=y_col,
                font=dict(size=14)
            )
        ),
        width=800,
        height=600,
        hovermode='closest',
        clickmode='event+select'
    )
    
    # Set appropriate axis ranges based on the selected variables
    if x_col == 'Overall Rating':
        fig.update_xaxes(range=[0, 10])
    elif x_col in ['Energy', 'Danceability', 'Happiness']:
        fig.update_xaxes(range=[0, 100])
    elif x_col == 'Loudness (dB)':
        fig.update_xaxes(range=[-20, 0])
    elif x_col == 'Length (Sec)':
        fig.update_xaxes(range=[100, 400])
    
    if y_col == 'Overall Rating':
        fig.update_yaxes(range=[0, 10])
    elif y_col in ['Energy', 'Danceability', 'Happiness']:
        fig.update_yaxes(range=[0, 100])
    elif y_col == 'Loudness (dB)':
        fig.update_yaxes(range=[-20, 0])
    elif y_col == 'Length (Sec)':
        fig.update_yaxes(range=[100, 400])
    
    return fig

# Callback to update AG Grid based on scatter plot click
@app.callback(
    Output('songs-grid', 'rowData'),
    Output('songs-grid', 'columnDefs'),
    Output('grid-section', 'style'),
    Output('songs-title', 'children'),
    Output('static-cards','children'),
    Input('scatter-plot', 'clickData')
)
def display_click_data(clickData):
    
    
    # Create column definitions for AG Grid
    columnDefs = []
    for col in df.columns:
        col_def = {
            "field": col,
            "headerName": col,
            "sortable": True,
            "filter": True,
            "resizable": True,
            "minWidth": 50
        }
        
        # Format numeric columns
        if col in ['Overall Rating', 'Energy', 'Danceability', 'Happiness', 'Loudness (dB)', 'Length (Sec)']:
            col_def["valueFormatter"] = {"function": "d3.format('.2f')(params.value)"}
        
        # Make some columns wider
        if col in ['Song', 'Artist']:
            col_def["minWidth"] = 200
        
        columnDefs.append(col_def)
    
 
    
    if clickData is None:
        
        
        static_cards = dbc.Row(
                [
                    dbc.Col(dbc.Card(card_content('Overall Rating',df), color="primary", outline=True)),
                    dbc.Col(dbc.Card(card_content('Happiness',df), color="secondary", outline=True)),
                    dbc.Col(dbc.Card(card_content('Danceability',df), color="info", outline=True)),
                ],
                className="mb-4",
            )
        
        
        # Convert Date to string for display
        # if 'Date' in df.columns:
        #     df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
        return df.to_dict('records'), columnDefs, {"display": "block"}, f"All {len(df)} songs since 1958", static_cards
    
    # Extract the year from the clicked point
    # The customdata is passed as a list, so we need to access the first element
    point = clickData['points'][0]

    
    # Get year from customdata or from the marker color (which represents year)
    if 'customdata' in point:
        year = int(point['customdata'][0])
    else:
        # Fallback: extract from the text field using string parsing
        text = point.get('text', '')
        year_line = [line for line in text.split('<br>') if line.startswith('Year:')]
        if year_line:
            year = int(year_line[0].split(':')[1].strip())
        else:
            # Last fallback: use marker color which represents year
            year = int(point.get('marker.color', 0))
    
    # Filter dataframe for the selected year
    filtered_df = df[df['Year'] == year].copy()
      

    
    # Convert dataframe to records for AG Grid
    data = filtered_df.to_dict('records')
    
    title = f"Songs from {year} ({len(filtered_df)} songs)"
    
    static_cards = dbc.Row(
                [
                    dbc.Col(dbc.Card(card_content('Overall Rating',filtered_df), color="primary", outline=True)),
                    dbc.Col(dbc.Card(card_content('Happiness',filtered_df), color="secondary", outline=True)),
                    dbc.Col(dbc.Card(card_content('Danceability',filtered_df), color="info", outline=True)),
                ],
                className="mb-4",
            )
    
    
    return data, columnDefs, {"display": "block"}, title,static_cards



if __name__ == '__main__':
    app.run(debug=True)
Py.Cafe

2025-w44-figurefriday

Billboard Hot 100